From 458821452642fd5dc2377b73cd1323fd4a9653e7 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 4 Oct 2006 13:21:45 +0900
Subject: sh: First step at generic timeofday support.

At the moment we wrap GENERIC_TIME around our existing timer API.
As boards start providing their own clocksources, they're able to
select GENERIC_TIME accordingly and optimize out most of the timer
API.

Once the current timers have been reworked as proper clocksource
drivers, the rest of the place holders for the timer API can go
away and we can flip on GENERIC_TIME unconditionally.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/asm-sh/timer.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/asm-sh/timer.h b/include/asm-sh/timer.h
index c7ab28095ba0..ebc78db1a9ea 100644
--- a/include/asm-sh/timer.h
+++ b/include/asm-sh/timer.h
@@ -8,7 +8,9 @@ struct sys_timer_ops {
 	int (*init)(void);
 	int (*start)(void);
 	int (*stop)(void);
+#ifndef CONFIG_GENERIC_TIME
 	unsigned long (*get_offset)(void);
+#endif
 	unsigned long (*get_frequency)(void);
 };
 
@@ -24,10 +26,12 @@ struct sys_timer {
 extern struct sys_timer tmu_timer;
 extern struct sys_timer *sys_timer;
 
+#ifndef CONFIG_GENERIC_TIME
 static inline unsigned long get_timer_offset(void)
 {
 	return sys_timer->ops->get_offset();
 }
+#endif
 
 static inline unsigned long get_timer_frequency(void)
 {
-- 
cgit v1.2.3


From a700f3594d63a85af196ac64984f7375d903afad Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Wed, 4 Oct 2006 13:27:32 +0900
Subject: sh: Kill off timer_ops get_frequency().

We're not using this anywhere these days, kill it off.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/timers/timer-tmu.c | 58 ++-------------------------------------
 include/asm-sh/timer.h            |  6 ----
 2 files changed, 2 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index 205816fcf0da..badfedb455a9 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -111,60 +111,6 @@ static struct irqaction tmu_irq = {
 	.mask		= CPU_MASK_NONE,
 };
 
-/*
- * Hah!  We'll see if this works (switching from usecs to nsecs).
- */
-static unsigned long tmu_timer_get_frequency(void)
-{
-	u32 freq;
-	struct timespec ts1, ts2;
-	unsigned long diff_nsec;
-	unsigned long factor;
-
-	/* Setup the timer:  We don't want to generate interrupts, just
-	 * have it count down at its natural rate.
-	 */
-	ctrl_outb(0, TMU_TSTR);
-#if !defined(CONFIG_CPU_SUBTYPE_SH7300) && !defined(CONFIG_CPU_SUBTYPE_SH7760)
-	ctrl_outb(TMU_TOCR_INIT, TMU_TOCR);
-#endif
-	ctrl_outw(TMU0_TCR_CALIB, TMU0_TCR);
-	ctrl_outl(0xffffffff, TMU0_TCOR);
-	ctrl_outl(0xffffffff, TMU0_TCNT);
-
-	rtc_sh_get_time(&ts2);
-
-	do {
-		rtc_sh_get_time(&ts1);
-	} while (ts1.tv_nsec == ts2.tv_nsec && ts1.tv_sec == ts2.tv_sec);
-
-	/* actually start the timer */
-	ctrl_outb(TMU_TSTR_INIT, TMU_TSTR);
-
-	do {
-		rtc_sh_get_time(&ts2);
-	} while (ts1.tv_nsec == ts2.tv_nsec && ts1.tv_sec == ts2.tv_sec);
-
-	freq = 0xffffffff - ctrl_inl(TMU0_TCNT);
-	if (ts2.tv_nsec < ts1.tv_nsec) {
-		ts2.tv_nsec += 1000000000;
-		ts2.tv_sec--;
-	}
-
-	diff_nsec = (ts2.tv_sec - ts1.tv_sec) * 1000000000 + (ts2.tv_nsec - ts1.tv_nsec);
-
-	/* this should work well if the RTC has a precision of n Hz, where
-	 * n is an integer.  I don't think we have to worry about the other
-	 * cases. */
-	factor = (1000000000 + diff_nsec/2) / diff_nsec;
-
-	if (factor * diff_nsec > 1100000000 ||
-	    factor * diff_nsec <  900000000)
-		panic("weird RTC (diff_nsec %ld)", diff_nsec);
-
-	return freq * factor;
-}
-
 static void tmu_clk_init(struct clk *clk)
 {
 	u8 divisor = TMU0_TCR_INIT & 0x7;
@@ -232,12 +178,12 @@ struct sys_timer_ops tmu_timer_ops = {
 	.init		= tmu_timer_init,
 	.start		= tmu_timer_start,
 	.stop		= tmu_timer_stop,
-	.get_frequency	= tmu_timer_get_frequency,
+#ifndef CONFIG_GENERIC_TIME
 	.get_offset	= tmu_timer_get_offset,
+#endif
 };
 
 struct sys_timer tmu_timer = {
 	.name	= "tmu",
 	.ops	= &tmu_timer_ops,
 };
-
diff --git a/include/asm-sh/timer.h b/include/asm-sh/timer.h
index ebc78db1a9ea..341cb71c2f9b 100644
--- a/include/asm-sh/timer.h
+++ b/include/asm-sh/timer.h
@@ -11,7 +11,6 @@ struct sys_timer_ops {
 #ifndef CONFIG_GENERIC_TIME
 	unsigned long (*get_offset)(void);
 #endif
-	unsigned long (*get_frequency)(void);
 };
 
 struct sys_timer {
@@ -33,11 +32,6 @@ static inline unsigned long get_timer_offset(void)
 }
 #endif
 
-static inline unsigned long get_timer_frequency(void)
-{
-	return sys_timer->ops->get_frequency();
-}
-
 /* arch/sh/kernel/timers/timer.c */
 struct sys_timer *get_sys_timer(void);
 
-- 
cgit v1.2.3


From 35f3c5185b1e28e6591aa649db8bf4fa16f1a7f3 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 6 Oct 2006 15:31:16 +0900
Subject: sh: Updates for IRQ handler changes.

Trivial fixes for build breakage introduced by IRQ handler changes.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/hp6xx/hp6xx_apm.c       |  4 ++--
 arch/sh/boards/landisk/landisk_pwb.c   |  2 +-
 arch/sh/boards/mpc1211/setup.c         |  5 ++---
 arch/sh/boards/snapgear/setup.c        |  2 +-
 arch/sh/cchips/hd6446x/hd64461/setup.c |  2 +-
 arch/sh/cchips/hd6446x/hd64465/gpio.c  |  2 +-
 arch/sh/cchips/hd6446x/hd64465/setup.c |  2 +-
 arch/sh/cchips/voyagergx/irq.c         |  3 +--
 arch/sh/drivers/dma/dma-g2.c           |  2 +-
 arch/sh/drivers/dma/dma-pvr2.c         |  2 +-
 arch/sh/drivers/dma/dma-sh.c           |  6 +++---
 arch/sh/drivers/pci/pci-st40.c         |  2 +-
 arch/sh/kernel/irq.c                   | 19 +++++++++++++------
 arch/sh/kernel/time.c                  |  7 ++++---
 arch/sh/kernel/timers/timer-tmu.c      |  5 ++---
 drivers/rtc/rtc-sh.c                   |  6 +++---
 drivers/serial/sh-sci.c                |  4 ++--
 include/asm-sh/hw_irq.h                |  4 ++++
 include/asm-sh/irq_regs.h              |  1 +
 include/asm-sh/timer.h                 |  3 +--
 20 files changed, 46 insertions(+), 37 deletions(-)
 create mode 100644 include/asm-sh/irq_regs.h

(limited to 'include')

diff --git a/arch/sh/boards/hp6xx/hp6xx_apm.c b/arch/sh/boards/hp6xx/hp6xx_apm.c
index 75f91aaae077..219179114f0f 100644
--- a/arch/sh/boards/hp6xx/hp6xx_apm.c
+++ b/arch/sh/boards/hp6xx/hp6xx_apm.c
@@ -83,7 +83,7 @@ static int hp6x0_apm_get_info(char *buf, char **start, off_t fpos, int length)
 	return p - buf;
 }
 
-static irqreturn_t hp6x0_apm_interrupt(int irq, void *dev, struct pt_regs *regs)
+static irqreturn_t hp6x0_apm_interrupt(int irq, void *dev)
 {
 	if (!apm_suspended)
 		apm_queue_event(APM_USER_SUSPEND);
@@ -96,7 +96,7 @@ static int __init hp6x0_apm_init(void)
 	int ret;
 
 	ret = request_irq(HP680_BTN_IRQ, hp6x0_apm_interrupt,
-			  SA_INTERRUPT, MODNAME, 0);
+			  IRQF_DISABLED, MODNAME, 0);
 	if (unlikely(ret < 0)) {
 		printk(KERN_ERR MODNAME ": IRQ %d request failed\n",
 		       HP680_BTN_IRQ);
diff --git a/arch/sh/boards/landisk/landisk_pwb.c b/arch/sh/boards/landisk/landisk_pwb.c
index 0b7bee1a9ca5..e62524978160 100644
--- a/arch/sh/boards/landisk/landisk_pwb.c
+++ b/arch/sh/boards/landisk/landisk_pwb.c
@@ -135,7 +135,7 @@ static int swdrv_write(struct file *filp, const char *buff, size_t count,
 	return count;
 }
 
-static irqreturn_t sw_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t sw_interrupt(int irq, void *dev_id)
 {
 	landisk_btn = (0x0ff & (~ctrl_inb(PA_STATUS)));
 	disable_irq(IRQ_BUTTON);
diff --git a/arch/sh/boards/mpc1211/setup.c b/arch/sh/boards/mpc1211/setup.c
index 01c10fa5c058..7c3d1d304157 100644
--- a/arch/sh/boards/mpc1211/setup.c
+++ b/arch/sh/boards/mpc1211/setup.c
@@ -69,7 +69,6 @@ static void __init pci_write_config(unsigned long busNo,
 
 static unsigned char m_irq_mask = 0xfb;
 static unsigned char s_irq_mask = 0xff;
-volatile unsigned long irq_err_count;
 
 static void disable_mpc1211_irq(unsigned int irq)
 {
@@ -118,7 +117,7 @@ static void mask_and_ack_mpc1211(unsigned int irq)
 	if(irq < 8) {
 		if(m_irq_mask & (1<<irq)){
 		  if(!mpc1211_irq_real(irq)){
-		    irq_err_count++;
+		    atomic_inc(&irq_err_count)
 		    printk("spurious 8259A interrupt: IRQ %x\n",irq);
 		   }
 		} else {
@@ -131,7 +130,7 @@ static void mask_and_ack_mpc1211(unsigned int irq)
 	} else {
 		if(s_irq_mask & (1<<(irq - 8))){
 		  if(!mpc1211_irq_real(irq)){
-		    irq_err_count++;
+		    atomic_inc(&irq_err_count);
 		    printk("spurious 8259A interrupt: IRQ %x\n",irq);
 		  }
 		} else {
diff --git a/arch/sh/boards/snapgear/setup.c b/arch/sh/boards/snapgear/setup.c
index f5e98c56b530..540d0bf16446 100644
--- a/arch/sh/boards/snapgear/setup.c
+++ b/arch/sh/boards/snapgear/setup.c
@@ -33,7 +33,7 @@ extern void pcibios_init(void);
  * EraseConfig handling functions
  */
 
-static irqreturn_t eraseconfig_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t eraseconfig_interrupt(int irq, void *dev_id)
 {
 	volatile char dummy __attribute__((unused)) = * (volatile char *) 0xb8000000;
 
diff --git a/arch/sh/cchips/hd6446x/hd64461/setup.c b/arch/sh/cchips/hd6446x/hd64461/setup.c
index 38f1e8171a3a..4d49b5cbcc13 100644
--- a/arch/sh/cchips/hd6446x/hd64461/setup.c
+++ b/arch/sh/cchips/hd6446x/hd64461/setup.c
@@ -71,7 +71,7 @@ static struct hw_interrupt_type hd64461_irq_type = {
 	.end		= end_hd64461_irq,
 };
 
-static irqreturn_t hd64461_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t hd64461_interrupt(int irq, void *dev_id)
 {
 	printk(KERN_INFO
 	       "HD64461: spurious interrupt, nirr: 0x%x nimr: 0x%x\n",
diff --git a/arch/sh/cchips/hd6446x/hd64465/gpio.c b/arch/sh/cchips/hd6446x/hd64465/gpio.c
index 72320d02d69a..43431855ec86 100644
--- a/arch/sh/cchips/hd6446x/hd64465/gpio.c
+++ b/arch/sh/cchips/hd6446x/hd64465/gpio.c
@@ -85,7 +85,7 @@ static struct {
     void *dev;
 } handlers[GPIO_NPORTS * 8];
 
-static irqreturn_t hd64465_gpio_interrupt(int irq, void *dev, struct pt_regs *regs)
+static irqreturn_t hd64465_gpio_interrupt(int irq, void *dev)
 {
     	unsigned short port, pin, isr, mask, portpin;
 	
diff --git a/arch/sh/cchips/hd6446x/hd64465/setup.c b/arch/sh/cchips/hd6446x/hd64465/setup.c
index 30573d3e1966..d126e1f30dee 100644
--- a/arch/sh/cchips/hd6446x/hd64465/setup.c
+++ b/arch/sh/cchips/hd6446x/hd64465/setup.c
@@ -84,7 +84,7 @@ static struct hw_interrupt_type hd64465_irq_type = {
 };
 
 
-static irqreturn_t hd64465_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t hd64465_interrupt(int irq, void *dev_id)
 {
 	printk(KERN_INFO
 	       "HD64465: spurious interrupt, nirr: 0x%x nimr: 0x%x\n",
diff --git a/arch/sh/cchips/voyagergx/irq.c b/arch/sh/cchips/voyagergx/irq.c
index 392c8b12ce36..bf1b28feca06 100644
--- a/arch/sh/cchips/voyagergx/irq.c
+++ b/arch/sh/cchips/voyagergx/irq.c
@@ -88,8 +88,7 @@ static struct hw_interrupt_type voyagergx_irq_type = {
 	.end = end_voyagergx_irq,
 };
 
-static irqreturn_t voyagergx_interrupt(int irq, void *dev_id,
-				      struct pt_regs *regs)
+static irqreturn_t voyagergx_interrupt(int irq, void *dev_id)
 {
 	printk(KERN_INFO
 	       "VoyagerGX: spurious interrupt, status: 0x%x\n",
diff --git a/arch/sh/drivers/dma/dma-g2.c b/arch/sh/drivers/dma/dma-g2.c
index 9cb070924180..0caf11bb7e27 100644
--- a/arch/sh/drivers/dma/dma-g2.c
+++ b/arch/sh/drivers/dma/dma-g2.c
@@ -51,7 +51,7 @@ static volatile struct g2_dma_info *g2_dma = (volatile struct g2_dma_info *)0xa0
 	((g2_dma->channel[i].size - \
 	  g2_dma->status[i].size) & 0x0fffffff)
 
-static irqreturn_t g2_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t g2_dma_interrupt(int irq, void *dev_id)
 {
 	int i;
 
diff --git a/arch/sh/drivers/dma/dma-pvr2.c b/arch/sh/drivers/dma/dma-pvr2.c
index c1b6bc23c107..838fad566eaf 100644
--- a/arch/sh/drivers/dma/dma-pvr2.c
+++ b/arch/sh/drivers/dma/dma-pvr2.c
@@ -21,7 +21,7 @@
 static unsigned int xfer_complete;
 static int count;
 
-static irqreturn_t pvr2_dma_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t pvr2_dma_interrupt(int irq, void *dev_id)
 {
 	if (get_dma_residue(PVR2_CASCADE_CHAN)) {
 		printk(KERN_WARNING "DMA: SH DMAC did not complete transfer "
diff --git a/arch/sh/drivers/dma/dma-sh.c b/arch/sh/drivers/dma/dma-sh.c
index cbbe8bce3d67..d8ece20bb2cf 100644
--- a/arch/sh/drivers/dma/dma-sh.c
+++ b/arch/sh/drivers/dma/dma-sh.c
@@ -60,9 +60,9 @@ static inline unsigned int calc_xmit_shift(struct dma_channel *chan)
  * Besides that it needs to waken any waiting process, which should handle
  * setting up the next transfer.
  */
-static irqreturn_t dma_tei(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t dma_tei(int irq, void *dev_id)
 {
-	struct dma_channel *chan = (struct dma_channel *)dev_id;
+	struct dma_channel *chan = dev_id;
 	u32 chcr;
 
 	chcr = ctrl_inl(CHCR[chan->chan]);
@@ -228,7 +228,7 @@ static inline int dmaor_reset(void)
 }
 
 #if defined(CONFIG_CPU_SH4)
-static irqreturn_t dma_err(int irq, void *dev_id, struct pt_regs *regs)
+static irqreturn_t dma_err(int irq, void *dummy)
 {
 	dmaor_reset();
 	disable_irq(irq);
diff --git a/arch/sh/drivers/pci/pci-st40.c b/arch/sh/drivers/pci/pci-st40.c
index 4ab5ea6b35fb..efecb3d5995c 100644
--- a/arch/sh/drivers/pci/pci-st40.c
+++ b/arch/sh/drivers/pci/pci-st40.c
@@ -161,7 +161,7 @@ static char * pci_commands[16]={
 	"Memory Write-and-Invalidate"
 };
 
-static irqreturn_t st40_pci_irq(int irq, void *dev_instance, struct pt_regs *regs)
+static irqreturn_t st40_pci_irq(int irq, void *dev_instance)
 {
 	unsigned pci_int, pci_air, pci_cir, pci_aint;
 	static int count=0;
diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c
index c7ebd6aec951..3b93682bf18b 100644
--- a/arch/sh/kernel/irq.c
+++ b/arch/sh/kernel/irq.c
@@ -17,6 +17,8 @@
 #include <asm/thread_info.h>
 #include <asm/cpu/mmu_context.h>
 
+atomic_t irq_err_count;
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves, it doesn't deserve
@@ -47,8 +49,10 @@ int show_interrupts(struct seq_file *p, void *v)
 		if (!action)
 			goto unlock;
 		seq_printf(p, "%3d: ",i);
-		seq_printf(p, "%10u ", kstat_irqs(i));
-		seq_printf(p, " %14s", irq_desc[i].chip->typename);
+		for_each_online_cpu(j)
+			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+		seq_printf(p, " %14s", irq_desc[i].chip->name);
+		seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
@@ -56,7 +60,9 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 unlock:
 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
-	}
+	} else if (i == NR_IRQS)
+		seq_printf(p, "Err: %10u\n", atomic_read(&irq_err_count));
+
 	return 0;
 }
 #endif
@@ -78,6 +84,7 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5,
 		      unsigned long r6, unsigned long r7,
 		      struct pt_regs regs)
 {
+	struct pt_regs *old_regs = set_irq_regs(&regs);
 	int irq = r4;
 #ifdef CONFIG_4KSTACKS
 	union irq_ctx *curctx, *irqctx;
@@ -139,7 +146,6 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5,
 
 		__asm__ __volatile__ (
 			"mov	%0, r4		\n"
-			"mov	%1, r5		\n"
 			"mov	r15, r9		\n"
 			"jsr	@%2		\n"
 			/* swith to the irq stack */
@@ -147,17 +153,18 @@ asmlinkage int do_IRQ(unsigned long r4, unsigned long r5,
 			/* restore the stack (ring zero) */
 			"mov	r9, r15		\n"
 			: /* no outputs */
-			: "r" (irq), "r" (&regs), "r" (__do_IRQ), "r" (isp)
+			: "r" (irq), "r" (generic_handle_irq), "r" (isp)
 			/* XXX: A somewhat excessive clobber list? -PFM */
 			: "memory", "r0", "r1", "r2", "r3", "r4",
 			  "r5", "r6", "r7", "r8", "t", "pr"
 		);
 	} else
 #endif
-		__do_IRQ(irq, &regs);
+		generic_handle_irq(irq);
 
 	irq_exit();
 
+	set_irq_regs(old_regs);
 	return 1;
 }
 
diff --git a/arch/sh/kernel/time.c b/arch/sh/kernel/time.c
index 1fbb83c665dd..57e708d7b52d 100644
--- a/arch/sh/kernel/time.c
+++ b/arch/sh/kernel/time.c
@@ -109,13 +109,14 @@ static long last_rtc_update;
  * handle_timer_tick() needs to keep up the real-time clock,
  * as well as call the "do_timer()" routine every clocktick
  */
-void handle_timer_tick(struct pt_regs *regs)
+void handle_timer_tick(void)
 {
 	do_timer(1);
 #ifndef CONFIG_SMP
-	update_process_times(user_mode(regs));
+	update_process_times(user_mode(get_irq_regs()));
 #endif
-	profile_tick(CPU_PROFILING, regs);
+	if (current->pid)
+		profile_tick(CPU_PROFILING);
 
 #ifdef CONFIG_HEARTBEAT
 	if (sh_mv.mv_heartbeat != NULL)
diff --git a/arch/sh/kernel/timers/timer-tmu.c b/arch/sh/kernel/timers/timer-tmu.c
index badfedb455a9..24927015dc31 100644
--- a/arch/sh/kernel/timers/timer-tmu.c
+++ b/arch/sh/kernel/timers/timer-tmu.c
@@ -80,8 +80,7 @@ static unsigned long tmu_timer_get_offset(void)
 	return count;
 }
 
-static irqreturn_t tmu_timer_interrupt(int irq, void *dev_id,
-				       struct pt_regs *regs)
+static irqreturn_t tmu_timer_interrupt(int irq, void *dummy)
 {
 	unsigned long timer_status;
 
@@ -98,7 +97,7 @@ static irqreturn_t tmu_timer_interrupt(int irq, void *dev_id,
 	 * locally disabled. -arca
 	 */
 	write_seqlock(&xtime_lock);
-	handle_timer_tick(regs);
+	handle_timer_tick();
 	write_sequnlock(&xtime_lock);
 
 	return IRQ_HANDLED;
diff --git a/drivers/rtc/rtc-sh.c b/drivers/rtc/rtc-sh.c
index 8b6efcc05058..143302a8e79c 100644
--- a/drivers/rtc/rtc-sh.c
+++ b/drivers/rtc/rtc-sh.c
@@ -160,7 +160,7 @@ static int sh_rtc_open(struct device *dev)
 	tmp |= RCR1_CIE;
 	writeb(tmp, rtc->regbase + RCR1);
 
-	ret = request_irq(rtc->periodic_irq, sh_rtc_periodic, SA_INTERRUPT,
+	ret = request_irq(rtc->periodic_irq, sh_rtc_periodic, IRQF_DISABLED,
 			  "sh-rtc period", dev);
 	if (unlikely(ret)) {
 		dev_err(dev, "request period IRQ failed with %d, IRQ %d\n",
@@ -168,7 +168,7 @@ static int sh_rtc_open(struct device *dev)
 		return ret;
 	}
 
-	ret = request_irq(rtc->carry_irq, sh_rtc_interrupt, SA_INTERRUPT,
+	ret = request_irq(rtc->carry_irq, sh_rtc_interrupt, IRQF_DISABLED,
 			  "sh-rtc carry", dev);
 	if (unlikely(ret)) {
 		dev_err(dev, "request carry IRQ failed with %d, IRQ %d\n",
@@ -177,7 +177,7 @@ static int sh_rtc_open(struct device *dev)
 		goto err_bad_carry;
 	}
 
-	ret = request_irq(rtc->alarm_irq, sh_rtc_interrupt, SA_INTERRUPT,
+	ret = request_irq(rtc->alarm_irq, sh_rtc_interrupt, IRQF_DISABLED,
 			  "sh-rtc alarm", dev);
 	if (unlikely(ret)) {
 		dev_err(dev, "request alarm IRQ failed with %d, IRQ %d\n",
diff --git a/drivers/serial/sh-sci.c b/drivers/serial/sh-sci.c
index 266aa325569e..cfcc3caf49d8 100644
--- a/drivers/serial/sh-sci.c
+++ b/drivers/serial/sh-sci.c
@@ -808,7 +808,7 @@ static int sci_request_irq(struct sci_port *port)
 		}
 
 		if (request_irq(port->irqs[0], sci_mpxed_interrupt,
-				SA_INTERRUPT, "sci", port)) {
+				IRQF_DISABLED, "sci", port)) {
 			printk(KERN_ERR "sci: Cannot allocate irq.\n");
 			return -ENODEV;
 		}
@@ -817,7 +817,7 @@ static int sci_request_irq(struct sci_port *port)
 			if (!port->irqs[i])
 				continue;
 			if (request_irq(port->irqs[i], handlers[i],
-					SA_INTERRUPT, desc[i], port)) {
+					IRQF_DISABLED, desc[i], port)) {
 				printk(KERN_ERR "sci: Cannot allocate irq.\n");
 				return -ENODEV;
 			}
diff --git a/include/asm-sh/hw_irq.h b/include/asm-sh/hw_irq.h
index fed26616967a..80ee1cda7498 100644
--- a/include/asm-sh/hw_irq.h
+++ b/include/asm-sh/hw_irq.h
@@ -1,4 +1,8 @@
 #ifndef __ASM_SH_HW_IRQ_H
 #define __ASM_SH_HW_IRQ_H
 
+#include <asm/atomic.h>
+
+extern atomic_t irq_err_count;
+
 #endif /* __ASM_SH_HW_IRQ_H */
diff --git a/include/asm-sh/irq_regs.h b/include/asm-sh/irq_regs.h
new file mode 100644
index 000000000000..3dd9c0b70270
--- /dev/null
+++ b/include/asm-sh/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/include/asm-sh/timer.h b/include/asm-sh/timer.h
index 341cb71c2f9b..5df842bcf7b6 100644
--- a/include/asm-sh/timer.h
+++ b/include/asm-sh/timer.h
@@ -36,7 +36,6 @@ static inline unsigned long get_timer_offset(void)
 struct sys_timer *get_sys_timer(void);
 
 /* arch/sh/kernel/time.c */
-void handle_timer_tick(struct pt_regs *);
+void handle_timer_tick(void);
 
 #endif /* __ASM_SH_TIMER_H */
-
-- 
cgit v1.2.3


From 525ccc452c79db41874c5edac3f67618a0997d6f Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 6 Oct 2006 17:35:48 +0900
Subject: sh: Convert INTC2 IRQ handler to irq_chip.

More struct irq_chip conversions, this time the INTC2 handlers.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/irq/intc2.c | 138 +++++++----------------------------------
 include/asm-sh/irq.h           |  14 +++--
 2 files changed, 32 insertions(+), 120 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/irq/intc2.c b/arch/sh/kernel/cpu/irq/intc2.c
index e30e4b7aa70e..d4b2bb7e08c7 100644
--- a/arch/sh/kernel/cpu/irq/intc2.c
+++ b/arch/sh/kernel/cpu/irq/intc2.c
@@ -10,93 +10,32 @@
  * These are the "new Hitachi style" interrupts, as present on the
  * Hitachi 7751, the STM ST40 STB1, SH7760, and SH7780.
  */
-
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <asm/system.h>
 #include <asm/io.h>
-#include <asm/machvec.h>
-
-struct intc2_data {
-	unsigned char msk_offset;
-	unsigned char msk_shift;
-
-	int (*clear_irq) (int);
-};
-
-static struct intc2_data intc2_data[NR_INTC2_IRQS];
-
-static void enable_intc2_irq(unsigned int irq);
-static void disable_intc2_irq(unsigned int irq);
-
-/* shutdown is same as "disable" */
-#define shutdown_intc2_irq disable_intc2_irq
-
-static void mask_and_ack_intc2(unsigned int);
-static void end_intc2_irq(unsigned int irq);
-
-static unsigned int startup_intc2_irq(unsigned int irq)
-{
-	enable_intc2_irq(irq);
-	return 0; /* never anything pending */
-}
-
-static struct hw_interrupt_type intc2_irq_type = {
-	.typename	= "INTC2-IRQ",
-	.startup	= startup_intc2_irq,
-	.shutdown	= shutdown_intc2_irq,
-	.enable		= enable_intc2_irq,
-	.disable	= disable_intc2_irq,
-	.ack		= mask_and_ack_intc2,
-	.end		= end_intc2_irq
-};
 
 static void disable_intc2_irq(unsigned int irq)
 {
-	int irq_offset = irq - INTC2_FIRST_IRQ;
-	int msk_shift, msk_offset;
-
-	/* Sanity check */
-	if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS))
-		return;
-
-	msk_shift = intc2_data[irq_offset].msk_shift;
-	msk_offset = intc2_data[irq_offset].msk_offset;
-
-	ctrl_outl(1 << msk_shift,
-		  INTC2_BASE + INTC2_INTMSK_OFFSET + msk_offset);
+	struct intc2_data *p = get_irq_chip_data(irq);
+	ctrl_outl(1 << p->msk_shift,
+		  INTC2_BASE + INTC2_INTMSK_OFFSET + p->msk_offset);
 }
 
 static void enable_intc2_irq(unsigned int irq)
 {
-	int irq_offset = irq - INTC2_FIRST_IRQ;
-	int msk_shift, msk_offset;
-
-	/* Sanity check */
-	if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS))
-		return;
-
-	msk_shift = intc2_data[irq_offset].msk_shift;
-	msk_offset = intc2_data[irq_offset].msk_offset;
-
-	ctrl_outl(1 << msk_shift,
-		  INTC2_BASE + INTC2_INTMSKCLR_OFFSET + msk_offset);
-}
-
-static void mask_and_ack_intc2(unsigned int irq)
-{
-	disable_intc2_irq(irq);
+	struct intc2_data *p = get_irq_chip_data(irq);
+	ctrl_outl(1 << p->msk_shift,
+		  INTC2_BASE + INTC2_INTMSKCLR_OFFSET + p->msk_offset);
 }
 
-static void end_intc2_irq(unsigned int irq)
-{
-	if (!(irq_desc[irq].status & (IRQ_DISABLED|IRQ_INPROGRESS)))
-		enable_intc2_irq(irq);
-
-	if (unlikely(intc2_data[irq - INTC2_FIRST_IRQ].clear_irq))
-		intc2_data[irq - INTC2_FIRST_IRQ].clear_irq(irq);
-}
+static struct irq_chip intc2_irq_chip = {
+	.typename	= "intc2",
+	.mask		= disable_intc2_irq,
+	.unmask		= enable_intc2_irq,
+	.mask_ack	= disable_intc2_irq,
+};
 
 /*
  * Setup an INTC2 style interrupt.
@@ -108,46 +47,30 @@ static void end_intc2_irq(unsigned int irq)
  *                         |     |             |  |
  *    make_intc2_irq(84,   0,   16,            0, 13);
  */
-void make_intc2_irq(unsigned int irq,
-		    unsigned int ipr_offset, unsigned int ipr_shift,
-		    unsigned int msk_offset, unsigned int msk_shift,
-		    unsigned int priority)
+void make_intc2_irq(struct intc2_data *p)
 {
-	int irq_offset = irq - INTC2_FIRST_IRQ;
 	unsigned int flags;
 	unsigned long ipr;
 
-	if (unlikely(irq_offset < 0 || irq_offset >= NR_INTC2_IRQS))
-		return;
-
-	disable_irq_nosync(irq);
-
-	/* Fill the data we need */
-	intc2_data[irq_offset].msk_offset = msk_offset;
-	intc2_data[irq_offset].msk_shift  = msk_shift;
-	intc2_data[irq_offset].clear_irq = NULL;
+	disable_irq_nosync(p->irq);
 
 	/* Set the priority level */
 	local_irq_save(flags);
 
-	ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET + ipr_offset);
-	ipr &= ~(0xf << ipr_shift);
-	ipr |= priority << ipr_shift;
-	ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET + ipr_offset);
+	ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset);
+	ipr &= ~(0xf << p->ipr_shift);
+	ipr |= p->priority << p->ipr_shift;
+	ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset);
 
 	local_irq_restore(flags);
 
-	irq_desc[irq].chip = &intc2_irq_type;
+	set_irq_chip_and_handler(p->irq, &intc2_irq_chip, handle_level_irq);
+	set_irq_chip_data(p->irq, p);
 
-	disable_intc2_irq(irq);
+	enable_intc2_irq(p->irq);
 }
 
-static struct intc2_init {
-	unsigned short irq;
-	unsigned char ipr_offset, ipr_shift;
-	unsigned char msk_offset, msk_shift;
-	unsigned char priority;
-} intc2_init_data[]  __initdata = {
+static struct intc2_data intc2_irq_table[] = {
 #if defined(CONFIG_CPU_SUBTYPE_ST40)
 	{64,  0,  0, 0,  0, 13},	/* PCI serr */
 	{65,  0,  4, 0,  1, 13},	/* PCI err */
@@ -266,19 +189,6 @@ void __init init_IRQ_intc2(void)
 {
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(intc2_init_data); i++) {
-		struct intc2_init *p = intc2_init_data + i;
-		make_intc2_irq(p->irq, p->ipr_offset, p->ipr_shift,
-			       p-> msk_offset, p->msk_shift, p->priority);
-	}
-}
-
-/* Adds a termination callback to the interrupt */
-void intc2_add_clear_irq(int irq, int (*fn)(int))
-{
-	if (unlikely(irq < INTC2_FIRST_IRQ))
-		return;
-
-	intc2_data[irq - INTC2_FIRST_IRQ].clear_irq = fn;
+	for (i = 0; i < ARRAY_SIZE(intc2_irq_table); i++)
+		make_intc2_irq(intc2_irq_table + i);
 }
-
diff --git a/include/asm-sh/irq.h b/include/asm-sh/irq.h
index 0e5f365aff70..28996f9c58cc 100644
--- a/include/asm-sh/irq.h
+++ b/include/asm-sh/irq.h
@@ -697,13 +697,15 @@ extern int ipr_irq_demux(int irq);
 
 #define INTC2_INTPRI_OFFSET	0x00
 
-void make_intc2_irq(unsigned int irq,
-		    unsigned int ipr_offset, unsigned int ipr_shift,
-		    unsigned int msk_offset, unsigned int msk_shift,
-		    unsigned int priority);
+struct intc2_data {
+	unsigned short irq;
+	unsigned char ipr_offset, ipr_shift;
+	unsigned char msk_offset, msk_shift;
+	unsigned char priority;
+};
+
+void make_intc2_irq(struct intc2_data *);
 void init_IRQ_intc2(void);
-void intc2_add_clear_irq(int irq, int (*fn)(int));
-
 #endif
 
 extern int shmse_irq_demux(int irq);
-- 
cgit v1.2.3


From 4dfbb9d8c6cbfc32faa5c71145bd2a43e1f8237c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Wed, 11 Oct 2006 01:45:14 -0400
Subject: Lockdep: add lockdep_set_class_and_subclass() and
 lockdep_set_subclass()

This annotation makes it possible to assign a subclass on lock init. This
annotation is meant to reduce the _nested() annotations by assigning a
default subclass.

One could do without this annotation and rely on lockdep_set_class()
exclusively, but that would require a manual stack of struct lock_class_key
objects.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/lockdep.h | 15 +++++++++++----
 kernel/lockdep.c        | 10 ++++++----
 kernel/mutex-debug.c    |  2 +-
 lib/rwsem-spinlock.c    |  2 +-
 lib/rwsem.c             |  2 +-
 lib/spinlock_debug.c    |  4 ++--
 net/core/sock.c         |  2 +-
 7 files changed, 23 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 1314ca0f29be..14fec2a23b2e 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -202,7 +202,7 @@ extern int lockdep_internal(void);
  */
 
 extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
-			     struct lock_class_key *key);
+			     struct lock_class_key *key, int subclass);
 
 /*
  * Reinitialize a lock key - for cases where there is special locking or
@@ -211,9 +211,14 @@ extern void lockdep_init_map(struct lockdep_map *lock, const char *name,
  * or they are too narrow (they suffer from a false class-split):
  */
 #define lockdep_set_class(lock, key) \
-		lockdep_init_map(&(lock)->dep_map, #key, key)
+		lockdep_init_map(&(lock)->dep_map, #key, key, 0)
 #define lockdep_set_class_and_name(lock, key, name) \
-		lockdep_init_map(&(lock)->dep_map, name, key)
+		lockdep_init_map(&(lock)->dep_map, name, key, 0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+		lockdep_init_map(&(lock)->dep_map, #key, key, sub)
+#define lockdep_set_subclass(lock, sub)	\
+		lockdep_init_map(&(lock)->dep_map, #lock, \
+				 (lock)->dep_map.key, sub)
 
 /*
  * Acquire a lock.
@@ -257,10 +262,12 @@ static inline int lockdep_internal(void)
 # define lock_release(l, n, i)			do { } while (0)
 # define lockdep_init()				do { } while (0)
 # define lockdep_info()				do { } while (0)
-# define lockdep_init_map(lock, name, key)	do { (void)(key); } while (0)
+# define lockdep_init_map(lock, name, key, sub)	do { (void)(key); } while (0)
 # define lockdep_set_class(lock, key)		do { (void)(key); } while (0)
 # define lockdep_set_class_and_name(lock, key, name) \
 		do { (void)(key); } while (0)
+#define lockdep_set_class_and_subclass(lock, key, sub) \
+		do { (void)(key); } while (0)
 # define INIT_LOCKDEP
 # define lockdep_reset()		do { debug_locks = 1; } while (0)
 # define lockdep_free_key_range(start, size)	do { } while (0)
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
index 4c0553461000..ba7156ac70c1 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -1177,7 +1177,7 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass)
  * itself, so actual lookup of the hash should be once per lock object.
  */
 static inline struct lock_class *
-register_lock_class(struct lockdep_map *lock, unsigned int subclass)
+register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force)
 {
 	struct lockdep_subclass_key *key;
 	struct list_head *hash_head;
@@ -1249,7 +1249,7 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass)
 out_unlock_set:
 	__raw_spin_unlock(&hash_lock);
 
-	if (!subclass)
+	if (!subclass || force)
 		lock->class_cache = class;
 
 	DEBUG_LOCKS_WARN_ON(class->subclass != subclass);
@@ -1937,7 +1937,7 @@ void trace_softirqs_off(unsigned long ip)
  * Initialize a lock instance's lock-class mapping info:
  */
 void lockdep_init_map(struct lockdep_map *lock, const char *name,
-		      struct lock_class_key *key)
+		      struct lock_class_key *key, int subclass)
 {
 	if (unlikely(!debug_locks))
 		return;
@@ -1957,6 +1957,8 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name,
 	lock->name = name;
 	lock->key = key;
 	lock->class_cache = NULL;
+	if (subclass)
+		register_lock_class(lock, subclass, 1);
 }
 
 EXPORT_SYMBOL_GPL(lockdep_init_map);
@@ -1995,7 +1997,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	 * Not cached yet or subclass?
 	 */
 	if (unlikely(!class)) {
-		class = register_lock_class(lock, subclass);
+		class = register_lock_class(lock, subclass, 0);
 		if (!class)
 			return 0;
 	}
diff --git a/kernel/mutex-debug.c b/kernel/mutex-debug.c
index e3203c654dda..18651641a7b5 100644
--- a/kernel/mutex-debug.c
+++ b/kernel/mutex-debug.c
@@ -91,7 +91,7 @@ void debug_mutex_init(struct mutex *lock, const char *name,
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map(&lock->dep_map, name, key);
+	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
 	lock->owner = NULL;
 	lock->magic = lock;
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index db4fed74b940..c4cfd6c0342f 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -28,7 +28,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	 * Make sure we are not reinitializing a held semaphore:
 	 */
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-	lockdep_init_map(&sem->dep_map, name, key);
+	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
 	sem->activity = 0;
 	spin_lock_init(&sem->wait_lock);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 901d0e7da892..cdb4e3d05607 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -19,7 +19,7 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
 	 * Make sure we are not reinitializing a held semaphore:
 	 */
 	debug_check_no_locks_freed((void *)sem, sizeof(*sem));
-	lockdep_init_map(&sem->dep_map, name, key);
+	lockdep_init_map(&sem->dep_map, name, key, 0);
 #endif
 	sem->count = RWSEM_UNLOCKED_VALUE;
 	spin_lock_init(&sem->wait_lock);
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
index dafaf1de2491..b6c4f898197c 100644
--- a/lib/spinlock_debug.c
+++ b/lib/spinlock_debug.c
@@ -20,7 +20,7 @@ void __spin_lock_init(spinlock_t *lock, const char *name,
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map(&lock->dep_map, name, key);
+	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
 	lock->raw_lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
 	lock->magic = SPINLOCK_MAGIC;
@@ -38,7 +38,7 @@ void __rwlock_init(rwlock_t *lock, const char *name,
 	 * Make sure we are not reinitializing a held lock:
 	 */
 	debug_check_no_locks_freed((void *)lock, sizeof(*lock));
-	lockdep_init_map(&lock->dep_map, name, key);
+	lockdep_init_map(&lock->dep_map, name, key, 0);
 #endif
 	lock->raw_lock = (raw_rwlock_t) __RAW_RW_LOCK_UNLOCKED;
 	lock->magic = RWLOCK_MAGIC;
diff --git a/net/core/sock.c b/net/core/sock.c
index b77e155cbe6c..d472db4776c3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -823,7 +823,7 @@ static void inline sock_lock_init(struct sock *sk)
 				   af_family_slock_key_strings[sk->sk_family]);
 	lockdep_init_map(&sk->sk_lock.dep_map,
 			 af_family_key_strings[sk->sk_family],
-			 af_family_keys + sk->sk_family);
+			 af_family_keys + sk->sk_family, 0);
 }
 
 /**
-- 
cgit v1.2.3


From 88aa0103e408616e433c209e80169ab8d6eda99e Mon Sep 17 00:00:00 2001
From: Jiri Kosina <jikos@jikos.cz>
Date: Wed, 11 Oct 2006 01:45:31 -0400
Subject: Input: serio - add lockdep annotations

Signed-off-by: Jiri Kosina <jikos@jikos.cz>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/serio/libps2.c | 3 ++-
 drivers/input/serio/serio.c  | 6 +++++-
 include/linux/serio.h        | 1 +
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/input/serio/libps2.c b/drivers/input/serio/libps2.c
index dcb16b5cbec0..e5b1b60757bb 100644
--- a/drivers/input/serio/libps2.c
+++ b/drivers/input/serio/libps2.c
@@ -189,7 +189,7 @@ int ps2_command(struct ps2dev *ps2dev, unsigned char *param, int command)
 		return -1;
 	}
 
-	mutex_lock_nested(&ps2dev->cmd_mutex, SINGLE_DEPTH_NESTING);
+	mutex_lock(&ps2dev->cmd_mutex);
 
 	serio_pause_rx(ps2dev->serio);
 	ps2dev->flags = command == PS2_CMD_GETID ? PS2_FLAG_WAITID : 0;
@@ -296,6 +296,7 @@ EXPORT_SYMBOL(ps2_schedule_command);
 void ps2_init(struct ps2dev *ps2dev, struct serio *serio)
 {
 	mutex_init(&ps2dev->cmd_mutex);
+	lockdep_set_subclass(&ps2dev->cmd_mutex, serio->depth);
 	init_waitqueue_head(&ps2dev->wait);
 	ps2dev->serio = serio;
 }
diff --git a/drivers/input/serio/serio.c b/drivers/input/serio/serio.c
index 960fae3c3cea..480fdc5d20b3 100644
--- a/drivers/input/serio/serio.c
+++ b/drivers/input/serio/serio.c
@@ -538,8 +538,12 @@ static void serio_init_port(struct serio *serio)
 		 "serio%ld", (long)atomic_inc_return(&serio_no) - 1);
 	serio->dev.bus = &serio_bus;
 	serio->dev.release = serio_release_port;
-	if (serio->parent)
+	if (serio->parent) {
 		serio->dev.parent = &serio->parent->dev;
+		serio->depth = serio->parent->depth + 1;
+	} else
+		serio->depth = 0;
+	lockdep_set_subclass(&serio->lock, serio->depth);
 }
 
 /*
diff --git a/include/linux/serio.h b/include/linux/serio.h
index 3a697cc6ecae..b99c5ca9708d 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -41,6 +41,7 @@ struct serio {
 	void (*stop)(struct serio *);
 
 	struct serio *parent, *child;
+	unsigned int depth;		/* level of nesting in serio hierarchy */
 
 	struct serio_driver *drv;	/* accessed from interrupt, must be protected by serio->lock and serio->sem */
 	struct mutex drv_mutex;		/* protects serio->drv so attributes can pin driver */
-- 
cgit v1.2.3


From 9d0a57cbdb4976f382eb1c03baee338e467b6592 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 11 Oct 2006 15:31:26 +0200
Subject: [S390] irq change improvements.

Remove the last few places where a pointer to pt_regs gets passed.
Also make sure we call set_irq_regs() before irq_enter() and after
irq_exit(). This doesn't fix anything but makes sure s390 looks the
same like all other architectures.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/appldata/appldata_base.c | 2 +-
 arch/s390/kernel/s390_ext.c        | 4 ++--
 arch/s390/kernel/vtime.c           | 8 ++++----
 drivers/s390/cio/cio.c             | 4 ++--
 include/asm-s390/timer.h           | 2 +-
 5 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index 2b1e6c9a6e0e..45c9fa7d7545 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -109,7 +109,7 @@ static LIST_HEAD(appldata_ops_list);
  *
  * schedule work and reschedule timer
  */
-static void appldata_timer_function(unsigned long data, struct pt_regs *regs)
+static void appldata_timer_function(unsigned long data)
 {
 	P_DEBUG("   -= Timer =-\n");
 	P_DEBUG("CPU: %i, expire_count: %i\n", smp_processor_id(),
diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
index c49ab8c784d2..4faf96f8a834 100644
--- a/arch/s390/kernel/s390_ext.c
+++ b/arch/s390/kernel/s390_ext.c
@@ -117,8 +117,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
         int index;
 	struct pt_regs *old_regs;
 
-	irq_enter();
 	old_regs = set_irq_regs(regs);
+	irq_enter();
 	asm volatile ("mc 0,0");
 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
 		/**
@@ -134,8 +134,8 @@ void do_extint(struct pt_regs *regs, unsigned short code)
 				p->handler(code);
 		}
 	}
-	set_irq_regs(old_regs);
 	irq_exit();
+	set_irq_regs(old_regs);
 }
 
 EXPORT_SYMBOL(register_external_interrupt);
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 1d7d3938b2b1..21baaf5496d6 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -209,11 +209,11 @@ static void list_add_sorted(struct vtimer_list *timer, struct list_head *head)
  * Do the callback functions of expired vtimer events.
  * Called from within the interrupt handler.
  */
-static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs)
+static void do_callbacks(struct list_head *cb_list)
 {
 	struct vtimer_queue *vt_list;
 	struct vtimer_list *event, *tmp;
-	void (*fn)(unsigned long, struct pt_regs*);
+	void (*fn)(unsigned long);
 	unsigned long data;
 
 	if (list_empty(cb_list))
@@ -224,7 +224,7 @@ static void do_callbacks(struct list_head *cb_list, struct pt_regs *regs)
 	list_for_each_entry_safe(event, tmp, cb_list, entry) {
 		fn = event->function;
 		data = event->data;
-		fn(data, regs);
+		fn(data);
 
 		if (!event->interval)
 			/* delete one shot timer */
@@ -275,7 +275,7 @@ static void do_cpu_timer_interrupt(__u16 error_code)
 		list_move_tail(&event->entry, &cb_list);
 	}
 	spin_unlock(&vt_list->lock);
-	do_callbacks(&cb_list, get_irq_regs());
+	do_callbacks(&cb_list);
 
 	/* next event is first in list */
 	spin_lock(&vt_list->lock);
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index f18b1623cad7..8936e460a807 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -609,8 +609,8 @@ do_IRQ (struct pt_regs *regs)
 	struct irb *irb;
 	struct pt_regs *old_regs;
 
-	irq_enter ();
 	old_regs = set_irq_regs(regs);
+	irq_enter();
 	asm volatile ("mc 0,0");
 	if (S390_lowcore.int_clock >= S390_lowcore.jiffy_timer)
 		/**
@@ -655,8 +655,8 @@ do_IRQ (struct pt_regs *regs)
 		 * out of the sie which costs more cycles than it saves.
 		 */
 	} while (!MACHINE_IS_VM && tpi (NULL) != 0);
+	irq_exit();
 	set_irq_regs(old_regs);
-	irq_exit ();
 }
 
 #ifdef CONFIG_CCW_CONSOLE
diff --git a/include/asm-s390/timer.h b/include/asm-s390/timer.h
index fcd6c256a2d1..30e5cbe570f2 100644
--- a/include/asm-s390/timer.h
+++ b/include/asm-s390/timer.h
@@ -26,7 +26,7 @@ struct vtimer_list {
 	spinlock_t lock;
 	unsigned long magic;
 
-	void (*function)(unsigned long, struct pt_regs*);
+	void (*function)(unsigned long);
 	unsigned long data;
 };
 
-- 
cgit v1.2.3


From 789642680518b28e7dc13f96061460a8238ec622 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Wed, 11 Oct 2006 15:31:38 +0200
Subject: [S390] cio: Use ccw_dev_id and subchannel_id in ccw_device_private

Use the proper structures to identify device and subchannel. Change
get_disc_ccwdev_by_devno() to get_disc_ccwdev_by_dev_id().

Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 drivers/s390/cio/css.h           |  5 ++---
 drivers/s390/cio/device.c        | 32 +++++++++++++++-----------------
 drivers/s390/cio/device_fsm.c    | 14 ++++++++------
 drivers/s390/cio/device_id.c     | 14 ++++++++------
 drivers/s390/cio/device_ops.c    |  4 ++--
 drivers/s390/cio/device_pgid.c   | 23 +++++++++++++----------
 drivers/s390/cio/device_status.c |  7 +++----
 drivers/s390/cio/qdio.c          | 10 +++++-----
 include/asm-s390/cio.h           |  6 ++++++
 9 files changed, 62 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/drivers/s390/cio/css.h b/drivers/s390/cio/css.h
index 8aabb4adeb5f..15bd1e28ed70 100644
--- a/drivers/s390/cio/css.h
+++ b/drivers/s390/cio/css.h
@@ -76,9 +76,8 @@ struct ccw_device_private {
 	int state;		/* device state */
 	atomic_t onoff;
 	unsigned long registered;
-	__u16 devno;		/* device number */
-	__u16 sch_no;		/* subchannel number */
-	__u8 ssid;              /* subchannel set id */
+	struct ccw_dev_id dev_id;	/* device id */
+	struct subchannel_id schid;	/* subchannel number */
 	__u8 imask;		/* lpm mask for SNID/SID/SPGID */
 	int iretry;		/* retry counter SNID/SID/SPGID */
 	struct {
diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
index 688945662c15..7646a9930ecb 100644
--- a/drivers/s390/cio/device.c
+++ b/drivers/s390/cio/device.c
@@ -552,21 +552,19 @@ ccw_device_register(struct ccw_device *cdev)
 }
 
 struct match_data {
-	unsigned int devno;
-	unsigned int ssid;
+	struct ccw_dev_id dev_id;
 	struct ccw_device * sibling;
 };
 
 static int
 match_devno(struct device * dev, void * data)
 {
-	struct match_data * d = (struct match_data *)data;
+	struct match_data * d = data;
 	struct ccw_device * cdev;
 
 	cdev = to_ccwdev(dev);
 	if ((cdev->private->state == DEV_STATE_DISCONNECTED) &&
-	    (cdev->private->devno == d->devno) &&
-	    (cdev->private->ssid == d->ssid) &&
+	    ccw_dev_id_is_equal(&cdev->private->dev_id, &d->dev_id) &&
 	    (cdev != d->sibling)) {
 		cdev->private->state = DEV_STATE_NOT_OPER;
 		return 1;
@@ -574,15 +572,13 @@ match_devno(struct device * dev, void * data)
 	return 0;
 }
 
-static struct ccw_device *
-get_disc_ccwdev_by_devno(unsigned int devno, unsigned int ssid,
-			 struct ccw_device *sibling)
+static struct ccw_device * get_disc_ccwdev_by_dev_id(struct ccw_dev_id *dev_id,
+						     struct ccw_device *sibling)
 {
 	struct device *dev;
 	struct match_data data;
 
-	data.devno = devno;
-	data.ssid = ssid;
+	data.dev_id = *dev_id;
 	data.sibling = sibling;
 	dev = bus_find_device(&ccw_bus_type, NULL, &data, match_devno);
 
@@ -618,7 +614,7 @@ ccw_device_do_unreg_rereg(void *data)
 
 	cdev = (struct ccw_device *)data;
 	sch = to_subchannel(cdev->dev.parent);
-	if (cdev->private->devno != sch->schib.pmcw.dev) {
+	if (cdev->private->dev_id.devno != sch->schib.pmcw.dev) {
 		/*
 		 * The device number has changed. This is usually only when
 		 * a device has been detached under VM and then re-appeared
@@ -633,10 +629,12 @@ ccw_device_do_unreg_rereg(void *data)
 		 *        get possibly sick...
 		 */
 		struct ccw_device *other_cdev;
+		struct ccw_dev_id dev_id;
 
 		need_rename = 1;
-		other_cdev = get_disc_ccwdev_by_devno(sch->schib.pmcw.dev,
-						      sch->schid.ssid, cdev);
+		dev_id.devno = sch->schib.pmcw.dev;
+		dev_id.ssid = sch->schid.ssid;
+		other_cdev = get_disc_ccwdev_by_dev_id(&dev_id, cdev);
 		if (other_cdev) {
 			struct subchannel *other_sch;
 
@@ -652,7 +650,7 @@ ccw_device_do_unreg_rereg(void *data)
 		}
 		/* Update ssd info here. */
 		css_get_ssd_info(sch);
-		cdev->private->devno = sch->schib.pmcw.dev;
+		cdev->private->dev_id.devno = sch->schib.pmcw.dev;
 	} else
 		need_rename = 0;
 	device_remove_files(&cdev->dev);
@@ -792,9 +790,9 @@ io_subchannel_recog(struct ccw_device *cdev, struct subchannel *sch)
 
 	/* Init private data. */
 	priv = cdev->private;
-	priv->devno = sch->schib.pmcw.dev;
-	priv->ssid = sch->schid.ssid;
-	priv->sch_no = sch->schid.sch_no;
+	priv->dev_id.devno = sch->schib.pmcw.dev;
+	priv->dev_id.ssid = sch->schid.ssid;
+	priv->schid = sch->schid;
 	priv->state = DEV_STATE_NOT_OPER;
 	INIT_LIST_HEAD(&priv->cmb_list);
 	init_waitqueue_head(&priv->wait_q);
diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
index c36d8b6fdb04..392eb33f3a9c 100644
--- a/drivers/s390/cio/device_fsm.c
+++ b/drivers/s390/cio/device_fsm.c
@@ -183,7 +183,7 @@ ccw_device_handle_oper(struct ccw_device *cdev)
 	    cdev->id.cu_model != cdev->private->senseid.cu_model ||
 	    cdev->id.dev_type != cdev->private->senseid.dev_type ||
 	    cdev->id.dev_model != cdev->private->senseid.dev_model ||
-	    cdev->private->devno != sch->schib.pmcw.dev) {
+	    cdev->private->dev_id.devno != sch->schib.pmcw.dev) {
 		PREPARE_WORK(&cdev->private->kick_work,
 			     ccw_device_do_unreg_rereg, (void *)cdev);
 		queue_work(ccw_device_work, &cdev->private->kick_work);
@@ -255,7 +255,7 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 	case DEV_STATE_NOT_OPER:
 		CIO_DEBUG(KERN_WARNING, 2,
 			  "SenseID : unknown device %04x on subchannel "
-			  "0.%x.%04x\n", cdev->private->devno,
+			  "0.%x.%04x\n", cdev->private->dev_id.devno,
 			  sch->schid.ssid, sch->schid.sch_no);
 		break;
 	case DEV_STATE_OFFLINE:
@@ -282,14 +282,15 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
 		CIO_DEBUG(KERN_INFO, 2, "SenseID : device 0.%x.%04x reports: "
 			  "CU  Type/Mod = %04X/%02X, Dev Type/Mod = "
 			  "%04X/%02X\n",
-			  cdev->private->ssid, cdev->private->devno,
+			  cdev->private->dev_id.ssid,
+			  cdev->private->dev_id.devno,
 			  cdev->id.cu_type, cdev->id.cu_model,
 			  cdev->id.dev_type, cdev->id.dev_model);
 		break;
 	case DEV_STATE_BOXED:
 		CIO_DEBUG(KERN_WARNING, 2,
 			  "SenseID : boxed device %04x on subchannel "
-			  "0.%x.%04x\n", cdev->private->devno,
+			  "0.%x.%04x\n", cdev->private->dev_id.devno,
 			  sch->schid.ssid, sch->schid.sch_no);
 		break;
 	}
@@ -363,7 +364,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
 	if (state == DEV_STATE_BOXED)
 		CIO_DEBUG(KERN_WARNING, 2,
 			  "Boxed device %04x on subchannel %04x\n",
-			  cdev->private->devno, sch->schid.sch_no);
+			  cdev->private->dev_id.devno, sch->schid.sch_no);
 
 	if (cdev->private->flags.donotify) {
 		cdev->private->flags.donotify = 0;
@@ -412,7 +413,8 @@ static void __ccw_device_get_common_pgid(struct ccw_device *cdev)
 		/* PGID mismatch, can't pathgroup. */
 		CIO_MSG_EVENT(0, "SNID - pgid mismatch for device "
 			      "0.%x.%04x, can't pathgroup\n",
-			      cdev->private->ssid, cdev->private->devno);
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno);
 		cdev->private->options.pgroup = 0;
 		return;
 	}
diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
index 1398367b5f68..a74785b9e4eb 100644
--- a/drivers/s390/cio/device_id.c
+++ b/drivers/s390/cio/device_id.c
@@ -251,7 +251,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
 		 */
 		CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel "
 			      "0.%x.%04x reports cmd reject\n",
-			      cdev->private->devno, sch->schid.ssid,
+			      cdev->private->dev_id.devno, sch->schid.ssid,
 			      sch->schid.sch_no);
 		return -EOPNOTSUPP;
 	}
@@ -259,7 +259,8 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
 		CIO_MSG_EVENT(2, "SenseID : UC on dev 0.%x.%04x, "
 			      "lpum %02X, cnt %02d, sns :"
 			      " %02X%02X%02X%02X %02X%02X%02X%02X ...\n",
-			      cdev->private->ssid, cdev->private->devno,
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno,
 			      irb->esw.esw0.sublog.lpum,
 			      irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
@@ -274,14 +275,15 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
 			CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x "
 				      "on subchannel 0.%x.%04x is "
 				      "'not operational'\n", sch->orb.lpm,
-				      cdev->private->devno, sch->schid.ssid,
-				      sch->schid.sch_no);
+				      cdev->private->dev_id.devno,
+				      sch->schid.ssid, sch->schid.sch_no);
 		return -EACCES;
 	}
 	/* Hmm, whatever happened, try again. */
 	CIO_MSG_EVENT(2, "SenseID : start_IO() for device %04x on "
 		      "subchannel 0.%x.%04x returns status %02X%02X\n",
-		      cdev->private->devno, sch->schid.ssid, sch->schid.sch_no,
+		      cdev->private->dev_id.devno, sch->schid.ssid,
+		      sch->schid.sch_no,
 		      irb->scsw.dstat, irb->scsw.cstat);
 	return -EAGAIN;
 }
@@ -330,7 +332,7 @@ ccw_device_sense_id_irq(struct ccw_device *cdev, enum dev_event dev_event)
 		/* fall through. */
 	default:		/* Sense ID failed. Try asking VM. */
 		if (MACHINE_IS_VM) {
-			VM_virtual_device_info (cdev->private->devno,
+			VM_virtual_device_info (cdev->private->dev_id.devno,
 						&cdev->private->senseid);
 			if (cdev->private->senseid.cu_type != 0xFFFF) {
 				/* Got the device information from VM. */
diff --git a/drivers/s390/cio/device_ops.c b/drivers/s390/cio/device_ops.c
index 84b9b18eabc2..96219935a06a 100644
--- a/drivers/s390/cio/device_ops.c
+++ b/drivers/s390/cio/device_ops.c
@@ -592,13 +592,13 @@ ccw_device_get_chp_desc(struct ccw_device *cdev, int chp_no)
 int
 _ccw_device_get_subchannel_number(struct ccw_device *cdev)
 {
-	return cdev->private->sch_no;
+	return cdev->private->schid.sch_no;
 }
 
 int
 _ccw_device_get_device_number(struct ccw_device *cdev)
 {
-	return cdev->private->devno;
+	return cdev->private->dev_id.devno;
 }
 
 
diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
index 84917b39de45..2975ce888c19 100644
--- a/drivers/s390/cio/device_pgid.c
+++ b/drivers/s390/cio/device_pgid.c
@@ -79,7 +79,8 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev)
 			CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel "
 				      "0.%x.%04x, lpm %02X, became 'not "
 				      "operational'\n",
-				      cdev->private->devno, sch->schid.ssid,
+				      cdev->private->dev_id.devno,
+				      sch->schid.ssid,
 				      sch->schid.sch_no, cdev->private->imask);
 
 		}
@@ -135,7 +136,8 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
 		CIO_MSG_EVENT(2, "SNID - device 0.%x.%04x, unit check, "
 			      "lpum %02X, cnt %02d, sns : "
 			      "%02X%02X%02X%02X %02X%02X%02X%02X ...\n",
-			      cdev->private->ssid, cdev->private->devno,
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno,
 			      irb->esw.esw0.sublog.lpum,
 			      irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
@@ -147,7 +149,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
 	if (irb->scsw.cc == 3) {
 		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x,"
 			      " lpm %02X, became 'not operational'\n",
-			      cdev->private->devno, sch->schid.ssid,
+			      cdev->private->dev_id.devno, sch->schid.ssid,
 			      sch->schid.sch_no, sch->orb.lpm);
 		return -EACCES;
 	}
@@ -155,7 +157,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
 	if (cdev->private->pgid[i].inf.ps.state2 == SNID_STATE2_RESVD_ELSE) {
 		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x "
 			      "is reserved by someone else\n",
-			      cdev->private->devno, sch->schid.ssid,
+			      cdev->private->dev_id.devno, sch->schid.ssid,
 			      sch->schid.sch_no);
 		return -EUSERS;
 	}
@@ -261,7 +263,7 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func)
 	/* PGID command failed on this path. */
 	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
-		      cdev->private->devno, sch->schid.ssid,
+		      cdev->private->dev_id.devno, sch->schid.ssid,
 		      sch->schid.sch_no, cdev->private->imask);
 	return ret;
 }
@@ -301,7 +303,7 @@ static int __ccw_device_do_nop(struct ccw_device *cdev)
 	/* nop command failed on this path. */
 	CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel "
 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
-		      cdev->private->devno, sch->schid.ssid,
+		      cdev->private->dev_id.devno, sch->schid.ssid,
 		      sch->schid.sch_no, cdev->private->imask);
 	return ret;
 }
@@ -328,8 +330,9 @@ __ccw_device_check_pgid(struct ccw_device *cdev)
 		CIO_MSG_EVENT(2, "SPID - device 0.%x.%04x, unit check, "
 			      "cnt %02d, "
 			      "sns : %02X%02X%02X%02X %02X%02X%02X%02X ...\n",
-			      cdev->private->ssid,
-			      cdev->private->devno, irb->esw.esw0.erw.scnt,
+			      cdev->private->dev_id.ssid,
+			      cdev->private->dev_id.devno,
+			      irb->esw.esw0.erw.scnt,
 			      irb->ecw[0], irb->ecw[1],
 			      irb->ecw[2], irb->ecw[3],
 			      irb->ecw[4], irb->ecw[5],
@@ -339,7 +342,7 @@ __ccw_device_check_pgid(struct ccw_device *cdev)
 	if (irb->scsw.cc == 3) {
 		CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x,"
 			      " lpm %02X, became 'not operational'\n",
-			      cdev->private->devno, sch->schid.ssid,
+			      cdev->private->dev_id.devno, sch->schid.ssid,
 			      sch->schid.sch_no, cdev->private->imask);
 		return -EACCES;
 	}
@@ -362,7 +365,7 @@ static int __ccw_device_check_nop(struct ccw_device *cdev)
 	if (irb->scsw.cc == 3) {
 		CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x,"
 			      " lpm %02X, became 'not operational'\n",
-			      cdev->private->devno, sch->schid.ssid,
+			      cdev->private->dev_id.devno, sch->schid.ssid,
 			      sch->schid.sch_no, cdev->private->imask);
 		return -EACCES;
 	}
diff --git a/drivers/s390/cio/device_status.c b/drivers/s390/cio/device_status.c
index caf148d5caad..3f7cbce4cd87 100644
--- a/drivers/s390/cio/device_status.c
+++ b/drivers/s390/cio/device_status.c
@@ -32,19 +32,18 @@ ccw_device_msg_control_check(struct ccw_device *cdev, struct irb *irb)
 				 SCHN_STAT_CHN_CTRL_CHK |
 				 SCHN_STAT_INTF_CTRL_CHK)))
 		return;
-		
 	CIO_MSG_EVENT(0, "Channel-Check or Interface-Control-Check "
 		      "received"
 		      " ... device %04x on subchannel 0.%x.%04x, dev_stat "
 		      ": %02X sch_stat : %02X\n",
-		      cdev->private->devno, cdev->private->ssid,
-		      cdev->private->sch_no,
+		      cdev->private->dev_id.devno, cdev->private->schid.ssid,
+		      cdev->private->schid.sch_no,
 		      irb->scsw.dstat, irb->scsw.cstat);
 
 	if (irb->scsw.cc != 3) {
 		char dbf_text[15];
 
-		sprintf(dbf_text, "chk%x", cdev->private->sch_no);
+		sprintf(dbf_text, "chk%x", cdev->private->schid.sch_no);
 		CIO_TRACE_EVENT(0, dbf_text);
 		CIO_HEX_EVENT(0, irb, sizeof (struct irb));
 	}
diff --git a/drivers/s390/cio/qdio.c b/drivers/s390/cio/qdio.c
index cde822d8b5c8..0648ce5bb684 100644
--- a/drivers/s390/cio/qdio.c
+++ b/drivers/s390/cio/qdio.c
@@ -1741,7 +1741,7 @@ qdio_fill_qs(struct qdio_irq *irq_ptr, struct ccw_device *cdev,
 	void *ptr;
 	int available;
 
-	sprintf(dbf_text,"qfqs%4x",cdev->private->sch_no);
+	sprintf(dbf_text,"qfqs%4x",cdev->private->schid.sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	for (i=0;i<no_input_qs;i++) {
 		q=irq_ptr->input_qs[i];
@@ -2924,7 +2924,7 @@ qdio_establish_handle_irq(struct ccw_device *cdev, int cstat, int dstat)
 
 	irq_ptr = cdev->private->qdio_data;
 
-	sprintf(dbf_text,"qehi%4x",cdev->private->sch_no);
+	sprintf(dbf_text,"qehi%4x",cdev->private->schid.sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
@@ -2943,7 +2943,7 @@ qdio_initialize(struct qdio_initialize *init_data)
 	int rc;
 	char dbf_text[15];
 
-	sprintf(dbf_text,"qini%4x",init_data->cdev->private->sch_no);
+	sprintf(dbf_text,"qini%4x",init_data->cdev->private->schid.sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
@@ -2964,7 +2964,7 @@ qdio_allocate(struct qdio_initialize *init_data)
 	struct qdio_irq *irq_ptr;
 	char dbf_text[15];
 
-	sprintf(dbf_text,"qalc%4x",init_data->cdev->private->sch_no);
+	sprintf(dbf_text,"qalc%4x",init_data->cdev->private->schid.sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 	if ( (init_data->no_input_qs>QDIO_MAX_QUEUES_PER_IRQ) ||
@@ -3187,7 +3187,7 @@ qdio_establish(struct qdio_initialize *init_data)
 		tiqdio_set_delay_target(irq_ptr,TIQDIO_DELAY_TARGET);
 	}
 
-	sprintf(dbf_text,"qest%4x",cdev->private->sch_no);
+	sprintf(dbf_text,"qest%4x",cdev->private->schid.sch_no);
 	QDIO_DBF_TEXT0(0,setup,dbf_text);
 	QDIO_DBF_TEXT0(0,trace,dbf_text);
 
diff --git a/include/asm-s390/cio.h b/include/asm-s390/cio.h
index da063cd5f0a0..81287d86329d 100644
--- a/include/asm-s390/cio.h
+++ b/include/asm-s390/cio.h
@@ -275,6 +275,12 @@ struct ccw_dev_id {
 	u16 devno;
 };
 
+static inline int ccw_dev_id_is_equal(struct ccw_dev_id *dev_id1,
+				      struct ccw_dev_id *dev_id2)
+{
+	return !memcmp(dev_id1, dev_id2, sizeof(struct ccw_dev_id));
+}
+
 extern int diag210(struct diag210 *addr);
 
 extern void wait_cons_dev(void);
-- 
cgit v1.2.3


From 4e0fadfcf62e252d2b14de0e0927eb2830c0c28c Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Wed, 11 Oct 2006 01:20:37 -0700
Subject: [PATCH] IRQ: Fix AVR32 breakage

Make the necessary changes to AVR32 required by the irq regs stuff.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/avr32/kernel/time.c        | 10 +++++-----
 arch/avr32/mach-at32ap/extint.c |  5 ++---
 arch/avr32/mach-at32ap/intc.c   |  7 ++++++-
 include/asm-avr32/irq_regs.h    |  1 +
 4 files changed, 14 insertions(+), 9 deletions(-)
 create mode 100644 include/asm-avr32/irq_regs.h

(limited to 'include')

diff --git a/arch/avr32/kernel/time.c b/arch/avr32/kernel/time.c
index 3e56b9f4358a..5a247ba71a72 100644
--- a/arch/avr32/kernel/time.c
+++ b/arch/avr32/kernel/time.c
@@ -124,15 +124,15 @@ unsigned long long sched_clock(void)
  *
  * In UP mode, it is invoked from the (global) timer_interrupt.
  */
-static void local_timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+static void local_timer_interrupt(int irq, void *dev_id)
 {
 	if (current->pid)
-		profile_tick(CPU_PROFILING, regs);
-	update_process_times(user_mode(regs));
+		profile_tick(CPU_PROFILING);
+	update_process_times(user_mode(get_irq_regs()));
 }
 
 static irqreturn_t
-timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+timer_interrupt(int irq, void *dev_id)
 {
 	unsigned int count;
 
@@ -157,7 +157,7 @@ timer_interrupt(int irq, void *dev_id, struct pt_regs *regs)
 	 *
 	 * SMP is not supported yet.
 	 */
-	local_timer_interrupt(irq, dev_id, regs);
+	local_timer_interrupt(irq, dev_id);
 
 	return IRQ_HANDLED;
 }
diff --git a/arch/avr32/mach-at32ap/extint.c b/arch/avr32/mach-at32ap/extint.c
index 7da9c5f7a0eb..4dff1f988900 100644
--- a/arch/avr32/mach-at32ap/extint.c
+++ b/arch/avr32/mach-at32ap/extint.c
@@ -102,8 +102,7 @@ struct irq_chip eim_chip = {
 	.set_type	= eim_set_irq_type,
 };
 
-static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
-			  struct pt_regs *regs)
+static void demux_eim_irq(unsigned int irq, struct irq_desc *desc)
 {
 	struct at32_sm *sm = desc->handler_data;
 	struct irq_desc *ext_desc;
@@ -121,7 +120,7 @@ static void demux_eim_irq(unsigned int irq, struct irq_desc *desc,
 
 		ext_irq = i + sm->eim_first_irq;
 		ext_desc = irq_desc + ext_irq;
-		ext_desc->handle_irq(ext_irq, ext_desc, regs);
+		ext_desc->handle_irq(ext_irq, ext_desc);
 	}
 
 	spin_unlock(&sm->lock);
diff --git a/arch/avr32/mach-at32ap/intc.c b/arch/avr32/mach-at32ap/intc.c
index 74f8c9f2f03d..eb87a18ad7b2 100644
--- a/arch/avr32/mach-at32ap/intc.c
+++ b/arch/avr32/mach-at32ap/intc.c
@@ -52,16 +52,19 @@ static struct intc intc0 = {
 asmlinkage void do_IRQ(int level, struct pt_regs *regs)
 {
 	struct irq_desc *desc;
+	struct pt_regs *old_regs;
 	unsigned int irq;
 	unsigned long status_reg;
 
 	local_irq_disable();
 
+	old_regs = set_irq_regs(regs);
+
 	irq_enter();
 
 	irq = intc_readl(&intc0, INTCAUSE0 - 4 * level);
 	desc = irq_desc + irq;
-	desc->handle_irq(irq, desc, regs);
+	desc->handle_irq(irq, desc);
 
 	/*
 	 * Clear all interrupt level masks so that we may handle
@@ -75,6 +78,8 @@ asmlinkage void do_IRQ(int level, struct pt_regs *regs)
 	sysreg_write(SR, status_reg);
 
 	irq_exit();
+
+	set_irq_regs(old_regs);
 }
 
 void __init init_IRQ(void)
diff --git a/include/asm-avr32/irq_regs.h b/include/asm-avr32/irq_regs.h
new file mode 100644
index 000000000000..3dd9c0b70270
--- /dev/null
+++ b/include/asm-avr32/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
-- 
cgit v1.2.3


From 502717f4e112b18d9c37753a32f675bec9f2838b Mon Sep 17 00:00:00 2001
From: "Chen, Kenneth W" <kenneth.w.chen@intel.com>
Date: Wed, 11 Oct 2006 01:20:46 -0700
Subject: [PATCH] hugetlb: fix linked list corruption in unmap_hugepage_range()

commit fe1668ae5bf0145014c71797febd9ad5670d5d05 causes kernel to oops with
libhugetlbfs test suite.  The problem is that hugetlb pages can be shared
by multiple mappings.  Multiple threads can fight over page->lru in the
unmap path and bad things happen.  We now serialize __unmap_hugepage_range
to void concurrent linked list manipulation.  Such serialization is also
needed for shared page table page on hugetlb area.  This patch will fixed
the bug and also serve as a prepatch for shared page table.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Hugh Dickins <hugh@veritas.com>
Cc: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hugetlbfs/inode.c    |  2 +-
 include/linux/hugetlb.h |  1 +
 mm/hugetlb.c            | 22 ++++++++++++++++++++--
 3 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 5e03b2f67b93..4ee3f006b861 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -293,7 +293,7 @@ hugetlb_vmtruncate_list(struct prio_tree_root *root, unsigned long h_pgoff)
 		if (h_vm_pgoff >= h_pgoff)
 			v_offset = 0;
 
-		unmap_hugepage_range(vma,
+		__unmap_hugepage_range(vma,
 				vma->vm_start + v_offset, vma->vm_end);
 	}
 }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index c25a38d8f600..5081d27bfa27 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -17,6 +17,7 @@ int hugetlb_sysctl_handler(struct ctl_table *, int, struct file *, void __user *
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, struct page **, struct vm_area_struct **, unsigned long *, int *, int);
 void unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
+void __unmap_hugepage_range(struct vm_area_struct *, unsigned long, unsigned long);
 int hugetlb_prefault(struct address_space *, struct vm_area_struct *);
 int hugetlb_report_meminfo(char *);
 int hugetlb_report_node_meminfo(int, char *);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 1d709ff528e1..2dbec90dc3ba 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -356,8 +356,8 @@ nomem:
 	return -ENOMEM;
 }
 
-void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
-			  unsigned long end)
+void __unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	unsigned long address;
@@ -398,6 +398,24 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 	}
 }
 
+void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
+			  unsigned long end)
+{
+	/*
+	 * It is undesirable to test vma->vm_file as it should be non-null
+	 * for valid hugetlb area. However, vm_file will be NULL in the error
+	 * cleanup path of do_mmap_pgoff. When hugetlbfs ->mmap method fails,
+	 * do_mmap_pgoff() nullifies vma->vm_file before calling this function
+	 * to clean up. Since no pte has actually been setup, it is safe to
+	 * do nothing in this case.
+	 */
+	if (vma->vm_file) {
+		spin_lock(&vma->vm_file->f_mapping->i_mmap_lock);
+		__unmap_hugepage_range(vma, start, end);
+		spin_unlock(&vma->vm_file->f_mapping->i_mmap_lock);
+	}
+}
+
 static int hugetlb_cow(struct mm_struct *mm, struct vm_area_struct *vma,
 			unsigned long address, pte_t *ptep, pte_t pte)
 {
-- 
cgit v1.2.3


From ac27a0ec112a089f1a5102bc8dffc79c8c815571 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 11 Oct 2006 01:20:50 -0700
Subject: [PATCH] ext4: initial copy of files from ext3

Start of the ext4 patch series.  See Documentation/filesystems/ext4.txt for
details.

This is a simple copy of the files in fs/ext3 to fs/ext4 and
/usr/incude/linux/ext3* to /usr/include/ex4*

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile           |   12 +
 fs/ext4/acl.c              |  551 ++++++++
 fs/ext4/acl.h              |   81 ++
 fs/ext4/balloc.c           | 1818 +++++++++++++++++++++++++
 fs/ext4/bitmap.c           |   32 +
 fs/ext4/dir.c              |  518 +++++++
 fs/ext4/file.c             |  139 ++
 fs/ext4/fsync.c            |   88 ++
 fs/ext4/hash.c             |  152 +++
 fs/ext4/ialloc.c           |  758 +++++++++++
 fs/ext4/inode.c            | 3219 ++++++++++++++++++++++++++++++++++++++++++++
 fs/ext4/ioctl.c            |  307 +++++
 fs/ext4/namei.c            | 2397 +++++++++++++++++++++++++++++++++
 fs/ext4/namei.h            |    8 +
 fs/ext4/resize.c           | 1042 ++++++++++++++
 fs/ext4/super.c            | 2754 +++++++++++++++++++++++++++++++++++++
 fs/ext4/symlink.c          |   54 +
 fs/ext4/xattr.c            | 1317 ++++++++++++++++++
 fs/ext4/xattr.h            |  145 ++
 fs/ext4/xattr_security.c   |   77 ++
 fs/ext4/xattr_trusted.c    |   62 +
 fs/ext4/xattr_user.c       |   64 +
 include/linux/ext4_fs.h    |  885 ++++++++++++
 include/linux/ext4_fs_i.h  |  147 ++
 include/linux/ext4_fs_sb.h |   83 ++
 include/linux/ext4_jbd.h   |  268 ++++
 26 files changed, 16978 insertions(+)
 create mode 100644 fs/ext4/Makefile
 create mode 100644 fs/ext4/acl.c
 create mode 100644 fs/ext4/acl.h
 create mode 100644 fs/ext4/balloc.c
 create mode 100644 fs/ext4/bitmap.c
 create mode 100644 fs/ext4/dir.c
 create mode 100644 fs/ext4/file.c
 create mode 100644 fs/ext4/fsync.c
 create mode 100644 fs/ext4/hash.c
 create mode 100644 fs/ext4/ialloc.c
 create mode 100644 fs/ext4/inode.c
 create mode 100644 fs/ext4/ioctl.c
 create mode 100644 fs/ext4/namei.c
 create mode 100644 fs/ext4/namei.h
 create mode 100644 fs/ext4/resize.c
 create mode 100644 fs/ext4/super.c
 create mode 100644 fs/ext4/symlink.c
 create mode 100644 fs/ext4/xattr.c
 create mode 100644 fs/ext4/xattr.h
 create mode 100644 fs/ext4/xattr_security.c
 create mode 100644 fs/ext4/xattr_trusted.c
 create mode 100644 fs/ext4/xattr_user.c
 create mode 100644 include/linux/ext4_fs.h
 create mode 100644 include/linux/ext4_fs_i.h
 create mode 100644 include/linux/ext4_fs_sb.h
 create mode 100644 include/linux/ext4_jbd.h

(limited to 'include')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
new file mode 100644
index 000000000000..704cd44a40c2
--- /dev/null
+++ b/fs/ext4/Makefile
@@ -0,0 +1,12 @@
+#
+# Makefile for the linux ext3-filesystem routines.
+#
+
+obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+
+ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+ext3-$(CONFIG_EXT3_FS_SECURITY)	 += xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
new file mode 100644
index 000000000000..1e5038d9a01b
--- /dev/null
+++ b/fs/ext4/acl.c
@@ -0,0 +1,551 @@
+/*
+ * linux/fs/ext3/acl.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ */
+
+#include <linux/init.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Convert from filesystem to in-memory representation.
+ */
+static struct posix_acl *
+ext3_acl_from_disk(const void *value, size_t size)
+{
+	const char *end = (char *)value + size;
+	int n, count;
+	struct posix_acl *acl;
+
+	if (!value)
+		return NULL;
+	if (size < sizeof(ext3_acl_header))
+		 return ERR_PTR(-EINVAL);
+	if (((ext3_acl_header *)value)->a_version !=
+	    cpu_to_le32(EXT3_ACL_VERSION))
+		return ERR_PTR(-EINVAL);
+	value = (char *)value + sizeof(ext3_acl_header);
+	count = ext3_acl_count(size);
+	if (count < 0)
+		return ERR_PTR(-EINVAL);
+	if (count == 0)
+		return NULL;
+	acl = posix_acl_alloc(count, GFP_KERNEL);
+	if (!acl)
+		return ERR_PTR(-ENOMEM);
+	for (n=0; n < count; n++) {
+		ext3_acl_entry *entry =
+			(ext3_acl_entry *)value;
+		if ((char *)value + sizeof(ext3_acl_entry_short) > end)
+			goto fail;
+		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
+		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
+		switch(acl->a_entries[n].e_tag) {
+			case ACL_USER_OBJ:
+			case ACL_GROUP_OBJ:
+			case ACL_MASK:
+			case ACL_OTHER:
+				value = (char *)value +
+					sizeof(ext3_acl_entry_short);
+				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
+				break;
+
+			case ACL_USER:
+			case ACL_GROUP:
+				value = (char *)value + sizeof(ext3_acl_entry);
+				if ((char *)value > end)
+					goto fail;
+				acl->a_entries[n].e_id =
+					le32_to_cpu(entry->e_id);
+				break;
+
+			default:
+				goto fail;
+		}
+	}
+	if (value != end)
+		goto fail;
+	return acl;
+
+fail:
+	posix_acl_release(acl);
+	return ERR_PTR(-EINVAL);
+}
+
+/*
+ * Convert from in-memory to filesystem representation.
+ */
+static void *
+ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
+{
+	ext3_acl_header *ext_acl;
+	char *e;
+	size_t n;
+
+	*size = ext3_acl_size(acl->a_count);
+	ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
+			sizeof(ext3_acl_entry), GFP_KERNEL);
+	if (!ext_acl)
+		return ERR_PTR(-ENOMEM);
+	ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
+	e = (char *)ext_acl + sizeof(ext3_acl_header);
+	for (n=0; n < acl->a_count; n++) {
+		ext3_acl_entry *entry = (ext3_acl_entry *)e;
+		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
+		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
+		switch(acl->a_entries[n].e_tag) {
+			case ACL_USER:
+			case ACL_GROUP:
+				entry->e_id =
+					cpu_to_le32(acl->a_entries[n].e_id);
+				e += sizeof(ext3_acl_entry);
+				break;
+
+			case ACL_USER_OBJ:
+			case ACL_GROUP_OBJ:
+			case ACL_MASK:
+			case ACL_OTHER:
+				e += sizeof(ext3_acl_entry_short);
+				break;
+
+			default:
+				goto fail;
+		}
+	}
+	return (char *)ext_acl;
+
+fail:
+	kfree(ext_acl);
+	return ERR_PTR(-EINVAL);
+}
+
+static inline struct posix_acl *
+ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+{
+	struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
+
+	spin_lock(&inode->i_lock);
+	if (*i_acl != EXT3_ACL_NOT_CACHED)
+		acl = posix_acl_dup(*i_acl);
+	spin_unlock(&inode->i_lock);
+
+	return acl;
+}
+
+static inline void
+ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+                  struct posix_acl *acl)
+{
+	spin_lock(&inode->i_lock);
+	if (*i_acl != EXT3_ACL_NOT_CACHED)
+		posix_acl_release(*i_acl);
+	*i_acl = posix_acl_dup(acl);
+	spin_unlock(&inode->i_lock);
+}
+
+/*
+ * Inode operation get_posix_acl().
+ *
+ * inode->i_mutex: don't care
+ */
+static struct posix_acl *
+ext3_get_acl(struct inode *inode, int type)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int name_index;
+	char *value = NULL;
+	struct posix_acl *acl;
+	int retval;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return NULL;
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			acl = ext3_iget_acl(inode, &ei->i_acl);
+			if (acl != EXT3_ACL_NOT_CACHED)
+				return acl;
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			acl = ext3_iget_acl(inode, &ei->i_default_acl);
+			if (acl != EXT3_ACL_NOT_CACHED)
+				return acl;
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			break;
+
+		default:
+			return ERR_PTR(-EINVAL);
+	}
+	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+	if (retval > 0) {
+		value = kmalloc(retval, GFP_KERNEL);
+		if (!value)
+			return ERR_PTR(-ENOMEM);
+		retval = ext3_xattr_get(inode, name_index, "", value, retval);
+	}
+	if (retval > 0)
+		acl = ext3_acl_from_disk(value, retval);
+	else if (retval == -ENODATA || retval == -ENOSYS)
+		acl = NULL;
+	else
+		acl = ERR_PTR(retval);
+	kfree(value);
+
+	if (!IS_ERR(acl)) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				ext3_iset_acl(inode, &ei->i_acl, acl);
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				break;
+		}
+	}
+	return acl;
+}
+
+/*
+ * Set the access or default ACL of an inode.
+ *
+ * inode->i_mutex: down unless called from ext3_new_inode
+ */
+static int
+ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+	     struct posix_acl *acl)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int name_index;
+	void *value = NULL;
+	size_t size = 0;
+	int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	switch(type) {
+		case ACL_TYPE_ACCESS:
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			if (acl) {
+				mode_t mode = inode->i_mode;
+				error = posix_acl_equiv_mode(acl, &mode);
+				if (error < 0)
+					return error;
+				else {
+					inode->i_mode = mode;
+					ext3_mark_inode_dirty(handle, inode);
+					if (error == 0)
+						acl = NULL;
+				}
+			}
+			break;
+
+		case ACL_TYPE_DEFAULT:
+			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			if (!S_ISDIR(inode->i_mode))
+				return acl ? -EACCES : 0;
+			break;
+
+		default:
+			return -EINVAL;
+	}
+	if (acl) {
+		value = ext3_acl_to_disk(acl, &size);
+		if (IS_ERR(value))
+			return (int)PTR_ERR(value);
+	}
+
+	error = ext3_xattr_set_handle(handle, inode, name_index, "",
+				      value, size, 0);
+
+	kfree(value);
+	if (!error) {
+		switch(type) {
+			case ACL_TYPE_ACCESS:
+				ext3_iset_acl(inode, &ei->i_acl, acl);
+				break;
+
+			case ACL_TYPE_DEFAULT:
+				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				break;
+		}
+	}
+	return error;
+}
+
+static int
+ext3_check_acl(struct inode *inode, int mask)
+{
+	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl) {
+		int error = posix_acl_permission(inode, acl, mask);
+		posix_acl_release(acl);
+		return error;
+	}
+
+	return -EAGAIN;
+}
+
+int
+ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+{
+	return generic_permission(inode, mask, ext3_check_acl);
+}
+
+/*
+ * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ *
+ * dir->i_mutex: down
+ * inode->i_mutex: up (access to inode is still exclusive)
+ */
+int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	struct posix_acl *acl = NULL;
+	int error = 0;
+
+	if (!S_ISLNK(inode->i_mode)) {
+		if (test_opt(dir->i_sb, POSIX_ACL)) {
+			acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
+			if (IS_ERR(acl))
+				return PTR_ERR(acl);
+		}
+		if (!acl)
+			inode->i_mode &= ~current->fs->umask;
+	}
+	if (test_opt(inode->i_sb, POSIX_ACL) && acl) {
+		struct posix_acl *clone;
+		mode_t mode;
+
+		if (S_ISDIR(inode->i_mode)) {
+			error = ext3_set_acl(handle, inode,
+					     ACL_TYPE_DEFAULT, acl);
+			if (error)
+				goto cleanup;
+		}
+		clone = posix_acl_clone(acl, GFP_KERNEL);
+		error = -ENOMEM;
+		if (!clone)
+			goto cleanup;
+
+		mode = inode->i_mode;
+		error = posix_acl_create_masq(clone, &mode);
+		if (error >= 0) {
+			inode->i_mode = mode;
+			if (error > 0) {
+				/* This is an extended ACL */
+				error = ext3_set_acl(handle, inode,
+						     ACL_TYPE_ACCESS, clone);
+			}
+		}
+		posix_acl_release(clone);
+	}
+cleanup:
+	posix_acl_release(acl);
+	return error;
+}
+
+/*
+ * Does chmod for an inode that may have an Access Control List. The
+ * inode->i_mode field must be updated to the desired value by the caller
+ * before calling this function.
+ * Returns 0 on success, or a negative error number.
+ *
+ * We change the ACL rather than storing some ACL entries in the file
+ * mode permission bits (which would be more efficient), because that
+ * would break once additional permissions (like  ACL_APPEND, ACL_DELETE
+ * for directories) are added. There are no more bits available in the
+ * file mode.
+ *
+ * inode->i_mutex: down
+ */
+int
+ext3_acl_chmod(struct inode *inode)
+{
+	struct posix_acl *acl, *clone;
+        int error;
+
+	if (S_ISLNK(inode->i_mode))
+		return -EOPNOTSUPP;
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	if (IS_ERR(acl) || !acl)
+		return PTR_ERR(acl);
+	clone = posix_acl_clone(acl, GFP_KERNEL);
+	posix_acl_release(acl);
+	if (!clone)
+		return -ENOMEM;
+	error = posix_acl_chmod_masq(clone, inode->i_mode);
+	if (!error) {
+		handle_t *handle;
+		int retries = 0;
+
+	retry:
+		handle = ext3_journal_start(inode,
+				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			ext3_std_error(inode->i_sb, error);
+			goto out;
+		}
+		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
+		ext3_journal_stop(handle);
+		if (error == -ENOSPC &&
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
+	}
+out:
+	posix_acl_release(clone);
+	return error;
+}
+
+/*
+ * Extended attribute handlers
+ */
+static size_t
+ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
+			   const char *name, size_t name_len)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_ACCESS, size);
+	return size;
+}
+
+static size_t
+ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
+			    const char *name, size_t name_len)
+{
+	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return 0;
+	if (list && size <= list_len)
+		memcpy(list, POSIX_ACL_XATTR_DEFAULT, size);
+	return size;
+}
+
+static int
+ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+{
+	struct posix_acl *acl;
+	int error;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return -EOPNOTSUPP;
+
+	acl = ext3_get_acl(inode, type);
+	if (IS_ERR(acl))
+		return PTR_ERR(acl);
+	if (acl == NULL)
+		return -ENODATA;
+	error = posix_acl_to_xattr(acl, buffer, size);
+	posix_acl_release(acl);
+
+	return error;
+}
+
+static int
+ext3_xattr_get_acl_access(struct inode *inode, const char *name,
+			  void *buffer, size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+}
+
+static int
+ext3_xattr_get_acl_default(struct inode *inode, const char *name,
+			   void *buffer, size_t size)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+}
+
+static int
+ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
+		   size_t size)
+{
+	handle_t *handle;
+	struct posix_acl *acl;
+	int error, retries = 0;
+
+	if (!test_opt(inode->i_sb, POSIX_ACL))
+		return -EOPNOTSUPP;
+	if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+		return -EPERM;
+
+	if (value) {
+		acl = posix_acl_from_xattr(value, size);
+		if (IS_ERR(acl))
+			return PTR_ERR(acl);
+		else if (acl) {
+			error = posix_acl_valid(acl);
+			if (error)
+				goto release_and_out;
+		}
+	} else
+		acl = NULL;
+
+retry:
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	error = ext3_set_acl(handle, inode, type, acl);
+	ext3_journal_stop(handle);
+	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+
+release_and_out:
+	posix_acl_release(acl);
+	return error;
+}
+
+static int
+ext3_xattr_set_acl_access(struct inode *inode, const char *name,
+			  const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+}
+
+static int
+ext3_xattr_set_acl_default(struct inode *inode, const char *name,
+			   const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") != 0)
+		return -EINVAL;
+	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+}
+
+struct xattr_handler ext3_xattr_acl_access_handler = {
+	.prefix	= POSIX_ACL_XATTR_ACCESS,
+	.list	= ext3_xattr_list_acl_access,
+	.get	= ext3_xattr_get_acl_access,
+	.set	= ext3_xattr_set_acl_access,
+};
+
+struct xattr_handler ext3_xattr_acl_default_handler = {
+	.prefix	= POSIX_ACL_XATTR_DEFAULT,
+	.list	= ext3_xattr_list_acl_default,
+	.get	= ext3_xattr_get_acl_default,
+	.set	= ext3_xattr_set_acl_default,
+};
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
new file mode 100644
index 000000000000..0d1e6279cbfd
--- /dev/null
+++ b/fs/ext4/acl.h
@@ -0,0 +1,81 @@
+/*
+  File: fs/ext3/acl.h
+
+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/posix_acl_xattr.h>
+
+#define EXT3_ACL_VERSION	0x0001
+
+typedef struct {
+	__le16		e_tag;
+	__le16		e_perm;
+	__le32		e_id;
+} ext3_acl_entry;
+
+typedef struct {
+	__le16		e_tag;
+	__le16		e_perm;
+} ext3_acl_entry_short;
+
+typedef struct {
+	__le32		a_version;
+} ext3_acl_header;
+
+static inline size_t ext3_acl_size(int count)
+{
+	if (count <= 4) {
+		return sizeof(ext3_acl_header) +
+		       count * sizeof(ext3_acl_entry_short);
+	} else {
+		return sizeof(ext3_acl_header) +
+		       4 * sizeof(ext3_acl_entry_short) +
+		       (count - 4) * sizeof(ext3_acl_entry);
+	}
+}
+
+static inline int ext3_acl_count(size_t size)
+{
+	ssize_t s;
+	size -= sizeof(ext3_acl_header);
+	s = size - 4 * sizeof(ext3_acl_entry_short);
+	if (s < 0) {
+		if (size % sizeof(ext3_acl_entry_short))
+			return -1;
+		return size / sizeof(ext3_acl_entry_short);
+	} else {
+		if (s % sizeof(ext3_acl_entry))
+			return -1;
+		return s / sizeof(ext3_acl_entry) + 4;
+	}
+}
+
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+
+/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
+   if the ACL has not been cached */
+#define EXT3_ACL_NOT_CACHED ((void *)-1)
+
+/* acl.c */
+extern int ext3_permission (struct inode *, int, struct nameidata *);
+extern int ext3_acl_chmod (struct inode *);
+extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+
+#else  /* CONFIG_EXT3_FS_POSIX_ACL */
+#include <linux/sched.h>
+#define ext3_permission NULL
+
+static inline int
+ext3_acl_chmod(struct inode *inode)
+{
+	return 0;
+}
+
+static inline int
+ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	return 0;
+}
+#endif  /* CONFIG_EXT3_FS_POSIX_ACL */
+
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
new file mode 100644
index 000000000000..b41a7d7e20f0
--- /dev/null
+++ b/fs/ext4/balloc.c
@@ -0,0 +1,1818 @@
+/*
+ *  linux/fs/ext3/balloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  Enhanced block allocation by Stephen Tweedie (sct@redhat.com), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/time.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+
+/*
+ * balloc.c contains the blocks allocation and deallocation routines
+ */
+
+/*
+ * The free blocks are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.  The descriptors are loaded in memory
+ * when a file system is mounted (see ext3_read_super).
+ */
+
+
+#define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
+
+/**
+ * ext3_get_group_desc() -- load group descriptor from disk
+ * @sb:			super block
+ * @block_group:	given block group
+ * @bh:			pointer to the buffer head to store the block
+ *			group descriptor
+ */
+struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+					     unsigned int block_group,
+					     struct buffer_head ** bh)
+{
+	unsigned long group_desc;
+	unsigned long offset;
+	struct ext3_group_desc * desc;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (block_group >= sbi->s_groups_count) {
+		ext3_error (sb, "ext3_get_group_desc",
+			    "block_group >= groups_count - "
+			    "block_group = %d, groups_count = %lu",
+			    block_group, sbi->s_groups_count);
+
+		return NULL;
+	}
+	smp_rmb();
+
+	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+	offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	if (!sbi->s_group_desc[group_desc]) {
+		ext3_error (sb, "ext3_get_group_desc",
+			    "Group descriptor not loaded - "
+			    "block_group = %d, group_desc = %lu, desc = %lu",
+			     block_group, group_desc, offset);
+		return NULL;
+	}
+
+	desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	if (bh)
+		*bh = sbi->s_group_desc[group_desc];
+	return desc + offset;
+}
+
+/**
+ * read_block_bitmap()
+ * @sb:			super block
+ * @block_group:	given block group
+ *
+ * Read the bitmap for a given block_group, reading into the specified
+ * slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head on success or NULL in case of failure.
+ */
+static struct buffer_head *
+read_block_bitmap(struct super_block *sb, unsigned int block_group)
+{
+	struct ext3_group_desc * desc;
+	struct buffer_head * bh = NULL;
+
+	desc = ext3_get_group_desc (sb, block_group, NULL);
+	if (!desc)
+		goto error_out;
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+	if (!bh)
+		ext3_error (sb, "read_block_bitmap",
+			    "Cannot read block bitmap - "
+			    "block_group = %d, block_bitmap = %u",
+			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+error_out:
+	return bh;
+}
+/*
+ * The reservation window structure operations
+ * --------------------------------------------
+ * Operations include:
+ * dump, find, add, remove, is_empty, find_next_reservable_window, etc.
+ *
+ * We use a red-black tree to represent per-filesystem reservation
+ * windows.
+ *
+ */
+
+/**
+ * __rsv_window_dump() -- Dump the filesystem block allocation reservation map
+ * @rb_root:		root of per-filesystem reservation rb tree
+ * @verbose:		verbose mode
+ * @fn:			function which wishes to dump the reservation map
+ *
+ * If verbose is turned on, it will print the whole block reservation
+ * windows(start, end).	Otherwise, it will only print out the "bad" windows,
+ * those windows that overlap with their immediate neighbors.
+ */
+#if 1
+static void __rsv_window_dump(struct rb_root *root, int verbose,
+			      const char *fn)
+{
+	struct rb_node *n;
+	struct ext3_reserve_window_node *rsv, *prev;
+	int bad;
+
+restart:
+	n = rb_first(root);
+	bad = 0;
+	prev = NULL;
+
+	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
+	while (n) {
+		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		if (verbose)
+			printk("reservation window 0x%p "
+			       "start:  %lu, end:  %lu\n",
+			       rsv, rsv->rsv_start, rsv->rsv_end);
+		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
+			printk("Bad reservation %p (start >= end)\n",
+			       rsv);
+			bad = 1;
+		}
+		if (prev && prev->rsv_end >= rsv->rsv_start) {
+			printk("Bad reservation %p (prev->end >= start)\n",
+			       rsv);
+			bad = 1;
+		}
+		if (bad) {
+			if (!verbose) {
+				printk("Restarting reservation walk in verbose mode\n");
+				verbose = 1;
+				goto restart;
+			}
+		}
+		n = rb_next(n);
+		prev = rsv;
+	}
+	printk("Window map complete.\n");
+	if (bad)
+		BUG();
+}
+#define rsv_window_dump(root, verbose) \
+	__rsv_window_dump((root), (verbose), __FUNCTION__)
+#else
+#define rsv_window_dump(root, verbose) do {} while (0)
+#endif
+
+/**
+ * goal_in_my_reservation()
+ * @rsv:		inode's reservation window
+ * @grp_goal:		given goal block relative to the allocation block group
+ * @group:		the current allocation block group
+ * @sb:			filesystem super block
+ *
+ * Test if the given goal block (group relative) is within the file's
+ * own block reservation window range.
+ *
+ * If the reservation window is outside the goal allocation group, return 0;
+ * grp_goal (given goal block) could be -1, which means no specific
+ * goal block. In this case, always return 1.
+ * If the goal block is within the reservation window, return 1;
+ * otherwise, return 0;
+ */
+static int
+goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
+			unsigned int group, struct super_block * sb)
+{
+	ext3_fsblk_t group_first_block, group_last_block;
+
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	if ((rsv->_rsv_start > group_last_block) ||
+	    (rsv->_rsv_end < group_first_block))
+		return 0;
+	if ((grp_goal >= 0) && ((grp_goal + group_first_block < rsv->_rsv_start)
+		|| (grp_goal + group_first_block > rsv->_rsv_end)))
+		return 0;
+	return 1;
+}
+
+/**
+ * search_reserve_window()
+ * @rb_root:		root of reservation tree
+ * @goal:		target allocation block
+ *
+ * Find the reserved window which includes the goal, or the previous one
+ * if the goal is not in any window.
+ * Returns NULL if there are no windows or if all windows start after the goal.
+ */
+static struct ext3_reserve_window_node *
+search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
+{
+	struct rb_node *n = root->rb_node;
+	struct ext3_reserve_window_node *rsv;
+
+	if (!n)
+		return NULL;
+
+	do {
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+
+		if (goal < rsv->rsv_start)
+			n = n->rb_left;
+		else if (goal > rsv->rsv_end)
+			n = n->rb_right;
+		else
+			return rsv;
+	} while (n);
+	/*
+	 * We've fallen off the end of the tree: the goal wasn't inside
+	 * any particular node.  OK, the previous node must be to one
+	 * side of the interval containing the goal.  If it's the RHS,
+	 * we need to back up one.
+	 */
+	if (rsv->rsv_start > goal) {
+		n = rb_prev(&rsv->rsv_node);
+		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+	}
+	return rsv;
+}
+
+/**
+ * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
+ * @sb:			super block
+ * @rsv:		reservation window to add
+ *
+ * Must be called with rsv_lock hold.
+ */
+void ext3_rsv_window_add(struct super_block *sb,
+		    struct ext3_reserve_window_node *rsv)
+{
+	struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct rb_node *node = &rsv->rsv_node;
+	ext3_fsblk_t start = rsv->rsv_start;
+
+	struct rb_node ** p = &root->rb_node;
+	struct rb_node * parent = NULL;
+	struct ext3_reserve_window_node *this;
+
+	while (*p)
+	{
+		parent = *p;
+		this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
+
+		if (start < this->rsv_start)
+			p = &(*p)->rb_left;
+		else if (start > this->rsv_end)
+			p = &(*p)->rb_right;
+		else {
+			rsv_window_dump(root, 1);
+			BUG();
+		}
+	}
+
+	rb_link_node(node, parent, p);
+	rb_insert_color(node, root);
+}
+
+/**
+ * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
+ * @sb:			super block
+ * @rsv:		reservation window to remove
+ *
+ * Mark the block reservation window as not allocated, and unlink it
+ * from the filesystem reservation window rb tree. Must be called with
+ * rsv_lock hold.
+ */
+static void rsv_window_remove(struct super_block *sb,
+			      struct ext3_reserve_window_node *rsv)
+{
+	rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_alloc_hit = 0;
+	rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
+}
+
+/*
+ * rsv_is_empty() -- Check if the reservation window is allocated.
+ * @rsv:		given reservation window to check
+ *
+ * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
+ */
+static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
+{
+	/* a valid reservation end block could not be 0 */
+	return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+}
+
+/**
+ * ext3_init_block_alloc_info()
+ * @inode:		file inode structure
+ *
+ * Allocate and initialize the	reservation window structure, and
+ * link the window to the ext3 inode structure at last
+ *
+ * The reservation window structure is only dynamically allocated
+ * and linked to ext3 inode the first time the open file
+ * needs a new block. So, before every ext3_new_block(s) call, for
+ * regular files, we should check whether the reservation window
+ * structure exists or not. In the latter case, this function is called.
+ * Fail to do so will result in block reservation being turned off for that
+ * open file.
+ *
+ * This function is called from ext3_get_blocks_handle(), also called
+ * when setting the reservation window size through ioctl before the file
+ * is open for write (needs block allocation).
+ *
+ * Needs truncate_mutex protection prior to call this function.
+ */
+void ext3_init_block_alloc_info(struct inode *inode)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct super_block *sb = inode->i_sb;
+
+	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
+	if (block_i) {
+		struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+
+		rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+
+		/*
+		 * if filesystem is mounted with NORESERVATION, the goal
+		 * reservation window size is set to zero to indicate
+		 * block reservation is off
+		 */
+		if (!test_opt(sb, RESERVATION))
+			rsv->rsv_goal_size = 0;
+		else
+			rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+		rsv->rsv_alloc_hit = 0;
+		block_i->last_alloc_logical_block = 0;
+		block_i->last_alloc_physical_block = 0;
+	}
+	ei->i_block_alloc_info = block_i;
+}
+
+/**
+ * ext3_discard_reservation()
+ * @inode:		inode
+ *
+ * Discard(free) block reservation window on last file close, or truncate
+ * or at last iput().
+ *
+ * It is being called in three cases:
+ *	ext3_release_file(): last writer close the file
+ *	ext3_clear_inode(): last iput(), when nobody link to this file.
+ *	ext3_truncate(): when the block indirect map is about to change.
+ *
+ */
+void ext3_discard_reservation(struct inode *inode)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext3_reserve_window_node *rsv;
+	spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
+
+	if (!block_i)
+		return;
+
+	rsv = &block_i->rsv_window_node;
+	if (!rsv_is_empty(&rsv->rsv_window)) {
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&rsv->rsv_window))
+			rsv_window_remove(inode->i_sb, rsv);
+		spin_unlock(rsv_lock);
+	}
+}
+
+/**
+ * ext3_free_blocks_sb() -- Free given blocks and update quota
+ * @handle:			handle to this transaction
+ * @sb:				super block
+ * @block:			start physcial block to free
+ * @count:			number of blocks to free
+ * @pdquot_freed_blocks:	pointer to quota
+ */
+void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
+			 ext3_fsblk_t block, unsigned long count,
+			 unsigned long *pdquot_freed_blocks)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *gd_bh;
+	unsigned long block_group;
+	ext3_grpblk_t bit;
+	unsigned long i;
+	unsigned long overflow;
+	struct ext3_group_desc * desc;
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi;
+	int err = 0, ret;
+	ext3_grpblk_t group_freed;
+
+	*pdquot_freed_blocks = 0;
+	sbi = EXT3_SB(sb);
+	es = sbi->s_es;
+	if (block < le32_to_cpu(es->s_first_data_block) ||
+	    block + count < block ||
+	    block + count > le32_to_cpu(es->s_blocks_count)) {
+		ext3_error (sb, "ext3_free_blocks",
+			    "Freeing blocks not in datazone - "
+			    "block = "E3FSBLK", count = %lu", block, count);
+		goto error_return;
+	}
+
+	ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+
+do_more:
+	overflow = 0;
+	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
+		      EXT3_BLOCKS_PER_GROUP(sb);
+	bit = (block - le32_to_cpu(es->s_first_data_block)) %
+		      EXT3_BLOCKS_PER_GROUP(sb);
+	/*
+	 * Check to see if we are freeing blocks across a group
+	 * boundary.
+	 */
+	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
+		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
+		count -= overflow;
+	}
+	brelse(bitmap_bh);
+	bitmap_bh = read_block_bitmap(sb, block_group);
+	if (!bitmap_bh)
+		goto error_return;
+	desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+	if (!desc)
+		goto error_return;
+
+	if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
+	    in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
+	    in_range (block, le32_to_cpu(desc->bg_inode_table),
+		      sbi->s_itb_per_group) ||
+	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
+		      sbi->s_itb_per_group))
+		ext3_error (sb, "ext3_free_blocks",
+			    "Freeing blocks in system zones - "
+			    "Block = "E3FSBLK", count = %lu",
+			    block, count);
+
+	/*
+	 * We are about to start releasing blocks in the bitmap,
+	 * so we need undo access.
+	 */
+	/* @@@ check errors */
+	BUFFER_TRACE(bitmap_bh, "getting undo access");
+	err = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (err)
+		goto error_return;
+
+	/*
+	 * We are about to modify some metadata.  Call the journal APIs
+	 * to unshare ->b_data if a currently-committing transaction is
+	 * using it
+	 */
+	BUFFER_TRACE(gd_bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, gd_bh);
+	if (err)
+		goto error_return;
+
+	jbd_lock_bh_state(bitmap_bh);
+
+	for (i = 0, group_freed = 0; i < count; i++) {
+		/*
+		 * An HJ special.  This is expensive...
+		 */
+#ifdef CONFIG_JBD_DEBUG
+		jbd_unlock_bh_state(bitmap_bh);
+		{
+			struct buffer_head *debug_bh;
+			debug_bh = sb_find_get_block(sb, block + i);
+			if (debug_bh) {
+				BUFFER_TRACE(debug_bh, "Deleted!");
+				if (!bh2jh(bitmap_bh)->b_committed_data)
+					BUFFER_TRACE(debug_bh,
+						"No commited data in bitmap");
+				BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap");
+				__brelse(debug_bh);
+			}
+		}
+		jbd_lock_bh_state(bitmap_bh);
+#endif
+		if (need_resched()) {
+			jbd_unlock_bh_state(bitmap_bh);
+			cond_resched();
+			jbd_lock_bh_state(bitmap_bh);
+		}
+		/* @@@ This prevents newly-allocated data from being
+		 * freed and then reallocated within the same
+		 * transaction.
+		 *
+		 * Ideally we would want to allow that to happen, but to
+		 * do so requires making journal_forget() capable of
+		 * revoking the queued write of a data block, which
+		 * implies blocking on the journal lock.  *forget()
+		 * cannot block due to truncate races.
+		 *
+		 * Eventually we can fix this by making journal_forget()
+		 * return a status indicating whether or not it was able
+		 * to revoke the buffer.  On successful revoke, it is
+		 * safe not to set the allocation bit in the committed
+		 * bitmap, because we know that there is no outstanding
+		 * activity on the buffer any more and so it is safe to
+		 * reallocate it.
+		 */
+		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
+		J_ASSERT_BH(bitmap_bh,
+				bh2jh(bitmap_bh)->b_committed_data != NULL);
+		ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
+				bh2jh(bitmap_bh)->b_committed_data);
+
+		/*
+		 * We clear the bit in the bitmap after setting the committed
+		 * data bit, because this is the reverse order to that which
+		 * the allocator uses.
+		 */
+		BUFFER_TRACE(bitmap_bh, "clear bit");
+		if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+						bit + i, bitmap_bh->b_data)) {
+			jbd_unlock_bh_state(bitmap_bh);
+			ext3_error(sb, __FUNCTION__,
+				"bit already cleared for block "E3FSBLK,
+				 block + i);
+			jbd_lock_bh_state(bitmap_bh);
+			BUFFER_TRACE(bitmap_bh, "bit already cleared");
+		} else {
+			group_freed++;
+		}
+	}
+	jbd_unlock_bh_state(bitmap_bh);
+
+	spin_lock(sb_bgl_lock(sbi, block_group));
+	desc->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
+			group_freed);
+	spin_unlock(sb_bgl_lock(sbi, block_group));
+	percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+
+	/* We dirtied the bitmap block */
+	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+
+	/* And the group descriptor block */
+	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
+	ret = ext3_journal_dirty_metadata(handle, gd_bh);
+	if (!err) err = ret;
+	*pdquot_freed_blocks += group_freed;
+
+	if (overflow && !err) {
+		block += count;
+		count = overflow;
+		goto do_more;
+	}
+	sb->s_dirt = 1;
+error_return:
+	brelse(bitmap_bh);
+	ext3_std_error(sb, err);
+	return;
+}
+
+/**
+ * ext3_free_blocks() -- Free given blocks and update quota
+ * @handle:		handle for this transaction
+ * @inode:		inode
+ * @block:		start physical block to free
+ * @count:		number of blocks to count
+ */
+void ext3_free_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t block, unsigned long count)
+{
+	struct super_block * sb;
+	unsigned long dquot_freed_blocks;
+
+	sb = inode->i_sb;
+	if (!sb) {
+		printk ("ext3_free_blocks: nonexistent device");
+		return;
+	}
+	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+	if (dquot_freed_blocks)
+		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+	return;
+}
+
+/**
+ * ext3_test_allocatable()
+ * @nr:			given allocation block group
+ * @bh:			bufferhead contains the bitmap of the given block group
+ *
+ * For ext3 allocations, we must not reuse any blocks which are
+ * allocated in the bitmap buffer's "last committed data" copy.  This
+ * prevents deletes from freeing up the page for reuse until we have
+ * committed the delete transaction.
+ *
+ * If we didn't do this, then deleting something and reallocating it as
+ * data would allow the old block to be overwritten before the
+ * transaction committed (because we force data to disk before commit).
+ * This would lead to corruption if we crashed between overwriting the
+ * data and committing the delete.
+ *
+ * @@@ We may want to make this allocation behaviour conditional on
+ * data-writes at some point, and disable it for metadata allocations or
+ * sync-data inodes.
+ */
+static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
+{
+	int ret;
+	struct journal_head *jh = bh2jh(bh);
+
+	if (ext3_test_bit(nr, bh->b_data))
+		return 0;
+
+	jbd_lock_bh_state(bh);
+	if (!jh->b_committed_data)
+		ret = 1;
+	else
+		ret = !ext3_test_bit(nr, jh->b_committed_data);
+	jbd_unlock_bh_state(bh);
+	return ret;
+}
+
+/**
+ * bitmap_search_next_usable_block()
+ * @start:		the starting block (group relative) of the search
+ * @bh:			bufferhead contains the block group bitmap
+ * @maxblocks:		the ending block (group relative) of the reservation
+ *
+ * The bitmap search --- search forward alternately through the actual
+ * bitmap on disk and the last-committed copy in journal, until we find a
+ * bit free in both bitmaps.
+ */
+static ext3_grpblk_t
+bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
+					ext3_grpblk_t maxblocks)
+{
+	ext3_grpblk_t next;
+	struct journal_head *jh = bh2jh(bh);
+
+	while (start < maxblocks) {
+		next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
+		if (next >= maxblocks)
+			return -1;
+		if (ext3_test_allocatable(next, bh))
+			return next;
+		jbd_lock_bh_state(bh);
+		if (jh->b_committed_data)
+			start = ext3_find_next_zero_bit(jh->b_committed_data,
+							maxblocks, next);
+		jbd_unlock_bh_state(bh);
+	}
+	return -1;
+}
+
+/**
+ * find_next_usable_block()
+ * @start:		the starting block (group relative) to find next
+ *			allocatable block in bitmap.
+ * @bh:			bufferhead contains the block group bitmap
+ * @maxblocks:		the ending block (group relative) for the search
+ *
+ * Find an allocatable block in a bitmap.  We honor both the bitmap and
+ * its last-committed copy (if that exists), and perform the "most
+ * appropriate allocation" algorithm of looking for a free block near
+ * the initial goal; then for a free byte somewhere in the bitmap; then
+ * for any free bit in the bitmap.
+ */
+static ext3_grpblk_t
+find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
+			ext3_grpblk_t maxblocks)
+{
+	ext3_grpblk_t here, next;
+	char *p, *r;
+
+	if (start > 0) {
+		/*
+		 * The goal was occupied; search forward for a free
+		 * block within the next XX blocks.
+		 *
+		 * end_goal is more or less random, but it has to be
+		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
+		 * next 64-bit boundary is simple..
+		 */
+		ext3_grpblk_t end_goal = (start + 63) & ~63;
+		if (end_goal > maxblocks)
+			end_goal = maxblocks;
+		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
+		if (here < end_goal && ext3_test_allocatable(here, bh))
+			return here;
+		ext3_debug("Bit not found near goal\n");
+	}
+
+	here = start;
+	if (here < 0)
+		here = 0;
+
+	p = ((char *)bh->b_data) + (here >> 3);
+	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
+	next = (r - ((char *)bh->b_data)) << 3;
+
+	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
+		return next;
+
+	/*
+	 * The bitmap search --- search forward alternately through the actual
+	 * bitmap and the last-committed copy until we find a bit free in
+	 * both
+	 */
+	here = bitmap_search_next_usable_block(here, bh, maxblocks);
+	return here;
+}
+
+/**
+ * claim_block()
+ * @block:		the free block (group relative) to allocate
+ * @bh:			the bufferhead containts the block group bitmap
+ *
+ * We think we can allocate this block in this bitmap.  Try to set the bit.
+ * If that succeeds then check that nobody has allocated and then freed the
+ * block since we saw that is was not marked in b_committed_data.  If it _was_
+ * allocated and freed then clear the bit in the bitmap again and return
+ * zero (failure).
+ */
+static inline int
+claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
+{
+	struct journal_head *jh = bh2jh(bh);
+	int ret;
+
+	if (ext3_set_bit_atomic(lock, block, bh->b_data))
+		return 0;
+	jbd_lock_bh_state(bh);
+	if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
+		ext3_clear_bit_atomic(lock, block, bh->b_data);
+		ret = 0;
+	} else {
+		ret = 1;
+	}
+	jbd_unlock_bh_state(bh);
+	return ret;
+}
+
+/**
+ * ext3_try_to_allocate()
+ * @sb:			superblock
+ * @handle:		handle to this transaction
+ * @group:		given allocation block group
+ * @bitmap_bh:		bufferhead holds the block bitmap
+ * @grp_goal:		given target block within the group
+ * @count:		target number of blocks to allocate
+ * @my_rsv:		reservation window
+ *
+ * Attempt to allocate blocks within a give range. Set the range of allocation
+ * first, then find the first free bit(s) from the bitmap (within the range),
+ * and at last, allocate the blocks by claiming the found free bit as allocated.
+ *
+ * To set the range of this allocation:
+ *	if there is a reservation window, only try to allocate block(s) from the
+ *	file's own reservation window;
+ *	Otherwise, the allocation range starts from the give goal block, ends at
+ *	the block group's last block.
+ *
+ * If we failed to allocate the desired block then we may end up crossing to a
+ * new bitmap.  In that case we must release write access to the old one via
+ * ext3_journal_release_buffer(), else we'll run out of credits.
+ */
+static ext3_grpblk_t
+ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+			struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
+			unsigned long *count, struct ext3_reserve_window *my_rsv)
+{
+	ext3_fsblk_t group_first_block;
+	ext3_grpblk_t start, end;
+	unsigned long num = 0;
+
+	/* we do allocation within the reservation window if we have a window */
+	if (my_rsv) {
+		group_first_block = ext3_group_first_block_no(sb, group);
+		if (my_rsv->_rsv_start >= group_first_block)
+			start = my_rsv->_rsv_start - group_first_block;
+		else
+			/* reservation window cross group boundary */
+			start = 0;
+		end = my_rsv->_rsv_end - group_first_block + 1;
+		if (end > EXT3_BLOCKS_PER_GROUP(sb))
+			/* reservation window crosses group boundary */
+			end = EXT3_BLOCKS_PER_GROUP(sb);
+		if ((start <= grp_goal) && (grp_goal < end))
+			start = grp_goal;
+		else
+			grp_goal = -1;
+	} else {
+		if (grp_goal > 0)
+			start = grp_goal;
+		else
+			start = 0;
+		end = EXT3_BLOCKS_PER_GROUP(sb);
+	}
+
+	BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+
+repeat:
+	if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
+		grp_goal = find_next_usable_block(start, bitmap_bh, end);
+		if (grp_goal < 0)
+			goto fail_access;
+		if (!my_rsv) {
+			int i;
+
+			for (i = 0; i < 7 && grp_goal > start &&
+					ext3_test_allocatable(grp_goal - 1,
+								bitmap_bh);
+					i++, grp_goal--)
+				;
+		}
+	}
+	start = grp_goal;
+
+	if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+		grp_goal, bitmap_bh)) {
+		/*
+		 * The block was allocated by another thread, or it was
+		 * allocated and then freed by another thread
+		 */
+		start++;
+		grp_goal++;
+		if (start >= end)
+			goto fail_access;
+		goto repeat;
+	}
+	num++;
+	grp_goal++;
+	while (num < *count && grp_goal < end
+		&& ext3_test_allocatable(grp_goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+				grp_goal, bitmap_bh)) {
+		num++;
+		grp_goal++;
+	}
+	*count = num;
+	return grp_goal - num;
+fail_access:
+	*count = num;
+	return -1;
+}
+
+/**
+ *	find_next_reservable_window():
+ *		find a reservable space within the given range.
+ *		It does not allocate the reservation window for now:
+ *		alloc_new_reservation() will do the work later.
+ *
+ *	@search_head: the head of the searching list;
+ *		This is not necessarily the list head of the whole filesystem
+ *
+ *		We have both head and start_block to assist the search
+ *		for the reservable space. The list starts from head,
+ *		but we will shift to the place where start_block is,
+ *		then start from there, when looking for a reservable space.
+ *
+ *	@size: the target new reservation window size
+ *
+ *	@group_first_block: the first block we consider to start
+ *			the real search from
+ *
+ *	@last_block:
+ *		the maximum block number that our goal reservable space
+ *		could start from. This is normally the last block in this
+ *		group. The search will end when we found the start of next
+ *		possible reservable space is out of this boundary.
+ *		This could handle the cross boundary reservation window
+ *		request.
+ *
+ *	basically we search from the given range, rather than the whole
+ *	reservation double linked list, (start_block, last_block)
+ *	to find a free region that is of my size and has not
+ *	been reserved.
+ *
+ */
+static int find_next_reservable_window(
+				struct ext3_reserve_window_node *search_head,
+				struct ext3_reserve_window_node *my_rsv,
+				struct super_block * sb,
+				ext3_fsblk_t start_block,
+				ext3_fsblk_t last_block)
+{
+	struct rb_node *next;
+	struct ext3_reserve_window_node *rsv, *prev;
+	ext3_fsblk_t cur;
+	int size = my_rsv->rsv_goal_size;
+
+	/* TODO: make the start of the reservation window byte-aligned */
+	/* cur = *start_block & ~7;*/
+	cur = start_block;
+	rsv = search_head;
+	if (!rsv)
+		return -1;
+
+	while (1) {
+		if (cur <= rsv->rsv_end)
+			cur = rsv->rsv_end + 1;
+
+		/* TODO?
+		 * in the case we could not find a reservable space
+		 * that is what is expected, during the re-search, we could
+		 * remember what's the largest reservable space we could have
+		 * and return that one.
+		 *
+		 * For now it will fail if we could not find the reservable
+		 * space with expected-size (or more)...
+		 */
+		if (cur > last_block)
+			return -1;		/* fail */
+
+		prev = rsv;
+		next = rb_next(&rsv->rsv_node);
+		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+
+		/*
+		 * Reached the last reservation, we can just append to the
+		 * previous one.
+		 */
+		if (!next)
+			break;
+
+		if (cur + size <= rsv->rsv_start) {
+			/*
+			 * Found a reserveable space big enough.  We could
+			 * have a reservation across the group boundary here
+			 */
+			break;
+		}
+	}
+	/*
+	 * we come here either :
+	 * when we reach the end of the whole list,
+	 * and there is empty reservable space after last entry in the list.
+	 * append it to the end of the list.
+	 *
+	 * or we found one reservable space in the middle of the list,
+	 * return the reservation window that we could append to.
+	 * succeed.
+	 */
+
+	if ((prev != my_rsv) && (!rsv_is_empty(&my_rsv->rsv_window)))
+		rsv_window_remove(sb, my_rsv);
+
+	/*
+	 * Let's book the whole avaliable window for now.  We will check the
+	 * disk bitmap later and then, if there are free blocks then we adjust
+	 * the window size if it's larger than requested.
+	 * Otherwise, we will remove this node from the tree next time
+	 * call find_next_reservable_window.
+	 */
+	my_rsv->rsv_start = cur;
+	my_rsv->rsv_end = cur + size - 1;
+	my_rsv->rsv_alloc_hit = 0;
+
+	if (prev != my_rsv)
+		ext3_rsv_window_add(sb, my_rsv);
+
+	return 0;
+}
+
+/**
+ *	alloc_new_reservation()--allocate a new reservation window
+ *
+ *		To make a new reservation, we search part of the filesystem
+ *		reservation list (the list that inside the group). We try to
+ *		allocate a new reservation window near the allocation goal,
+ *		or the beginning of the group, if there is no goal.
+ *
+ *		We first find a reservable space after the goal, then from
+ *		there, we check the bitmap for the first free block after
+ *		it. If there is no free block until the end of group, then the
+ *		whole group is full, we failed. Otherwise, check if the free
+ *		block is inside the expected reservable space, if so, we
+ *		succeed.
+ *		If the first free block is outside the reservable space, then
+ *		start from the first free block, we search for next available
+ *		space, and go on.
+ *
+ *	on succeed, a new reservation will be found and inserted into the list
+ *	It contains at least one free block, and it does not overlap with other
+ *	reservation windows.
+ *
+ *	failed: we failed to find a reservation window in this group
+ *
+ *	@rsv: the reservation
+ *
+ *	@grp_goal: The goal (group-relative).  It is where the search for a
+ *		free reservable space should start from.
+ *		if we have a grp_goal(grp_goal >0 ), then start from there,
+ *		no grp_goal(grp_goal = -1), we start from the first block
+ *		of the group.
+ *
+ *	@sb: the super block
+ *	@group: the group we are trying to allocate in
+ *	@bitmap_bh: the block group block bitmap
+ *
+ */
+static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
+		ext3_grpblk_t grp_goal, struct super_block *sb,
+		unsigned int group, struct buffer_head *bitmap_bh)
+{
+	struct ext3_reserve_window_node *search_head;
+	ext3_fsblk_t group_first_block, group_end_block, start_block;
+	ext3_grpblk_t first_free_block;
+	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
+	unsigned long size;
+	int ret;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	if (grp_goal < 0)
+		start_block = group_first_block;
+	else
+		start_block = grp_goal + group_first_block;
+
+	size = my_rsv->rsv_goal_size;
+
+	if (!rsv_is_empty(&my_rsv->rsv_window)) {
+		/*
+		 * if the old reservation is cross group boundary
+		 * and if the goal is inside the old reservation window,
+		 * we will come here when we just failed to allocate from
+		 * the first part of the window. We still have another part
+		 * that belongs to the next group. In this case, there is no
+		 * point to discard our window and try to allocate a new one
+		 * in this group(which will fail). we should
+		 * keep the reservation window, just simply move on.
+		 *
+		 * Maybe we could shift the start block of the reservation
+		 * window to the first block of next group.
+		 */
+
+		if ((my_rsv->rsv_start <= group_end_block) &&
+				(my_rsv->rsv_end > group_end_block) &&
+				(start_block >= my_rsv->rsv_start))
+			return -1;
+
+		if ((my_rsv->rsv_alloc_hit >
+		     (my_rsv->rsv_end - my_rsv->rsv_start + 1) / 2)) {
+			/*
+			 * if the previously allocation hit ratio is
+			 * greater than 1/2, then we double the size of
+			 * the reservation window the next time,
+			 * otherwise we keep the same size window
+			 */
+			size = size * 2;
+			if (size > EXT3_MAX_RESERVE_BLOCKS)
+				size = EXT3_MAX_RESERVE_BLOCKS;
+			my_rsv->rsv_goal_size= size;
+		}
+	}
+
+	spin_lock(rsv_lock);
+	/*
+	 * shift the search start to the window near the goal block
+	 */
+	search_head = search_reserve_window(fs_rsv_root, start_block);
+
+	/*
+	 * find_next_reservable_window() simply finds a reservable window
+	 * inside the given range(start_block, group_end_block).
+	 *
+	 * To make sure the reservation window has a free bit inside it, we
+	 * need to check the bitmap after we found a reservable window.
+	 */
+retry:
+	ret = find_next_reservable_window(search_head, my_rsv, sb,
+						start_block, group_end_block);
+
+	if (ret == -1) {
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;
+	}
+
+	/*
+	 * On success, find_next_reservable_window() returns the
+	 * reservation window where there is a reservable space after it.
+	 * Before we reserve this reservable space, we need
+	 * to make sure there is at least a free block inside this region.
+	 *
+	 * searching the first free bit on the block bitmap and copy of
+	 * last committed bitmap alternatively, until we found a allocatable
+	 * block. Search start from the start block of the reservable space
+	 * we just found.
+	 */
+	spin_unlock(rsv_lock);
+	first_free_block = bitmap_search_next_usable_block(
+			my_rsv->rsv_start - group_first_block,
+			bitmap_bh, group_end_block - group_first_block + 1);
+
+	if (first_free_block < 0) {
+		/*
+		 * no free block left on the bitmap, no point
+		 * to reserve the space. return failed.
+		 */
+		spin_lock(rsv_lock);
+		if (!rsv_is_empty(&my_rsv->rsv_window))
+			rsv_window_remove(sb, my_rsv);
+		spin_unlock(rsv_lock);
+		return -1;		/* failed */
+	}
+
+	start_block = first_free_block + group_first_block;
+	/*
+	 * check if the first free block is within the
+	 * free space we just reserved
+	 */
+	if (start_block >= my_rsv->rsv_start && start_block < my_rsv->rsv_end)
+		return 0;		/* success */
+	/*
+	 * if the first free bit we found is out of the reservable space
+	 * continue search for next reservable space,
+	 * start from where the free block is,
+	 * we also shift the list head to where we stopped last time
+	 */
+	search_head = my_rsv;
+	spin_lock(rsv_lock);
+	goto retry;
+}
+
+/**
+ * try_to_extend_reservation()
+ * @my_rsv:		given reservation window
+ * @sb:			super block
+ * @size:		the delta to extend
+ *
+ * Attempt to expand the reservation window large enough to have
+ * required number of free blocks
+ *
+ * Since ext3_try_to_allocate() will always allocate blocks within
+ * the reservation window range, if the window size is too small,
+ * multiple blocks allocation has to stop at the end of the reservation
+ * window. To make this more efficient, given the total number of
+ * blocks needed and the current size of the window, we try to
+ * expand the reservation window size if necessary on a best-effort
+ * basis before ext3_new_blocks() tries to allocate blocks,
+ */
+static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+			struct super_block *sb, int size)
+{
+	struct ext3_reserve_window_node *next_rsv;
+	struct rb_node *next;
+	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+
+	if (!spin_trylock(rsv_lock))
+		return;
+
+	next = rb_next(&my_rsv->rsv_node);
+
+	if (!next)
+		my_rsv->rsv_end += size;
+	else {
+		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+
+		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
+			my_rsv->rsv_end += size;
+		else
+			my_rsv->rsv_end = next_rsv->rsv_start - 1;
+	}
+	spin_unlock(rsv_lock);
+}
+
+/**
+ * ext3_try_to_allocate_with_rsv()
+ * @sb:			superblock
+ * @handle:		handle to this transaction
+ * @group:		given allocation block group
+ * @bitmap_bh:		bufferhead holds the block bitmap
+ * @grp_goal:		given target block within the group
+ * @count:		target number of blocks to allocate
+ * @my_rsv:		reservation window
+ * @errp:		pointer to store the error code
+ *
+ * This is the main function used to allocate a new block and its reservation
+ * window.
+ *
+ * Each time when a new block allocation is need, first try to allocate from
+ * its own reservation.  If it does not have a reservation window, instead of
+ * looking for a free bit on bitmap first, then look up the reservation list to
+ * see if it is inside somebody else's reservation window, we try to allocate a
+ * reservation window for it starting from the goal first. Then do the block
+ * allocation within the reservation window.
+ *
+ * This will avoid keeping on searching the reservation list again and
+ * again when somebody is looking for a free block (without
+ * reservation), and there are lots of free blocks, but they are all
+ * being reserved.
+ *
+ * We use a red-black tree for the per-filesystem reservation list.
+ *
+ */
+static ext3_grpblk_t
+ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
+			unsigned int group, struct buffer_head *bitmap_bh,
+			ext3_grpblk_t grp_goal,
+			struct ext3_reserve_window_node * my_rsv,
+			unsigned long *count, int *errp)
+{
+	ext3_fsblk_t group_first_block, group_last_block;
+	ext3_grpblk_t ret = 0;
+	int fatal;
+	unsigned long num = *count;
+
+	*errp = 0;
+
+	/*
+	 * Make sure we use undo access for the bitmap, because it is critical
+	 * that we do the frozen_data COW on bitmap buffers in all cases even
+	 * if the buffer is in BJ_Forget state in the committing transaction.
+	 */
+	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (fatal) {
+		*errp = fatal;
+		return -1;
+	}
+
+	/*
+	 * we don't deal with reservation when
+	 * filesystem is mounted without reservation
+	 * or the file is not a regular file
+	 * or last attempt to allocate a block with reservation turned on failed
+	 */
+	if (my_rsv == NULL ) {
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+						grp_goal, count, NULL);
+		goto out;
+	}
+	/*
+	 * grp_goal is a group relative block number (if there is a goal)
+	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * first block is a filesystem wide block number
+	 * first block is the block number of the first block in this group
+	 */
+	group_first_block = ext3_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+	/*
+	 * Basically we will allocate a new block from inode's reservation
+	 * window.
+	 *
+	 * We need to allocate a new reservation window, if:
+	 * a) inode does not have a reservation window; or
+	 * b) last attempt to allocate a block from existing reservation
+	 *    failed; or
+	 * c) we come here with a goal and with a reservation window
+	 *
+	 * We do not need to allocate a new reservation window if we come here
+	 * at the beginning with a goal and the goal is inside the window, or
+	 * we don't have a goal but already have a reservation window.
+	 * then we could go to allocate from the reservation window directly.
+	 */
+	while (1) {
+		if (rsv_is_empty(&my_rsv->rsv_window) || (ret < 0) ||
+			!goal_in_my_reservation(&my_rsv->rsv_window,
+						grp_goal, group, sb)) {
+			if (my_rsv->rsv_goal_size < *count)
+				my_rsv->rsv_goal_size = *count;
+			ret = alloc_new_reservation(my_rsv, grp_goal, sb,
+							group, bitmap_bh);
+			if (ret < 0)
+				break;			/* failed */
+
+			if (!goal_in_my_reservation(&my_rsv->rsv_window,
+							grp_goal, group, sb))
+				grp_goal = -1;
+		} else if (grp_goal > 0 &&
+			  (my_rsv->rsv_end-grp_goal+1) < *count)
+			try_to_extend_reservation(my_rsv, sb,
+					*count-my_rsv->rsv_end + grp_goal - 1);
+
+		if ((my_rsv->rsv_start > group_last_block) ||
+				(my_rsv->rsv_end < group_first_block)) {
+			rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
+			BUG();
+		}
+		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+					   grp_goal, &num, &my_rsv->rsv_window);
+		if (ret >= 0) {
+			my_rsv->rsv_alloc_hit += num;
+			*count = num;
+			break;				/* succeed */
+		}
+		num = *count;
+	}
+out:
+	if (ret >= 0) {
+		BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
+					"bitmap block");
+		fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+		if (fatal) {
+			*errp = fatal;
+			return -1;
+		}
+		return ret;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
+	ext3_journal_release_buffer(handle, bitmap_bh);
+	return ret;
+}
+
+/**
+ * ext3_has_free_blocks()
+ * @sbi:		in-core super block structure.
+ *
+ * Check if filesystem has at least 1 free block available for allocation.
+ */
+static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+{
+	ext3_fsblk_t free_blocks, root_blocks;
+
+	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
+		sbi->s_resuid != current->fsuid &&
+		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
+		return 0;
+	}
+	return 1;
+}
+
+/**
+ * ext3_should_retry_alloc()
+ * @sb:			super block
+ * @retries		number of attemps has been made
+ *
+ * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
+ * it is profitable to retry the operation, this function will wait
+ * for the current or commiting transaction to complete, and then
+ * return TRUE.
+ *
+ * if the total number of retries exceed three times, return FALSE.
+ */
+int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+{
+	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+		return 0;
+
+	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
+
+	return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
+}
+
+/**
+ * ext3_new_blocks() -- core block(s) allocation function
+ * @handle:		handle to this transaction
+ * @inode:		file inode
+ * @goal:		given target block(filesystem wide)
+ * @count:		target number of blocks to allocate
+ * @errp:		error code
+ *
+ * ext3_new_blocks uses a goal block to assist allocation.  It tries to
+ * allocate block(s) from the block group contains the goal block first. If that
+ * fails, it will try to allocate block(s) from other block groups without
+ * any specific goal block.
+ *
+ */
+ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, unsigned long *count, int *errp)
+{
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *gdp_bh;
+	int group_no;
+	int goal_group;
+	ext3_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
+	ext3_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
+	ext3_fsblk_t ret_block;		/* filesyetem-wide allocated block */
+	int bgi;			/* blockgroup iteration index */
+	int fatal = 0, err;
+	int performed_allocation = 0;
+	ext3_grpblk_t free_blocks;	/* number of free blocks in a group */
+	struct super_block *sb;
+	struct ext3_group_desc *gdp;
+	struct ext3_super_block *es;
+	struct ext3_sb_info *sbi;
+	struct ext3_reserve_window_node *my_rsv = NULL;
+	struct ext3_block_alloc_info *block_i;
+	unsigned short windowsz = 0;
+#ifdef EXT3FS_DEBUG
+	static int goal_hits, goal_attempts;
+#endif
+	unsigned long ngroups;
+	unsigned long num = *count;
+
+	*errp = -ENOSPC;
+	sb = inode->i_sb;
+	if (!sb) {
+		printk("ext3_new_block: nonexistent device");
+		return 0;
+	}
+
+	/*
+	 * Check quota for allocation of this block.
+	 */
+	if (DQUOT_ALLOC_BLOCK(inode, num)) {
+		*errp = -EDQUOT;
+		return 0;
+	}
+
+	sbi = EXT3_SB(sb);
+	es = EXT3_SB(sb)->s_es;
+	ext3_debug("goal=%lu.\n", goal);
+	/*
+	 * Allocate a block from reservation only when
+	 * filesystem is mounted with reservation(default,-o reservation), and
+	 * it's a regular file, and
+	 * the desired window size is greater than 0 (One could use ioctl
+	 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
+	 * reservation on that particular file)
+	 */
+	block_i = EXT3_I(inode)->i_block_alloc_info;
+	if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
+		my_rsv = &block_i->rsv_window_node;
+
+	if (!ext3_has_free_blocks(sbi)) {
+		*errp = -ENOSPC;
+		goto out;
+	}
+
+	/*
+	 * First, test whether the goal block is free.
+	 */
+	if (goal < le32_to_cpu(es->s_first_data_block) ||
+	    goal >= le32_to_cpu(es->s_blocks_count))
+		goal = le32_to_cpu(es->s_first_data_block);
+	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
+			EXT3_BLOCKS_PER_GROUP(sb);
+	goal_group = group_no;
+retry_alloc:
+	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+	if (!gdp)
+		goto io_error;
+
+	free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+	/*
+	 * if there is not enough free blocks to make a new resevation
+	 * turn off reservation for this allocation
+	 */
+	if (my_rsv && (free_blocks < windowsz)
+		&& (rsv_is_empty(&my_rsv->rsv_window)))
+		my_rsv = NULL;
+
+	if (free_blocks > 0) {
+		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
+				EXT3_BLOCKS_PER_GROUP(sb));
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
+			goto io_error;
+		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+					group_no, bitmap_bh, grp_target_blk,
+					my_rsv,	&num, &fatal);
+		if (fatal)
+			goto out;
+		if (grp_alloc_blk >= 0)
+			goto allocated;
+	}
+
+	ngroups = EXT3_SB(sb)->s_groups_count;
+	smp_rmb();
+
+	/*
+	 * Now search the rest of the groups.  We assume that
+	 * i and gdp correctly point to the last group visited.
+	 */
+	for (bgi = 0; bgi < ngroups; bgi++) {
+		group_no++;
+		if (group_no >= ngroups)
+			group_no = 0;
+		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+		if (!gdp) {
+			*errp = -EIO;
+			goto out;
+		}
+		free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
+		/*
+		 * skip this group if the number of
+		 * free blocks is less than half of the reservation
+		 * window size.
+		 */
+		if (free_blocks <= (windowsz/2))
+			continue;
+
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
+			goto io_error;
+		/*
+		 * try to allocate block(s) from this group, without a goal(-1).
+		 */
+		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+					group_no, bitmap_bh, -1, my_rsv,
+					&num, &fatal);
+		if (fatal)
+			goto out;
+		if (grp_alloc_blk >= 0)
+			goto allocated;
+	}
+	/*
+	 * We may end up a bogus ealier ENOSPC error due to
+	 * filesystem is "full" of reservations, but
+	 * there maybe indeed free blocks avaliable on disk
+	 * In this case, we just forget about the reservations
+	 * just do block allocation as without reservations.
+	 */
+	if (my_rsv) {
+		my_rsv = NULL;
+		group_no = goal_group;
+		goto retry_alloc;
+	}
+	/* No space left on the device */
+	*errp = -ENOSPC;
+	goto out;
+
+allocated:
+
+	ext3_debug("using block group %d(%d)\n",
+			group_no, gdp->bg_free_blocks_count);
+
+	BUFFER_TRACE(gdp_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, gdp_bh);
+	if (fatal)
+		goto out;
+
+	ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
+
+	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
+	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
+	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group) ||
+	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
+		      EXT3_SB(sb)->s_itb_per_group))
+		ext3_error(sb, "ext3_new_block",
+			    "Allocating block in system zone - "
+			    "blocks from "E3FSBLK", length %lu",
+			     ret_block, num);
+
+	performed_allocation = 1;
+
+#ifdef CONFIG_JBD_DEBUG
+	{
+		struct buffer_head *debug_bh;
+
+		/* Record bitmap buffer state in the newly allocated block */
+		debug_bh = sb_find_get_block(sb, ret_block);
+		if (debug_bh) {
+			BUFFER_TRACE(debug_bh, "state when allocated");
+			BUFFER_TRACE2(debug_bh, bitmap_bh, "bitmap state");
+			brelse(debug_bh);
+		}
+	}
+	jbd_lock_bh_state(bitmap_bh);
+	spin_lock(sb_bgl_lock(sbi, group_no));
+	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data) {
+		int i;
+
+		for (i = 0; i < num; i++) {
+			if (ext3_test_bit(grp_alloc_blk+i,
+					bh2jh(bitmap_bh)->b_committed_data)) {
+				printk("%s: block was unexpectedly set in "
+					"b_committed_data\n", __FUNCTION__);
+			}
+		}
+	}
+	ext3_debug("found bit %d\n", grp_alloc_blk);
+	spin_unlock(sb_bgl_lock(sbi, group_no));
+	jbd_unlock_bh_state(bitmap_bh);
+#endif
+
+	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
+		ext3_error(sb, "ext3_new_block",
+			    "block("E3FSBLK") >= blocks count(%d) - "
+			    "block_group = %d, es == %p ", ret_block,
+			le32_to_cpu(es->s_blocks_count), group_no, es);
+		goto out;
+	}
+
+	/*
+	 * It is up to the caller to add the new buffer to a journal
+	 * list of some description.  We don't know in advance whether
+	 * the caller wants to use it as metadata or data.
+	 */
+	ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
+			ret_block, goal_hits, goal_attempts);
+
+	spin_lock(sb_bgl_lock(sbi, group_no));
+	gdp->bg_free_blocks_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)-num);
+	spin_unlock(sb_bgl_lock(sbi, group_no));
+	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
+
+	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+	err = ext3_journal_dirty_metadata(handle, gdp_bh);
+	if (!fatal)
+		fatal = err;
+
+	sb->s_dirt = 1;
+	if (fatal)
+		goto out;
+
+	*errp = 0;
+	brelse(bitmap_bh);
+	DQUOT_FREE_BLOCK(inode, *count-num);
+	*count = num;
+	return ret_block;
+
+io_error:
+	*errp = -EIO;
+out:
+	if (fatal) {
+		*errp = fatal;
+		ext3_std_error(sb, fatal);
+	}
+	/*
+	 * Undo the block allocation
+	 */
+	if (!performed_allocation)
+		DQUOT_FREE_BLOCK(inode, *count);
+	brelse(bitmap_bh);
+	return 0;
+}
+
+ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, int *errp)
+{
+	unsigned long count = 1;
+
+	return ext3_new_blocks(handle, inode, goal, &count, errp);
+}
+
+/**
+ * ext3_count_free_blocks() -- count filesystem free blocks
+ * @sb:		superblock
+ *
+ * Adds up the number of free blocks from each block group.
+ */
+ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
+{
+	ext3_fsblk_t desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
+	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
+#ifdef EXT3FS_DEBUG
+	struct ext3_super_block *es;
+	ext3_fsblk_t bitmap_count;
+	unsigned long x;
+	struct buffer_head *bitmap_bh = NULL;
+
+	es = EXT3_SB(sb)->s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+
+	smp_rmb();
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext3_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, i);
+		if (bitmap_bh == NULL)
+			continue;
+
+		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+		printk("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
+		bitmap_count += x;
+	}
+	brelse(bitmap_bh);
+	printk("ext3_count_free_blocks: stored = "E3FSBLK
+		", computed = "E3FSBLK", "E3FSBLK"\n",
+	       le32_to_cpu(es->s_free_blocks_count),
+		desc_count, bitmap_count);
+	return bitmap_count;
+#else
+	desc_count = 0;
+	smp_rmb();
+	for (i = 0; i < ngroups; i++) {
+		gdp = ext3_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+	}
+
+	return desc_count;
+#endif
+}
+
+static inline int
+block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map)
+{
+	return ext3_test_bit ((block -
+		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
+			 EXT3_BLOCKS_PER_GROUP(sb), map);
+}
+
+static inline int test_root(int a, int b)
+{
+	int num = b;
+
+	while (a > num)
+		num *= b;
+	return num == a;
+}
+
+static int ext3_group_sparse(int group)
+{
+	if (group <= 1)
+		return 1;
+	if (!(group & 1))
+		return 0;
+	return (test_root(group, 7) || test_root(group, 5) ||
+		test_root(group, 3));
+}
+
+/**
+ *	ext3_bg_has_super - number of blocks used by the superblock in group
+ *	@sb: superblock for filesystem
+ *	@group: group number to check
+ *
+ *	Return the number of blocks used by the superblock (primary or backup)
+ *	in this group.  Currently this will be only 0 or 1.
+ */
+int ext3_bg_has_super(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return 1;
+}
+
+static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+{
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+
+	if (group == first || group == first + 1 || group == last)
+		return 1;
+	return 0;
+}
+
+static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+{
+	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
+				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext3_group_sparse(group))
+		return 0;
+	return EXT3_SB(sb)->s_gdb_count;
+}
+
+/**
+ *	ext3_bg_num_gdb - number of blocks used by the group table in group
+ *	@sb: superblock for filesystem
+ *	@group: group number to check
+ *
+ *	Return the number of blocks used by the group descriptor table
+ *	(primary or backup) in this group.  In the future there may be a
+ *	different number of descriptor blocks in each group.
+ */
+unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
+{
+	unsigned long first_meta_bg =
+			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+			metagroup < first_meta_bg)
+		return ext3_bg_num_gdb_nometa(sb,group);
+
+	return ext3_bg_num_gdb_meta(sb,group);
+
+}
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
new file mode 100644
index 000000000000..b9176eed98d1
--- /dev/null
+++ b/fs/ext4/bitmap.c
@@ -0,0 +1,32 @@
+/*
+ *  linux/fs/ext3/bitmap.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/buffer_head.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+
+#ifdef EXT3FS_DEBUG
+
+static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
+
+unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
+{
+	unsigned int i;
+	unsigned long sum = 0;
+
+	if (!map)
+		return (0);
+	for (i = 0; i < numchars; i++)
+		sum += nibblemap[map->b_data[i] & 0xf] +
+			nibblemap[(map->b_data[i] >> 4) & 0xf];
+	return (sum);
+}
+
+#endif  /*  EXT3FS_DEBUG  */
+
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
new file mode 100644
index 000000000000..d0b54f30b914
--- /dev/null
+++ b/fs/ext4/dir.c
@@ -0,0 +1,518 @@
+/*
+ *  linux/fs/ext3/dir.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/dir.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 directory handling functions
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ * Hash Tree Directory indexing (c) 2001  Daniel Phillips
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/buffer_head.h>
+#include <linux/smp_lock.h>
+#include <linux/slab.h>
+#include <linux/rbtree.h>
+
+static unsigned char ext3_filetype_table[] = {
+	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+};
+
+static int ext3_readdir(struct file *, void *, filldir_t);
+static int ext3_dx_readdir(struct file * filp,
+			   void * dirent, filldir_t filldir);
+static int ext3_release_dir (struct inode * inode,
+				struct file * filp);
+
+const struct file_operations ext3_dir_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= generic_read_dir,
+	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
+	.ioctl		= ext3_ioctl,		/* BKL held */
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
+	.fsync		= ext3_sync_file,	/* BKL held */
+#ifdef CONFIG_EXT3_INDEX
+	.release	= ext3_release_dir,
+#endif
+};
+
+
+static unsigned char get_dtype(struct super_block *sb, int filetype)
+{
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
+	    (filetype >= EXT3_FT_MAX))
+		return DT_UNKNOWN;
+
+	return (ext3_filetype_table[filetype]);
+}
+
+
+int ext3_check_dir_entry (const char * function, struct inode * dir,
+			  struct ext3_dir_entry_2 * de,
+			  struct buffer_head * bh,
+			  unsigned long offset)
+{
+	const char * error_msg = NULL;
+	const int rlen = le16_to_cpu(de->rec_len);
+
+	if (rlen < EXT3_DIR_REC_LEN(1))
+		error_msg = "rec_len is smaller than minimal";
+	else if (rlen % 4 != 0)
+		error_msg = "rec_len % 4 != 0";
+	else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+		error_msg = "rec_len is too small for name_len";
+	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+		error_msg = "directory entry across blocks";
+	else if (le32_to_cpu(de->inode) >
+			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+		error_msg = "inode out of bounds";
+
+	if (error_msg != NULL)
+		ext3_error (dir->i_sb, function,
+			"bad entry in directory #%lu: %s - "
+			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
+			dir->i_ino, error_msg, offset,
+			(unsigned long) le32_to_cpu(de->inode),
+			rlen, de->name_len);
+	return error_msg == NULL ? 1 : 0;
+}
+
+static int ext3_readdir(struct file * filp,
+			 void * dirent, filldir_t filldir)
+{
+	int error = 0;
+	unsigned long offset;
+	int i, stored;
+	struct ext3_dir_entry_2 *de;
+	struct super_block *sb;
+	int err;
+	struct inode *inode = filp->f_dentry->d_inode;
+	int ret = 0;
+
+	sb = inode->i_sb;
+
+#ifdef CONFIG_EXT3_INDEX
+	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
+	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
+		err = ext3_dx_readdir(filp, dirent, filldir);
+		if (err != ERR_BAD_DX_DIR) {
+			ret = err;
+			goto out;
+		}
+		/*
+		 * We don't set the inode dirty flag since it's not
+		 * critical that it get flushed back to the disk.
+		 */
+		EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+	}
+#endif
+	stored = 0;
+	offset = filp->f_pos & (sb->s_blocksize - 1);
+
+	while (!error && !stored && filp->f_pos < inode->i_size) {
+		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		struct buffer_head map_bh;
+		struct buffer_head *bh = NULL;
+
+		map_bh.b_state = 0;
+		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+						&map_bh, 0, 0);
+		if (err > 0) {
+			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
+				&filp->f_ra,
+				filp,
+				map_bh.b_blocknr >>
+					(PAGE_CACHE_SHIFT - inode->i_blkbits),
+				1);
+			bh = ext3_bread(NULL, inode, blk, 0, &err);
+		}
+
+		/*
+		 * We ignore I/O errors on directories so users have a chance
+		 * of recovering data when there's a bad sector
+		 */
+		if (!bh) {
+			ext3_error (sb, "ext3_readdir",
+				"directory #%lu contains a hole at offset %lu",
+				inode->i_ino, (unsigned long)filp->f_pos);
+			filp->f_pos += sb->s_blocksize - offset;
+			continue;
+		}
+
+revalidate:
+		/* If the dir block has changed since the last call to
+		 * readdir(2), then we might be pointing to an invalid
+		 * dirent right now.  Scan from the start of the block
+		 * to make sure. */
+		if (filp->f_version != inode->i_version) {
+			for (i = 0; i < sb->s_blocksize && i < offset; ) {
+				de = (struct ext3_dir_entry_2 *)
+					(bh->b_data + i);
+				/* It's too expensive to do a full
+				 * dirent test each time round this
+				 * loop, but we do have to test at
+				 * least that it is non-zero.  A
+				 * failure will be detected in the
+				 * dirent test below. */
+				if (le16_to_cpu(de->rec_len) <
+						EXT3_DIR_REC_LEN(1))
+					break;
+				i += le16_to_cpu(de->rec_len);
+			}
+			offset = i;
+			filp->f_pos = (filp->f_pos & ~(sb->s_blocksize - 1))
+				| offset;
+			filp->f_version = inode->i_version;
+		}
+
+		while (!error && filp->f_pos < inode->i_size
+		       && offset < sb->s_blocksize) {
+			de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
+			if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+						   bh, offset)) {
+				/* On error, skip the f_pos to the
+                                   next block. */
+				filp->f_pos = (filp->f_pos |
+						(sb->s_blocksize - 1)) + 1;
+				brelse (bh);
+				ret = stored;
+				goto out;
+			}
+			offset += le16_to_cpu(de->rec_len);
+			if (le32_to_cpu(de->inode)) {
+				/* We might block in the next section
+				 * if the data destination is
+				 * currently swapped out.  So, use a
+				 * version stamp to detect whether or
+				 * not the directory has been modified
+				 * during the copy operation.
+				 */
+				unsigned long version = filp->f_version;
+
+				error = filldir(dirent, de->name,
+						de->name_len,
+						filp->f_pos,
+						le32_to_cpu(de->inode),
+						get_dtype(sb, de->file_type));
+				if (error)
+					break;
+				if (version != filp->f_version)
+					goto revalidate;
+				stored ++;
+			}
+			filp->f_pos += le16_to_cpu(de->rec_len);
+		}
+		offset = 0;
+		brelse (bh);
+	}
+out:
+	return ret;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * These functions convert from the major/minor hash to an f_pos
+ * value.
+ *
+ * Currently we only use major hash numer.  This is unfortunate, but
+ * on 32-bit machines, the same VFS interface is used for lseek and
+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
+ * lseek/telldir/seekdir will blow out spectacularly, and from within
+ * the ext2 low-level routine, we don't know if we're being called by
+ * a 64-bit version of the system call or the 32-bit version of the
+ * system call.  Worse yet, NFSv2 only allows for a 32-bit readdir
+ * cookie.  Sigh.
+ */
+#define hash2pos(major, minor)	(major >> 1)
+#define pos2maj_hash(pos)	((pos << 1) & 0xffffffff)
+#define pos2min_hash(pos)	(0)
+
+/*
+ * This structure holds the nodes of the red-black tree used to store
+ * the directory entry in hash order.
+ */
+struct fname {
+	__u32		hash;
+	__u32		minor_hash;
+	struct rb_node	rb_hash;
+	struct fname	*next;
+	__u32		inode;
+	__u8		name_len;
+	__u8		file_type;
+	char		name[0];
+};
+
+/*
+ * This functoin implements a non-recursive way of freeing all of the
+ * nodes in the red-black tree.
+ */
+static void free_rb_tree_fname(struct rb_root *root)
+{
+	struct rb_node	*n = root->rb_node;
+	struct rb_node	*parent;
+	struct fname	*fname;
+
+	while (n) {
+		/* Do the node's children first */
+		if ((n)->rb_left) {
+			n = n->rb_left;
+			continue;
+		}
+		if (n->rb_right) {
+			n = n->rb_right;
+			continue;
+		}
+		/*
+		 * The node has no children; free it, and then zero
+		 * out parent's link to it.  Finally go to the
+		 * beginning of the loop and try to free the parent
+		 * node.
+		 */
+		parent = rb_parent(n);
+		fname = rb_entry(n, struct fname, rb_hash);
+		while (fname) {
+			struct fname * old = fname;
+			fname = fname->next;
+			kfree (old);
+		}
+		if (!parent)
+			root->rb_node = NULL;
+		else if (parent->rb_left == n)
+			parent->rb_left = NULL;
+		else if (parent->rb_right == n)
+			parent->rb_right = NULL;
+		n = parent;
+	}
+	root->rb_node = NULL;
+}
+
+
+static struct dir_private_info *create_dir_info(loff_t pos)
+{
+	struct dir_private_info *p;
+
+	p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
+	if (!p)
+		return NULL;
+	p->root.rb_node = NULL;
+	p->curr_node = NULL;
+	p->extra_fname = NULL;
+	p->last_pos = 0;
+	p->curr_hash = pos2maj_hash(pos);
+	p->curr_minor_hash = pos2min_hash(pos);
+	p->next_hash = 0;
+	return p;
+}
+
+void ext3_htree_free_dir_info(struct dir_private_info *p)
+{
+	free_rb_tree_fname(&p->root);
+	kfree(p);
+}
+
+/*
+ * Given a directory entry, enter it into the fname rb tree.
+ */
+int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+			     __u32 minor_hash,
+			     struct ext3_dir_entry_2 *dirent)
+{
+	struct rb_node **p, *parent = NULL;
+	struct fname * fname, *new_fn;
+	struct dir_private_info *info;
+	int len;
+
+	info = (struct dir_private_info *) dir_file->private_data;
+	p = &info->root.rb_node;
+
+	/* Create and allocate the fname structure */
+	len = sizeof(struct fname) + dirent->name_len + 1;
+	new_fn = kzalloc(len, GFP_KERNEL);
+	if (!new_fn)
+		return -ENOMEM;
+	new_fn->hash = hash;
+	new_fn->minor_hash = minor_hash;
+	new_fn->inode = le32_to_cpu(dirent->inode);
+	new_fn->name_len = dirent->name_len;
+	new_fn->file_type = dirent->file_type;
+	memcpy(new_fn->name, dirent->name, dirent->name_len);
+	new_fn->name[dirent->name_len] = 0;
+
+	while (*p) {
+		parent = *p;
+		fname = rb_entry(parent, struct fname, rb_hash);
+
+		/*
+		 * If the hash and minor hash match up, then we put
+		 * them on a linked list.  This rarely happens...
+		 */
+		if ((new_fn->hash == fname->hash) &&
+		    (new_fn->minor_hash == fname->minor_hash)) {
+			new_fn->next = fname->next;
+			fname->next = new_fn;
+			return 0;
+		}
+
+		if (new_fn->hash < fname->hash)
+			p = &(*p)->rb_left;
+		else if (new_fn->hash > fname->hash)
+			p = &(*p)->rb_right;
+		else if (new_fn->minor_hash < fname->minor_hash)
+			p = &(*p)->rb_left;
+		else /* if (new_fn->minor_hash > fname->minor_hash) */
+			p = &(*p)->rb_right;
+	}
+
+	rb_link_node(&new_fn->rb_hash, parent, p);
+	rb_insert_color(&new_fn->rb_hash, &info->root);
+	return 0;
+}
+
+
+
+/*
+ * This is a helper function for ext3_dx_readdir.  It calls filldir
+ * for all entres on the fname linked list.  (Normally there is only
+ * one entry on the linked list, unless there are 62 bit hash collisions.)
+ */
+static int call_filldir(struct file * filp, void * dirent,
+			filldir_t filldir, struct fname *fname)
+{
+	struct dir_private_info *info = filp->private_data;
+	loff_t	curr_pos;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct super_block * sb;
+	int error;
+
+	sb = inode->i_sb;
+
+	if (!fname) {
+		printk("call_filldir: called with null fname?!?\n");
+		return 0;
+	}
+	curr_pos = hash2pos(fname->hash, fname->minor_hash);
+	while (fname) {
+		error = filldir(dirent, fname->name,
+				fname->name_len, curr_pos,
+				fname->inode,
+				get_dtype(sb, fname->file_type));
+		if (error) {
+			filp->f_pos = curr_pos;
+			info->extra_fname = fname->next;
+			return error;
+		}
+		fname = fname->next;
+	}
+	return 0;
+}
+
+static int ext3_dx_readdir(struct file * filp,
+			 void * dirent, filldir_t filldir)
+{
+	struct dir_private_info *info = filp->private_data;
+	struct inode *inode = filp->f_dentry->d_inode;
+	struct fname *fname;
+	int	ret;
+
+	if (!info) {
+		info = create_dir_info(filp->f_pos);
+		if (!info)
+			return -ENOMEM;
+		filp->private_data = info;
+	}
+
+	if (filp->f_pos == EXT3_HTREE_EOF)
+		return 0;	/* EOF */
+
+	/* Some one has messed with f_pos; reset the world */
+	if (info->last_pos != filp->f_pos) {
+		free_rb_tree_fname(&info->root);
+		info->curr_node = NULL;
+		info->extra_fname = NULL;
+		info->curr_hash = pos2maj_hash(filp->f_pos);
+		info->curr_minor_hash = pos2min_hash(filp->f_pos);
+	}
+
+	/*
+	 * If there are any leftover names on the hash collision
+	 * chain, return them first.
+	 */
+	if (info->extra_fname &&
+	    call_filldir(filp, dirent, filldir, info->extra_fname))
+		goto finished;
+
+	if (!info->curr_node)
+		info->curr_node = rb_first(&info->root);
+
+	while (1) {
+		/*
+		 * Fill the rbtree if we have no more entries,
+		 * or the inode has changed since we last read in the
+		 * cached entries.
+		 */
+		if ((!info->curr_node) ||
+		    (filp->f_version != inode->i_version)) {
+			info->curr_node = NULL;
+			free_rb_tree_fname(&info->root);
+			filp->f_version = inode->i_version;
+			ret = ext3_htree_fill_tree(filp, info->curr_hash,
+						   info->curr_minor_hash,
+						   &info->next_hash);
+			if (ret < 0)
+				return ret;
+			if (ret == 0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				break;
+			}
+			info->curr_node = rb_first(&info->root);
+		}
+
+		fname = rb_entry(info->curr_node, struct fname, rb_hash);
+		info->curr_hash = fname->hash;
+		info->curr_minor_hash = fname->minor_hash;
+		if (call_filldir(filp, dirent, filldir, fname))
+			break;
+
+		info->curr_node = rb_next(info->curr_node);
+		if (!info->curr_node) {
+			if (info->next_hash == ~0) {
+				filp->f_pos = EXT3_HTREE_EOF;
+				break;
+			}
+			info->curr_hash = info->next_hash;
+			info->curr_minor_hash = 0;
+		}
+	}
+finished:
+	info->last_pos = filp->f_pos;
+	return 0;
+}
+
+static int ext3_release_dir (struct inode * inode, struct file * filp)
+{
+       if (filp->private_data)
+		ext3_htree_free_dir_info(filp->private_data);
+
+	return 0;
+}
+
+#endif
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
new file mode 100644
index 000000000000..e96c388047e0
--- /dev/null
+++ b/fs/ext4/file.c
@@ -0,0 +1,139 @@
+/*
+ *  linux/fs/ext3/file.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/file.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 fs regular file handling primitives
+ *
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ *	(jj@sunsite.ms.mff.cuni.cz)
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * Called when an inode is released. Note that this is different
+ * from ext3_file_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+static int ext3_release_file (struct inode * inode, struct file * filp)
+{
+	/* if we are the last writer on the inode, drop the block reservation */
+	if ((filp->f_mode & FMODE_WRITE) &&
+			(atomic_read(&inode->i_writecount) == 1))
+	{
+		mutex_lock(&EXT3_I(inode)->truncate_mutex);
+		ext3_discard_reservation(inode);
+		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+	}
+	if (is_dx(inode) && filp->private_data)
+		ext3_htree_free_dir_info(filp->private_data);
+
+	return 0;
+}
+
+static ssize_t
+ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+		unsigned long nr_segs, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_dentry->d_inode;
+	ssize_t ret;
+	int err;
+
+	ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
+
+	/*
+	 * Skip flushing if there was an error, or if nothing was written.
+	 */
+	if (ret <= 0)
+		return ret;
+
+	/*
+	 * If the inode is IS_SYNC, or is O_SYNC and we are doing data
+	 * journalling then we need to make sure that we force the transaction
+	 * to disk to keep all metadata uptodate synchronously.
+	 */
+	if (file->f_flags & O_SYNC) {
+		/*
+		 * If we are non-data-journaled, then the dirty data has
+		 * already been flushed to backing store by generic_osync_inode,
+		 * and the inode has been flushed too if there have been any
+		 * modifications other than mere timestamp updates.
+		 *
+		 * Open question --- do we care about flushing timestamps too
+		 * if the inode is IS_SYNC?
+		 */
+		if (!ext3_should_journal_data(inode))
+			return ret;
+
+		goto force_commit;
+	}
+
+	/*
+	 * So we know that there has been no forced data flush.  If the inode
+	 * is marked IS_SYNC, we need to force one ourselves.
+	 */
+	if (!IS_SYNC(inode))
+		return ret;
+
+	/*
+	 * Open question #2 --- should we force data to disk here too?  If we
+	 * don't, the only impact is that data=writeback filesystems won't
+	 * flush data to disk automatically on IS_SYNC, only metadata (but
+	 * historically, that is what ext2 has done.)
+	 */
+
+force_commit:
+	err = ext3_force_commit(inode->i_sb);
+	if (err)
+		return err;
+	return ret;
+}
+
+const struct file_operations ext3_file_operations = {
+	.llseek		= generic_file_llseek,
+	.read		= do_sync_read,
+	.write		= do_sync_write,
+	.aio_read	= generic_file_aio_read,
+	.aio_write	= ext3_file_write,
+	.ioctl		= ext3_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= ext3_compat_ioctl,
+#endif
+	.mmap		= generic_file_mmap,
+	.open		= generic_file_open,
+	.release	= ext3_release_file,
+	.fsync		= ext3_sync_file,
+	.sendfile	= generic_file_sendfile,
+	.splice_read	= generic_file_splice_read,
+	.splice_write	= generic_file_splice_write,
+};
+
+struct inode_operations ext3_file_inode_operations = {
+	.truncate	= ext3_truncate,
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
+
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
new file mode 100644
index 000000000000..dd1fd3c0fc05
--- /dev/null
+++ b/fs/ext4/fsync.c
@@ -0,0 +1,88 @@
+/*
+ *  linux/fs/ext3/fsync.c
+ *
+ *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
+ *  from
+ *  Copyright (C) 1992  Remy Card (card@masi.ibp.fr)
+ *                      Laboratoire MASI - Institut Blaise Pascal
+ *                      Universite Pierre et Marie Curie (Paris VI)
+ *  from
+ *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3fs fsync primitive
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *
+ *  Removed unnecessary code duplication for little endian machines
+ *  and excessive __inline__s.
+ *        Andi Kleen, 1997
+ *
+ * Major simplications and cleanup - we only need to do the metadata, because
+ * we can depend on generic_block_fdatasync() to sync the data blocks.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/writeback.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+
+/*
+ * akpm: A new design for ext3_sync_file().
+ *
+ * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
+ * There cannot be a transaction open by this task.
+ * Another task could have dirtied this inode.  Its data can be in any
+ * state in the journalling system.
+ *
+ * What we do is just kick off a commit and wait on it.  This will snapshot the
+ * inode to disk.
+ */
+
+int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+{
+	struct inode *inode = dentry->d_inode;
+	int ret = 0;
+
+	J_ASSERT(ext3_journal_current_handle() == 0);
+
+	/*
+	 * data=writeback:
+	 *  The caller's filemap_fdatawrite()/wait will sync the data.
+	 *  sync_inode() will sync the metadata
+	 *
+	 * data=ordered:
+	 *  The caller's filemap_fdatawrite() will write the data and
+	 *  sync_inode() will write the inode if it is dirty.  Then the caller's
+	 *  filemap_fdatawait() will wait on the pages.
+	 *
+	 * data=journal:
+	 *  filemap_fdatawrite won't do anything (the buffers are clean).
+	 *  ext3_force_commit will write the file data into the journal and
+	 *  will wait on that.
+	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
+	 *  (they were dirtied by commit).  But that's OK - the blocks are
+	 *  safe in-journal, which is all fsync() needs to ensure.
+	 */
+	if (ext3_should_journal_data(inode)) {
+		ret = ext3_force_commit(inode->i_sb);
+		goto out;
+	}
+
+	/*
+	 * The VFS has written the file data.  If the inode is unaltered
+	 * then we need not start a commit.
+	 */
+	if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
+		struct writeback_control wbc = {
+			.sync_mode = WB_SYNC_ALL,
+			.nr_to_write = 0, /* sys_fsync did this */
+		};
+		ret = sync_inode(inode, &wbc);
+	}
+out:
+	return ret;
+}
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
new file mode 100644
index 000000000000..deeb27b5ba83
--- /dev/null
+++ b/fs/ext4/hash.c
@@ -0,0 +1,152 @@
+/*
+ *  linux/fs/ext3/hash.c
+ *
+ * Copyright (C) 2002 by Theodore Ts'o
+ *
+ * This file is released under the GPL v2.
+ *
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/sched.h>
+#include <linux/ext3_fs.h>
+#include <linux/cryptohash.h>
+
+#define DELTA 0x9E3779B9
+
+static void TEA_transform(__u32 buf[4], __u32 const in[])
+{
+	__u32	sum = 0;
+	__u32	b0 = buf[0], b1 = buf[1];
+	__u32	a = in[0], b = in[1], c = in[2], d = in[3];
+	int	n = 16;
+
+	do {
+		sum += DELTA;
+		b0 += ((b1 << 4)+a) ^ (b1+sum) ^ ((b1 >> 5)+b);
+		b1 += ((b0 << 4)+c) ^ (b0+sum) ^ ((b0 >> 5)+d);
+	} while(--n);
+
+	buf[0] += b0;
+	buf[1] += b1;
+}
+
+
+/* The old legacy hash */
+static __u32 dx_hack_hash (const char *name, int len)
+{
+	__u32 hash0 = 0x12a3fe2d, hash1 = 0x37abe8f9;
+	while (len--) {
+		__u32 hash = hash1 + (hash0 ^ (*name++ * 7152373));
+
+		if (hash & 0x80000000) hash -= 0x7fffffff;
+		hash1 = hash0;
+		hash0 = hash;
+	}
+	return (hash0 << 1);
+}
+
+static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+{
+	__u32	pad, val;
+	int	i;
+
+	pad = (__u32)len | ((__u32)len << 8);
+	pad |= pad << 16;
+
+	val = pad;
+	if (len > num*4)
+		len = num * 4;
+	for (i=0; i < len; i++) {
+		if ((i % 4) == 0)
+			val = pad;
+		val = msg[i] + (val << 8);
+		if ((i % 4) == 3) {
+			*buf++ = val;
+			val = pad;
+			num--;
+		}
+	}
+	if (--num >= 0)
+		*buf++ = val;
+	while (--num >= 0)
+		*buf++ = pad;
+}
+
+/*
+ * Returns the hash of a filename.  If len is 0 and name is NULL, then
+ * this function can be used to test whether or not a hash version is
+ * supported.
+ *
+ * The seed is an 4 longword (32 bits) "secret" which can be used to
+ * uniquify a hash.  If the seed is all zero's, then some default seed
+ * may be used.
+ *
+ * A particular hash version specifies whether or not the seed is
+ * represented, and whether or not the returned hash is 32 bits or 64
+ * bits.  32 bit hashes will return 0 for the minor hash.
+ */
+int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+{
+	__u32	hash;
+	__u32	minor_hash = 0;
+	const char	*p;
+	int		i;
+	__u32		in[8], buf[4];
+
+	/* Initialize the default seed for the hash checksum functions */
+	buf[0] = 0x67452301;
+	buf[1] = 0xefcdab89;
+	buf[2] = 0x98badcfe;
+	buf[3] = 0x10325476;
+
+	/* Check to see if the seed is all zero's */
+	if (hinfo->seed) {
+		for (i=0; i < 4; i++) {
+			if (hinfo->seed[i])
+				break;
+		}
+		if (i < 4)
+			memcpy(buf, hinfo->seed, sizeof(buf));
+	}
+
+	switch (hinfo->hash_version) {
+	case DX_HASH_LEGACY:
+		hash = dx_hack_hash(name, len);
+		break;
+	case DX_HASH_HALF_MD4:
+		p = name;
+		while (len > 0) {
+			str2hashbuf(p, len, in, 8);
+			half_md4_transform(buf, in);
+			len -= 32;
+			p += 32;
+		}
+		minor_hash = buf[2];
+		hash = buf[1];
+		break;
+	case DX_HASH_TEA:
+		p = name;
+		while (len > 0) {
+			str2hashbuf(p, len, in, 4);
+			TEA_transform(buf, in);
+			len -= 16;
+			p += 16;
+		}
+		hash = buf[0];
+		minor_hash = buf[1];
+		break;
+	default:
+		hinfo->hash = 0;
+		return -1;
+	}
+	hash = hash & ~1;
+	if (hash == (EXT3_HTREE_EOF << 1))
+		hash = (EXT3_HTREE_EOF-1) << 1;
+	hinfo->hash = hash;
+	hinfo->minor_hash = minor_hash;
+	return 0;
+}
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
new file mode 100644
index 000000000000..e45dbd651736
--- /dev/null
+++ b/fs/ext4/ialloc.c
@@ -0,0 +1,758 @@
+/*
+ *  linux/fs/ext3/ialloc.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  BSD ufs-inspired inode and directory allocation by
+ *  Stephen Tweedie (sct@redhat.com), 1993
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/random.h>
+#include <linux/bitops.h>
+
+#include <asm/byteorder.h>
+
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * ialloc.c contains the inodes allocation and deallocation routines
+ */
+
+/*
+ * The free inodes are managed by bitmaps.  A file system contains several
+ * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
+ * block for inodes, N blocks for the inode table and data blocks.
+ *
+ * The file system contains group descriptors which are located after the
+ * super block.  Each descriptor contains the number of the bitmap block and
+ * the free blocks count in the block.
+ */
+
+
+/*
+ * Read the inode allocation bitmap for a given block_group, reading
+ * into the specified slot in the superblock's bitmap cache.
+ *
+ * Return buffer_head of bitmap on success or NULL.
+ */
+static struct buffer_head *
+read_inode_bitmap(struct super_block * sb, unsigned long block_group)
+{
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh = NULL;
+
+	desc = ext3_get_group_desc(sb, block_group, NULL);
+	if (!desc)
+		goto error_out;
+
+	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+	if (!bh)
+		ext3_error(sb, "read_inode_bitmap",
+			    "Cannot read inode bitmap - "
+			    "block_group = %lu, inode_bitmap = %u",
+			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+error_out:
+	return bh;
+}
+
+/*
+ * NOTE! When we get the inode, we're the only people
+ * that have access to it, and as such there are no
+ * race conditions we have to worry about. The inode
+ * is not on the hash-lists, and it cannot be reached
+ * through the filesystem because the directory entry
+ * has been deleted earlier.
+ *
+ * HOWEVER: we must make sure that we get no aliases,
+ * which means that we have to call "clear_inode()"
+ * _before_ we mark the inode not in use in the inode
+ * bitmaps. Otherwise a newly created file might use
+ * the same inode number (not actually the same pointer
+ * though), and then we'd have two inodes sharing the
+ * same inode number and space on the harddisk.
+ */
+void ext3_free_inode (handle_t *handle, struct inode * inode)
+{
+	struct super_block * sb = inode->i_sb;
+	int is_directory;
+	unsigned long ino;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
+	unsigned long block_group;
+	unsigned long bit;
+	struct ext3_group_desc * gdp;
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi;
+	int fatal = 0, err;
+
+	if (atomic_read(&inode->i_count) > 1) {
+		printk ("ext3_free_inode: inode has count=%d\n",
+					atomic_read(&inode->i_count));
+		return;
+	}
+	if (inode->i_nlink) {
+		printk ("ext3_free_inode: inode has nlink=%d\n",
+			inode->i_nlink);
+		return;
+	}
+	if (!sb) {
+		printk("ext3_free_inode: inode on nonexistent device\n");
+		return;
+	}
+	sbi = EXT3_SB(sb);
+
+	ino = inode->i_ino;
+	ext3_debug ("freeing inode %lu\n", ino);
+
+	/*
+	 * Note: we must free any quota before locking the superblock,
+	 * as writing the quota to disk may need the lock as well.
+	 */
+	DQUOT_INIT(inode);
+	ext3_xattr_delete_inode(handle, inode);
+	DQUOT_FREE_INODE(inode);
+	DQUOT_DROP(inode);
+
+	is_directory = S_ISDIR(inode->i_mode);
+
+	/* Do this BEFORE marking the inode not in use or returning an error */
+	clear_inode (inode);
+
+	es = EXT3_SB(sb)->s_es;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_free_inode",
+			    "reserved or nonexistent inode %lu", ino);
+		goto error_return;
+	}
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh)
+		goto error_return;
+
+	BUFFER_TRACE(bitmap_bh, "get_write_access");
+	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
+	if (fatal)
+		goto error_return;
+
+	/* Ok, now we can actually update the inode bitmaps.. */
+	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+					bit, bitmap_bh->b_data))
+		ext3_error (sb, "ext3_free_inode",
+			      "bit already cleared for inode %lu", ino);
+	else {
+		gdp = ext3_get_group_desc (sb, block_group, &bh2);
+
+		BUFFER_TRACE(bh2, "get_write_access");
+		fatal = ext3_journal_get_write_access(handle, bh2);
+		if (fatal) goto error_return;
+
+		if (gdp) {
+			spin_lock(sb_bgl_lock(sbi, block_group));
+			gdp->bg_free_inodes_count = cpu_to_le16(
+				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
+			if (is_directory)
+				gdp->bg_used_dirs_count = cpu_to_le16(
+				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
+			spin_unlock(sb_bgl_lock(sbi, block_group));
+			percpu_counter_inc(&sbi->s_freeinodes_counter);
+			if (is_directory)
+				percpu_counter_dec(&sbi->s_dirs_counter);
+
+		}
+		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh2);
+		if (!fatal) fatal = err;
+	}
+	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	if (!fatal)
+		fatal = err;
+	sb->s_dirt = 1;
+error_return:
+	brelse(bitmap_bh);
+	ext3_std_error(sb, fatal);
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory\'s block
+ * group to find a free inode.
+ */
+static int find_group_dir(struct super_block *sb, struct inode *parent)
+{
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	unsigned int freei, avefreei;
+	struct ext3_group_desc *desc, *best_desc = NULL;
+	struct buffer_head *bh;
+	int group, best_group = -1;
+
+	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+	avefreei = freei / ngroups;
+
+	for (group = 0; group < ngroups; group++) {
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+			continue;
+		if (!best_desc ||
+		    (le16_to_cpu(desc->bg_free_blocks_count) >
+		     le16_to_cpu(best_desc->bg_free_blocks_count))) {
+			best_group = group;
+			best_desc = desc;
+		}
+	}
+	return best_group;
+}
+
+/*
+ * Orlov's allocator for directories.
+ *
+ * We always try to spread first-level directories.
+ *
+ * If there are blockgroups with both free inodes and free blocks counts
+ * not worse than average we return one with smallest directory count.
+ * Otherwise we simply return a random group.
+ *
+ * For the rest rules look so:
+ *
+ * It's OK to put directory into a group unless
+ * it has too many directories already (max_dirs) or
+ * it has too few free inodes left (min_inodes) or
+ * it has too few free blocks left (min_blocks) or
+ * it's already running too large debt (max_debt).
+ * Parent's group is prefered, if it doesn't satisfy these
+ * conditions we search cyclically through the rest. If none
+ * of the groups look good we just look for a group with more
+ * free inodes than average (starting at parent's group).
+ *
+ * Debt is incremented each time we allocate a directory and decremented
+ * when we allocate an inode, within 0--255.
+ */
+
+#define INODE_COST 64
+#define BLOCK_COST 256
+
+static int find_group_orlov(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int ngroups = sbi->s_groups_count;
+	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+	unsigned int freei, avefreei;
+	ext3_fsblk_t freeb, avefreeb;
+	ext3_fsblk_t blocks_per_dir;
+	unsigned int ndirs;
+	int max_debt, max_dirs, min_inodes;
+	ext3_grpblk_t min_blocks;
+	int group = -1, i;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+
+	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
+	avefreei = freei / ngroups;
+	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
+	avefreeb = freeb / ngroups;
+	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
+
+	if ((parent == sb->s_root->d_inode) ||
+	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
+		int best_ndir = inodes_per_group;
+		int best_group = -1;
+
+		get_random_bytes(&group, sizeof(group));
+		parent_group = (unsigned)group % ngroups;
+		for (i = 0; i < ngroups; i++) {
+			group = (parent_group + i) % ngroups;
+			desc = ext3_get_group_desc (sb, group, &bh);
+			if (!desc || !desc->bg_free_inodes_count)
+				continue;
+			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+				continue;
+			if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
+				continue;
+			if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
+				continue;
+			best_group = group;
+			best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+		}
+		if (best_group >= 0)
+			return best_group;
+		goto fallback;
+	}
+
+	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
+
+	max_dirs = ndirs / ngroups + inodes_per_group / 16;
+	min_inodes = avefreei - inodes_per_group / 4;
+	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
+
+	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
+	if (max_debt * INODE_COST > inodes_per_group)
+		max_debt = inodes_per_group / INODE_COST;
+	if (max_debt > 255)
+		max_debt = 255;
+	if (max_debt == 0)
+		max_debt = 1;
+
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
+			continue;
+		if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
+			continue;
+		return group;
+	}
+
+fallback:
+	for (i = 0; i < ngroups; i++) {
+		group = (parent_group + i) % ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (!desc || !desc->bg_free_inodes_count)
+			continue;
+		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
+			return group;
+	}
+
+	if (avefreei) {
+		/*
+		 * The free-inodes counter is approximate, and for really small
+		 * filesystems the above test can fail to find any blockgroups
+		 */
+		avefreei = 0;
+		goto fallback;
+	}
+
+	return -1;
+}
+
+static int find_group_other(struct super_block *sb, struct inode *parent)
+{
+	int parent_group = EXT3_I(parent)->i_block_group;
+	int ngroups = EXT3_SB(sb)->s_groups_count;
+	struct ext3_group_desc *desc;
+	struct buffer_head *bh;
+	int group, i;
+
+	/*
+	 * Try to place the inode in its parent directory
+	 */
+	group = parent_group;
+	desc = ext3_get_group_desc (sb, group, &bh);
+	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+			le16_to_cpu(desc->bg_free_blocks_count))
+		return group;
+
+	/*
+	 * We're going to place this inode in a different blockgroup from its
+	 * parent.  We want to cause files in a common directory to all land in
+	 * the same blockgroup.  But we want files which are in a different
+	 * directory which shares a blockgroup with our parent to land in a
+	 * different blockgroup.
+	 *
+	 * So add our directory's i_ino into the starting point for the hash.
+	 */
+	group = (group + parent->i_ino) % ngroups;
+
+	/*
+	 * Use a quadratic hash to find a group with a free inode and some free
+	 * blocks.
+	 */
+	for (i = 1; i < ngroups; i <<= 1) {
+		group += i;
+		if (group >= ngroups)
+			group -= ngroups;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+				le16_to_cpu(desc->bg_free_blocks_count))
+			return group;
+	}
+
+	/*
+	 * That failed: try linear search for a free inode, even if that group
+	 * has no free blocks.
+	 */
+	group = parent_group;
+	for (i = 0; i < ngroups; i++) {
+		if (++group >= ngroups)
+			group = 0;
+		desc = ext3_get_group_desc (sb, group, &bh);
+		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
+			return group;
+	}
+
+	return -1;
+}
+
+/*
+ * There are two policies for allocating an inode.  If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+ * the groups with above-average free space, that group with the fewest
+ * directories already is chosen.
+ *
+ * For other inodes, search forward from the parent directory's block
+ * group to find a free inode.
+ */
+struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+{
+	struct super_block *sb;
+	struct buffer_head *bitmap_bh = NULL;
+	struct buffer_head *bh2;
+	int group;
+	unsigned long ino = 0;
+	struct inode * inode;
+	struct ext3_group_desc * gdp = NULL;
+	struct ext3_super_block * es;
+	struct ext3_inode_info *ei;
+	struct ext3_sb_info *sbi;
+	int err = 0;
+	struct inode *ret;
+	int i;
+
+	/* Cannot create files in a deleted directory */
+	if (!dir || !dir->i_nlink)
+		return ERR_PTR(-EPERM);
+
+	sb = dir->i_sb;
+	inode = new_inode(sb);
+	if (!inode)
+		return ERR_PTR(-ENOMEM);
+	ei = EXT3_I(inode);
+
+	sbi = EXT3_SB(sb);
+	es = sbi->s_es;
+	if (S_ISDIR(mode)) {
+		if (test_opt (sb, OLDALLOC))
+			group = find_group_dir(sb, dir);
+		else
+			group = find_group_orlov(sb, dir);
+	} else
+		group = find_group_other(sb, dir);
+
+	err = -ENOSPC;
+	if (group == -1)
+		goto out;
+
+	for (i = 0; i < sbi->s_groups_count; i++) {
+		err = -EIO;
+
+		gdp = ext3_get_group_desc(sb, group, &bh2);
+		if (!gdp)
+			goto fail;
+
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, group);
+		if (!bitmap_bh)
+			goto fail;
+
+		ino = 0;
+
+repeat_in_this_group:
+		ino = ext3_find_next_zero_bit((unsigned long *)
+				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
+		if (ino < EXT3_INODES_PER_GROUP(sb)) {
+
+			BUFFER_TRACE(bitmap_bh, "get_write_access");
+			err = ext3_journal_get_write_access(handle, bitmap_bh);
+			if (err)
+				goto fail;
+
+			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+						ino, bitmap_bh->b_data)) {
+				/* we won it */
+				BUFFER_TRACE(bitmap_bh,
+					"call ext3_journal_dirty_metadata");
+				err = ext3_journal_dirty_metadata(handle,
+								bitmap_bh);
+				if (err)
+					goto fail;
+				goto got;
+			}
+			/* we lost it */
+			journal_release_buffer(handle, bitmap_bh);
+
+			if (++ino < EXT3_INODES_PER_GROUP(sb))
+				goto repeat_in_this_group;
+		}
+
+		/*
+		 * This case is possible in concurrent environment.  It is very
+		 * rare.  We cannot repeat the find_group_xxx() call because
+		 * that will simply return the same blockgroup, because the
+		 * group descriptor metadata has not yet been updated.
+		 * So we just go onto the next blockgroup.
+		 */
+		if (++group == sbi->s_groups_count)
+			group = 0;
+	}
+	err = -ENOSPC;
+	goto out;
+
+got:
+	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
+	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext3_error (sb, "ext3_new_inode",
+			    "reserved inode or inode > inodes count - "
+			    "block_group = %d, inode=%lu", group, ino);
+		err = -EIO;
+		goto fail;
+	}
+
+	BUFFER_TRACE(bh2, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh2);
+	if (err) goto fail;
+	spin_lock(sb_bgl_lock(sbi, group));
+	gdp->bg_free_inodes_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
+	if (S_ISDIR(mode)) {
+		gdp->bg_used_dirs_count =
+			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+	}
+	spin_unlock(sb_bgl_lock(sbi, group));
+	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh2);
+	if (err) goto fail;
+
+	percpu_counter_dec(&sbi->s_freeinodes_counter);
+	if (S_ISDIR(mode))
+		percpu_counter_inc(&sbi->s_dirs_counter);
+	sb->s_dirt = 1;
+
+	inode->i_uid = current->fsuid;
+	if (test_opt (sb, GRPID))
+		inode->i_gid = dir->i_gid;
+	else if (dir->i_mode & S_ISGID) {
+		inode->i_gid = dir->i_gid;
+		if (S_ISDIR(mode))
+			mode |= S_ISGID;
+	} else
+		inode->i_gid = current->fsgid;
+	inode->i_mode = mode;
+
+	inode->i_ino = ino;
+	/* This is the optimal IO size (for stat), not the fs block size */
+	inode->i_blocks = 0;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+
+	memset(ei->i_data, 0, sizeof(ei->i_data));
+	ei->i_dir_start_lookup = 0;
+	ei->i_disksize = 0;
+
+	ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+	if (S_ISLNK(mode))
+		ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+	/* dirsync only applies to directories */
+	if (!S_ISDIR(mode))
+		ei->i_flags &= ~EXT3_DIRSYNC_FL;
+#ifdef EXT3_FRAGMENTS
+	ei->i_faddr = 0;
+	ei->i_frag_no = 0;
+	ei->i_frag_size = 0;
+#endif
+	ei->i_file_acl = 0;
+	ei->i_dir_acl = 0;
+	ei->i_dtime = 0;
+	ei->i_block_alloc_info = NULL;
+	ei->i_block_group = group;
+
+	ext3_set_inode_flags(inode);
+	if (IS_DIRSYNC(inode))
+		handle->h_sync = 1;
+	insert_inode_hash(inode);
+	spin_lock(&sbi->s_next_gen_lock);
+	inode->i_generation = sbi->s_next_generation++;
+	spin_unlock(&sbi->s_next_gen_lock);
+
+	ei->i_state = EXT3_STATE_NEW;
+	ei->i_extra_isize =
+		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
+		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+
+	ret = inode;
+	if(DQUOT_ALLOC_INODE(inode)) {
+		err = -EDQUOT;
+		goto fail_drop;
+	}
+
+	err = ext3_init_acl(handle, inode, dir);
+	if (err)
+		goto fail_free_drop;
+
+	err = ext3_init_security(handle,inode, dir);
+	if (err)
+		goto fail_free_drop;
+
+	err = ext3_mark_inode_dirty(handle, inode);
+	if (err) {
+		ext3_std_error(sb, err);
+		goto fail_free_drop;
+	}
+
+	ext3_debug("allocating inode %lu\n", inode->i_ino);
+	goto really_out;
+fail:
+	ext3_std_error(sb, err);
+out:
+	iput(inode);
+	ret = ERR_PTR(err);
+really_out:
+	brelse(bitmap_bh);
+	return ret;
+
+fail_free_drop:
+	DQUOT_FREE_INODE(inode);
+
+fail_drop:
+	DQUOT_DROP(inode);
+	inode->i_flags |= S_NOQUOTA;
+	inode->i_nlink = 0;
+	iput(inode);
+	brelse(bitmap_bh);
+	return ERR_PTR(err);
+}
+
+/* Verify that we are loading a valid orphan from disk */
+struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+{
+	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+	unsigned long block_group;
+	int bit;
+	struct buffer_head *bitmap_bh = NULL;
+	struct inode *inode = NULL;
+
+	/* Error cases - e2fsck has already cleaned up for us */
+	if (ino > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan ino %lu!  e2fsck was run?", ino);
+		goto out;
+	}
+
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	bitmap_bh = read_inode_bitmap(sb, block_group);
+	if (!bitmap_bh) {
+		ext3_warning(sb, __FUNCTION__,
+			     "inode bitmap error for orphan %lu", ino);
+		goto out;
+	}
+
+	/* Having the inode bit set should be a 100% indicator that this
+	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
+	 * inodes that were being truncated, so we can't check i_nlink==0.
+	 */
+	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
+			NEXT_ORPHAN(inode) > max_ino) {
+		ext3_warning(sb, __FUNCTION__,
+			     "bad orphan inode %lu!  e2fsck was run?", ino);
+		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+		       bit, (unsigned long long)bitmap_bh->b_blocknr,
+		       ext3_test_bit(bit, bitmap_bh->b_data));
+		printk(KERN_NOTICE "inode=%p\n", inode);
+		if (inode) {
+			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
+			       is_bad_inode(inode));
+			printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
+			       NEXT_ORPHAN(inode));
+			printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
+		}
+		/* Avoid freeing blocks if we got a bad deleted inode */
+		if (inode && inode->i_nlink == 0)
+			inode->i_blocks = 0;
+		iput(inode);
+		inode = NULL;
+	}
+out:
+	brelse(bitmap_bh);
+	return inode;
+}
+
+unsigned long ext3_count_free_inodes (struct super_block * sb)
+{
+	unsigned long desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
+#ifdef EXT3FS_DEBUG
+	struct ext3_super_block *es;
+	unsigned long bitmap_count, x;
+	struct buffer_head *bitmap_bh = NULL;
+
+	es = EXT3_SB(sb)->s_es;
+	desc_count = 0;
+	bitmap_count = 0;
+	gdp = NULL;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		brelse(bitmap_bh);
+		bitmap_bh = read_inode_bitmap(sb, i);
+		if (!bitmap_bh)
+			continue;
+
+		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
+		printk("group %d: stored = %d, counted = %lu\n",
+			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+		bitmap_count += x;
+	}
+	brelse(bitmap_bh);
+	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
+		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
+	return desc_count;
+#else
+	desc_count = 0;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+		cond_resched();
+	}
+	return desc_count;
+#endif
+}
+
+/* Called at mount-time, super-block is locked */
+unsigned long ext3_count_dirs (struct super_block * sb)
+{
+	unsigned long count = 0;
+	int i;
+
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		count += le16_to_cpu(gdp->bg_used_dirs_count);
+	}
+	return count;
+}
+
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
new file mode 100644
index 000000000000..03ba5bcab186
--- /dev/null
+++ b/fs/ext4/inode.c
@@ -0,0 +1,3219 @@
+/*
+ *  linux/fs/ext3/inode.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Goal-directed block allocation by Stephen Tweedie
+ *	(sct@redhat.com), 1993, 1998
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  64-bit file support on 64-bit platforms by Jakub Jelinek
+ *	(jj@sunsite.ms.mff.cuni.cz)
+ *
+ *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext3_jbd.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/buffer_head.h>
+#include <linux/writeback.h>
+#include <linux/mpage.h>
+#include <linux/uio.h>
+#include <linux/bio.h>
+#include "xattr.h"
+#include "acl.h"
+
+static int ext3_writepage_trans_blocks(struct inode *inode);
+
+/*
+ * Test whether an inode is a fast symlink.
+ */
+static int ext3_inode_is_fast_symlink(struct inode *inode)
+{
+	int ea_blocks = EXT3_I(inode)->i_file_acl ?
+		(inode->i_sb->s_blocksize >> 9) : 0;
+
+	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
+}
+
+/*
+ * The ext3 forget function must perform a revoke if we are freeing data
+ * which has been journaled.  Metadata (eg. indirect blocks) must be
+ * revoked in all cases.
+ *
+ * "bh" may be NULL: a metadata block may have been freed from memory
+ * but there may still be a record of it in the journal, and that record
+ * still needs to be revoked.
+ */
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, ext3_fsblk_t blocknr)
+{
+	int err;
+
+	might_sleep();
+
+	BUFFER_TRACE(bh, "enter");
+
+	jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
+		  "data mode %lx\n",
+		  bh, is_metadata, inode->i_mode,
+		  test_opt(inode->i_sb, DATA_FLAGS));
+
+	/* Never use the revoke function if we are doing full data
+	 * journaling: there is no need to, and a V1 superblock won't
+	 * support it.  Otherwise, only skip the revoke on un-journaled
+	 * data blocks. */
+
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
+	    (!is_metadata && !ext3_should_journal_data(inode))) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call journal_forget");
+			return ext3_journal_forget(handle, bh);
+		}
+		return 0;
+	}
+
+	/*
+	 * data!=journal && (is_metadata || should_journal_data(inode))
+	 */
+	BUFFER_TRACE(bh, "call ext3_journal_revoke");
+	err = ext3_journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_abort(inode->i_sb, __FUNCTION__,
+			   "error %d when attempting revoke", err);
+	BUFFER_TRACE(bh, "exit");
+	return err;
+}
+
+/*
+ * Work out how many blocks we need to proceed with the next chunk of a
+ * truncate transaction.
+ */
+static unsigned long blocks_for_truncate(struct inode *inode)
+{
+	unsigned long needed;
+
+	needed = inode->i_blocks >> (inode->i_sb->s_blocksize_bits - 9);
+
+	/* Give ourselves just enough room to cope with inodes in which
+	 * i_blocks is corrupt: we've seen disk corruptions in the past
+	 * which resulted in random data in an inode which looked enough
+	 * like a regular file for ext3 to try to delete it.  Things
+	 * will go a bit crazy if that happens, but at least we should
+	 * try not to panic the whole kernel. */
+	if (needed < 2)
+		needed = 2;
+
+	/* But we need to bound the transaction so we don't overflow the
+	 * journal. */
+	if (needed > EXT3_MAX_TRANS_DATA)
+		needed = EXT3_MAX_TRANS_DATA;
+
+	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+}
+
+/*
+ * Truncate transactions can be complex and absolutely huge.  So we need to
+ * be able to restart the transaction at a conventient checkpoint to make
+ * sure we don't overflow the journal.
+ *
+ * start_transaction gets us a new handle for a truncate transaction,
+ * and extend_transaction tries to extend the existing one a bit.  If
+ * extend fails, we need to propagate the failure up and restart the
+ * transaction in the top-level truncate loop. --sct
+ */
+static handle_t *start_transaction(struct inode *inode)
+{
+	handle_t *result;
+
+	result = ext3_journal_start(inode, blocks_for_truncate(inode));
+	if (!IS_ERR(result))
+		return result;
+
+	ext3_std_error(inode->i_sb, PTR_ERR(result));
+	return result;
+}
+
+/*
+ * Try to extend this transaction for the purposes of truncation.
+ *
+ * Returns 0 if we managed to create more room.  If we can't create more
+ * room, and the transaction must be restarted we return 1.
+ */
+static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
+{
+	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
+		return 0;
+	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
+		return 0;
+	return 1;
+}
+
+/*
+ * Restart the transaction associated with *handle.  This does a commit,
+ * so before we call here everything must be consistently dirtied against
+ * this transaction.
+ */
+static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+{
+	jbd_debug(2, "restarting handle %p\n", handle);
+	return ext3_journal_restart(handle, blocks_for_truncate(inode));
+}
+
+/*
+ * Called at the last iput() if i_nlink is zero.
+ */
+void ext3_delete_inode (struct inode * inode)
+{
+	handle_t *handle;
+
+	truncate_inode_pages(&inode->i_data, 0);
+
+	if (is_bad_inode(inode))
+		goto no_delete;
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle)) {
+		/*
+		 * If we're going to skip the normal cleanup, we still need to
+		 * make sure that the in-core orphan linked list is properly
+		 * cleaned up.
+		 */
+		ext3_orphan_del(NULL, inode);
+		goto no_delete;
+	}
+
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+	inode->i_size = 0;
+	if (inode->i_blocks)
+		ext3_truncate(inode);
+	/*
+	 * Kill off the orphan record which ext3_truncate created.
+	 * AKPM: I think this can be inside the above `if'.
+	 * Note that ext3_orphan_del() has to be able to cope with the
+	 * deletion of a non-existent orphan - this is because we don't
+	 * know if ext3_truncate() actually created an orphan record.
+	 * (Well, we could do this if we need to, but heck - it works)
+	 */
+	ext3_orphan_del(handle, inode);
+	EXT3_I(inode)->i_dtime	= get_seconds();
+
+	/*
+	 * One subtle ordering requirement: if anything has gone wrong
+	 * (transaction abort, IO errors, whatever), then we can still
+	 * do these next steps (the fs will already have been marked as
+	 * having errors), but we can't free the inode if the mark_dirty
+	 * fails.
+	 */
+	if (ext3_mark_inode_dirty(handle, inode))
+		/* If that failed, just do the required in-core inode clear. */
+		clear_inode(inode);
+	else
+		ext3_free_inode(handle, inode);
+	ext3_journal_stop(handle);
+	return;
+no_delete:
+	clear_inode(inode);	/* We must guarantee clearing of inode... */
+}
+
+typedef struct {
+	__le32	*p;
+	__le32	key;
+	struct buffer_head *bh;
+} Indirect;
+
+static inline void add_chain(Indirect *p, struct buffer_head *bh, __le32 *v)
+{
+	p->key = *(p->p = v);
+	p->bh = bh;
+}
+
+static int verify_chain(Indirect *from, Indirect *to)
+{
+	while (from <= to && from->key == *from->p)
+		from++;
+	return (from > to);
+}
+
+/**
+ *	ext3_block_to_path - parse the block number into array of offsets
+ *	@inode: inode in question (we are only interested in its superblock)
+ *	@i_block: block number to be parsed
+ *	@offsets: array to store the offsets in
+ *      @boundary: set this non-zero if the referred-to block is likely to be
+ *             followed (on disk) by an indirect block.
+ *
+ *	To store the locations of file's data ext3 uses a data structure common
+ *	for UNIX filesystems - tree of pointers anchored in the inode, with
+ *	data blocks at leaves and indirect blocks in intermediate nodes.
+ *	This function translates the block number into path in that tree -
+ *	return value is the path length and @offsets[n] is the offset of
+ *	pointer to (n+1)th node in the nth one. If @block is out of range
+ *	(negative or too large) warning is printed and zero returned.
+ *
+ *	Note: function doesn't find node addresses, so no IO is needed. All
+ *	we need to know is the capacity of indirect blocks (taken from the
+ *	inode->i_sb).
+ */
+
+/*
+ * Portability note: the last comparison (check that we fit into triple
+ * indirect block) is spelled differently, because otherwise on an
+ * architecture with 32-bit longs and 8Kb pages we might get into trouble
+ * if our filesystem had 8Kb blocks. We might use long long, but that would
+ * kill us on x86. Oh, well, at least the sign propagation does not matter -
+ * i_block would have to be negative in the very beginning, so we would not
+ * get there at all.
+ */
+
+static int ext3_block_to_path(struct inode *inode,
+			long i_block, int offsets[4], int *boundary)
+{
+	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT3_NDIR_BLOCKS,
+		indirect_blocks = ptrs,
+		double_blocks = (1 << (ptrs_bits * 2));
+	int n = 0;
+	int final = 0;
+
+	if (i_block < 0) {
+		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
+	} else if (i_block < direct_blocks) {
+		offsets[n++] = i_block;
+		final = direct_blocks;
+	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
+		offsets[n++] = EXT3_IND_BLOCK;
+		offsets[n++] = i_block;
+		final = ptrs;
+	} else if ((i_block -= indirect_blocks) < double_blocks) {
+		offsets[n++] = EXT3_DIND_BLOCK;
+		offsets[n++] = i_block >> ptrs_bits;
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
+		offsets[n++] = EXT3_TIND_BLOCK;
+		offsets[n++] = i_block >> (ptrs_bits * 2);
+		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
+		offsets[n++] = i_block & (ptrs - 1);
+		final = ptrs;
+	} else {
+		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
+	}
+	if (boundary)
+		*boundary = final - 1 - (i_block & (ptrs - 1));
+	return n;
+}
+
+/**
+ *	ext3_get_branch - read the chain of indirect blocks leading to data
+ *	@inode: inode in question
+ *	@depth: depth of the chain (1 - direct pointer, etc.)
+ *	@offsets: offsets of pointers in inode/indirect blocks
+ *	@chain: place to store the result
+ *	@err: here we store the error value
+ *
+ *	Function fills the array of triples <key, p, bh> and returns %NULL
+ *	if everything went OK or the pointer to the last filled triple
+ *	(incomplete one) otherwise. Upon the return chain[i].key contains
+ *	the number of (i+1)-th block in the chain (as it is stored in memory,
+ *	i.e. little-endian 32-bit), chain[i].p contains the address of that
+ *	number (it points into struct inode for i==0 and into the bh->b_data
+ *	for i>0) and chain[i].bh points to the buffer_head of i-th indirect
+ *	block for i>0 and NULL for i==0. In other words, it holds the block
+ *	numbers of the chain, addresses they were taken from (and where we can
+ *	verify that chain did not change) and buffer_heads hosting these
+ *	numbers.
+ *
+ *	Function stops when it stumbles upon zero pointer (absent block)
+ *		(pointer to last triple returned, *@err == 0)
+ *	or when it gets an IO error reading an indirect block
+ *		(ditto, *@err == -EIO)
+ *	or when it notices that chain had been changed while it was reading
+ *		(ditto, *@err == -EAGAIN)
+ *	or when it reads all @depth-1 indirect blocks successfully and finds
+ *	the whole chain, all way to the data (returns %NULL, *err == 0).
+ */
+static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
+				 Indirect chain[4], int *err)
+{
+	struct super_block *sb = inode->i_sb;
+	Indirect *p = chain;
+	struct buffer_head *bh;
+
+	*err = 0;
+	/* i_data is not going away, no lock needed */
+	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+	if (!p->key)
+		goto no_block;
+	while (--depth) {
+		bh = sb_bread(sb, le32_to_cpu(p->key));
+		if (!bh)
+			goto failure;
+		/* Reader: pointers */
+		if (!verify_chain(chain, p))
+			goto changed;
+		add_chain(++p, bh, (__le32*)bh->b_data + *++offsets);
+		/* Reader: end */
+		if (!p->key)
+			goto no_block;
+	}
+	return NULL;
+
+changed:
+	brelse(bh);
+	*err = -EAGAIN;
+	goto no_block;
+failure:
+	*err = -EIO;
+no_block:
+	return p;
+}
+
+/**
+ *	ext3_find_near - find a place for allocation with sufficient locality
+ *	@inode: owner
+ *	@ind: descriptor of indirect block.
+ *
+ *	This function returns the prefered place for block allocation.
+ *	It is used when heuristic for sequential allocation fails.
+ *	Rules are:
+ *	  + if there is a block to the left of our position - allocate near it.
+ *	  + if pointer will live in indirect block - allocate near that block.
+ *	  + if pointer will live in inode - allocate in the same
+ *	    cylinder group.
+ *
+ * In the latter case we colour the starting block by the callers PID to
+ * prevent it from clashing with concurrent allocations for a different inode
+ * in the same block group.   The PID is used here so that functionally related
+ * files will be close-by on-disk.
+ *
+ *	Caller must make sure that @ind is valid and will stay that way.
+ */
+static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
+	__le32 *p;
+	ext3_fsblk_t bg_start;
+	ext3_grpblk_t colour;
+
+	/* Try to find previous block */
+	for (p = ind->p - 1; p >= start; p--) {
+		if (*p)
+			return le32_to_cpu(*p);
+	}
+
+	/* No such thing, so let's try location of indirect block */
+	if (ind->bh)
+		return ind->bh->b_blocknr;
+
+	/*
+	 * It is going to be referred to from the inode itself? OK, just put it
+	 * into the same cylinder group then.
+	 */
+	bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
+	colour = (current->pid % 16) *
+			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	return bg_start + colour;
+}
+
+/**
+ *	ext3_find_goal - find a prefered place for allocation.
+ *	@inode: owner
+ *	@block:  block we want
+ *	@chain:  chain of indirect blocks
+ *	@partial: pointer to the last triple within a chain
+ *	@goal:	place to store the result.
+ *
+ *	Normally this function find the prefered place for block allocation,
+ *	stores it in *@goal and returns zero.
+ */
+
+static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
+		Indirect chain[4], Indirect *partial)
+{
+	struct ext3_block_alloc_info *block_i;
+
+	block_i =  EXT3_I(inode)->i_block_alloc_info;
+
+	/*
+	 * try the heuristic for sequential allocation,
+	 * failing that at least try to get decent locality.
+	 */
+	if (block_i && (block == block_i->last_alloc_logical_block + 1)
+		&& (block_i->last_alloc_physical_block != 0)) {
+		return block_i->last_alloc_physical_block + 1;
+	}
+
+	return ext3_find_near(inode, partial);
+}
+
+/**
+ *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	of direct blocks need to be allocated for the given branch.
+ *
+ *	@branch: chain of indirect blocks
+ *	@k: number of blocks need for indirect blocks
+ *	@blks: number of data blocks to be mapped.
+ *	@blocks_to_boundary:  the offset in the indirect block
+ *
+ *	return the total number of blocks to be allocate, including the
+ *	direct and indirect blocks.
+ */
+static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+		int blocks_to_boundary)
+{
+	unsigned long count = 0;
+
+	/*
+	 * Simple case, [t,d]Indirect block(s) has not allocated yet
+	 * then it's clear blocks on that path have not allocated
+	 */
+	if (k > 0) {
+		/* right now we don't handle cross boundary allocation */
+		if (blks < blocks_to_boundary + 1)
+			count += blks;
+		else
+			count += blocks_to_boundary + 1;
+		return count;
+	}
+
+	count++;
+	while (count < blks && count <= blocks_to_boundary &&
+		le32_to_cpu(*(branch[0].p + count)) == 0) {
+		count++;
+	}
+	return count;
+}
+
+/**
+ *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	@indirect_blks: the number of blocks need to allocate for indirect
+ *			blocks
+ *
+ *	@new_blocks: on return it will store the new block numbers for
+ *	the indirect blocks(if needed) and the first direct block,
+ *	@blks:	on return it will store the total number of allocated
+ *		direct blocks
+ */
+static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, int indirect_blks, int blks,
+			ext3_fsblk_t new_blocks[4], int *err)
+{
+	int target, i;
+	unsigned long count = 0;
+	int index = 0;
+	ext3_fsblk_t current_block = 0;
+	int ret = 0;
+
+	/*
+	 * Here we try to allocate the requested multiple blocks at once,
+	 * on a best-effort basis.
+	 * To build a branch, we should allocate blocks for
+	 * the indirect blocks(if not allocated yet), and at least
+	 * the first direct block of this branch.  That's the
+	 * minimum number of blocks need to allocate(required)
+	 */
+	target = blks + indirect_blks;
+
+	while (1) {
+		count = target;
+		/* allocating blocks for indirect blocks and direct blocks */
+		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
+		if (*err)
+			goto failed_out;
+
+		target -= count;
+		/* allocate blocks for indirect blocks */
+		while (index < indirect_blks && count) {
+			new_blocks[index++] = current_block++;
+			count--;
+		}
+
+		if (count > 0)
+			break;
+	}
+
+	/* save the new block number for the first direct block */
+	new_blocks[index] = current_block;
+
+	/* total number of blocks allocated for direct blocks */
+	ret = count;
+	*err = 0;
+	return ret;
+failed_out:
+	for (i = 0; i <index; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+	return ret;
+}
+
+/**
+ *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	@inode: owner
+ *	@indirect_blks: number of allocated indirect blocks
+ *	@blks: number of allocated direct blocks
+ *	@offsets: offsets (in the blocks) to store the pointers to next.
+ *	@branch: place to store the chain in.
+ *
+ *	This function allocates blocks, zeroes out all but the last one,
+ *	links them into chain and (if we are synchronous) writes them to disk.
+ *	In other words, it prepares a branch that can be spliced onto the
+ *	inode. It stores the information about that chain in the branch[], in
+ *	the same format as ext3_get_branch() would do. We are calling it after
+ *	we had read the existing part of chain and partial points to the last
+ *	triple of that (one with zero ->key). Upon the exit we have the same
+ *	picture as after the successful ext3_get_block(), except that in one
+ *	place chain is disconnected - *branch->p is still zero (we did not
+ *	set the last link), but branch->key contains the number that should
+ *	be placed into *branch->p to fill that gap.
+ *
+ *	If allocation fails we free all blocks we've allocated (and forget
+ *	their buffer_heads) and return the error value the from failed
+ *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	as described above and return 0.
+ */
+static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
+			int indirect_blks, int *blks, ext3_fsblk_t goal,
+			int *offsets, Indirect *branch)
+{
+	int blocksize = inode->i_sb->s_blocksize;
+	int i, n = 0;
+	int err = 0;
+	struct buffer_head *bh;
+	int num;
+	ext3_fsblk_t new_blocks[4];
+	ext3_fsblk_t current_block;
+
+	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+				*blks, new_blocks, &err);
+	if (err)
+		return err;
+
+	branch[0].key = cpu_to_le32(new_blocks[0]);
+	/*
+	 * metadata blocks and data blocks are allocated.
+	 */
+	for (n = 1; n <= indirect_blks;  n++) {
+		/*
+		 * Get buffer_head for parent block, zero it out
+		 * and set the pointer to new one, then send
+		 * parent to disk.
+		 */
+		bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+		branch[n].bh = bh;
+		lock_buffer(bh);
+		BUFFER_TRACE(bh, "call get_create_access");
+		err = ext3_journal_get_create_access(handle, bh);
+		if (err) {
+			unlock_buffer(bh);
+			brelse(bh);
+			goto failed;
+		}
+
+		memset(bh->b_data, 0, blocksize);
+		branch[n].p = (__le32 *) bh->b_data + offsets[n];
+		branch[n].key = cpu_to_le32(new_blocks[n]);
+		*branch[n].p = branch[n].key;
+		if ( n == indirect_blks) {
+			current_block = new_blocks[n];
+			/*
+			 * End of chain, update the last new metablock of
+			 * the chain to point to the new allocated
+			 * data blocks numbers
+			 */
+			for (i=1; i < num; i++)
+				*(branch[n].p + i) = cpu_to_le32(++current_block);
+		}
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+
+		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, bh);
+		if (err)
+			goto failed;
+	}
+	*blks = num;
+	return err;
+failed:
+	/* Allocation failed, free what we already allocated */
+	for (i = 1; i <= n ; i++) {
+		BUFFER_TRACE(branch[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, branch[i].bh);
+	}
+	for (i = 0; i <indirect_blks; i++)
+		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+
+	ext3_free_blocks(handle, inode, new_blocks[i], num);
+
+	return err;
+}
+
+/**
+ * ext3_splice_branch - splice the allocated branch onto inode.
+ * @inode: owner
+ * @block: (logical) number of block we are adding
+ * @chain: chain of indirect blocks (with a missing link - see
+ *	ext3_alloc_branch)
+ * @where: location of missing link
+ * @num:   number of indirect blocks we are adding
+ * @blks:  number of direct blocks we are adding
+ *
+ * This function fills the missing link and does all housekeeping needed in
+ * inode (->i_blocks, etc.). In case of success we end up with the full
+ * chain to new block and return 0.
+ */
+static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+			long block, Indirect *where, int num, int blks)
+{
+	int i;
+	int err = 0;
+	struct ext3_block_alloc_info *block_i;
+	ext3_fsblk_t current_block;
+
+	block_i = EXT3_I(inode)->i_block_alloc_info;
+	/*
+	 * If we're splicing into a [td]indirect block (as opposed to the
+	 * inode) then we need to get write access to the [td]indirect block
+	 * before the splice.
+	 */
+	if (where->bh) {
+		BUFFER_TRACE(where->bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, where->bh);
+		if (err)
+			goto err_out;
+	}
+	/* That's it */
+
+	*where->p = where->key;
+
+	/*
+	 * Update the host buffer_head or inode to point to more just allocated
+	 * direct blocks blocks
+	 */
+	if (num == 0 && blks > 1) {
+		current_block = le32_to_cpu(where->key) + 1;
+		for (i = 1; i < blks; i++)
+			*(where->p + i ) = cpu_to_le32(current_block++);
+	}
+
+	/*
+	 * update the most recently allocated logical & physical block
+	 * in i_block_alloc_info, to assist find the proper goal block for next
+	 * allocation
+	 */
+	if (block_i) {
+		block_i->last_alloc_logical_block = block + blks - 1;
+		block_i->last_alloc_physical_block =
+				le32_to_cpu(where[num].key) + blks - 1;
+	}
+
+	/* We are done with atomic stuff, now do the rest of housekeeping */
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/* had we spliced it onto indirect block? */
+	if (where->bh) {
+		/*
+		 * If we spliced it onto an indirect block, we haven't
+		 * altered the inode.  Note however that if it is being spliced
+		 * onto an indirect block at the very end of the file (the
+		 * file is growing) then we *will* alter the inode to reflect
+		 * the new i_size.  But that is not done here - it is done in
+		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
+		 */
+		jbd_debug(5, "splicing indirect only\n");
+		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
+		err = ext3_journal_dirty_metadata(handle, where->bh);
+		if (err)
+			goto err_out;
+	} else {
+		/*
+		 * OK, we spliced it into the inode itself on a direct block.
+		 * Inode was dirtied above.
+		 */
+		jbd_debug(5, "splicing direct\n");
+	}
+	return err;
+
+err_out:
+	for (i = 1; i <= num; i++) {
+		BUFFER_TRACE(where[i].bh, "call journal_forget");
+		ext3_journal_forget(handle, where[i].bh);
+		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
+	}
+	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+
+	return err;
+}
+
+/*
+ * Allocation strategy is simple: if we have to allocate something, we will
+ * have to go the whole way to leaf. So let's do it before attaching anything
+ * to tree, set linkage between the newborn blocks, write them if sync is
+ * required, recheck the path, free and repeat if check fails, otherwise
+ * set the last missing link (that will protect us from any truncate-generated
+ * removals - all blocks on the path are immune now) and possibly force the
+ * write on the parent block.
+ * That has a nice additional property: no special recovery from the failed
+ * allocations is needed - we simply release blocks and do not touch anything
+ * reachable from inode.
+ *
+ * `handle' can be NULL if create == 0.
+ *
+ * The BKL may not be held on entry here.  Be sure to take it early.
+ * return > 0, # of blocks mapped or allocated.
+ * return = 0, if plain lookup failed.
+ * return < 0, error case.
+ */
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+		sector_t iblock, unsigned long maxblocks,
+		struct buffer_head *bh_result,
+		int create, int extend_disksize)
+{
+	int err = -EIO;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	ext3_fsblk_t goal;
+	int indirect_blks;
+	int blocks_to_boundary = 0;
+	int depth;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	int count = 0;
+	ext3_fsblk_t first_block = 0;
+
+
+	J_ASSERT(handle != NULL || create == 0);
+	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
+
+	if (depth == 0)
+		goto out;
+
+	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+
+	/* Simplest case - block found, no allocation needed */
+	if (!partial) {
+		first_block = le32_to_cpu(chain[depth - 1].key);
+		clear_buffer_new(bh_result);
+		count++;
+		/*map more blocks*/
+		while (count < maxblocks && count <= blocks_to_boundary) {
+			ext3_fsblk_t blk;
+
+			if (!verify_chain(chain, partial)) {
+				/*
+				 * Indirect block might be removed by
+				 * truncate while we were reading it.
+				 * Handling of that case: forget what we've
+				 * got now. Flag the err as EAGAIN, so it
+				 * will reread.
+				 */
+				err = -EAGAIN;
+				count = 0;
+				break;
+			}
+			blk = le32_to_cpu(*(chain[depth-1].p + count));
+
+			if (blk == first_block + count)
+				count++;
+			else
+				break;
+		}
+		if (err != -EAGAIN)
+			goto got_it;
+	}
+
+	/* Next simple case - plain lookup or failed read of indirect block */
+	if (!create || err == -EIO)
+		goto cleanup;
+
+	mutex_lock(&ei->truncate_mutex);
+
+	/*
+	 * If the indirect block is missing while we are reading
+	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * if the chain has been changed after we grab the semaphore,
+	 * (either because another process truncated this branch, or
+	 * another get_block allocated this branch) re-grab the chain to see if
+	 * the request block has been allocated or not.
+	 *
+	 * Since we already block the truncate/other get_block
+	 * at this point, we will have the current copy of the chain when we
+	 * splice the branch into the tree.
+	 */
+	if (err == -EAGAIN || !verify_chain(chain, partial)) {
+		while (partial > chain) {
+			brelse(partial->bh);
+			partial--;
+		}
+		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+		if (!partial) {
+			count++;
+			mutex_unlock(&ei->truncate_mutex);
+			if (err)
+				goto cleanup;
+			clear_buffer_new(bh_result);
+			goto got_it;
+		}
+	}
+
+	/*
+	 * Okay, we need to do block allocation.  Lazily initialize the block
+	 * allocation info here if necessary
+	*/
+	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
+		ext3_init_block_alloc_info(inode);
+
+	goal = ext3_find_goal(inode, iblock, chain, partial);
+
+	/* the number of blocks need to allocate for [d,t]indirect blocks */
+	indirect_blks = (chain + depth) - partial - 1;
+
+	/*
+	 * Next look up the indirect map to count the totoal number of
+	 * direct blocks to allocate for this branch.
+	 */
+	count = ext3_blks_to_allocate(partial, indirect_blks,
+					maxblocks, blocks_to_boundary);
+	/*
+	 * Block out ext3_truncate while we alter the tree
+	 */
+	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
+				offsets + (partial - chain), partial);
+
+	/*
+	 * The ext3_splice_branch call will free and forget any buffers
+	 * on the new chain if there is a failure, but that risks using
+	 * up transaction credits, especially for bitmaps where the
+	 * credits cannot be returned.  Can we handle this somehow?  We
+	 * may need to return -EAGAIN upwards in the worst case.  --sct
+	 */
+	if (!err)
+		err = ext3_splice_branch(handle, inode, iblock,
+					partial, indirect_blks, count);
+	/*
+	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
+	 * protect it if you're about to implement concurrent
+	 * ext3_get_block() -bzzz
+	*/
+	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
+		ei->i_disksize = inode->i_size;
+	mutex_unlock(&ei->truncate_mutex);
+	if (err)
+		goto cleanup;
+
+	set_buffer_new(bh_result);
+got_it:
+	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	if (count > blocks_to_boundary)
+		set_buffer_boundary(bh_result);
+	err = count;
+	/* Clean up and exit */
+	partial = chain + depth - 1;	/* the whole chain */
+cleanup:
+	while (partial > chain) {
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse(partial->bh);
+		partial--;
+	}
+	BUFFER_TRACE(bh_result, "returned");
+out:
+	return err;
+}
+
+#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
+
+static int ext3_get_block(struct inode *inode, sector_t iblock,
+			struct buffer_head *bh_result, int create)
+{
+	handle_t *handle = journal_current_handle();
+	int ret = 0;
+	unsigned max_blocks = bh_result->b_size >> inode->i_blkbits;
+
+	if (!create)
+		goto get_block;		/* A read */
+
+	if (max_blocks == 1)
+		goto get_block;		/* A single block get */
+
+	if (handle->h_transaction->t_state == T_LOCKED) {
+		/*
+		 * Huge direct-io writes can hold off commits for long
+		 * periods of time.  Let this commit run.
+		 */
+		ext3_journal_stop(handle);
+		handle = ext3_journal_start(inode, DIO_CREDITS);
+		if (IS_ERR(handle))
+			ret = PTR_ERR(handle);
+		goto get_block;
+	}
+
+	if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
+		/*
+		 * Getting low on buffer credits...
+		 */
+		ret = ext3_journal_extend(handle, DIO_CREDITS);
+		if (ret > 0) {
+			/*
+			 * Couldn't extend the transaction.  Start a new one.
+			 */
+			ret = ext3_journal_restart(handle, DIO_CREDITS);
+		}
+	}
+
+get_block:
+	if (ret == 0) {
+		ret = ext3_get_blocks_handle(handle, inode, iblock,
+					max_blocks, bh_result, create, 0);
+		if (ret > 0) {
+			bh_result->b_size = (ret << inode->i_blkbits);
+			ret = 0;
+		}
+	}
+	return ret;
+}
+
+/*
+ * `handle' can be NULL if create is zero
+ */
+struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+				long block, int create, int *errp)
+{
+	struct buffer_head dummy;
+	int fatal = 0, err;
+
+	J_ASSERT(handle != NULL || create == 0);
+
+	dummy.b_state = 0;
+	dummy.b_blocknr = -1000;
+	buffer_trace_init(&dummy.b_history);
+	err = ext3_get_blocks_handle(handle, inode, block, 1,
+					&dummy, create, 1);
+	/*
+	 * ext3_get_blocks_handle() returns number of blocks
+	 * mapped. 0 in case of a HOLE.
+	 */
+	if (err > 0) {
+		if (err > 1)
+			WARN_ON(1);
+		err = 0;
+	}
+	*errp = err;
+	if (!err && buffer_mapped(&dummy)) {
+		struct buffer_head *bh;
+		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+		if (!bh) {
+			*errp = -EIO;
+			goto err;
+		}
+		if (buffer_new(&dummy)) {
+			J_ASSERT(create != 0);
+			J_ASSERT(handle != 0);
+
+			/*
+			 * Now that we do not always journal data, we should
+			 * keep in mind whether this should always journal the
+			 * new buffer as metadata.  For now, regular file
+			 * writes use ext3_get_block instead, so it's not a
+			 * problem.
+			 */
+			lock_buffer(bh);
+			BUFFER_TRACE(bh, "call get_create_access");
+			fatal = ext3_journal_get_create_access(handle, bh);
+			if (!fatal && !buffer_uptodate(bh)) {
+				memset(bh->b_data,0,inode->i_sb->s_blocksize);
+				set_buffer_uptodate(bh);
+			}
+			unlock_buffer(bh);
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			err = ext3_journal_dirty_metadata(handle, bh);
+			if (!fatal)
+				fatal = err;
+		} else {
+			BUFFER_TRACE(bh, "not a new buffer");
+		}
+		if (fatal) {
+			*errp = fatal;
+			brelse(bh);
+			bh = NULL;
+		}
+		return bh;
+	}
+err:
+	return NULL;
+}
+
+struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
+			       int block, int create, int *err)
+{
+	struct buffer_head * bh;
+
+	bh = ext3_getblk(handle, inode, block, create, err);
+	if (!bh)
+		return bh;
+	if (buffer_uptodate(bh))
+		return bh;
+	ll_rw_block(READ_META, 1, &bh);
+	wait_on_buffer(bh);
+	if (buffer_uptodate(bh))
+		return bh;
+	put_bh(bh);
+	*err = -EIO;
+	return NULL;
+}
+
+static int walk_page_buffers(	handle_t *handle,
+				struct buffer_head *head,
+				unsigned from,
+				unsigned to,
+				int *partial,
+				int (*fn)(	handle_t *handle,
+						struct buffer_head *bh))
+{
+	struct buffer_head *bh;
+	unsigned block_start, block_end;
+	unsigned blocksize = head->b_size;
+	int err, ret = 0;
+	struct buffer_head *next;
+
+	for (	bh = head, block_start = 0;
+		ret == 0 && (bh != head || !block_start);
+		block_start = block_end, bh = next)
+	{
+		next = bh->b_this_page;
+		block_end = block_start + blocksize;
+		if (block_end <= from || block_start >= to) {
+			if (partial && !buffer_uptodate(bh))
+				*partial = 1;
+			continue;
+		}
+		err = (*fn)(handle, bh);
+		if (!ret)
+			ret = err;
+	}
+	return ret;
+}
+
+/*
+ * To preserve ordering, it is essential that the hole instantiation and
+ * the data write be encapsulated in a single transaction.  We cannot
+ * close off a transaction and start a new one between the ext3_get_block()
+ * and the commit_write().  So doing the journal_start at the start of
+ * prepare_write() is the right place.
+ *
+ * Also, this function can nest inside ext3_writepage() ->
+ * block_write_full_page(). In that case, we *know* that ext3_writepage()
+ * has generated enough buffer credits to do the whole page.  So we won't
+ * block on the journal in that case, which is good, because the caller may
+ * be PF_MEMALLOC.
+ *
+ * By accident, ext3 can be reentered when a transaction is open via
+ * quota file writes.  If we were to commit the transaction while thus
+ * reentered, there can be a deadlock - we would be holding a quota
+ * lock, and the commit would never complete if another thread had a
+ * transaction open and was blocking on the quota lock - a ranking
+ * violation.
+ *
+ * So what we do is to rely on the fact that journal_stop/journal_start
+ * will _not_ run commit under these circumstances because handle->h_ref
+ * is elevated.  We'll still have enough credits for the tiny quotafile
+ * write.
+ */
+static int do_journal_get_write_access(handle_t *handle,
+					struct buffer_head *bh)
+{
+	if (!buffer_mapped(bh) || buffer_freed(bh))
+		return 0;
+	return ext3_journal_get_write_access(handle, bh);
+}
+
+static int ext3_prepare_write(struct file *file, struct page *page,
+			      unsigned from, unsigned to)
+{
+	struct inode *inode = page->mapping->host;
+	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	handle_t *handle;
+	int retries = 0;
+
+retry:
+	handle = ext3_journal_start(inode, needed_blocks);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out;
+	}
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	else
+		ret = block_prepare_write(page, from, to, ext3_get_block);
+	if (ret)
+		goto prepare_write_failed;
+
+	if (ext3_should_journal_data(inode)) {
+		ret = walk_page_buffers(handle, page_buffers(page),
+				from, to, NULL, do_journal_get_write_access);
+	}
+prepare_write_failed:
+	if (ret)
+		ext3_journal_stop(handle);
+	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		goto retry;
+out:
+	return ret;
+}
+
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_data(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+						bh, handle,err);
+	return err;
+}
+
+/* For commit_write() in data=journal mode */
+static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
+{
+	if (!buffer_mapped(bh) || buffer_freed(bh))
+		return 0;
+	set_buffer_uptodate(bh);
+	return ext3_journal_dirty_metadata(handle, bh);
+}
+
+/*
+ * We need to pick up the new inode size which generic_commit_write gave us
+ * `file' can be NULL - eg, when called from page_symlink().
+ *
+ * ext3 never places buffers on inode->i_mapping->private_list.  metadata
+ * buffers are managed internally.
+ */
+static int ext3_ordered_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+
+	ret = walk_page_buffers(handle, page_buffers(page),
+		from, to, NULL, ext3_journal_dirty_data);
+
+	if (ret == 0) {
+		/*
+		 * generic_commit_write() will run mark_inode_dirty() if i_size
+		 * changes.  So let's piggyback the i_disksize mark_inode_dirty
+		 * into that.
+		 */
+		loff_t new_i_size;
+
+		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+		if (new_i_size > EXT3_I(inode)->i_disksize)
+			EXT3_I(inode)->i_disksize = new_i_size;
+		ret = generic_commit_write(file, page, from, to);
+	}
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_writeback_commit_write(struct file *file, struct page *page,
+			     unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	loff_t new_i_size;
+
+	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+	if (new_i_size > EXT3_I(inode)->i_disksize)
+		EXT3_I(inode)->i_disksize = new_i_size;
+
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_commit_write(file, page, from, to);
+	else
+		ret = generic_commit_write(file, page, from, to);
+
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+static int ext3_journalled_commit_write(struct file *file,
+			struct page *page, unsigned from, unsigned to)
+{
+	handle_t *handle = ext3_journal_current_handle();
+	struct inode *inode = page->mapping->host;
+	int ret = 0, ret2;
+	int partial = 0;
+	loff_t pos;
+
+	/*
+	 * Here we duplicate the generic_commit_write() functionality
+	 */
+	pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
+
+	ret = walk_page_buffers(handle, page_buffers(page), from,
+				to, &partial, commit_write_fn);
+	if (!partial)
+		SetPageUptodate(page);
+	if (pos > inode->i_size)
+		i_size_write(inode, pos);
+	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+	if (inode->i_size > EXT3_I(inode)->i_disksize) {
+		EXT3_I(inode)->i_disksize = inode->i_size;
+		ret2 = ext3_mark_inode_dirty(handle, inode);
+		if (!ret)
+			ret = ret2;
+	}
+	ret2 = ext3_journal_stop(handle);
+	if (!ret)
+		ret = ret2;
+	return ret;
+}
+
+/*
+ * bmap() is special.  It gets used by applications such as lilo and by
+ * the swapper to find the on-disk block of a specific piece of data.
+ *
+ * Naturally, this is dangerous if the block concerned is still in the
+ * journal.  If somebody makes a swapfile on an ext3 data-journaling
+ * filesystem and enables swap, then they may get a nasty shock when the
+ * data getting swapped to that swapfile suddenly gets overwritten by
+ * the original zero's written out previously to the journal and
+ * awaiting writeback in the kernel's buffer cache.
+ *
+ * So, if we see any bmap calls here on a modified, data-journaled file,
+ * take extra steps to flush any blocks which might be in the cache.
+ */
+static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
+{
+	struct inode *inode = mapping->host;
+	journal_t *journal;
+	int err;
+
+	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+		/*
+		 * This is a REALLY heavyweight approach, but the use of
+		 * bmap on dirty files is expected to be extremely rare:
+		 * only if we run lilo or swapon on a freshly made file
+		 * do we expect this to happen.
+		 *
+		 * (bmap requires CAP_SYS_RAWIO so this does not
+		 * represent an unprivileged user DOS attack --- we'd be
+		 * in trouble if mortal users could trigger this path at
+		 * will.)
+		 *
+		 * NB. EXT3_STATE_JDATA is not set on files other than
+		 * regular files.  If somebody wants to bmap a directory
+		 * or symlink and gets confused because the buffer
+		 * hasn't yet been flushed to disk, they deserve
+		 * everything they get.
+		 */
+
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
+		journal = EXT3_JOURNAL(inode);
+		journal_lock_updates(journal);
+		err = journal_flush(journal);
+		journal_unlock_updates(journal);
+
+		if (err)
+			return 0;
+	}
+
+	return generic_block_bmap(mapping,block,ext3_get_block);
+}
+
+static int bget_one(handle_t *handle, struct buffer_head *bh)
+{
+	get_bh(bh);
+	return 0;
+}
+
+static int bput_one(handle_t *handle, struct buffer_head *bh)
+{
+	put_bh(bh);
+	return 0;
+}
+
+static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
+{
+	if (buffer_mapped(bh))
+		return ext3_journal_dirty_data(handle, bh);
+	return 0;
+}
+
+/*
+ * Note that we always start a transaction even if we're not journalling
+ * data.  This is to preserve ordering: any hole instantiation within
+ * __block_write_full_page -> ext3_get_block() should be journalled
+ * along with the data so we don't crash and then get metadata which
+ * refers to old data.
+ *
+ * In all journalling modes block_write_full_page() will start the I/O.
+ *
+ * Problem:
+ *
+ *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext3_writepage()
+ *
+ * Similar for:
+ *
+ *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
+ *
+ * Same applies to ext3_get_block().  We will deadlock on various things like
+ * lock_journal and i_truncate_mutex.
+ *
+ * Setting PF_MEMALLOC here doesn't work - too many internal memory
+ * allocations fail.
+ *
+ * 16May01: If we're reentered then journal_current_handle() will be
+ *	    non-zero. We simply *return*.
+ *
+ * 1 July 2001: @@@ FIXME:
+ *   In journalled data mode, a data buffer may be metadata against the
+ *   current transaction.  But the same file is part of a shared mapping
+ *   and someone does a writepage() on it.
+ *
+ *   We will move the buffer onto the async_data list, but *after* it has
+ *   been dirtied. So there's a small window where we have dirty data on
+ *   BJ_Metadata.
+ *
+ *   Note that this only applies to the last partial page in the file.  The
+ *   bit which block_write_full_page() uses prepare/commit for.  (That's
+ *   broken code anyway: it's wrong for msync()).
+ *
+ *   It's a rare case: affects the final partial page, for journalled data
+ *   where the file is subject to bith write() and writepage() in the same
+ *   transction.  To fix it we'll need a custom block_write_full_page().
+ *   We'll probably need that anyway for journalling writepage() output.
+ *
+ * We don't honour synchronous mounts for writepage().  That would be
+ * disastrous.  Any write() or metadata operation will sync the fs for
+ * us.
+ *
+ * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
+ * we don't need to open a transaction here.
+ */
+static int ext3_ordered_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	struct buffer_head *page_bufs;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	J_ASSERT(PageLocked(page));
+
+	/*
+	 * We give up here if we're reentered, because it might be for a
+	 * different filesystem.
+	 */
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
+	}
+
+	if (!page_has_buffers(page)) {
+		create_empty_buffers(page, inode->i_sb->s_blocksize,
+				(1 << BH_Dirty)|(1 << BH_Uptodate));
+	}
+	page_bufs = page_buffers(page);
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bget_one);
+
+	ret = block_write_full_page(page, ext3_get_block, wbc);
+
+	/*
+	 * The page can become unlocked at any point now, and
+	 * truncate can then come in and change things.  So we
+	 * can't touch *page from now on.  But *page_bufs is
+	 * safe due to elevated refcount.
+	 */
+
+	/*
+	 * And attach them to the current transaction.  But only if
+	 * block_write_full_page() succeeded.  Otherwise they are unmapped,
+	 * and generally junk.
+	 */
+	if (ret == 0) {
+		err = walk_page_buffers(handle, page_bufs, 0, PAGE_CACHE_SIZE,
+					NULL, journal_dirty_data_fn);
+		if (!ret)
+			ret = err;
+	}
+	walk_page_buffers(handle, page_bufs, 0,
+			PAGE_CACHE_SIZE, NULL, bput_one);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return ret;
+}
+
+static int ext3_writeback_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto out_fail;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto out_fail;
+	}
+
+	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+		ret = nobh_writepage(page, ext3_get_block, wbc);
+	else
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+
+out_fail:
+	redirty_page_for_writepage(wbc, page);
+	unlock_page(page);
+	return ret;
+}
+
+static int ext3_journalled_writepage(struct page *page,
+				struct writeback_control *wbc)
+{
+	struct inode *inode = page->mapping->host;
+	handle_t *handle = NULL;
+	int ret = 0;
+	int err;
+
+	if (ext3_journal_current_handle())
+		goto no_write;
+
+	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+		goto no_write;
+	}
+
+	if (!page_has_buffers(page) || PageChecked(page)) {
+		/*
+		 * It's mmapped pagecache.  Add buffers and journal it.  There
+		 * doesn't seem much point in redirtying the page here.
+		 */
+		ClearPageChecked(page);
+		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
+					ext3_get_block);
+		if (ret != 0) {
+			ext3_journal_stop(handle);
+			goto out_unlock;
+		}
+		ret = walk_page_buffers(handle, page_buffers(page), 0,
+			PAGE_CACHE_SIZE, NULL, do_journal_get_write_access);
+
+		err = walk_page_buffers(handle, page_buffers(page), 0,
+				PAGE_CACHE_SIZE, NULL, commit_write_fn);
+		if (ret == 0)
+			ret = err;
+		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		unlock_page(page);
+	} else {
+		/*
+		 * It may be a page full of checkpoint-mode buffers.  We don't
+		 * really know unless we go poke around in the buffer_heads.
+		 * But block_write_full_page will do the right thing.
+		 */
+		ret = block_write_full_page(page, ext3_get_block, wbc);
+	}
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+out:
+	return ret;
+
+no_write:
+	redirty_page_for_writepage(wbc, page);
+out_unlock:
+	unlock_page(page);
+	goto out;
+}
+
+static int ext3_readpage(struct file *file, struct page *page)
+{
+	return mpage_readpage(page, ext3_get_block);
+}
+
+static int
+ext3_readpages(struct file *file, struct address_space *mapping,
+		struct list_head *pages, unsigned nr_pages)
+{
+	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
+}
+
+static void ext3_invalidatepage(struct page *page, unsigned long offset)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	/*
+	 * If it's a full truncate we just forget about the pending dirtying
+	 */
+	if (offset == 0)
+		ClearPageChecked(page);
+
+	journal_invalidatepage(journal, page, offset);
+}
+
+static int ext3_releasepage(struct page *page, gfp_t wait)
+{
+	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+
+	WARN_ON(PageChecked(page));
+	if (!page_has_buffers(page))
+		return 0;
+	return journal_try_to_free_buffers(journal, page, wait);
+}
+
+/*
+ * If the O_DIRECT write will extend the file then add this inode to the
+ * orphan list.  So recovery will truncate it back to the original size
+ * if the machine crashes during the write.
+ *
+ * If the O_DIRECT write is intantiating holes inside i_size and the machine
+ * crashes then stale disk data _may_ be exposed inside the file.
+ */
+static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
+			const struct iovec *iov, loff_t offset,
+			unsigned long nr_segs)
+{
+	struct file *file = iocb->ki_filp;
+	struct inode *inode = file->f_mapping->host;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	handle_t *handle = NULL;
+	ssize_t ret;
+	int orphan = 0;
+	size_t count = iov_length(iov, nr_segs);
+
+	if (rw == WRITE) {
+		loff_t final_size = offset + count;
+
+		handle = ext3_journal_start(inode, DIO_CREDITS);
+		if (IS_ERR(handle)) {
+			ret = PTR_ERR(handle);
+			goto out;
+		}
+		if (final_size > inode->i_size) {
+			ret = ext3_orphan_add(handle, inode);
+			if (ret)
+				goto out_stop;
+			orphan = 1;
+			ei->i_disksize = inode->i_size;
+		}
+	}
+
+	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
+				 offset, nr_segs,
+				 ext3_get_block, NULL);
+
+	/*
+	 * Reacquire the handle: ext3_get_block() can restart the transaction
+	 */
+	handle = journal_current_handle();
+
+out_stop:
+	if (handle) {
+		int err;
+
+		if (orphan && inode->i_nlink)
+			ext3_orphan_del(handle, inode);
+		if (orphan && ret > 0) {
+			loff_t end = offset + ret;
+			if (end > inode->i_size) {
+				ei->i_disksize = end;
+				i_size_write(inode, end);
+				/*
+				 * We're going to return a positive `ret'
+				 * here due to non-zero-length I/O, so there's
+				 * no way of reporting error returns from
+				 * ext3_mark_inode_dirty() to userspace.  So
+				 * ignore it.
+				 */
+				ext3_mark_inode_dirty(handle, inode);
+			}
+		}
+		err = ext3_journal_stop(handle);
+		if (ret == 0)
+			ret = err;
+	}
+out:
+	return ret;
+}
+
+/*
+ * Pages can be marked dirty completely asynchronously from ext3's journalling
+ * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
+ * much here because ->set_page_dirty is called under VFS locks.  The page is
+ * not necessarily locked.
+ *
+ * We cannot just dirty the page and leave attached buffers clean, because the
+ * buffers' dirty state is "definitive".  We cannot just set the buffers dirty
+ * or jbddirty because all the journalling code will explode.
+ *
+ * So what we do is to mark the page "pending dirty" and next time writepage
+ * is called, propagate that into the buffers appropriately.
+ */
+static int ext3_journalled_set_page_dirty(struct page *page)
+{
+	SetPageChecked(page);
+	return __set_page_dirty_nobuffers(page);
+}
+
+static const struct address_space_operations ext3_ordered_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_ordered_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_ordered_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
+};
+
+static const struct address_space_operations ext3_writeback_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_writeback_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_writeback_commit_write,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+	.direct_IO	= ext3_direct_IO,
+	.migratepage	= buffer_migrate_page,
+};
+
+static const struct address_space_operations ext3_journalled_aops = {
+	.readpage	= ext3_readpage,
+	.readpages	= ext3_readpages,
+	.writepage	= ext3_journalled_writepage,
+	.sync_page	= block_sync_page,
+	.prepare_write	= ext3_prepare_write,
+	.commit_write	= ext3_journalled_commit_write,
+	.set_page_dirty	= ext3_journalled_set_page_dirty,
+	.bmap		= ext3_bmap,
+	.invalidatepage	= ext3_invalidatepage,
+	.releasepage	= ext3_releasepage,
+};
+
+void ext3_set_aops(struct inode *inode)
+{
+	if (ext3_should_order_data(inode))
+		inode->i_mapping->a_ops = &ext3_ordered_aops;
+	else if (ext3_should_writeback_data(inode))
+		inode->i_mapping->a_ops = &ext3_writeback_aops;
+	else
+		inode->i_mapping->a_ops = &ext3_journalled_aops;
+}
+
+/*
+ * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
+ * up to the end of the block which corresponds to `from'.
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+static int ext3_block_truncate_page(handle_t *handle, struct page *page,
+		struct address_space *mapping, loff_t from)
+{
+	ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+	unsigned offset = from & (PAGE_CACHE_SIZE-1);
+	unsigned blocksize, iblock, length, pos;
+	struct inode *inode = mapping->host;
+	struct buffer_head *bh;
+	int err = 0;
+	void *kaddr;
+
+	blocksize = inode->i_sb->s_blocksize;
+	length = blocksize - (offset & (blocksize - 1));
+	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+
+	/*
+	 * For "nobh" option,  we can only work if we don't need to
+	 * read-in the page - otherwise we create buffers to do the IO.
+	 */
+	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
+	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
+		kaddr = kmap_atomic(page, KM_USER0);
+		memset(kaddr + offset, 0, length);
+		flush_dcache_page(page);
+		kunmap_atomic(kaddr, KM_USER0);
+		set_page_dirty(page);
+		goto unlock;
+	}
+
+	if (!page_has_buffers(page))
+		create_empty_buffers(page, blocksize, 0);
+
+	/* Find the buffer that contains "offset" */
+	bh = page_buffers(page);
+	pos = blocksize;
+	while (offset >= pos) {
+		bh = bh->b_this_page;
+		iblock++;
+		pos += blocksize;
+	}
+
+	err = 0;
+	if (buffer_freed(bh)) {
+		BUFFER_TRACE(bh, "freed: skip");
+		goto unlock;
+	}
+
+	if (!buffer_mapped(bh)) {
+		BUFFER_TRACE(bh, "unmapped");
+		ext3_get_block(inode, iblock, bh, 0);
+		/* unmapped? It's a hole - nothing to do */
+		if (!buffer_mapped(bh)) {
+			BUFFER_TRACE(bh, "still unmapped");
+			goto unlock;
+		}
+	}
+
+	/* Ok, it's mapped. Make sure it's up-to-date */
+	if (PageUptodate(page))
+		set_buffer_uptodate(bh);
+
+	if (!buffer_uptodate(bh)) {
+		err = -EIO;
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		/* Uhhuh. Read error. Complain and punt. */
+		if (!buffer_uptodate(bh))
+			goto unlock;
+	}
+
+	if (ext3_should_journal_data(inode)) {
+		BUFFER_TRACE(bh, "get write access");
+		err = ext3_journal_get_write_access(handle, bh);
+		if (err)
+			goto unlock;
+	}
+
+	kaddr = kmap_atomic(page, KM_USER0);
+	memset(kaddr + offset, 0, length);
+	flush_dcache_page(page);
+	kunmap_atomic(kaddr, KM_USER0);
+
+	BUFFER_TRACE(bh, "zeroed end of block");
+
+	err = 0;
+	if (ext3_should_journal_data(inode)) {
+		err = ext3_journal_dirty_metadata(handle, bh);
+	} else {
+		if (ext3_should_order_data(inode))
+			err = ext3_journal_dirty_data(handle, bh);
+		mark_buffer_dirty(bh);
+	}
+
+unlock:
+	unlock_page(page);
+	page_cache_release(page);
+	return err;
+}
+
+/*
+ * Probably it should be a library function... search for first non-zero word
+ * or memcmp with zero_page, whatever is better for particular architecture.
+ * Linus?
+ */
+static inline int all_zeroes(__le32 *p, __le32 *q)
+{
+	while (p < q)
+		if (*p++)
+			return 0;
+	return 1;
+}
+
+/**
+ *	ext3_find_shared - find the indirect blocks for partial truncation.
+ *	@inode:	  inode in question
+ *	@depth:	  depth of the affected branch
+ *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
+ *	@chain:	  place to store the pointers to partial indirect blocks
+ *	@top:	  place to the (detached) top of branch
+ *
+ *	This is a helper function used by ext3_truncate().
+ *
+ *	When we do truncate() we may have to clean the ends of several
+ *	indirect blocks but leave the blocks themselves alive. Block is
+ *	partially truncated if some data below the new i_size is refered
+ *	from it (and it is on the path to the first completely truncated
+ *	data block, indeed).  We have to free the top of that path along
+ *	with everything to the right of the path. Since no allocation
+ *	past the truncation point is possible until ext3_truncate()
+ *	finishes, we may safely do the latter, but top of branch may
+ *	require special attention - pageout below the truncation point
+ *	might try to populate it.
+ *
+ *	We atomically detach the top of branch from the tree, store the
+ *	block number of its root in *@top, pointers to buffer_heads of
+ *	partially truncated blocks - in @chain[].bh and pointers to
+ *	their last elements that should not be removed - in
+ *	@chain[].p. Return value is the pointer to last filled element
+ *	of @chain.
+ *
+ *	The work left to caller to do the actual freeing of subtrees:
+ *		a) free the subtree starting from *@top
+ *		b) free the subtrees whose roots are stored in
+ *			(@chain[i].p+1 .. end of @chain[i].bh->b_data)
+ *		c) free the subtrees growing from the inode past the @chain[0].
+ *			(no partially truncated stuff there).  */
+
+static Indirect *ext3_find_shared(struct inode *inode, int depth,
+			int offsets[4], Indirect chain[4], __le32 *top)
+{
+	Indirect *partial, *p;
+	int k, err;
+
+	*top = 0;
+	/* Make k index the deepest non-null offest + 1 */
+	for (k = depth; k > 1 && !offsets[k-1]; k--)
+		;
+	partial = ext3_get_branch(inode, k, offsets, chain, &err);
+	/* Writer: pointers */
+	if (!partial)
+		partial = chain + k-1;
+	/*
+	 * If the branch acquired continuation since we've looked at it -
+	 * fine, it should all survive and (new) top doesn't belong to us.
+	 */
+	if (!partial->key && *partial->p)
+		/* Writer: end */
+		goto no_top;
+	for (p=partial; p>chain && all_zeroes((__le32*)p->bh->b_data,p->p); p--)
+		;
+	/*
+	 * OK, we've found the last block that must survive. The rest of our
+	 * branch should be detached before unlocking. However, if that rest
+	 * of branch is all ours and does not grow immediately from the inode
+	 * it's easier to cheat and just decrement partial->p.
+	 */
+	if (p == chain + k - 1 && p > chain) {
+		p->p--;
+	} else {
+		*top = *p->p;
+		/* Nope, don't do this in ext3.  Must leave the tree intact */
+#if 0
+		*p->p = 0;
+#endif
+	}
+	/* Writer: end */
+
+	while(partial > p) {
+		brelse(partial->bh);
+		partial--;
+	}
+no_top:
+	return partial;
+}
+
+/*
+ * Zero a number of block pointers in either an inode or an indirect block.
+ * If we restart the transaction we must again get write access to the
+ * indirect block for further modification.
+ *
+ * We release `count' blocks on disk, but (last - first) may be greater
+ * than `count' because there can be holes in there.
+ */
+static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, ext3_fsblk_t block_to_free,
+		unsigned long count, __le32 *first, __le32 *last)
+{
+	__le32 *p;
+	if (try_to_extend_transaction(handle, inode)) {
+		if (bh) {
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			ext3_journal_dirty_metadata(handle, bh);
+		}
+		ext3_mark_inode_dirty(handle, inode);
+		ext3_journal_test_restart(handle, inode);
+		if (bh) {
+			BUFFER_TRACE(bh, "retaking write access");
+			ext3_journal_get_write_access(handle, bh);
+		}
+	}
+
+	/*
+	 * Any buffers which are on the journal will be in memory. We find
+	 * them on the hash table so journal_revoke() will run journal_forget()
+	 * on them.  We've already detached each block from the file, so
+	 * bforget() in journal_forget() should be safe.
+	 *
+	 * AKPM: turn on bforget in journal_forget()!!!
+	 */
+	for (p = first; p < last; p++) {
+		u32 nr = le32_to_cpu(*p);
+		if (nr) {
+			struct buffer_head *bh;
+
+			*p = 0;
+			bh = sb_find_get_block(inode->i_sb, nr);
+			ext3_forget(handle, 0, inode, bh, nr);
+		}
+	}
+
+	ext3_free_blocks(handle, inode, block_to_free, count);
+}
+
+/**
+ * ext3_free_data - free a list of data blocks
+ * @handle:	handle for this transaction
+ * @inode:	inode we are dealing with
+ * @this_bh:	indirect buffer_head which contains *@first and *@last
+ * @first:	array of block numbers
+ * @last:	points immediately past the end of array
+ *
+ * We are freeing all blocks refered from that array (numbers are stored as
+ * little-endian 32-bit) and updating @inode->i_blocks appropriately.
+ *
+ * We accumulate contiguous runs of blocks to free.  Conveniently, if these
+ * blocks are contiguous then releasing them at one time will only affect one
+ * or two bitmap blocks (+ group descriptor(s) and superblock) and we won't
+ * actually use a lot of journal space.
+ *
+ * @this_bh will be %NULL if @first and @last point into the inode's direct
+ * block pointers.
+ */
+static void ext3_free_data(handle_t *handle, struct inode *inode,
+			   struct buffer_head *this_bh,
+			   __le32 *first, __le32 *last)
+{
+	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
+	unsigned long count = 0;	    /* Number of blocks in the run */
+	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
+					       corresponding to
+					       block_to_free */
+	ext3_fsblk_t nr;		    /* Current block # */
+	__le32 *p;			    /* Pointer into inode/ind
+					       for current block */
+	int err;
+
+	if (this_bh) {				/* For indirect block */
+		BUFFER_TRACE(this_bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, this_bh);
+		/* Important: if we can't update the indirect pointers
+		 * to the blocks, we can't free them. */
+		if (err)
+			return;
+	}
+
+	for (p = first; p < last; p++) {
+		nr = le32_to_cpu(*p);
+		if (nr) {
+			/* accumulate blocks to free if they're contiguous */
+			if (count == 0) {
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			} else if (nr == block_to_free + count) {
+				count++;
+			} else {
+				ext3_clear_blocks(handle, inode, this_bh,
+						  block_to_free,
+						  count, block_to_free_p, p);
+				block_to_free = nr;
+				block_to_free_p = p;
+				count = 1;
+			}
+		}
+	}
+
+	if (count > 0)
+		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
+				  count, block_to_free_p, p);
+
+	if (this_bh) {
+		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, this_bh);
+	}
+}
+
+/**
+ *	ext3_free_branches - free an array of branches
+ *	@handle: JBD handle for this transaction
+ *	@inode:	inode we are dealing with
+ *	@parent_bh: the buffer_head which contains *@first and *@last
+ *	@first:	array of block numbers
+ *	@last:	pointer immediately past the end of array
+ *	@depth:	depth of the branches to free
+ *
+ *	We are freeing all blocks refered from these branches (numbers are
+ *	stored as little-endian 32-bit) and updating @inode->i_blocks
+ *	appropriately.
+ */
+static void ext3_free_branches(handle_t *handle, struct inode *inode,
+			       struct buffer_head *parent_bh,
+			       __le32 *first, __le32 *last, int depth)
+{
+	ext3_fsblk_t nr;
+	__le32 *p;
+
+	if (is_handle_aborted(handle))
+		return;
+
+	if (depth--) {
+		struct buffer_head *bh;
+		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+		p = last;
+		while (--p >= first) {
+			nr = le32_to_cpu(*p);
+			if (!nr)
+				continue;		/* A hole */
+
+			/* Go read the buffer for the next level down */
+			bh = sb_bread(inode->i_sb, nr);
+
+			/*
+			 * A read failure? Report error and clear slot
+			 * (should be rare).
+			 */
+			if (!bh) {
+				ext3_error(inode->i_sb, "ext3_free_branches",
+					   "Read failure, inode=%lu, block="E3FSBLK,
+					   inode->i_ino, nr);
+				continue;
+			}
+
+			/* This zaps the entire block.  Bottom up. */
+			BUFFER_TRACE(bh, "free child branches");
+			ext3_free_branches(handle, inode, bh,
+					   (__le32*)bh->b_data,
+					   (__le32*)bh->b_data + addr_per_block,
+					   depth);
+
+			/*
+			 * We've probably journalled the indirect block several
+			 * times during the truncate.  But it's no longer
+			 * needed and we now drop it from the transaction via
+			 * journal_revoke().
+			 *
+			 * That's easy if it's exclusively part of this
+			 * transaction.  But if it's part of the committing
+			 * transaction then journal_forget() will simply
+			 * brelse() it.  That means that if the underlying
+			 * block is reallocated in ext3_get_block(),
+			 * unmap_underlying_metadata() will find this block
+			 * and will try to get rid of it.  damn, damn.
+			 *
+			 * If this block has already been committed to the
+			 * journal, a revoke record will be written.  And
+			 * revoke records must be emitted *before* clearing
+			 * this block's bit in the bitmaps.
+			 */
+			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+
+			/*
+			 * Everything below this this pointer has been
+			 * released.  Now let this top-of-subtree go.
+			 *
+			 * We want the freeing of this indirect block to be
+			 * atomic in the journal with the updating of the
+			 * bitmap block which owns it.  So make some room in
+			 * the journal.
+			 *
+			 * We zero the parent pointer *after* freeing its
+			 * pointee in the bitmaps, so if extend_transaction()
+			 * for some reason fails to put the bitmap changes and
+			 * the release into the same transaction, recovery
+			 * will merely complain about releasing a free block,
+			 * rather than leaking blocks.
+			 */
+			if (is_handle_aborted(handle))
+				return;
+			if (try_to_extend_transaction(handle, inode)) {
+				ext3_mark_inode_dirty(handle, inode);
+				ext3_journal_test_restart(handle, inode);
+			}
+
+			ext3_free_blocks(handle, inode, nr, 1);
+
+			if (parent_bh) {
+				/*
+				 * The block which we have just freed is
+				 * pointed to by an indirect block: journal it
+				 */
+				BUFFER_TRACE(parent_bh, "get_write_access");
+				if (!ext3_journal_get_write_access(handle,
+								   parent_bh)){
+					*p = 0;
+					BUFFER_TRACE(parent_bh,
+					"call ext3_journal_dirty_metadata");
+					ext3_journal_dirty_metadata(handle,
+								    parent_bh);
+				}
+			}
+		}
+	} else {
+		/* We have reached the bottom of the tree. */
+		BUFFER_TRACE(parent_bh, "free data blocks");
+		ext3_free_data(handle, inode, parent_bh, first, last);
+	}
+}
+
+/*
+ * ext3_truncate()
+ *
+ * We block out ext3_get_block() block instantiations across the entire
+ * transaction, and VFS/VM ensures that ext3_truncate() cannot run
+ * simultaneously on behalf of the same inode.
+ *
+ * As we work through the truncate and commmit bits of it to the journal there
+ * is one core, guiding principle: the file's tree must always be consistent on
+ * disk.  We must be able to restart the truncate after a crash.
+ *
+ * The file's tree may be transiently inconsistent in memory (although it
+ * probably isn't), but whenever we close off and commit a journal transaction,
+ * the contents of (the filesystem + the journal) must be consistent and
+ * restartable.  It's pretty simple, really: bottom up, right to left (although
+ * left-to-right works OK too).
+ *
+ * Note that at recovery time, journal replay occurs *before* the restart of
+ * truncate against the orphan inode list.
+ *
+ * The committed inode has the new, desired i_size (which is the same as
+ * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
+ * that this inode's truncate did not complete and it will again call
+ * ext3_truncate() to have another go.  So there will be instantiated blocks
+ * to the right of the truncation point in a crashed ext3 filesystem.  But
+ * that's fine - as long as they are linked from the inode, the post-crash
+ * ext3_truncate() run will find them and release them.
+ */
+void ext3_truncate(struct inode *inode)
+{
+	handle_t *handle;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	__le32 *i_data = ei->i_data;
+	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	struct address_space *mapping = inode->i_mapping;
+	int offsets[4];
+	Indirect chain[4];
+	Indirect *partial;
+	__le32 nr = 0;
+	int n;
+	long last_block;
+	unsigned blocksize = inode->i_sb->s_blocksize;
+	struct page *page;
+
+	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+	    S_ISLNK(inode->i_mode)))
+		return;
+	if (ext3_inode_is_fast_symlink(inode))
+		return;
+	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+		return;
+
+	/*
+	 * We have to lock the EOF page here, because lock_page() nests
+	 * outside journal_start().
+	 */
+	if ((inode->i_size & (blocksize - 1)) == 0) {
+		/* Block boundary? Nothing to do */
+		page = NULL;
+	} else {
+		page = grab_cache_page(mapping,
+				inode->i_size >> PAGE_CACHE_SHIFT);
+		if (!page)
+			return;
+	}
+
+	handle = start_transaction(inode);
+	if (IS_ERR(handle)) {
+		if (page) {
+			clear_highpage(page);
+			flush_dcache_page(page);
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		return;		/* AKPM: return what? */
+	}
+
+	last_block = (inode->i_size + blocksize-1)
+					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
+
+	if (page)
+		ext3_block_truncate_page(handle, page, mapping, inode->i_size);
+
+	n = ext3_block_to_path(inode, last_block, offsets, NULL);
+	if (n == 0)
+		goto out_stop;	/* error */
+
+	/*
+	 * OK.  This truncate is going to happen.  We add the inode to the
+	 * orphan list, so that if this truncate spans multiple transactions,
+	 * and we crash, we will resume the truncate when the filesystem
+	 * recovers.  It also marks the inode dirty, to catch the new size.
+	 *
+	 * Implication: the file must always be in a sane, consistent
+	 * truncatable state while each transaction commits.
+	 */
+	if (ext3_orphan_add(handle, inode))
+		goto out_stop;
+
+	/*
+	 * The orphan list entry will now protect us from any crash which
+	 * occurs before the truncate completes, so it is now safe to propagate
+	 * the new, shorter inode size (held for now in i_size) into the
+	 * on-disk inode. We do this via i_disksize, which is the value which
+	 * ext3 *really* writes onto the disk inode.
+	 */
+	ei->i_disksize = inode->i_size;
+
+	/*
+	 * From here we block out all ext3_get_block() callers who want to
+	 * modify the block allocation tree.
+	 */
+	mutex_lock(&ei->truncate_mutex);
+
+	if (n == 1) {		/* direct blocks */
+		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT3_NDIR_BLOCKS);
+		goto do_indirects;
+	}
+
+	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
+	/* Kill the top of shared branch (not detached) */
+	if (nr) {
+		if (partial == chain) {
+			/* Shared branch grows from the inode */
+			ext3_free_branches(handle, inode, NULL,
+					   &nr, &nr+1, (chain+n-1) - partial);
+			*partial->p = 0;
+			/*
+			 * We mark the inode dirty prior to restart,
+			 * and prior to stop.  No need for it here.
+			 */
+		} else {
+			/* Shared branch grows from an indirect block */
+			BUFFER_TRACE(partial->bh, "get_write_access");
+			ext3_free_branches(handle, inode, partial->bh,
+					partial->p,
+					partial->p+1, (chain+n-1) - partial);
+		}
+	}
+	/* Clear the ends of indirect blocks on the shared branch */
+	while (partial > chain) {
+		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+				   (__le32*)partial->bh->b_data+addr_per_block,
+				   (chain+n-1) - partial);
+		BUFFER_TRACE(partial->bh, "call brelse");
+		brelse (partial->bh);
+		partial--;
+	}
+do_indirects:
+	/* Kill the remaining (whole) subtrees */
+	switch (offsets[0]) {
+	default:
+		nr = i_data[EXT3_IND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT3_IND_BLOCK] = 0;
+		}
+	case EXT3_IND_BLOCK:
+		nr = i_data[EXT3_DIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT3_DIND_BLOCK] = 0;
+		}
+	case EXT3_DIND_BLOCK:
+		nr = i_data[EXT3_TIND_BLOCK];
+		if (nr) {
+			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT3_TIND_BLOCK] = 0;
+		}
+	case EXT3_TIND_BLOCK:
+		;
+	}
+
+	ext3_discard_reservation(inode);
+
+	mutex_unlock(&ei->truncate_mutex);
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+
+	/*
+	 * In a multi-transaction truncate, we only make the final transaction
+	 * synchronous
+	 */
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext3_orphan_del(handle, inode);
+
+	ext3_journal_stop(handle);
+}
+
+static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
+		unsigned long ino, struct ext3_iloc *iloc)
+{
+	unsigned long desc, group_desc, block_group;
+	unsigned long offset;
+	ext3_fsblk_t block;
+	struct buffer_head *bh;
+	struct ext3_group_desc * gdp;
+
+	if (!ext3_valid_inum(sb, ino)) {
+		/*
+		 * This error is already checked for in namei.c unless we are
+		 * looking at an NFS filehandle, in which case no error
+		 * report is needed
+		 */
+		return 0;
+	}
+
+	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
+	if (block_group >= EXT3_SB(sb)->s_groups_count) {
+		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
+		return 0;
+	}
+	smp_rmb();
+	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
+	desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	bh = EXT3_SB(sb)->s_group_desc[group_desc];
+	if (!bh) {
+		ext3_error (sb, "ext3_get_inode_block",
+			    "Descriptor not loaded");
+		return 0;
+	}
+
+	gdp = (struct ext3_group_desc *)bh->b_data;
+	/*
+	 * Figure out the offset within the block group inode table
+	 */
+	offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
+		EXT3_INODE_SIZE(sb);
+	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
+
+	iloc->block_group = block_group;
+	iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
+	return block;
+}
+
+/*
+ * ext3_get_inode_loc returns with an extra refcount against the inode's
+ * underlying buffer_head on success. If 'in_mem' is true, we have all
+ * data in memory that is needed to recreate the on-disk version of this
+ * inode.
+ */
+static int __ext3_get_inode_loc(struct inode *inode,
+				struct ext3_iloc *iloc, int in_mem)
+{
+	ext3_fsblk_t block;
+	struct buffer_head *bh;
+
+	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
+	if (!block)
+		return -EIO;
+
+	bh = sb_getblk(inode->i_sb, block);
+	if (!bh) {
+		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+				"unable to read inode block - "
+				"inode=%lu, block="E3FSBLK,
+				 inode->i_ino, block);
+		return -EIO;
+	}
+	if (!buffer_uptodate(bh)) {
+		lock_buffer(bh);
+		if (buffer_uptodate(bh)) {
+			/* someone brought it uptodate while we waited */
+			unlock_buffer(bh);
+			goto has_buffer;
+		}
+
+		/*
+		 * If we have all information of the inode in memory and this
+		 * is the only valid inode in the block, we need not read the
+		 * block.
+		 */
+		if (in_mem) {
+			struct buffer_head *bitmap_bh;
+			struct ext3_group_desc *desc;
+			int inodes_per_buffer;
+			int inode_offset, i;
+			int block_group;
+			int start;
+
+			block_group = (inode->i_ino - 1) /
+					EXT3_INODES_PER_GROUP(inode->i_sb);
+			inodes_per_buffer = bh->b_size /
+				EXT3_INODE_SIZE(inode->i_sb);
+			inode_offset = ((inode->i_ino - 1) %
+					EXT3_INODES_PER_GROUP(inode->i_sb));
+			start = inode_offset & ~(inodes_per_buffer - 1);
+
+			/* Is the inode bitmap in cache? */
+			desc = ext3_get_group_desc(inode->i_sb,
+						block_group, NULL);
+			if (!desc)
+				goto make_io;
+
+			bitmap_bh = sb_getblk(inode->i_sb,
+					le32_to_cpu(desc->bg_inode_bitmap));
+			if (!bitmap_bh)
+				goto make_io;
+
+			/*
+			 * If the inode bitmap isn't in cache then the
+			 * optimisation may end up performing two reads instead
+			 * of one, so skip it.
+			 */
+			if (!buffer_uptodate(bitmap_bh)) {
+				brelse(bitmap_bh);
+				goto make_io;
+			}
+			for (i = start; i < start + inodes_per_buffer; i++) {
+				if (i == inode_offset)
+					continue;
+				if (ext3_test_bit(i, bitmap_bh->b_data))
+					break;
+			}
+			brelse(bitmap_bh);
+			if (i == start + inodes_per_buffer) {
+				/* all other inodes are free, so skip I/O */
+				memset(bh->b_data, 0, bh->b_size);
+				set_buffer_uptodate(bh);
+				unlock_buffer(bh);
+				goto has_buffer;
+			}
+		}
+
+make_io:
+		/*
+		 * There are other valid inodes in the buffer, this inode
+		 * has in-inode xattrs, or we don't have this inode in memory.
+		 * Read the block from disk.
+		 */
+		get_bh(bh);
+		bh->b_end_io = end_buffer_read_sync;
+		submit_bh(READ_META, bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			ext3_error(inode->i_sb, "ext3_get_inode_loc",
+					"unable to read inode block - "
+					"inode=%lu, block="E3FSBLK,
+					inode->i_ino, block);
+			brelse(bh);
+			return -EIO;
+		}
+	}
+has_buffer:
+	iloc->bh = bh;
+	return 0;
+}
+
+int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+{
+	/* We have all inode data except xattrs in memory here. */
+	return __ext3_get_inode_loc(inode, iloc,
+		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+}
+
+void ext3_set_inode_flags(struct inode *inode)
+{
+	unsigned int flags = EXT3_I(inode)->i_flags;
+
+	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
+	if (flags & EXT3_SYNC_FL)
+		inode->i_flags |= S_SYNC;
+	if (flags & EXT3_APPEND_FL)
+		inode->i_flags |= S_APPEND;
+	if (flags & EXT3_IMMUTABLE_FL)
+		inode->i_flags |= S_IMMUTABLE;
+	if (flags & EXT3_NOATIME_FL)
+		inode->i_flags |= S_NOATIME;
+	if (flags & EXT3_DIRSYNC_FL)
+		inode->i_flags |= S_DIRSYNC;
+}
+
+void ext3_read_inode(struct inode * inode)
+{
+	struct ext3_iloc iloc;
+	struct ext3_inode *raw_inode;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct buffer_head *bh;
+	int block;
+
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	ei->i_acl = EXT3_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#endif
+	ei->i_block_alloc_info = NULL;
+
+	if (__ext3_get_inode_loc(inode, &iloc, 0))
+		goto bad_inode;
+	bh = iloc.bh;
+	raw_inode = ext3_raw_inode(&iloc);
+	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+	if(!(test_opt (inode->i_sb, NO_UID32))) {
+		inode->i_uid |= le16_to_cpu(raw_inode->i_uid_high) << 16;
+		inode->i_gid |= le16_to_cpu(raw_inode->i_gid_high) << 16;
+	}
+	inode->i_nlink = le16_to_cpu(raw_inode->i_links_count);
+	inode->i_size = le32_to_cpu(raw_inode->i_size);
+	inode->i_atime.tv_sec = le32_to_cpu(raw_inode->i_atime);
+	inode->i_ctime.tv_sec = le32_to_cpu(raw_inode->i_ctime);
+	inode->i_mtime.tv_sec = le32_to_cpu(raw_inode->i_mtime);
+	inode->i_atime.tv_nsec = inode->i_ctime.tv_nsec = inode->i_mtime.tv_nsec = 0;
+
+	ei->i_state = 0;
+	ei->i_dir_start_lookup = 0;
+	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+	/* We now have enough fields to check if the inode was active or not.
+	 * This is needed because nfsd might try to access dead inodes
+	 * the test is that same one that e2fsck uses
+	 * NeilBrown 1999oct15
+	 */
+	if (inode->i_nlink == 0) {
+		if (inode->i_mode == 0 ||
+		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+			/* this inode is deleted */
+			brelse (bh);
+			goto bad_inode;
+		}
+		/* The only unlinked inodes we let through here have
+		 * valid i_mode and are being read by the orphan
+		 * recovery code: that's fine, we're about to complete
+		 * the process of deleting those. */
+	}
+	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+#ifdef EXT3_FRAGMENTS
+	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
+	ei->i_frag_no = raw_inode->i_frag;
+	ei->i_frag_size = raw_inode->i_fsize;
+#endif
+	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+	} else {
+		inode->i_size |=
+			((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+	}
+	ei->i_disksize = inode->i_size;
+	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+	ei->i_block_group = iloc.block_group;
+	/*
+	 * NOTE! The in-memory inode i_data array is in little-endian order
+	 * even on big-endian machines: we do NOT byteswap the block numbers!
+	 */
+	for (block = 0; block < EXT3_N_BLOCKS; block++)
+		ei->i_data[block] = raw_inode->i_block[block];
+	INIT_LIST_HEAD(&ei->i_orphan);
+
+	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
+	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+		/*
+		 * When mke2fs creates big inodes it does not zero out
+		 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
+		 * so ignore those first few inodes.
+		 */
+		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
+		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+		    EXT3_INODE_SIZE(inode->i_sb))
+			goto bad_inode;
+		if (ei->i_extra_isize == 0) {
+			/* The extra space is currently unused. Use it. */
+			ei->i_extra_isize = sizeof(struct ext3_inode) -
+					    EXT3_GOOD_OLD_INODE_SIZE;
+		} else {
+			__le32 *magic = (void *)raw_inode +
+					EXT3_GOOD_OLD_INODE_SIZE +
+					ei->i_extra_isize;
+			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
+				 ei->i_state |= EXT3_STATE_XATTR;
+		}
+	} else
+		ei->i_extra_isize = 0;
+
+	if (S_ISREG(inode->i_mode)) {
+		inode->i_op = &ext3_file_inode_operations;
+		inode->i_fop = &ext3_file_operations;
+		ext3_set_aops(inode);
+	} else if (S_ISDIR(inode->i_mode)) {
+		inode->i_op = &ext3_dir_inode_operations;
+		inode->i_fop = &ext3_dir_operations;
+	} else if (S_ISLNK(inode->i_mode)) {
+		if (ext3_inode_is_fast_symlink(inode))
+			inode->i_op = &ext3_fast_symlink_inode_operations;
+		else {
+			inode->i_op = &ext3_symlink_inode_operations;
+			ext3_set_aops(inode);
+		}
+	} else {
+		inode->i_op = &ext3_special_inode_operations;
+		if (raw_inode->i_block[0])
+			init_special_inode(inode, inode->i_mode,
+			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
+		else
+			init_special_inode(inode, inode->i_mode,
+			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
+	}
+	brelse (iloc.bh);
+	ext3_set_inode_flags(inode);
+	return;
+
+bad_inode:
+	make_bad_inode(inode);
+	return;
+}
+
+/*
+ * Post the struct inode info into an on-disk inode location in the
+ * buffer-cache.  This gobbles the caller's reference to the
+ * buffer_head in the inode location struct.
+ *
+ * The caller must have write access to iloc->bh.
+ */
+static int ext3_do_update_inode(handle_t *handle,
+				struct inode *inode,
+				struct ext3_iloc *iloc)
+{
+	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct buffer_head *bh = iloc->bh;
+	int err = 0, rc, block;
+
+	/* For fields not not tracking in the in-memory inode,
+	 * initialise them to zero for new inodes. */
+	if (ei->i_state & EXT3_STATE_NEW)
+		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+
+	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
+	if(!(test_opt(inode->i_sb, NO_UID32))) {
+		raw_inode->i_uid_low = cpu_to_le16(low_16_bits(inode->i_uid));
+		raw_inode->i_gid_low = cpu_to_le16(low_16_bits(inode->i_gid));
+/*
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+		if(!ei->i_dtime) {
+			raw_inode->i_uid_high =
+				cpu_to_le16(high_16_bits(inode->i_uid));
+			raw_inode->i_gid_high =
+				cpu_to_le16(high_16_bits(inode->i_gid));
+		} else {
+			raw_inode->i_uid_high = 0;
+			raw_inode->i_gid_high = 0;
+		}
+	} else {
+		raw_inode->i_uid_low =
+			cpu_to_le16(fs_high2lowuid(inode->i_uid));
+		raw_inode->i_gid_low =
+			cpu_to_le16(fs_high2lowgid(inode->i_gid));
+		raw_inode->i_uid_high = 0;
+		raw_inode->i_gid_high = 0;
+	}
+	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+	raw_inode->i_atime = cpu_to_le32(inode->i_atime.tv_sec);
+	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime.tv_sec);
+	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime.tv_sec);
+	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
+	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+#ifdef EXT3_FRAGMENTS
+	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
+	raw_inode->i_frag = ei->i_frag_no;
+	raw_inode->i_fsize = ei->i_frag_size;
+#endif
+	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+	if (!S_ISREG(inode->i_mode)) {
+		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+	} else {
+		raw_inode->i_size_high =
+			cpu_to_le32(ei->i_disksize >> 32);
+		if (ei->i_disksize > 0x7fffffffULL) {
+			struct super_block *sb = inode->i_sb;
+			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT3_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			       /* If this is the first large file
+				* created, add a flag to the superblock.
+				*/
+				err = ext3_journal_get_write_access(handle,
+						EXT3_SB(sb)->s_sbh);
+				if (err)
+					goto out_brelse;
+				ext3_update_dynamic_rev(sb);
+				EXT3_SET_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+				sb->s_dirt = 1;
+				handle->h_sync = 1;
+				err = ext3_journal_dirty_metadata(handle,
+						EXT3_SB(sb)->s_sbh);
+			}
+		}
+	}
+	raw_inode->i_generation = cpu_to_le32(inode->i_generation);
+	if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
+		if (old_valid_dev(inode->i_rdev)) {
+			raw_inode->i_block[0] =
+				cpu_to_le32(old_encode_dev(inode->i_rdev));
+			raw_inode->i_block[1] = 0;
+		} else {
+			raw_inode->i_block[0] = 0;
+			raw_inode->i_block[1] =
+				cpu_to_le32(new_encode_dev(inode->i_rdev));
+			raw_inode->i_block[2] = 0;
+		}
+	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+		raw_inode->i_block[block] = ei->i_data[block];
+
+	if (ei->i_extra_isize)
+		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
+
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	rc = ext3_journal_dirty_metadata(handle, bh);
+	if (!err)
+		err = rc;
+	ei->i_state &= ~EXT3_STATE_NEW;
+
+out_brelse:
+	brelse (bh);
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * ext3_write_inode()
+ *
+ * We are called from a few places:
+ *
+ * - Within generic_file_write() for O_SYNC files.
+ *   Here, there will be no transaction running. We wait for any running
+ *   trasnaction to commit.
+ *
+ * - Within sys_sync(), kupdate and such.
+ *   We wait on commit, if tol to.
+ *
+ * - Within prune_icache() (PF_MEMALLOC == true)
+ *   Here we simply return.  We can't afford to block kswapd on the
+ *   journal commit.
+ *
+ * In all cases it is actually safe for us to return without doing anything,
+ * because the inode has been copied into a raw inode buffer in
+ * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * knfsd.
+ *
+ * Note that we are absolutely dependent upon all inode dirtiers doing the
+ * right thing: they *must* call mark_inode_dirty() after dirtying info in
+ * which we are interested.
+ *
+ * It would be a bug for them to not do this.  The code:
+ *
+ *	mark_inode_dirty(inode)
+ *	stuff();
+ *	inode->i_size = expr;
+ *
+ * is in error because a kswapd-driven write_inode() could occur while
+ * `stuff()' is running, and the new i_size will be lost.  Plus the inode
+ * will no longer be on the superblock's dirty inode list.
+ */
+int ext3_write_inode(struct inode *inode, int wait)
+{
+	if (current->flags & PF_MEMALLOC)
+		return 0;
+
+	if (ext3_journal_current_handle()) {
+		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
+		dump_stack();
+		return -EIO;
+	}
+
+	if (!wait)
+		return 0;
+
+	return ext3_force_commit(inode->i_sb);
+}
+
+/*
+ * ext3_setattr()
+ *
+ * Called from notify_change.
+ *
+ * We want to trap VFS attempts to truncate the file as soon as
+ * possible.  In particular, we want to make sure that when the VFS
+ * shrinks i_size, we put the inode on the orphan list and modify
+ * i_disksize immediately, so that during the subsequent flushing of
+ * dirty pages and freeing of disk blocks, we can guarantee that any
+ * commit will leave the blocks being flushed in an unused state on
+ * disk.  (On recovery, the inode will get truncated and the blocks will
+ * be freed, so we have a strong guarantee that no future commit will
+ * leave these blocks visible to the user.)
+ *
+ * Called with inode->sem down.
+ */
+int ext3_setattr(struct dentry *dentry, struct iattr *attr)
+{
+	struct inode *inode = dentry->d_inode;
+	int error, rc = 0;
+	const unsigned int ia_valid = attr->ia_valid;
+
+	error = inode_change_ok(inode, attr);
+	if (error)
+		return error;
+
+	if ((ia_valid & ATTR_UID && attr->ia_uid != inode->i_uid) ||
+		(ia_valid & ATTR_GID && attr->ia_gid != inode->i_gid)) {
+		handle_t *handle;
+
+		/* (user+group)*(old+new) structure, inode write (sb,
+		 * inode block, ? - but truncate inode update has it) */
+		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			goto err_out;
+		}
+		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
+		if (error) {
+			ext3_journal_stop(handle);
+			return error;
+		}
+		/* Update corresponding info in inode so that everything is in
+		 * one transaction */
+		if (attr->ia_valid & ATTR_UID)
+			inode->i_uid = attr->ia_uid;
+		if (attr->ia_valid & ATTR_GID)
+			inode->i_gid = attr->ia_gid;
+		error = ext3_mark_inode_dirty(handle, inode);
+		ext3_journal_stop(handle);
+	}
+
+	if (S_ISREG(inode->i_mode) &&
+	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
+		handle_t *handle;
+
+		handle = ext3_journal_start(inode, 3);
+		if (IS_ERR(handle)) {
+			error = PTR_ERR(handle);
+			goto err_out;
+		}
+
+		error = ext3_orphan_add(handle, inode);
+		EXT3_I(inode)->i_disksize = attr->ia_size;
+		rc = ext3_mark_inode_dirty(handle, inode);
+		if (!error)
+			error = rc;
+		ext3_journal_stop(handle);
+	}
+
+	rc = inode_setattr(inode, attr);
+
+	/* If inode_setattr's call to ext3_truncate failed to get a
+	 * transaction handle at all, we need to clean up the in-core
+	 * orphan list manually. */
+	if (inode->i_nlink)
+		ext3_orphan_del(NULL, inode);
+
+	if (!rc && (ia_valid & ATTR_MODE))
+		rc = ext3_acl_chmod(inode);
+
+err_out:
+	ext3_std_error(inode->i_sb, error);
+	if (!error)
+		error = rc;
+	return error;
+}
+
+
+/*
+ * How many blocks doth make a writepage()?
+ *
+ * With N blocks per page, it may be:
+ * N data blocks
+ * 2 indirect block
+ * 2 dindirect
+ * 1 tindirect
+ * N+5 bitmap blocks (from the above)
+ * N+5 group descriptor summary blocks
+ * 1 inode block
+ * 1 superblock.
+ * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
+ *
+ * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
+ *
+ * With ordered or writeback data it's the same, less the N data blocks.
+ *
+ * If the inode's direct blocks can hold an integral number of pages then a
+ * page cannot straddle two indirect blocks, and we can only touch one indirect
+ * and dindirect block, and the "5" above becomes "3".
+ *
+ * This still overestimates under most circumstances.  If we were to pass the
+ * start and end offsets in here as well we could do block_to_path() on each
+ * block and work out the exact number of indirects which are touched.  Pah.
+ */
+
+static int ext3_writepage_trans_blocks(struct inode *inode)
+{
+	int bpp = ext3_journal_blocks_per_page(inode);
+	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+	int ret;
+
+	if (ext3_should_journal_data(inode))
+		ret = 3 * (bpp + indirects) + 2;
+	else
+		ret = 2 * (bpp + indirects) + 2;
+
+#ifdef CONFIG_QUOTA
+	/* We know that structure was already allocated during DQUOT_INIT so
+	 * we will be updating only the data blocks + inodes */
+	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+	return ret;
+}
+
+/*
+ * The caller must have previously called ext3_reserve_inode_write().
+ * Give this, we know that the caller already has write access to iloc->bh.
+ */
+int ext3_mark_iloc_dirty(handle_t *handle,
+		struct inode *inode, struct ext3_iloc *iloc)
+{
+	int err = 0;
+
+	/* the do_update_inode consumes one bh->b_count */
+	get_bh(iloc->bh);
+
+	/* ext3_do_update_inode() does journal_dirty_metadata */
+	err = ext3_do_update_inode(handle, inode, iloc);
+	put_bh(iloc->bh);
+	return err;
+}
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later.
+ */
+
+int
+ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+			 struct ext3_iloc *iloc)
+{
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc->bh, "get_write_access");
+			err = ext3_journal_get_write_access(handle, iloc->bh);
+			if (err) {
+				brelse(iloc->bh);
+				iloc->bh = NULL;
+			}
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * What we do here is to mark the in-core inode as clean with respect to inode
+ * dirtiness (it may still be data-dirty).
+ * This means that the in-core inode may be reaped by prune_icache
+ * without having to perform any I/O.  This is a very good thing,
+ * because *any* task may call prune_icache - even ones which
+ * have a transaction open against a different journal.
+ *
+ * Is this cheating?  Not really.  Sure, we haven't written the
+ * inode out, but prune_icache isn't a user-visible syncing function.
+ * Whenever the user wants stuff synced (sys_sync, sys_msync, sys_fsync)
+ * we start and wait on commits.
+ *
+ * Is this efficient/effective?  Well, we're being nice to the system
+ * by cleaning up our inodes proactively so they can be reaped
+ * without I/O.  But we are potentially leaving up to five seconds'
+ * worth of inodes floating about which prune_icache wants us to
+ * write out.  One way to fix that would be to get prune_icache()
+ * to do a write_super() to free up some memory.  It has the desired
+ * effect.
+ */
+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+	int err;
+
+	might_sleep();
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (!err)
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	return err;
+}
+
+/*
+ * ext3_dirty_inode() is called from __mark_inode_dirty()
+ *
+ * We're really interested in the case where a file is being extended.
+ * i_size has been changed by generic_commit_write() and we thus need
+ * to include the updated inode in the current transaction.
+ *
+ * Also, DQUOT_ALLOC_SPACE() will always dirty the inode when blocks
+ * are allocated to the file.
+ *
+ * If the inode is marked synchronous, we don't honour that here - doing
+ * so would cause a commit on atime updates, which we don't bother doing.
+ * We handle synchronous inodes at the highest possible level.
+ */
+void ext3_dirty_inode(struct inode *inode)
+{
+	handle_t *current_handle = ext3_journal_current_handle();
+	handle_t *handle;
+
+	handle = ext3_journal_start(inode, 2);
+	if (IS_ERR(handle))
+		goto out;
+	if (current_handle &&
+		current_handle->h_transaction != handle->h_transaction) {
+		/* This task has a transaction open against a different fs */
+		printk(KERN_EMERG "%s: transactions do not match!\n",
+		       __FUNCTION__);
+	} else {
+		jbd_debug(5, "marking dirty.  outer handle=%p\n",
+				current_handle);
+		ext3_mark_inode_dirty(handle, inode);
+	}
+	ext3_journal_stop(handle);
+out:
+	return;
+}
+
+#if 0
+/*
+ * Bind an inode's backing buffer_head into this transaction, to prevent
+ * it from being flushed to disk early.  Unlike
+ * ext3_reserve_inode_write, this leaves behind no bh reference and
+ * returns no iloc structure, so the caller needs to repeat the iloc
+ * lookup to mark the inode dirty later.
+ */
+static int ext3_pin_inode(handle_t *handle, struct inode *inode)
+{
+	struct ext3_iloc iloc;
+
+	int err = 0;
+	if (handle) {
+		err = ext3_get_inode_loc(inode, &iloc);
+		if (!err) {
+			BUFFER_TRACE(iloc.bh, "get_write_access");
+			err = journal_get_write_access(handle, iloc.bh);
+			if (!err)
+				err = ext3_journal_dirty_metadata(handle,
+								  iloc.bh);
+			brelse(iloc.bh);
+		}
+	}
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+#endif
+
+int ext3_change_inode_journal_flag(struct inode *inode, int val)
+{
+	journal_t *journal;
+	handle_t *handle;
+	int err;
+
+	/*
+	 * We have to be very careful here: changing a data block's
+	 * journaling status dynamically is dangerous.  If we write a
+	 * data block to the journal, change the status and then delete
+	 * that block, we risk forgetting to revoke the old log record
+	 * from the journal and so a subsequent replay can corrupt data.
+	 * So, first we make sure that the journal is empty and that
+	 * nobody is changing anything.
+	 */
+
+	journal = EXT3_JOURNAL(inode);
+	if (is_journal_aborted(journal) || IS_RDONLY(inode))
+		return -EROFS;
+
+	journal_lock_updates(journal);
+	journal_flush(journal);
+
+	/*
+	 * OK, there are no updates running now, and all cached data is
+	 * synced to disk.  We are now in a completely consistent state
+	 * which doesn't have anything in the journal, and we know that
+	 * no filesystem updates are running, so it is safe to modify
+	 * the inode's in-core data-journaling state flag now.
+	 */
+
+	if (val)
+		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+	else
+		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
+	ext3_set_aops(inode);
+
+	journal_unlock_updates(journal);
+
+	/* Finally we can mark the inode as dirty. */
+
+	handle = ext3_journal_start(inode, 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	err = ext3_mark_inode_dirty(handle, inode);
+	handle->h_sync = 1;
+	ext3_journal_stop(handle);
+	ext3_std_error(inode->i_sb, err);
+
+	return err;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
new file mode 100644
index 000000000000..12daa6869572
--- /dev/null
+++ b/fs/ext4/ioctl.c
@@ -0,0 +1,307 @@
+/*
+ * linux/fs/ext3/ioctl.c
+ *
+ * Copyright (C) 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/capability.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/time.h>
+#include <linux/compat.h>
+#include <linux/smp_lock.h>
+#include <asm/uaccess.h>
+
+int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+		unsigned long arg)
+{
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	unsigned int flags;
+	unsigned short rsv_window_size;
+
+	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+	switch (cmd) {
+	case EXT3_IOC_GETFLAGS:
+		flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+		return put_user(flags, (int __user *) arg);
+	case EXT3_IOC_SETFLAGS: {
+		handle_t *handle = NULL;
+		int err;
+		struct ext3_iloc iloc;
+		unsigned int oldflags;
+		unsigned int jflag;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(flags, (int __user *) arg))
+			return -EFAULT;
+
+		if (!S_ISDIR(inode->i_mode))
+			flags &= ~EXT3_DIRSYNC_FL;
+
+		mutex_lock(&inode->i_mutex);
+		oldflags = ei->i_flags;
+
+		/* The JOURNAL_DATA flag is modifiable only by root */
+		jflag = flags & EXT3_JOURNAL_DATA_FL;
+
+		/*
+		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
+		 * the relevant capability.
+		 *
+		 * This test looks nicer. Thanks to Pauline Middelink
+		 */
+		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
+			if (!capable(CAP_LINUX_IMMUTABLE)) {
+				mutex_unlock(&inode->i_mutex);
+				return -EPERM;
+			}
+		}
+
+		/*
+		 * The JOURNAL_DATA flag can only be changed by
+		 * the relevant capability.
+		 */
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+			if (!capable(CAP_SYS_RESOURCE)) {
+				mutex_unlock(&inode->i_mutex);
+				return -EPERM;
+			}
+		}
+
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle)) {
+			mutex_unlock(&inode->i_mutex);
+			return PTR_ERR(handle);
+		}
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err)
+			goto flags_err;
+
+		flags = flags & EXT3_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+		ei->i_flags = flags;
+
+		ext3_set_inode_flags(inode);
+		inode->i_ctime = CURRENT_TIME_SEC;
+
+		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+flags_err:
+		ext3_journal_stop(handle);
+		if (err) {
+			mutex_unlock(&inode->i_mutex);
+			return err;
+		}
+
+		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
+			err = ext3_change_inode_journal_flag(inode, jflag);
+		mutex_unlock(&inode->i_mutex);
+		return err;
+	}
+	case EXT3_IOC_GETVERSION:
+	case EXT3_IOC_GETVERSION_OLD:
+		return put_user(inode->i_generation, (int __user *) arg);
+	case EXT3_IOC_SETVERSION:
+	case EXT3_IOC_SETVERSION_OLD: {
+		handle_t *handle;
+		struct ext3_iloc iloc;
+		__u32 generation;
+		int err;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EPERM;
+		if (IS_RDONLY(inode))
+			return -EROFS;
+		if (get_user(generation, (int __user *) arg))
+			return -EFAULT;
+
+		handle = ext3_journal_start(inode, 1);
+		if (IS_ERR(handle))
+			return PTR_ERR(handle);
+		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		if (err == 0) {
+			inode->i_ctime = CURRENT_TIME_SEC;
+			inode->i_generation = generation;
+			err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		}
+		ext3_journal_stop(handle);
+		return err;
+	}
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC_WAIT_FOR_READONLY:
+		/*
+		 * This is racy - by the time we're woken up and running,
+		 * the superblock could be released.  And the module could
+		 * have been unloaded.  So sue me.
+		 *
+		 * Returns 1 if it slept, else zero.
+		 */
+		{
+			struct super_block *sb = inode->i_sb;
+			DECLARE_WAITQUEUE(wait, current);
+			int ret = 0;
+
+			set_current_state(TASK_INTERRUPTIBLE);
+			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+				schedule();
+				ret = 1;
+			}
+			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			return ret;
+		}
+#endif
+	case EXT3_IOC_GETRSVSZ:
+		if (test_opt(inode->i_sb, RESERVATION)
+			&& S_ISREG(inode->i_mode)
+			&& ei->i_block_alloc_info) {
+			rsv_window_size = ei->i_block_alloc_info->rsv_window_node.rsv_goal_size;
+			return put_user(rsv_window_size, (int __user *)arg);
+		}
+		return -ENOTTY;
+	case EXT3_IOC_SETRSVSZ: {
+
+		if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
+			return -ENOTTY;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER))
+			return -EACCES;
+
+		if (get_user(rsv_window_size, (int __user *)arg))
+			return -EFAULT;
+
+		if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
+			rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
+
+		/*
+		 * need to allocate reservation structure for this inode
+		 * before set the window size
+		 */
+		mutex_lock(&ei->truncate_mutex);
+		if (!ei->i_block_alloc_info)
+			ext3_init_block_alloc_info(inode);
+
+		if (ei->i_block_alloc_info){
+			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
+			rsv->rsv_goal_size = rsv_window_size;
+		}
+		mutex_unlock(&ei->truncate_mutex);
+		return 0;
+	}
+	case EXT3_IOC_GROUP_EXTEND: {
+		ext3_fsblk_t n_blocks_count;
+		struct super_block *sb = inode->i_sb;
+		int err;
+
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (get_user(n_blocks_count, (__u32 __user *)arg))
+			return -EFAULT;
+
+		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
+		journal_lock_updates(EXT3_SB(sb)->s_journal);
+		journal_flush(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+
+		return err;
+	}
+	case EXT3_IOC_GROUP_ADD: {
+		struct ext3_new_group_data input;
+		struct super_block *sb = inode->i_sb;
+		int err;
+
+		if (!capable(CAP_SYS_RESOURCE))
+			return -EPERM;
+
+		if (IS_RDONLY(inode))
+			return -EROFS;
+
+		if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
+				sizeof(input)))
+			return -EFAULT;
+
+		err = ext3_group_add(sb, &input);
+		journal_lock_updates(EXT3_SB(sb)->s_journal);
+		journal_flush(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+
+		return err;
+	}
+
+
+	default:
+		return -ENOTTY;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+	struct inode *inode = file->f_dentry->d_inode;
+	int ret;
+
+	/* These are just misnamed, they actually get/put from/to user an int */
+	switch (cmd) {
+	case EXT3_IOC32_GETFLAGS:
+		cmd = EXT3_IOC_GETFLAGS;
+		break;
+	case EXT3_IOC32_SETFLAGS:
+		cmd = EXT3_IOC_SETFLAGS;
+		break;
+	case EXT3_IOC32_GETVERSION:
+		cmd = EXT3_IOC_GETVERSION;
+		break;
+	case EXT3_IOC32_SETVERSION:
+		cmd = EXT3_IOC_SETVERSION;
+		break;
+	case EXT3_IOC32_GROUP_EXTEND:
+		cmd = EXT3_IOC_GROUP_EXTEND;
+		break;
+	case EXT3_IOC32_GETVERSION_OLD:
+		cmd = EXT3_IOC_GETVERSION_OLD;
+		break;
+	case EXT3_IOC32_SETVERSION_OLD:
+		cmd = EXT3_IOC_SETVERSION_OLD;
+		break;
+#ifdef CONFIG_JBD_DEBUG
+	case EXT3_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+		break;
+#endif
+	case EXT3_IOC32_GETRSVSZ:
+		cmd = EXT3_IOC_GETRSVSZ;
+		break;
+	case EXT3_IOC32_SETRSVSZ:
+		cmd = EXT3_IOC_SETRSVSZ;
+		break;
+	case EXT3_IOC_GROUP_ADD:
+		break;
+	default:
+		return -ENOIOCTLCMD;
+	}
+	lock_kernel();
+	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	unlock_kernel();
+	return ret;
+}
+#endif
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
new file mode 100644
index 000000000000..906731a20f1a
--- /dev/null
+++ b/fs/ext4/namei.c
@@ -0,0 +1,2397 @@
+/*
+ *  linux/fs/ext3/namei.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/namei.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ *  Directory entry file type support and forward compatibility hooks
+ *	for B-tree directories by Theodore Ts'o (tytso@mit.edu), 1998
+ *  Hash Tree Directory indexing (c)
+ *	Daniel Phillips, 2001
+ *  Hash Tree Directory indexing porting
+ *	Christopher Li, 2002
+ *  Hash Tree Directory indexing cleanup
+ *	Theodore Ts'o, 2002
+ */
+
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+#include <linux/jbd.h>
+#include <linux/time.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/fcntl.h>
+#include <linux/stat.h>
+#include <linux/string.h>
+#include <linux/quotaops.h>
+#include <linux/buffer_head.h>
+#include <linux/bio.h>
+#include <linux/smp_lock.h>
+
+#include "namei.h"
+#include "xattr.h"
+#include "acl.h"
+
+/*
+ * define how far ahead to read directories while searching them.
+ */
+#define NAMEI_RA_CHUNKS  2
+#define NAMEI_RA_BLOCKS  4
+#define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+#define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+
+static struct buffer_head *ext3_append(handle_t *handle,
+					struct inode *inode,
+					u32 *block, int *err)
+{
+	struct buffer_head *bh;
+
+	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+
+	if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+		inode->i_size += inode->i_sb->s_blocksize;
+		EXT3_I(inode)->i_disksize = inode->i_size;
+		ext3_journal_get_write_access(handle,bh);
+	}
+	return bh;
+}
+
+#ifndef assert
+#define assert(test) J_ASSERT(test)
+#endif
+
+#ifndef swap
+#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+#endif
+
+#ifdef DX_DEBUG
+#define dxtrace(command) command
+#else
+#define dxtrace(command)
+#endif
+
+struct fake_dirent
+{
+	__le32 inode;
+	__le16 rec_len;
+	u8 name_len;
+	u8 file_type;
+};
+
+struct dx_countlimit
+{
+	__le16 limit;
+	__le16 count;
+};
+
+struct dx_entry
+{
+	__le32 hash;
+	__le32 block;
+};
+
+/*
+ * dx_root_info is laid out so that if it should somehow get overlaid by a
+ * dirent the two low bits of the hash version will be zero.  Therefore, the
+ * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
+ */
+
+struct dx_root
+{
+	struct fake_dirent dot;
+	char dot_name[4];
+	struct fake_dirent dotdot;
+	char dotdot_name[4];
+	struct dx_root_info
+	{
+		__le32 reserved_zero;
+		u8 hash_version;
+		u8 info_length; /* 8 */
+		u8 indirect_levels;
+		u8 unused_flags;
+	}
+	info;
+	struct dx_entry	entries[0];
+};
+
+struct dx_node
+{
+	struct fake_dirent fake;
+	struct dx_entry	entries[0];
+};
+
+
+struct dx_frame
+{
+	struct buffer_head *bh;
+	struct dx_entry *entries;
+	struct dx_entry *at;
+};
+
+struct dx_map_entry
+{
+	u32 hash;
+	u32 offs;
+};
+
+#ifdef CONFIG_EXT3_INDEX
+static inline unsigned dx_get_block (struct dx_entry *entry);
+static void dx_set_block (struct dx_entry *entry, unsigned value);
+static inline unsigned dx_get_hash (struct dx_entry *entry);
+static void dx_set_hash (struct dx_entry *entry, unsigned value);
+static unsigned dx_get_count (struct dx_entry *entries);
+static unsigned dx_get_limit (struct dx_entry *entries);
+static void dx_set_count (struct dx_entry *entries, unsigned value);
+static void dx_set_limit (struct dx_entry *entries, unsigned value);
+static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+static unsigned dx_node_limit (struct inode *dir);
+static struct dx_frame *dx_probe(struct dentry *dentry,
+				 struct inode *dir,
+				 struct dx_hash_info *hinfo,
+				 struct dx_frame *frame,
+				 int *err);
+static void dx_release (struct dx_frame *frames);
+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+			struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+		struct dx_map_entry *offsets, int count);
+static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+				 struct dx_frame *frame,
+				 struct dx_frame *frames,
+				 __u32 *start_hash);
+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+		       struct ext3_dir_entry_2 **res_dir, int *err);
+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode);
+
+/*
+ * Future: use high four bits of block for coalesce-on-delete flags
+ * Mask them off for now.
+ */
+
+static inline unsigned dx_get_block (struct dx_entry *entry)
+{
+	return le32_to_cpu(entry->block) & 0x00ffffff;
+}
+
+static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+{
+	entry->block = cpu_to_le32(value);
+}
+
+static inline unsigned dx_get_hash (struct dx_entry *entry)
+{
+	return le32_to_cpu(entry->hash);
+}
+
+static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
+{
+	entry->hash = cpu_to_le32(value);
+}
+
+static inline unsigned dx_get_count (struct dx_entry *entries)
+{
+	return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+}
+
+static inline unsigned dx_get_limit (struct dx_entry *entries)
+{
+	return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+}
+
+static inline void dx_set_count (struct dx_entry *entries, unsigned value)
+{
+	((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+}
+
+static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
+{
+	((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+}
+
+static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+{
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
+		EXT3_DIR_REC_LEN(2) - infosize;
+	return 0? 20: entry_space / sizeof(struct dx_entry);
+}
+
+static inline unsigned dx_node_limit (struct inode *dir)
+{
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+	return 0? 22: entry_space / sizeof(struct dx_entry);
+}
+
+/*
+ * Debug
+ */
+#ifdef DX_DEBUG
+static void dx_show_index (char * label, struct dx_entry *entries)
+{
+        int i, n = dx_get_count (entries);
+        printk("%s index ", label);
+        for (i = 0; i < n; i++)
+        {
+                printk("%x->%u ", i? dx_get_hash(entries + i): 0, dx_get_block(entries + i));
+        }
+        printk("\n");
+}
+
+struct stats
+{
+	unsigned names;
+	unsigned space;
+	unsigned bcount;
+};
+
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
+				 int size, int show_names)
+{
+	unsigned names = 0, space = 0;
+	char *base = (char *) de;
+	struct dx_hash_info h = *hinfo;
+
+	printk("names: ");
+	while ((char *) de < base + size)
+	{
+		if (de->inode)
+		{
+			if (show_names)
+			{
+				int len = de->name_len;
+				char *name = de->name;
+				while (len--) printk("%c", *name++);
+				ext3fs_dirhash(de->name, de->name_len, &h);
+				printk(":%x.%u ", h.hash,
+				       ((char *) de - base));
+			}
+			space += EXT3_DIR_REC_LEN(de->name_len);
+			names++;
+		}
+		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+	}
+	printk("(%i)\n", names);
+	return (struct stats) { names, space, 1 };
+}
+
+struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
+			     struct dx_entry *entries, int levels)
+{
+	unsigned blocksize = dir->i_sb->s_blocksize;
+	unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+	unsigned bcount = 0;
+	struct buffer_head *bh;
+	int err;
+	printk("%i indexed blocks...\n", count);
+	for (i = 0; i < count; i++, entries++)
+	{
+		u32 block = dx_get_block(entries), hash = i? dx_get_hash(entries): 0;
+		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
+		struct stats stats;
+		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
+		if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
+		stats = levels?
+		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
+		   dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+		names += stats.names;
+		space += stats.space;
+		bcount += stats.bcount;
+		brelse (bh);
+	}
+	if (bcount)
+		printk("%snames %u, fullness %u (%u%%)\n", levels?"":"   ",
+			names, space/bcount,(space/bcount)*100/blocksize);
+	return (struct stats) { names, space, bcount};
+}
+#endif /* DX_DEBUG */
+
+/*
+ * Probe for a directory leaf block to search.
+ *
+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+ * error in the directory index, and the caller should fall back to
+ * searching the directory normally.  The callers of dx_probe **MUST**
+ * check for this error code, and make sure it never gets reflected
+ * back to userspace.
+ */
+static struct dx_frame *
+dx_probe(struct dentry *dentry, struct inode *dir,
+	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+{
+	unsigned count, indirect;
+	struct dx_entry *at, *entries, *p, *q, *m;
+	struct dx_root *root;
+	struct buffer_head *bh;
+	struct dx_frame *frame = frame_in;
+	u32 hash;
+
+	frame->bh = NULL;
+	if (dentry)
+		dir = dentry->d_parent->d_inode;
+	if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+		goto fail;
+	root = (struct dx_root *) bh->b_data;
+	if (root->info.hash_version != DX_HASH_TEA &&
+	    root->info.hash_version != DX_HASH_HALF_MD4 &&
+	    root->info.hash_version != DX_HASH_LEGACY) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unrecognised inode hash code %d",
+			     root->info.hash_version);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+	hinfo->hash_version = root->info.hash_version;
+	hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	if (dentry)
+		ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+	hash = hinfo->hash;
+
+	if (root->info.unused_flags & 1) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unimplemented inode hash flags: %#06x",
+			     root->info.unused_flags);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
+	if ((indirect = root->info.indirect_levels) > 1) {
+		ext3_warning(dir->i_sb, __FUNCTION__,
+			     "Unimplemented inode hash depth: %#06x",
+			     root->info.indirect_levels);
+		brelse(bh);
+		*err = ERR_BAD_DX_DIR;
+		goto fail;
+	}
+
+	entries = (struct dx_entry *) (((char *)&root->info) +
+				       root->info.info_length);
+	assert(dx_get_limit(entries) == dx_root_limit(dir,
+						      root->info.info_length));
+	dxtrace (printk("Look up %x", hash));
+	while (1)
+	{
+		count = dx_get_count(entries);
+		assert (count && count <= dx_get_limit(entries));
+		p = entries + 1;
+		q = entries + count - 1;
+		while (p <= q)
+		{
+			m = p + (q - p)/2;
+			dxtrace(printk("."));
+			if (dx_get_hash(m) > hash)
+				q = m - 1;
+			else
+				p = m + 1;
+		}
+
+		if (0) // linear search cross check
+		{
+			unsigned n = count - 1;
+			at = entries;
+			while (n--)
+			{
+				dxtrace(printk(","));
+				if (dx_get_hash(++at) > hash)
+				{
+					at--;
+					break;
+				}
+			}
+			assert (at == p - 1);
+		}
+
+		at = p - 1;
+		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
+		frame->bh = bh;
+		frame->entries = entries;
+		frame->at = at;
+		if (!indirect--) return frame;
+		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+			goto fail2;
+		at = entries = ((struct dx_node *) bh->b_data)->entries;
+		assert (dx_get_limit(entries) == dx_node_limit (dir));
+		frame++;
+	}
+fail2:
+	while (frame >= frame_in) {
+		brelse(frame->bh);
+		frame--;
+	}
+fail:
+	return NULL;
+}
+
+static void dx_release (struct dx_frame *frames)
+{
+	if (frames[0].bh == NULL)
+		return;
+
+	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
+		brelse(frames[1].bh);
+	brelse(frames[0].bh);
+}
+
+/*
+ * This function increments the frame pointer to search the next leaf
+ * block, and reads in the necessary intervening nodes if the search
+ * should be necessary.  Whether or not the search is necessary is
+ * controlled by the hash parameter.  If the hash value is even, then
+ * the search is only continued if the next block starts with that
+ * hash value.  This is used if we are searching for a specific file.
+ *
+ * If the hash value is HASH_NB_ALWAYS, then always go to the next block.
+ *
+ * This function returns 1 if the caller should continue to search,
+ * or 0 if it should not.  If there is an error reading one of the
+ * index blocks, it will a negative error code.
+ *
+ * If start_hash is non-null, it will be filled in with the starting
+ * hash of the next page.
+ */
+static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+				 struct dx_frame *frame,
+				 struct dx_frame *frames,
+				 __u32 *start_hash)
+{
+	struct dx_frame *p;
+	struct buffer_head *bh;
+	int err, num_frames = 0;
+	__u32 bhash;
+
+	p = frame;
+	/*
+	 * Find the next leaf page by incrementing the frame pointer.
+	 * If we run out of entries in the interior node, loop around and
+	 * increment pointer in the parent node.  When we break out of
+	 * this loop, num_frames indicates the number of interior
+	 * nodes need to be read.
+	 */
+	while (1) {
+		if (++(p->at) < p->entries + dx_get_count(p->entries))
+			break;
+		if (p == frames)
+			return 0;
+		num_frames++;
+		p--;
+	}
+
+	/*
+	 * If the hash is 1, then continue only if the next page has a
+	 * continuation hash of any value.  This is used for readdir
+	 * handling.  Otherwise, check to see if the hash matches the
+	 * desired contiuation hash.  If it doesn't, return since
+	 * there's no point to read in the successive index pages.
+	 */
+	bhash = dx_get_hash(p->at);
+	if (start_hash)
+		*start_hash = bhash;
+	if ((hash & 1) == 0) {
+		if ((bhash & ~1) != hash)
+			return 0;
+	}
+	/*
+	 * If the hash is HASH_NB_ALWAYS, we always go to the next
+	 * block so no check is necessary
+	 */
+	while (num_frames--) {
+		if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+				      0, &err)))
+			return err; /* Failure */
+		p++;
+		brelse (p->bh);
+		p->bh = bh;
+		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
+	}
+	return 1;
+}
+
+
+/*
+ * p is at least 6 bytes before the end of page
+ */
+static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+{
+	return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+}
+
+/*
+ * This function fills a red-black tree with information from a
+ * directory block.  It returns the number directory entries loaded
+ * into the tree.  If there is an error it is returned in err.
+ */
+static int htree_dirblock_to_tree(struct file *dir_file,
+				  struct inode *dir, int block,
+				  struct dx_hash_info *hinfo,
+				  __u32 start_hash, __u32 start_minor_hash)
+{
+	struct buffer_head *bh;
+	struct ext3_dir_entry_2 *de, *top;
+	int err, count = 0;
+
+	dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
+	if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+		return err;
+
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	top = (struct ext3_dir_entry_2 *) ((char *) de +
+					   dir->i_sb->s_blocksize -
+					   EXT3_DIR_REC_LEN(0));
+	for (; de < top; de = ext3_next_entry(de)) {
+		ext3fs_dirhash(de->name, de->name_len, hinfo);
+		if ((hinfo->hash < start_hash) ||
+		    ((hinfo->hash == start_hash) &&
+		     (hinfo->minor_hash < start_minor_hash)))
+			continue;
+		if (de->inode == 0)
+			continue;
+		if ((err = ext3_htree_store_dirent(dir_file,
+				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
+			brelse(bh);
+			return err;
+		}
+		count++;
+	}
+	brelse(bh);
+	return count;
+}
+
+
+/*
+ * This function fills a red-black tree with information from a
+ * directory.  We start scanning the directory in hash order, starting
+ * at start_hash and start_minor_hash.
+ *
+ * This function returns the number of entries inserted into the tree,
+ * or a negative error code.
+ */
+int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+			 __u32 start_minor_hash, __u32 *next_hash)
+{
+	struct dx_hash_info hinfo;
+	struct ext3_dir_entry_2 *de;
+	struct dx_frame frames[2], *frame;
+	struct inode *dir;
+	int block, err;
+	int count = 0;
+	int ret;
+	__u32 hashval;
+
+	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+		       start_minor_hash));
+	dir = dir_file->f_dentry->d_inode;
+	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+		hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
+					       start_hash, start_minor_hash);
+		*next_hash = ~0;
+		return count;
+	}
+	hinfo.hash = start_hash;
+	hinfo.minor_hash = 0;
+	frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+	if (!frame)
+		return err;
+
+	/* Add '.' and '..' from the htree header */
+	if (!start_hash && !start_minor_hash) {
+		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
+		if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+			goto errout;
+		count++;
+	}
+	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
+		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
+		de = ext3_next_entry(de);
+		if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
+			goto errout;
+		count++;
+	}
+
+	while (1) {
+		block = dx_get_block(frame->at);
+		ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+					     start_hash, start_minor_hash);
+		if (ret < 0) {
+			err = ret;
+			goto errout;
+		}
+		count += ret;
+		hashval = ~0;
+		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+					    frame, frames, &hashval);
+		*next_hash = hashval;
+		if (ret < 0) {
+			err = ret;
+			goto errout;
+		}
+		/*
+		 * Stop if:  (a) there are no more entries, or
+		 * (b) we have inserted at least one entry and the
+		 * next hash value is not a continuation
+		 */
+		if ((ret == 0) ||
+		    (count && ((hashval & 1) == 0)))
+			break;
+	}
+	dx_release(frames);
+	dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+		       count, *next_hash));
+	return count;
+errout:
+	dx_release(frames);
+	return (err);
+}
+
+
+/*
+ * Directory block splitting, compacting
+ */
+
+static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
+{
+	int count = 0;
+	char *base = (char *) de;
+	struct dx_hash_info h = *hinfo;
+
+	while ((char *) de < base + size)
+	{
+		if (de->name_len && de->inode) {
+			ext3fs_dirhash(de->name, de->name_len, &h);
+			map_tail--;
+			map_tail->hash = h.hash;
+			map_tail->offs = (u32) ((char *) de - base);
+			count++;
+			cond_resched();
+		}
+		/* XXX: do we need to check rec_len == 0 case? -Chris */
+		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+	}
+	return count;
+}
+
+static void dx_sort_map (struct dx_map_entry *map, unsigned count)
+{
+        struct dx_map_entry *p, *q, *top = map + count - 1;
+        int more;
+        /* Combsort until bubble sort doesn't suck */
+        while (count > 2)
+	{
+                count = count*10/13;
+                if (count - 9 < 2) /* 9, 10 -> 11 */
+                        count = 11;
+                for (p = top, q = p - count; q >= map; p--, q--)
+                        if (p->hash < q->hash)
+                                swap(*p, *q);
+        }
+        /* Garden variety bubble sort */
+        do {
+                more = 0;
+                q = top;
+                while (q-- > map)
+		{
+                        if (q[1].hash >= q[0].hash)
+				continue;
+                        swap(*(q+1), *q);
+                        more = 1;
+		}
+	} while(more);
+}
+
+static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
+{
+	struct dx_entry *entries = frame->entries;
+	struct dx_entry *old = frame->at, *new = old + 1;
+	int count = dx_get_count(entries);
+
+	assert(count < dx_get_limit(entries));
+	assert(old < entries + count);
+	memmove(new + 1, new, (char *)(entries + count) - (char *)(new));
+	dx_set_hash(new, hash);
+	dx_set_block(new, block);
+	dx_set_count(entries, count + 1);
+}
+#endif
+
+
+static void ext3_update_dx_flag(struct inode *inode)
+{
+	if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
+				     EXT3_FEATURE_COMPAT_DIR_INDEX))
+		EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
+}
+
+/*
+ * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+ *
+ * `len <= EXT3_NAME_LEN' is guaranteed by caller.
+ * `de != NULL' is guaranteed by caller.
+ */
+static inline int ext3_match (int len, const char * const name,
+			      struct ext3_dir_entry_2 * de)
+{
+	if (len != de->name_len)
+		return 0;
+	if (!de->inode)
+		return 0;
+	return !memcmp(name, de->name, len);
+}
+
+/*
+ * Returns 0 if not found, -1 on failure, and 1 on success
+ */
+static inline int search_dirblock(struct buffer_head * bh,
+				  struct inode *dir,
+				  struct dentry *dentry,
+				  unsigned long offset,
+				  struct ext3_dir_entry_2 ** res_dir)
+{
+	struct ext3_dir_entry_2 * de;
+	char * dlimit;
+	int de_len;
+	const char *name = dentry->d_name.name;
+	int namelen = dentry->d_name.len;
+
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	dlimit = bh->b_data + dir->i_sb->s_blocksize;
+	while ((char *) de < dlimit) {
+		/* this code is executed quadratically often */
+		/* do minimal checking `by hand' */
+
+		if ((char *) de + namelen <= dlimit &&
+		    ext3_match (namelen, name, de)) {
+			/* found a match - just to be sure, do a full check */
+			if (!ext3_check_dir_entry("ext3_find_entry",
+						  dir, de, bh, offset))
+				return -1;
+			*res_dir = de;
+			return 1;
+		}
+		/* prevent looping on a bad block */
+		de_len = le16_to_cpu(de->rec_len);
+		if (de_len <= 0)
+			return -1;
+		offset += de_len;
+		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
+	}
+	return 0;
+}
+
+
+/*
+ *	ext3_find_entry()
+ *
+ * finds an entry in the specified directory with the wanted name. It
+ * returns the cache buffer in which the entry was found, and the entry
+ * itself (as a parameter - res_dir). It does NOT read the inode of the
+ * entry - you'll have to do that yourself if you want to.
+ *
+ * The returned buffer_head has ->b_count elevated.  The caller is expected
+ * to brelse() it when appropriate.
+ */
+static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+					struct ext3_dir_entry_2 ** res_dir)
+{
+	struct super_block * sb;
+	struct buffer_head * bh_use[NAMEI_RA_SIZE];
+	struct buffer_head * bh, *ret = NULL;
+	unsigned long start, block, b;
+	int ra_max = 0;		/* Number of bh's in the readahead
+				   buffer, bh_use[] */
+	int ra_ptr = 0;		/* Current index into readahead
+				   buffer */
+	int num = 0;
+	int nblocks, i, err;
+	struct inode *dir = dentry->d_parent->d_inode;
+	int namelen;
+	const u8 *name;
+	unsigned blocksize;
+
+	*res_dir = NULL;
+	sb = dir->i_sb;
+	blocksize = sb->s_blocksize;
+	namelen = dentry->d_name.len;
+	name = dentry->d_name.name;
+	if (namelen > EXT3_NAME_LEN)
+		return NULL;
+#ifdef CONFIG_EXT3_INDEX
+	if (is_dx(dir)) {
+		bh = ext3_dx_find_entry(dentry, res_dir, &err);
+		/*
+		 * On success, or if the error was file not found,
+		 * return.  Otherwise, fall back to doing a search the
+		 * old fashioned way.
+		 */
+		if (bh || (err != ERR_BAD_DX_DIR))
+			return bh;
+		dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
+	}
+#endif
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	start = EXT3_I(dir)->i_dir_start_lookup;
+	if (start >= nblocks)
+		start = 0;
+	block = start;
+restart:
+	do {
+		/*
+		 * We deal with the read-ahead logic here.
+		 */
+		if (ra_ptr >= ra_max) {
+			/* Refill the readahead buffer */
+			ra_ptr = 0;
+			b = block;
+			for (ra_max = 0; ra_max < NAMEI_RA_SIZE; ra_max++) {
+				/*
+				 * Terminate if we reach the end of the
+				 * directory and must wrap, or if our
+				 * search has finished at this block.
+				 */
+				if (b >= nblocks || (num && block == start)) {
+					bh_use[ra_max] = NULL;
+					break;
+				}
+				num++;
+				bh = ext3_getblk(NULL, dir, b++, 0, &err);
+				bh_use[ra_max] = bh;
+				if (bh)
+					ll_rw_block(READ_META, 1, &bh);
+			}
+		}
+		if ((bh = bh_use[ra_ptr++]) == NULL)
+			goto next;
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			/* read error, skip block & hope for the best */
+			ext3_error(sb, __FUNCTION__, "reading directory #%lu "
+				   "offset %lu", dir->i_ino, block);
+			brelse(bh);
+			goto next;
+		}
+		i = search_dirblock(bh, dir, dentry,
+			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+		if (i == 1) {
+			EXT3_I(dir)->i_dir_start_lookup = block;
+			ret = bh;
+			goto cleanup_and_exit;
+		} else {
+			brelse(bh);
+			if (i < 0)
+				goto cleanup_and_exit;
+		}
+	next:
+		if (++block >= nblocks)
+			block = 0;
+	} while (block != start);
+
+	/*
+	 * If the directory has grown while we were searching, then
+	 * search the last part of the directory before giving up.
+	 */
+	block = nblocks;
+	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	if (block < nblocks) {
+		start = 0;
+		goto restart;
+	}
+
+cleanup_and_exit:
+	/* Clean up the read-ahead blocks */
+	for (; ra_ptr < ra_max; ra_ptr++)
+		brelse (bh_use[ra_ptr]);
+	return ret;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+		       struct ext3_dir_entry_2 **res_dir, int *err)
+{
+	struct super_block * sb;
+	struct dx_hash_info	hinfo;
+	u32 hash;
+	struct dx_frame frames[2], *frame;
+	struct ext3_dir_entry_2 *de, *top;
+	struct buffer_head *bh;
+	unsigned long block;
+	int retval;
+	int namelen = dentry->d_name.len;
+	const u8 *name = dentry->d_name.name;
+	struct inode *dir = dentry->d_parent->d_inode;
+
+	sb = dir->i_sb;
+	/* NFS may look up ".." - look at dx_root directory block */
+	if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+		if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
+			return NULL;
+	} else {
+		frame = frames;
+		frame->bh = NULL;			/* for dx_release() */
+		frame->at = (struct dx_entry *)frames;	/* hack for zero entry*/
+		dx_set_block(frame->at, 0);		/* dx_root block is 0 */
+	}
+	hash = hinfo.hash;
+	do {
+		block = dx_get_block(frame->at);
+		if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+			goto errout;
+		de = (struct ext3_dir_entry_2 *) bh->b_data;
+		top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+				       EXT3_DIR_REC_LEN(0));
+		for (; de < top; de = ext3_next_entry(de))
+		if (ext3_match (namelen, name, de)) {
+			if (!ext3_check_dir_entry("ext3_find_entry",
+						  dir, de, bh,
+				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
+					  +((char *)de - bh->b_data))) {
+				brelse (bh);
+				goto errout;
+			}
+			*res_dir = de;
+			dx_release (frames);
+			return bh;
+		}
+		brelse (bh);
+		/* Check to see if we should continue to search */
+		retval = ext3_htree_next_block(dir, hash, frame,
+					       frames, NULL);
+		if (retval < 0) {
+			ext3_warning(sb, __FUNCTION__,
+			     "error reading index page in directory #%lu",
+			     dir->i_ino);
+			*err = retval;
+			goto errout;
+		}
+	} while (retval == 1);
+
+	*err = -ENOENT;
+errout:
+	dxtrace(printk("%s not found\n", name));
+	dx_release (frames);
+	return NULL;
+}
+#endif
+
+static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+{
+	struct inode * inode;
+	struct ext3_dir_entry_2 * de;
+	struct buffer_head * bh;
+
+	if (dentry->d_name.len > EXT3_NAME_LEN)
+		return ERR_PTR(-ENAMETOOLONG);
+
+	bh = ext3_find_entry(dentry, &de);
+	inode = NULL;
+	if (bh) {
+		unsigned long ino = le32_to_cpu(de->inode);
+		brelse (bh);
+		if (!ext3_valid_inum(dir->i_sb, ino)) {
+			ext3_error(dir->i_sb, "ext3_lookup",
+				   "bad inode number: %lu", ino);
+			inode = NULL;
+		} else
+			inode = iget(dir->i_sb, ino);
+
+		if (!inode)
+			return ERR_PTR(-EACCES);
+	}
+	return d_splice_alias(inode, dentry);
+}
+
+
+struct dentry *ext3_get_parent(struct dentry *child)
+{
+	unsigned long ino;
+	struct dentry *parent;
+	struct inode *inode;
+	struct dentry dotdot;
+	struct ext3_dir_entry_2 * de;
+	struct buffer_head *bh;
+
+	dotdot.d_name.name = "..";
+	dotdot.d_name.len = 2;
+	dotdot.d_parent = child; /* confusing, isn't it! */
+
+	bh = ext3_find_entry(&dotdot, &de);
+	inode = NULL;
+	if (!bh)
+		return ERR_PTR(-ENOENT);
+	ino = le32_to_cpu(de->inode);
+	brelse(bh);
+
+	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
+		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+			   "bad inode number: %lu", ino);
+		inode = NULL;
+	} else
+		inode = iget(child->d_inode->i_sb, ino);
+
+	if (!inode)
+		return ERR_PTR(-EACCES);
+
+	parent = d_alloc_anon(inode);
+	if (!parent) {
+		iput(inode);
+		parent = ERR_PTR(-ENOMEM);
+	}
+	return parent;
+}
+
+#define S_SHIFT 12
+static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXT3_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXT3_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXT3_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXT3_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXT3_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXT3_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXT3_FT_SYMLINK,
+};
+
+static inline void ext3_set_de_type(struct super_block *sb,
+				struct ext3_dir_entry_2 *de,
+				umode_t mode) {
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
+		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+}
+
+#ifdef CONFIG_EXT3_INDEX
+static struct ext3_dir_entry_2 *
+dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
+{
+	unsigned rec_len = 0;
+
+	while (count--) {
+		struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
+		rec_len = EXT3_DIR_REC_LEN(de->name_len);
+		memcpy (to, de, rec_len);
+		((struct ext3_dir_entry_2 *) to)->rec_len =
+				cpu_to_le16(rec_len);
+		de->inode = 0;
+		map++;
+		to += rec_len;
+	}
+	return (struct ext3_dir_entry_2 *) (to - rec_len);
+}
+
+static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+{
+	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
+	unsigned rec_len = 0;
+
+	prev = to = de;
+	while ((char*)de < base + size) {
+		next = (struct ext3_dir_entry_2 *) ((char *) de +
+						    le16_to_cpu(de->rec_len));
+		if (de->inode && de->name_len) {
+			rec_len = EXT3_DIR_REC_LEN(de->name_len);
+			if (de > to)
+				memmove(to, de, rec_len);
+			to->rec_len = cpu_to_le16(rec_len);
+			prev = to;
+			to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
+		}
+		de = next;
+	}
+	return prev;
+}
+
+static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+			struct buffer_head **bh,struct dx_frame *frame,
+			struct dx_hash_info *hinfo, int *error)
+{
+	unsigned blocksize = dir->i_sb->s_blocksize;
+	unsigned count, continued;
+	struct buffer_head *bh2;
+	u32 newblock;
+	u32 hash2;
+	struct dx_map_entry *map;
+	char *data1 = (*bh)->b_data, *data2;
+	unsigned split;
+	struct ext3_dir_entry_2 *de = NULL, *de2;
+	int	err;
+
+	bh2 = ext3_append (handle, dir, &newblock, error);
+	if (!(bh2)) {
+		brelse(*bh);
+		*bh = NULL;
+		goto errout;
+	}
+
+	BUFFER_TRACE(*bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, *bh);
+	if (err) {
+	journal_error:
+		brelse(*bh);
+		brelse(bh2);
+		*bh = NULL;
+		ext3_std_error(dir->i_sb, err);
+		goto errout;
+	}
+	BUFFER_TRACE(frame->bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, frame->bh);
+	if (err)
+		goto journal_error;
+
+	data2 = bh2->b_data;
+
+	/* create map in the end of data2 block */
+	map = (struct dx_map_entry *) (data2 + blocksize);
+	count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
+			     blocksize, hinfo, map);
+	map -= count;
+	split = count/2; // need to adjust to actual middle
+	dx_sort_map (map, count);
+	hash2 = map[split].hash;
+	continued = hash2 == map[split - 1].hash;
+	dxtrace(printk("Split block %i at %x, %i/%i\n",
+		dx_get_block(frame->at), hash2, split, count-split));
+
+	/* Fancy dance to stay within two buffers */
+	de2 = dx_move_dirents(data1, data2, map + split, count - split);
+	de = dx_pack_dirents(data1,blocksize);
+	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
+
+	/* Which block gets the new entry? */
+	if (hinfo->hash >= hash2)
+	{
+		swap(*bh, bh2);
+		de = de2;
+	}
+	dx_insert_block (frame, hash2 + continued, newblock);
+	err = ext3_journal_dirty_metadata (handle, bh2);
+	if (err)
+		goto journal_error;
+	err = ext3_journal_dirty_metadata (handle, frame->bh);
+	if (err)
+		goto journal_error;
+	brelse (bh2);
+	dxtrace(dx_show_index ("frame", frame->entries));
+errout:
+	return de;
+}
+#endif
+
+
+/*
+ * Add a new entry into a directory (leaf) block.  If de is non-NULL,
+ * it points to a directory entry which is guaranteed to be large
+ * enough for new directory entry.  If de is NULL, then
+ * add_dirent_to_buf will attempt search the directory block for
+ * space.  It will return -ENOSPC if no space is available, and -EIO
+ * and -EEXIST if directory entry already exists.
+ *
+ * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+ * all other cases bh is released.
+ */
+static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode, struct ext3_dir_entry_2 *de,
+			     struct buffer_head * bh)
+{
+	struct inode	*dir = dentry->d_parent->d_inode;
+	const char	*name = dentry->d_name.name;
+	int		namelen = dentry->d_name.len;
+	unsigned long	offset = 0;
+	unsigned short	reclen;
+	int		nlen, rlen, err;
+	char		*top;
+
+	reclen = EXT3_DIR_REC_LEN(namelen);
+	if (!de) {
+		de = (struct ext3_dir_entry_2 *)bh->b_data;
+		top = bh->b_data + dir->i_sb->s_blocksize - reclen;
+		while ((char *) de <= top) {
+			if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
+						  bh, offset)) {
+				brelse (bh);
+				return -EIO;
+			}
+			if (ext3_match (namelen, name, de)) {
+				brelse (bh);
+				return -EEXIST;
+			}
+			nlen = EXT3_DIR_REC_LEN(de->name_len);
+			rlen = le16_to_cpu(de->rec_len);
+			if ((de->inode? rlen - nlen: rlen) >= reclen)
+				break;
+			de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
+			offset += rlen;
+		}
+		if ((char *) de > top)
+			return -ENOSPC;
+	}
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh);
+	if (err) {
+		ext3_std_error(dir->i_sb, err);
+		brelse(bh);
+		return err;
+	}
+
+	/* By now the buffer is marked for journaling */
+	nlen = EXT3_DIR_REC_LEN(de->name_len);
+	rlen = le16_to_cpu(de->rec_len);
+	if (de->inode) {
+		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
+		de1->rec_len = cpu_to_le16(rlen - nlen);
+		de->rec_len = cpu_to_le16(nlen);
+		de = de1;
+	}
+	de->file_type = EXT3_FT_UNKNOWN;
+	if (inode) {
+		de->inode = cpu_to_le32(inode->i_ino);
+		ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+	} else
+		de->inode = 0;
+	de->name_len = namelen;
+	memcpy (de->name, name, namelen);
+	/*
+	 * XXX shouldn't update any times until successful
+	 * completion of syscall, but too many callers depend
+	 * on this.
+	 *
+	 * XXX similarly, too many callers depend on
+	 * ext3_new_inode() setting the times, but error
+	 * recovery deletes the inode, so the worst that can
+	 * happen is that the times are slightly out of date
+	 * and/or different from the directory change time.
+	 */
+	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(dir);
+	dir->i_version++;
+	ext3_mark_inode_dirty(handle, dir);
+	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+	err = ext3_journal_dirty_metadata(handle, bh);
+	if (err)
+		ext3_std_error(dir->i_sb, err);
+	brelse(bh);
+	return 0;
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * This converts a one block unindexed directory to a 3 block indexed
+ * directory, and adds the dentry to the indexed directory.
+ */
+static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+			    struct inode *inode, struct buffer_head *bh)
+{
+	struct inode	*dir = dentry->d_parent->d_inode;
+	const char	*name = dentry->d_name.name;
+	int		namelen = dentry->d_name.len;
+	struct buffer_head *bh2;
+	struct dx_root	*root;
+	struct dx_frame	frames[2], *frame;
+	struct dx_entry *entries;
+	struct ext3_dir_entry_2	*de, *de2;
+	char		*data1, *top;
+	unsigned	len;
+	int		retval;
+	unsigned	blocksize;
+	struct dx_hash_info hinfo;
+	u32		block;
+	struct fake_dirent *fde;
+
+	blocksize =  dir->i_sb->s_blocksize;
+	dxtrace(printk("Creating index\n"));
+	retval = ext3_journal_get_write_access(handle, bh);
+	if (retval) {
+		ext3_std_error(dir->i_sb, retval);
+		brelse(bh);
+		return retval;
+	}
+	root = (struct dx_root *) bh->b_data;
+
+	bh2 = ext3_append (handle, dir, &block, &retval);
+	if (!(bh2)) {
+		brelse(bh);
+		return retval;
+	}
+	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+	data1 = bh2->b_data;
+
+	/* The 0th block becomes the root, move the dirents out */
+	fde = &root->dotdot;
+	de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	len = ((char *) root) + blocksize - (char *) de;
+	memcpy (data1, de, len);
+	de = (struct ext3_dir_entry_2 *) data1;
+	top = data1 + len;
+	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
+		de = de2;
+	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+	/* Initialize the root; the dot dirents already exist */
+	de = (struct ext3_dir_entry_2 *) (&root->dotdot);
+	de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+	memset (&root->info, 0, sizeof(root->info));
+	root->info.info_length = sizeof(root->info);
+	root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+	entries = root->entries;
+	dx_set_block (entries, 1);
+	dx_set_count (entries, 1);
+	dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
+
+	/* Initialize as for dx_probe */
+	hinfo.hash_version = root->info.hash_version;
+	hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	ext3fs_dirhash(name, namelen, &hinfo);
+	frame = frames;
+	frame->entries = entries;
+	frame->at = entries;
+	frame->bh = bh;
+	bh = bh2;
+	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
+	dx_release (frames);
+	if (!(de))
+		return retval;
+
+	return add_dirent_to_buf(handle, dentry, inode, de, bh);
+}
+#endif
+
+/*
+ *	ext3_add_entry()
+ *
+ * adds a file entry to the specified directory, using the same
+ * semantics as ext3_find_entry(). It returns NULL if it failed.
+ *
+ * NOTE!! The inode part of 'de' is left at 0 - which means you
+ * may not sleep between calling this and putting something into
+ * the entry, as someone else might have used it while you slept.
+ */
+static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+	struct inode *inode)
+{
+	struct inode *dir = dentry->d_parent->d_inode;
+	unsigned long offset;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 *de;
+	struct super_block * sb;
+	int	retval;
+#ifdef CONFIG_EXT3_INDEX
+	int	dx_fallback=0;
+#endif
+	unsigned blocksize;
+	u32 block, blocks;
+
+	sb = dir->i_sb;
+	blocksize = sb->s_blocksize;
+	if (!dentry->d_name.len)
+		return -EINVAL;
+#ifdef CONFIG_EXT3_INDEX
+	if (is_dx(dir)) {
+		retval = ext3_dx_add_entry(handle, dentry, inode);
+		if (!retval || (retval != ERR_BAD_DX_DIR))
+			return retval;
+		EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
+		dx_fallback++;
+		ext3_mark_inode_dirty(handle, dir);
+	}
+#endif
+	blocks = dir->i_size >> sb->s_blocksize_bits;
+	for (block = 0, offset = 0; block < blocks; block++) {
+		bh = ext3_bread(handle, dir, block, 0, &retval);
+		if(!bh)
+			return retval;
+		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+		if (retval != -ENOSPC)
+			return retval;
+
+#ifdef CONFIG_EXT3_INDEX
+		if (blocks == 1 && !dx_fallback &&
+		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+			return make_indexed_dir(handle, dentry, inode, bh);
+#endif
+		brelse(bh);
+	}
+	bh = ext3_append(handle, dir, &block, &retval);
+	if (!bh)
+		return retval;
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de->inode = 0;
+	de->rec_len = cpu_to_le16(blocksize);
+	return add_dirent_to_buf(handle, dentry, inode, de, bh);
+}
+
+#ifdef CONFIG_EXT3_INDEX
+/*
+ * Returns 0 for success, or a negative error value
+ */
+static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+			     struct inode *inode)
+{
+	struct dx_frame frames[2], *frame;
+	struct dx_entry *entries, *at;
+	struct dx_hash_info hinfo;
+	struct buffer_head * bh;
+	struct inode *dir = dentry->d_parent->d_inode;
+	struct super_block * sb = dir->i_sb;
+	struct ext3_dir_entry_2 *de;
+	int err;
+
+	frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+	if (!frame)
+		return err;
+	entries = frame->entries;
+	at = frame->at;
+
+	if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+		goto cleanup;
+
+	BUFFER_TRACE(bh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, bh);
+	if (err)
+		goto journal_error;
+
+	err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+	if (err != -ENOSPC) {
+		bh = NULL;
+		goto cleanup;
+	}
+
+	/* Block full, should compress but for now just split */
+	dxtrace(printk("using %u of %u node entries\n",
+		       dx_get_count(entries), dx_get_limit(entries)));
+	/* Need to split index? */
+	if (dx_get_count(entries) == dx_get_limit(entries)) {
+		u32 newblock;
+		unsigned icount = dx_get_count(entries);
+		int levels = frame - frames;
+		struct dx_entry *entries2;
+		struct dx_node *node2;
+		struct buffer_head *bh2;
+
+		if (levels && (dx_get_count(frames->entries) ==
+			       dx_get_limit(frames->entries))) {
+			ext3_warning(sb, __FUNCTION__,
+				     "Directory index full!");
+			err = -ENOSPC;
+			goto cleanup;
+		}
+		bh2 = ext3_append (handle, dir, &newblock, &err);
+		if (!(bh2))
+			goto cleanup;
+		node2 = (struct dx_node *)(bh2->b_data);
+		entries2 = node2->entries;
+		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+		node2->fake.inode = 0;
+		BUFFER_TRACE(frame->bh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, frame->bh);
+		if (err)
+			goto journal_error;
+		if (levels) {
+			unsigned icount1 = icount/2, icount2 = icount - icount1;
+			unsigned hash2 = dx_get_hash(entries + icount1);
+			dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+
+			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
+			err = ext3_journal_get_write_access(handle,
+							     frames[0].bh);
+			if (err)
+				goto journal_error;
+
+			memcpy ((char *) entries2, (char *) (entries + icount1),
+				icount2 * sizeof(struct dx_entry));
+			dx_set_count (entries, icount1);
+			dx_set_count (entries2, icount2);
+			dx_set_limit (entries2, dx_node_limit(dir));
+
+			/* Which index block gets the new entry? */
+			if (at - entries >= icount1) {
+				frame->at = at = at - entries - icount1 + entries2;
+				frame->entries = entries = entries2;
+				swap(frame->bh, bh2);
+			}
+			dx_insert_block (frames + 0, hash2, newblock);
+			dxtrace(dx_show_index ("node", frames[1].entries));
+			dxtrace(dx_show_index ("node",
+			       ((struct dx_node *) bh2->b_data)->entries));
+			err = ext3_journal_dirty_metadata(handle, bh2);
+			if (err)
+				goto journal_error;
+			brelse (bh2);
+		} else {
+			dxtrace(printk("Creating second level index...\n"));
+			memcpy((char *) entries2, (char *) entries,
+			       icount * sizeof(struct dx_entry));
+			dx_set_limit(entries2, dx_node_limit(dir));
+
+			/* Set up root */
+			dx_set_count(entries, 1);
+			dx_set_block(entries + 0, newblock);
+			((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
+
+			/* Add new access path frame */
+			frame = frames + 1;
+			frame->at = at = at - entries + entries2;
+			frame->entries = entries = entries2;
+			frame->bh = bh2;
+			err = ext3_journal_get_write_access(handle,
+							     frame->bh);
+			if (err)
+				goto journal_error;
+		}
+		ext3_journal_dirty_metadata(handle, frames[0].bh);
+	}
+	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
+	if (!de)
+		goto cleanup;
+	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+	bh = NULL;
+	goto cleanup;
+
+journal_error:
+	ext3_std_error(dir->i_sb, err);
+cleanup:
+	if (bh)
+		brelse(bh);
+	dx_release(frames);
+	return err;
+}
+#endif
+
+/*
+ * ext3_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+ */
+static int ext3_delete_entry (handle_t *handle,
+			      struct inode * dir,
+			      struct ext3_dir_entry_2 * de_del,
+			      struct buffer_head * bh)
+{
+	struct ext3_dir_entry_2 * de, * pde;
+	int i;
+
+	i = 0;
+	pde = NULL;
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	while (i < bh->b_size) {
+		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+			return -EIO;
+		if (de == de_del)  {
+			BUFFER_TRACE(bh, "get_write_access");
+			ext3_journal_get_write_access(handle, bh);
+			if (pde)
+				pde->rec_len =
+					cpu_to_le16(le16_to_cpu(pde->rec_len) +
+						    le16_to_cpu(de->rec_len));
+			else
+				de->inode = 0;
+			dir->i_version++;
+			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+			ext3_journal_dirty_metadata(handle, bh);
+			return 0;
+		}
+		i += le16_to_cpu(de->rec_len);
+		pde = de;
+		de = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	}
+	return -ENOENT;
+}
+
+/*
+ * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
+ * do not perform it in these functions.  We perform it at the call site,
+ * if it is needed.
+ */
+static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+{
+	inc_nlink(inode);
+}
+
+static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+{
+	drop_nlink(inode);
+}
+
+static int ext3_add_nondir(handle_t *handle,
+		struct dentry *dentry, struct inode *inode)
+{
+	int err = ext3_add_entry(handle, dentry, inode);
+	if (!err) {
+		ext3_mark_inode_dirty(handle, inode);
+		d_instantiate(dentry, inode);
+		return 0;
+	}
+	ext3_dec_count(handle, inode);
+	iput(inode);
+	return err;
+}
+
+/*
+ * By the time this is called, we already have created
+ * the directory cache entry for the new file, but it
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+ * with d_instantiate().
+ */
+static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+		struct nameidata *nd)
+{
+	handle_t *handle;
+	struct inode * inode;
+	int err, retries = 0;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, mode);
+	err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		inode->i_op = &ext3_file_inode_operations;
+		inode->i_fop = &ext3_file_operations;
+		ext3_set_aops(inode);
+		err = ext3_add_nondir(handle, dentry, inode);
+	}
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_mknod (struct inode * dir, struct dentry *dentry,
+			int mode, dev_t rdev)
+{
+	handle_t *handle;
+	struct inode *inode;
+	int err, retries = 0;
+
+	if (!new_valid_dev(rdev))
+		return -EINVAL;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, mode);
+	err = PTR_ERR(inode);
+	if (!IS_ERR(inode)) {
+		init_special_inode(inode, inode->i_mode, rdev);
+#ifdef CONFIG_EXT3_FS_XATTR
+		inode->i_op = &ext3_special_inode_operations;
+#endif
+		err = ext3_add_nondir(handle, dentry, inode);
+	}
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+{
+	handle_t *handle;
+	struct inode * inode;
+	struct buffer_head * dir_block;
+	struct ext3_dir_entry_2 * de;
+	int err, retries = 0;
+
+	if (dir->i_nlink >= EXT3_LINK_MAX)
+		return -EMLINK;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_stop;
+
+	inode->i_op = &ext3_dir_inode_operations;
+	inode->i_fop = &ext3_dir_operations;
+	inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+	dir_block = ext3_bread (handle, inode, 0, 1, &err);
+	if (!dir_block) {
+		drop_nlink(inode); /* is this nlink == 0? */
+		ext3_mark_inode_dirty(handle, inode);
+		iput (inode);
+		goto out_stop;
+	}
+	BUFFER_TRACE(dir_block, "get_write_access");
+	ext3_journal_get_write_access(handle, dir_block);
+	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
+	de->inode = cpu_to_le32(inode->i_ino);
+	de->name_len = 1;
+	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+	strcpy (de->name, ".");
+	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	de = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	de->inode = cpu_to_le32(dir->i_ino);
+	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+	de->name_len = 2;
+	strcpy (de->name, "..");
+	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	inode->i_nlink = 2;
+	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
+	ext3_journal_dirty_metadata(handle, dir_block);
+	brelse (dir_block);
+	ext3_mark_inode_dirty(handle, inode);
+	err = ext3_add_entry (handle, dentry, inode);
+	if (err) {
+		inode->i_nlink = 0;
+		ext3_mark_inode_dirty(handle, inode);
+		iput (inode);
+		goto out_stop;
+	}
+	inc_nlink(dir);
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+	d_instantiate(dentry, inode);
+out_stop:
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+/*
+ * routine to check that the specified directory is empty (for rmdir)
+ */
+static int empty_dir (struct inode * inode)
+{
+	unsigned long offset;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de, * de1;
+	struct super_block * sb;
+	int err = 0;
+
+	sb = inode->i_sb;
+	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
+	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+		if (err)
+			ext3_error(inode->i_sb, __FUNCTION__,
+				   "error %d reading directory #%lu offset 0",
+				   err, inode->i_ino);
+		else
+			ext3_warning(inode->i_sb, __FUNCTION__,
+				     "bad directory (dir #%lu) - no data block",
+				     inode->i_ino);
+		return 1;
+	}
+	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de1 = (struct ext3_dir_entry_2 *)
+			((char *) de + le16_to_cpu(de->rec_len));
+	if (le32_to_cpu(de->inode) != inode->i_ino ||
+			!le32_to_cpu(de1->inode) ||
+			strcmp (".", de->name) ||
+			strcmp ("..", de1->name)) {
+		ext3_warning (inode->i_sb, "empty_dir",
+			      "bad directory (dir #%lu) - no `.' or `..'",
+			      inode->i_ino);
+		brelse (bh);
+		return 1;
+	}
+	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
+	de = (struct ext3_dir_entry_2 *)
+			((char *) de1 + le16_to_cpu(de1->rec_len));
+	while (offset < inode->i_size ) {
+		if (!bh ||
+			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
+			err = 0;
+			brelse (bh);
+			bh = ext3_bread (NULL, inode,
+				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
+			if (!bh) {
+				if (err)
+					ext3_error(sb, __FUNCTION__,
+						   "error %d reading directory"
+						   " #%lu offset %lu",
+						   err, inode->i_ino, offset);
+				offset += sb->s_blocksize;
+				continue;
+			}
+			de = (struct ext3_dir_entry_2 *) bh->b_data;
+		}
+		if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+			de = (struct ext3_dir_entry_2 *)(bh->b_data +
+							 sb->s_blocksize);
+			offset = (offset | (sb->s_blocksize - 1)) + 1;
+			continue;
+		}
+		if (le32_to_cpu(de->inode)) {
+			brelse (bh);
+			return 0;
+		}
+		offset += le16_to_cpu(de->rec_len);
+		de = (struct ext3_dir_entry_2 *)
+				((char *) de + le16_to_cpu(de->rec_len));
+	}
+	brelse (bh);
+	return 1;
+}
+
+/* ext3_orphan_add() links an unlinked or truncated inode into a list of
+ * such inodes, starting at the superblock, in case we crash before the
+ * file is closed/deleted, or in case the inode truncate spans multiple
+ * transactions and the last transaction is not recovered after a crash.
+ *
+ * At filesystem recovery time, we walk this list deleting unlinked
+ * inodes and truncating linked inodes in ext3_orphan_cleanup().
+ */
+int ext3_orphan_add(handle_t *handle, struct inode *inode)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext3_iloc iloc;
+	int err = 0, rc;
+
+	lock_super(sb);
+	if (!list_empty(&EXT3_I(inode)->i_orphan))
+		goto out_unlock;
+
+	/* Orphan handling is only valid for files with data blocks
+	 * being truncated, or files being unlinked. */
+
+	/* @@@ FIXME: Observation from aviro:
+	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
+	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * ->i_nlink. For, say it, character device. Not a regular file,
+	 * not a directory, not a symlink and ->i_nlink > 0.
+	 */
+	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+
+	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
+	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+	if (err)
+		goto out_unlock;
+
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto out_unlock;
+
+	/* Insert this inode at the head of the on-disk orphan list... */
+	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
+	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	if (!err)
+		err = rc;
+
+	/* Only add to the head of the in-memory list if all the
+	 * previous operations succeeded.  If the orphan_add is going to
+	 * fail (possibly taking the journal offline), we can't risk
+	 * leaving the inode on the orphan list: stray orphan-list
+	 * entries can cause panics at unmount time.
+	 *
+	 * This is safe: on error we're going to ignore the orphan list
+	 * anyway on the next recovery. */
+	if (!err)
+		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+
+	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
+	jbd_debug(4, "orphan inode %lu will point to %d\n",
+			inode->i_ino, NEXT_ORPHAN(inode));
+out_unlock:
+	unlock_super(sb);
+	ext3_std_error(inode->i_sb, err);
+	return err;
+}
+
+/*
+ * ext3_orphan_del() removes an unlinked or truncated inode from the list
+ * of such inodes stored on disk, because it is finally being cleaned up.
+ */
+int ext3_orphan_del(handle_t *handle, struct inode *inode)
+{
+	struct list_head *prev;
+	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext3_sb_info *sbi;
+	unsigned long ino_next;
+	struct ext3_iloc iloc;
+	int err = 0;
+
+	lock_super(inode->i_sb);
+	if (list_empty(&ei->i_orphan)) {
+		unlock_super(inode->i_sb);
+		return 0;
+	}
+
+	ino_next = NEXT_ORPHAN(inode);
+	prev = ei->i_orphan.prev;
+	sbi = EXT3_SB(inode->i_sb);
+
+	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
+
+	list_del_init(&ei->i_orphan);
+
+	/* If we're on an error path, we may not have a valid
+	 * transaction handle with which to update the orphan list on
+	 * disk, but we still need to remove the inode from the linked
+	 * list in memory. */
+	if (!handle)
+		goto out;
+
+	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	if (err)
+		goto out_err;
+
+	if (prev == &sbi->s_orphan) {
+		jbd_debug(4, "superblock will point to %lu\n", ino_next);
+		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
+		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+		if (err)
+			goto out_brelse;
+		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
+		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	} else {
+		struct ext3_iloc iloc2;
+		struct inode *i_prev =
+			&list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode;
+
+		jbd_debug(4, "orphan inode %lu will point to %lu\n",
+			  i_prev->i_ino, ino_next);
+		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
+		if (err)
+			goto out_brelse;
+		NEXT_ORPHAN(i_prev) = ino_next;
+		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
+	}
+	if (err)
+		goto out_brelse;
+	NEXT_ORPHAN(inode) = 0;
+	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+
+out_err:
+	ext3_std_error(inode->i_sb, err);
+out:
+	unlock_super(inode->i_sb);
+	return err;
+
+out_brelse:
+	brelse(iloc.bh);
+	goto out_err;
+}
+
+static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
+{
+	int retval;
+	struct inode * inode;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de;
+	handle_t *handle;
+
+	/* Initialize quotas before so that eventual writes go in
+	 * separate transaction */
+	DQUOT_INIT(dentry->d_inode);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	retval = -ENOENT;
+	bh = ext3_find_entry (dentry, &de);
+	if (!bh)
+		goto end_rmdir;
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = dentry->d_inode;
+
+	retval = -EIO;
+	if (le32_to_cpu(de->inode) != inode->i_ino)
+		goto end_rmdir;
+
+	retval = -ENOTEMPTY;
+	if (!empty_dir (inode))
+		goto end_rmdir;
+
+	retval = ext3_delete_entry(handle, dir, de, bh);
+	if (retval)
+		goto end_rmdir;
+	if (inode->i_nlink != 2)
+		ext3_warning (inode->i_sb, "ext3_rmdir",
+			      "empty directory has nlink!=2 (%d)",
+			      inode->i_nlink);
+	inode->i_version++;
+	clear_nlink(inode);
+	/* There's no need to set i_disksize: the fact that i_nlink is
+	 * zero will ensure that the right thing happens during any
+	 * recovery. */
+	inode->i_size = 0;
+	ext3_orphan_add(handle, inode);
+	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, inode);
+	drop_nlink(dir);
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+
+end_rmdir:
+	ext3_journal_stop(handle);
+	brelse (bh);
+	return retval;
+}
+
+static int ext3_unlink(struct inode * dir, struct dentry *dentry)
+{
+	int retval;
+	struct inode * inode;
+	struct buffer_head * bh;
+	struct ext3_dir_entry_2 * de;
+	handle_t *handle;
+
+	/* Initialize quotas before so that eventual writes go
+	 * in separate transaction */
+	DQUOT_INIT(dentry->d_inode);
+	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	retval = -ENOENT;
+	bh = ext3_find_entry (dentry, &de);
+	if (!bh)
+		goto end_unlink;
+
+	inode = dentry->d_inode;
+
+	retval = -EIO;
+	if (le32_to_cpu(de->inode) != inode->i_ino)
+		goto end_unlink;
+
+	if (!inode->i_nlink) {
+		ext3_warning (inode->i_sb, "ext3_unlink",
+			      "Deleting nonexistent file (%lu), %d",
+			      inode->i_ino, inode->i_nlink);
+		inode->i_nlink = 1;
+	}
+	retval = ext3_delete_entry(handle, dir, de, bh);
+	if (retval)
+		goto end_unlink;
+	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(dir);
+	ext3_mark_inode_dirty(handle, dir);
+	drop_nlink(inode);
+	if (!inode->i_nlink)
+		ext3_orphan_add(handle, inode);
+	inode->i_ctime = dir->i_ctime;
+	ext3_mark_inode_dirty(handle, inode);
+	retval = 0;
+
+end_unlink:
+	ext3_journal_stop(handle);
+	brelse (bh);
+	return retval;
+}
+
+static int ext3_symlink (struct inode * dir,
+		struct dentry *dentry, const char * symname)
+{
+	handle_t *handle;
+	struct inode * inode;
+	int l, err, retries = 0;
+
+	l = strlen(symname)+1;
+	if (l > dir->i_sb->s_blocksize)
+		return -ENAMETOOLONG;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+	err = PTR_ERR(inode);
+	if (IS_ERR(inode))
+		goto out_stop;
+
+	if (l > sizeof (EXT3_I(inode)->i_data)) {
+		inode->i_op = &ext3_symlink_inode_operations;
+		ext3_set_aops(inode);
+		/*
+		 * page_symlink() calls into ext3_prepare/commit_write.
+		 * We have a transaction open.  All is sweetness.  It also sets
+		 * i_size in generic_commit_write().
+		 */
+		err = __page_symlink(inode, symname, l,
+				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
+		if (err) {
+			ext3_dec_count(handle, inode);
+			ext3_mark_inode_dirty(handle, inode);
+			iput (inode);
+			goto out_stop;
+		}
+	} else {
+		inode->i_op = &ext3_fast_symlink_inode_operations;
+		memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+		inode->i_size = l-1;
+	}
+	EXT3_I(inode)->i_disksize = inode->i_size;
+	err = ext3_add_nondir(handle, dentry, inode);
+out_stop:
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+static int ext3_link (struct dentry * old_dentry,
+		struct inode * dir, struct dentry *dentry)
+{
+	handle_t *handle;
+	struct inode *inode = old_dentry->d_inode;
+	int err, retries = 0;
+
+	if (inode->i_nlink >= EXT3_LINK_MAX)
+		return -EMLINK;
+
+retry:
+	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(dir))
+		handle->h_sync = 1;
+
+	inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_inc_count(handle, inode);
+	atomic_inc(&inode->i_count);
+
+	err = ext3_add_nondir(handle, dentry, inode);
+	ext3_journal_stop(handle);
+	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+		goto retry;
+	return err;
+}
+
+#define PARENT_INO(buffer) \
+	((struct ext3_dir_entry_2 *) ((char *) buffer + \
+	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+
+/*
+ * Anybody can rename anything with this: the permission checks are left to the
+ * higher-level routines.
+ */
+static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
+			   struct inode * new_dir,struct dentry *new_dentry)
+{
+	handle_t *handle;
+	struct inode * old_inode, * new_inode;
+	struct buffer_head * old_bh, * new_bh, * dir_bh;
+	struct ext3_dir_entry_2 * old_de, * new_de;
+	int retval;
+
+	old_bh = new_bh = dir_bh = NULL;
+
+	/* Initialize quotas before so that eventual writes go
+	 * in separate transaction */
+	if (new_dentry->d_inode)
+		DQUOT_INIT(new_dentry->d_inode);
+	handle = ext3_journal_start(old_dir, 2 *
+					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
+		handle->h_sync = 1;
+
+	old_bh = ext3_find_entry (old_dentry, &old_de);
+	/*
+	 *  Check for inode number is _not_ due to possible IO errors.
+	 *  We might rmdir the source, keep it as pwd of some process
+	 *  and merrily kill the link to whatever was created under the
+	 *  same name. Goodbye sticky bit ;-<
+	 */
+	old_inode = old_dentry->d_inode;
+	retval = -ENOENT;
+	if (!old_bh || le32_to_cpu(old_de->inode) != old_inode->i_ino)
+		goto end_rename;
+
+	new_inode = new_dentry->d_inode;
+	new_bh = ext3_find_entry (new_dentry, &new_de);
+	if (new_bh) {
+		if (!new_inode) {
+			brelse (new_bh);
+			new_bh = NULL;
+		}
+	}
+	if (S_ISDIR(old_inode->i_mode)) {
+		if (new_inode) {
+			retval = -ENOTEMPTY;
+			if (!empty_dir (new_inode))
+				goto end_rename;
+		}
+		retval = -EIO;
+		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+		if (!dir_bh)
+			goto end_rename;
+		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
+			goto end_rename;
+		retval = -EMLINK;
+		if (!new_inode && new_dir!=old_dir &&
+				new_dir->i_nlink >= EXT3_LINK_MAX)
+			goto end_rename;
+	}
+	if (!new_bh) {
+		retval = ext3_add_entry (handle, new_dentry, old_inode);
+		if (retval)
+			goto end_rename;
+	} else {
+		BUFFER_TRACE(new_bh, "get write access");
+		ext3_journal_get_write_access(handle, new_bh);
+		new_de->inode = cpu_to_le32(old_inode->i_ino);
+		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+					      EXT3_FEATURE_INCOMPAT_FILETYPE))
+			new_de->file_type = old_de->file_type;
+		new_dir->i_version++;
+		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, new_bh);
+		brelse(new_bh);
+		new_bh = NULL;
+	}
+
+	/*
+	 * Like most other Unix systems, set the ctime for inodes on a
+	 * rename.
+	 */
+	old_inode->i_ctime = CURRENT_TIME_SEC;
+	ext3_mark_inode_dirty(handle, old_inode);
+
+	/*
+	 * ok, that's it
+	 */
+	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
+	    old_de->name_len != old_dentry->d_name.len ||
+	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
+	    (retval = ext3_delete_entry(handle, old_dir,
+					old_de, old_bh)) == -ENOENT) {
+		/* old_de could have moved from under us during htree split, so
+		 * make sure that we are deleting the right entry.  We might
+		 * also be pointing to a stale entry in the unused part of
+		 * old_bh so just checking inum and the name isn't enough. */
+		struct buffer_head *old_bh2;
+		struct ext3_dir_entry_2 *old_de2;
+
+		old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+		if (old_bh2) {
+			retval = ext3_delete_entry(handle, old_dir,
+						   old_de2, old_bh2);
+			brelse(old_bh2);
+		}
+	}
+	if (retval) {
+		ext3_warning(old_dir->i_sb, "ext3_rename",
+				"Deleting old file (%lu), %d, error=%d",
+				old_dir->i_ino, old_dir->i_nlink, retval);
+	}
+
+	if (new_inode) {
+		drop_nlink(new_inode);
+		new_inode->i_ctime = CURRENT_TIME_SEC;
+	}
+	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
+	ext3_update_dx_flag(old_dir);
+	if (dir_bh) {
+		BUFFER_TRACE(dir_bh, "get_write_access");
+		ext3_journal_get_write_access(handle, dir_bh);
+		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
+		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
+		ext3_journal_dirty_metadata(handle, dir_bh);
+		drop_nlink(old_dir);
+		if (new_inode) {
+			drop_nlink(new_inode);
+		} else {
+			inc_nlink(new_dir);
+			ext3_update_dx_flag(new_dir);
+			ext3_mark_inode_dirty(handle, new_dir);
+		}
+	}
+	ext3_mark_inode_dirty(handle, old_dir);
+	if (new_inode) {
+		ext3_mark_inode_dirty(handle, new_inode);
+		if (!new_inode->i_nlink)
+			ext3_orphan_add(handle, new_inode);
+	}
+	retval = 0;
+
+end_rename:
+	brelse (dir_bh);
+	brelse (old_bh);
+	brelse (new_bh);
+	ext3_journal_stop(handle);
+	return retval;
+}
+
+/*
+ * directories can handle most operations...
+ */
+struct inode_operations ext3_dir_inode_operations = {
+	.create		= ext3_create,
+	.lookup		= ext3_lookup,
+	.link		= ext3_link,
+	.unlink		= ext3_unlink,
+	.symlink	= ext3_symlink,
+	.mkdir		= ext3_mkdir,
+	.rmdir		= ext3_rmdir,
+	.mknod		= ext3_mknod,
+	.rename		= ext3_rename,
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
+
+struct inode_operations ext3_special_inode_operations = {
+	.setattr	= ext3_setattr,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+	.permission	= ext3_permission,
+};
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
new file mode 100644
index 000000000000..f2ce2b0065c9
--- /dev/null
+++ b/fs/ext4/namei.h
@@ -0,0 +1,8 @@
+/*  linux/fs/ext3/namei.h
+ *
+ * Copyright (C) 2005 Simtec Electronics
+ *	Ben Dooks <ben@simtec.co.uk>
+ *
+*/
+
+extern struct dentry *ext3_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
new file mode 100644
index 000000000000..b73cba12f79c
--- /dev/null
+++ b/fs/ext4/resize.c
@@ -0,0 +1,1042 @@
+/*
+ *  linux/fs/ext3/resize.c
+ *
+ * Support for resizing an ext3 filesystem while it is mounted.
+ *
+ * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
+ *
+ * This could probably be made into a module, because it is not often in use.
+ */
+
+
+#define EXT3FS_DEBUG
+
+#include <linux/sched.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+
+#define outside(b, first, last)	((b) < (first) || (b) >= (last))
+#define inside(b, first, last)	((b) >= (first) && (b) < (last))
+
+static int verify_group_input(struct super_block *sb,
+			      struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext3_fsblk_t end = start + input->blocks_count;
+	unsigned group = input->group;
+	ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
+	unsigned overhead = ext3_bg_has_super(sb, group) ?
+		(1 + ext3_bg_num_gdb(sb, group) +
+		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
+	ext3_fsblk_t metaend = start + overhead;
+	struct buffer_head *bh = NULL;
+	ext3_grpblk_t free_blocks_count;
+	int err = -EINVAL;
+
+	input->free_blocks_count = free_blocks_count =
+		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
+		       "(%d free, %u reserved)\n",
+		       ext3_bg_has_super(sb, input->group) ? "normal" :
+		       "no-super", input->group, input->blocks_count,
+		       free_blocks_count, input->reserved_blocks);
+
+	if (group != sbi->s_groups_count)
+		ext3_warning(sb, __FUNCTION__,
+			     "Cannot add at group %u (only %lu groups)",
+			     input->group, sbi->s_groups_count);
+	else if ((start - le32_to_cpu(es->s_first_data_block)) %
+		 EXT3_BLOCKS_PER_GROUP(sb))
+		ext3_warning(sb, __FUNCTION__, "Last group not full");
+	else if (input->reserved_blocks > input->blocks_count / 5)
+		ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+			     input->reserved_blocks);
+	else if (free_blocks_count < 0)
+		ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
+			     input->blocks_count);
+	else if (!(bh = sb_bread(sb, end - 1)))
+		ext3_warning(sb, __FUNCTION__,
+			     "Cannot read last block ("E3FSBLK")",
+			     end - 1);
+	else if (outside(input->block_bitmap, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap not in group (block %u)",
+			     input->block_bitmap);
+	else if (outside(input->inode_bitmap, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap not in group (block %u)",
+			     input->inode_bitmap);
+	else if (outside(input->inode_table, start, end) ||
+	         outside(itend - 1, start, end))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode table not in group (blocks %u-"E3FSBLK")",
+			     input->inode_table, itend - 1);
+	else if (input->inode_bitmap == input->block_bitmap)
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap same as inode bitmap (%u)",
+			     input->block_bitmap);
+	else if (inside(input->block_bitmap, input->inode_table, itend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     input->block_bitmap, input->inode_table, itend-1);
+	else if (inside(input->inode_bitmap, input->inode_table, itend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     input->inode_bitmap, input->inode_table, itend-1);
+	else if (inside(input->block_bitmap, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Block bitmap (%u) in GDT table"
+			     " ("E3FSBLK"-"E3FSBLK")",
+			     input->block_bitmap, start, metaend - 1);
+	else if (inside(input->inode_bitmap, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode bitmap (%u) in GDT table"
+			     " ("E3FSBLK"-"E3FSBLK")",
+			     input->inode_bitmap, start, metaend - 1);
+	else if (inside(input->inode_table, start, metaend) ||
+	         inside(itend - 1, start, metaend))
+		ext3_warning(sb, __FUNCTION__,
+			     "Inode table (%u-"E3FSBLK") overlaps"
+			     "GDT table ("E3FSBLK"-"E3FSBLK")",
+			     input->inode_table, itend - 1, start, metaend - 1);
+	else
+		err = 0;
+	brelse(bh);
+
+	return err;
+}
+
+static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
+				  ext3_fsblk_t blk)
+{
+	struct buffer_head *bh;
+	int err;
+
+	bh = sb_getblk(sb, blk);
+	if (!bh)
+		return ERR_PTR(-EIO);
+	if ((err = ext3_journal_get_write_access(handle, bh))) {
+		brelse(bh);
+		bh = ERR_PTR(err);
+	} else {
+		lock_buffer(bh);
+		memset(bh->b_data, 0, sb->s_blocksize);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+	}
+
+	return bh;
+}
+
+/*
+ * To avoid calling the atomic setbit hundreds or thousands of times, we only
+ * need to use it within a single byte (to ensure we get endianness right).
+ * We can use memset for the rest of the bitmap as there are no other users.
+ */
+static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+{
+	int i;
+
+	if (start_bit >= end_bit)
+		return;
+
+	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
+		ext3_set_bit(i, bitmap);
+	if (i < end_bit)
+		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
+}
+
+/*
+ * Set up the block and inode bitmaps, and the inode table for the new group.
+ * This doesn't need to be part of the main transaction, since we are only
+ * changing blocks outside the actual filesystem.  We still do journaling to
+ * ensure the recovery is correct in case of a failure just after resize.
+ * If any part of this fails, we simply abort the resize.
+ */
+static int setup_new_group_blocks(struct super_block *sb,
+				  struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
+	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
+	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
+	struct buffer_head *bh;
+	handle_t *handle;
+	ext3_fsblk_t block;
+	ext3_grpblk_t bit;
+	int i;
+	int err = 0, err2;
+
+	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
+				       2 + sbi->s_itb_per_group);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	lock_super(sb);
+	if (input->group != sbi->s_groups_count) {
+		err = -EBUSY;
+		goto exit_journal;
+	}
+
+	if (IS_ERR(bh = bclean(handle, sb, input->block_bitmap))) {
+		err = PTR_ERR(bh);
+		goto exit_journal;
+	}
+
+	if (ext3_bg_has_super(sb, input->group)) {
+		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
+		ext3_set_bit(0, bh->b_data);
+	}
+
+	/* Copy all of the GDT blocks into the backup in this group */
+	for (i = 0, bit = 1, block = start + 1;
+	     i < gdblocks; i++, block++, bit++) {
+		struct buffer_head *gdb;
+
+		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
+
+		gdb = sb_getblk(sb, block);
+		if (!gdb) {
+			err = -EIO;
+			goto exit_bh;
+		}
+		if ((err = ext3_journal_get_write_access(handle, gdb))) {
+			brelse(gdb);
+			goto exit_bh;
+		}
+		lock_buffer(bh);
+		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
+		set_buffer_uptodate(gdb);
+		unlock_buffer(bh);
+		ext3_journal_dirty_metadata(handle, gdb);
+		ext3_set_bit(bit, bh->b_data);
+		brelse(gdb);
+	}
+
+	/* Zero out all of the reserved backup group descriptor table blocks */
+	for (i = 0, bit = gdblocks + 1, block = start + bit;
+	     i < reserved_gdb; i++, block++, bit++) {
+		struct buffer_head *gdb;
+
+		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+
+		if (IS_ERR(gdb = bclean(handle, sb, block))) {
+			err = PTR_ERR(bh);
+			goto exit_bh;
+		}
+		ext3_journal_dirty_metadata(handle, gdb);
+		ext3_set_bit(bit, bh->b_data);
+		brelse(gdb);
+	}
+	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
+		   input->block_bitmap - start);
+	ext3_set_bit(input->block_bitmap - start, bh->b_data);
+	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
+		   input->inode_bitmap - start);
+	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
+
+	/* Zero out all of the inode table blocks */
+	for (i = 0, block = input->inode_table, bit = block - start;
+	     i < sbi->s_itb_per_group; i++, bit++, block++) {
+		struct buffer_head *it;
+
+		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
+		if (IS_ERR(it = bclean(handle, sb, block))) {
+			err = PTR_ERR(it);
+			goto exit_bh;
+		}
+		ext3_journal_dirty_metadata(handle, it);
+		brelse(it);
+		ext3_set_bit(bit, bh->b_data);
+	}
+	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
+	ext3_journal_dirty_metadata(handle, bh);
+	brelse(bh);
+
+	/* Mark unused entries in inode bitmap used */
+	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
+		   input->inode_bitmap, input->inode_bitmap - start);
+	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
+		err = PTR_ERR(bh);
+		goto exit_journal;
+	}
+
+	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
+			bh->b_data);
+	ext3_journal_dirty_metadata(handle, bh);
+exit_bh:
+	brelse(bh);
+
+exit_journal:
+	unlock_super(sb);
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+
+	return err;
+}
+
+/*
+ * Iterate through the groups which hold BACKUP superblock/GDT copies in an
+ * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
+ * calling this for the first time.  In a sparse filesystem it will be the
+ * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
+ * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
+ */
+static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
+				  unsigned *five, unsigned *seven)
+{
+	unsigned *min = three;
+	int mult = 3;
+	unsigned ret;
+
+	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ret = *min;
+		*min += 1;
+		return ret;
+	}
+
+	if (*five < *min) {
+		min = five;
+		mult = 5;
+	}
+	if (*seven < *min) {
+		min = seven;
+		mult = 7;
+	}
+
+	ret = *min;
+	*min *= mult;
+
+	return ret;
+}
+
+/*
+ * Check that all of the backup GDT blocks are held in the primary GDT block.
+ * It is assumed that they are stored in group order.  Returns the number of
+ * groups in current filesystem that have BACKUPS, or -ve error code.
+ */
+static int verify_reserved_gdb(struct super_block *sb,
+			       struct buffer_head *primary)
+{
+	const ext3_fsblk_t blk = primary->b_blocknr;
+	const unsigned long end = EXT3_SB(sb)->s_groups_count;
+	unsigned three = 1;
+	unsigned five = 5;
+	unsigned seven = 7;
+	unsigned grp;
+	__le32 *p = (__le32 *)primary->b_data;
+	int gdbackups = 0;
+
+	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
+		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
+			ext3_warning(sb, __FUNCTION__,
+				     "reserved GDT "E3FSBLK
+				     " missing grp %d ("E3FSBLK")",
+				     blk, grp,
+				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
+			return -EINVAL;
+		}
+		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
+			return -EFBIG;
+	}
+
+	return gdbackups;
+}
+
+/*
+ * Called when we need to bring a reserved group descriptor table block into
+ * use from the resize inode.  The primary copy of the new GDT block currently
+ * is an indirect block (under the double indirect block in the resize inode).
+ * The new backup GDT blocks will be stored as leaf blocks in this indirect
+ * block, in group order.  Even though we know all the block numbers we need,
+ * we check to ensure that the resize inode has actually reserved these blocks.
+ *
+ * Don't need to update the block bitmaps because the blocks are still in use.
+ *
+ * We get all of the error cases out of the way, so that we are sure to not
+ * fail once we start modifying the data on disk, because JBD has no rollback.
+ */
+static int add_new_gdb(handle_t *handle, struct inode *inode,
+		       struct ext3_new_group_data *input,
+		       struct buffer_head **primary)
+{
+	struct super_block *sb = inode->i_sb;
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
+	ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
+	struct buffer_head **o_group_desc, **n_group_desc;
+	struct buffer_head *dind;
+	int gdbackups;
+	struct ext3_iloc iloc;
+	__le32 *data;
+	int err;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG
+		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
+		       gdb_num);
+
+	/*
+	 * If we are not using the primary superblock/GDT copy don't resize,
+	 * because the user tools have no way of handling this.  Probably a
+	 * bad time to do it anyways.
+	 */
+	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
+	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
+		ext3_warning(sb, __FUNCTION__,
+			"won't resize using backup superblock at %llu",
+			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
+		return -EPERM;
+	}
+
+	*primary = sb_bread(sb, gdblock);
+	if (!*primary)
+		return -EIO;
+
+	if ((gdbackups = verify_reserved_gdb(sb, *primary)) < 0) {
+		err = gdbackups;
+		goto exit_bh;
+	}
+
+	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	dind = sb_bread(sb, le32_to_cpu(*data));
+	if (!dind) {
+		err = -EIO;
+		goto exit_bh;
+	}
+
+	data = (__le32 *)dind->b_data;
+	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
+		ext3_warning(sb, __FUNCTION__,
+			     "new group %u GDT block "E3FSBLK" not reserved",
+			     input->group, gdblock);
+		err = -EINVAL;
+		goto exit_dind;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
+		goto exit_dind;
+
+	if ((err = ext3_journal_get_write_access(handle, *primary)))
+		goto exit_sbh;
+
+	if ((err = ext3_journal_get_write_access(handle, dind)))
+		goto exit_primary;
+
+	/* ext3_reserve_inode_write() gets a reference on the iloc */
+	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+		goto exit_dindj;
+
+	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
+			GFP_KERNEL);
+	if (!n_group_desc) {
+		err = -ENOMEM;
+		ext3_warning (sb, __FUNCTION__,
+			      "not enough memory for %lu groups", gdb_num + 1);
+		goto exit_inode;
+	}
+
+	/*
+	 * Finally, we have all of the possible failures behind us...
+	 *
+	 * Remove new GDT block from inode double-indirect block and clear out
+	 * the new GDT block for use (which also "frees" the backup GDT blocks
+	 * from the reserved inode).  We don't need to change the bitmaps for
+	 * these blocks, because they are marked as in-use from being in the
+	 * reserved inode, and will become GDT blocks (primary and backup).
+	 */
+	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
+	ext3_journal_dirty_metadata(handle, dind);
+	brelse(dind);
+	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
+	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	memset((*primary)->b_data, 0, sb->s_blocksize);
+	ext3_journal_dirty_metadata(handle, *primary);
+
+	o_group_desc = EXT3_SB(sb)->s_group_desc;
+	memcpy(n_group_desc, o_group_desc,
+	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+	n_group_desc[gdb_num] = *primary;
+	EXT3_SB(sb)->s_group_desc = n_group_desc;
+	EXT3_SB(sb)->s_gdb_count++;
+	kfree(o_group_desc);
+
+	es->s_reserved_gdt_blocks =
+		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
+	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+
+	return 0;
+
+exit_inode:
+	//ext3_journal_release_buffer(handle, iloc.bh);
+	brelse(iloc.bh);
+exit_dindj:
+	//ext3_journal_release_buffer(handle, dind);
+exit_primary:
+	//ext3_journal_release_buffer(handle, *primary);
+exit_sbh:
+	//ext3_journal_release_buffer(handle, *primary);
+exit_dind:
+	brelse(dind);
+exit_bh:
+	brelse(*primary);
+
+	ext3_debug("leaving with error %d\n", err);
+	return err;
+}
+
+/*
+ * Called when we are adding a new group which has a backup copy of each of
+ * the GDT blocks (i.e. sparse group) and there are reserved GDT blocks.
+ * We need to add these reserved backup GDT blocks to the resize inode, so
+ * that they are kept for future resizing and not allocated to files.
+ *
+ * Each reserved backup GDT block will go into a different indirect block.
+ * The indirect blocks are actually the primary reserved GDT blocks,
+ * so we know in advance what their block numbers are.  We only get the
+ * double-indirect block to verify it is pointing to the primary reserved
+ * GDT blocks so we don't overwrite a data block by accident.  The reserved
+ * backup GDT blocks are stored in their reserved primary GDT block.
+ */
+static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
+			      struct ext3_new_group_data *input)
+{
+	struct super_block *sb = inode->i_sb;
+	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
+	struct buffer_head **primary;
+	struct buffer_head *dind;
+	struct ext3_iloc iloc;
+	ext3_fsblk_t blk;
+	__le32 *data, *end;
+	int gdbackups = 0;
+	int res, i;
+	int err;
+
+	primary = kmalloc(reserved_gdb * sizeof(*primary), GFP_KERNEL);
+	if (!primary)
+		return -ENOMEM;
+
+	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	dind = sb_bread(sb, le32_to_cpu(*data));
+	if (!dind) {
+		err = -EIO;
+		goto exit_free;
+	}
+
+	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
+	data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
+	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+
+	/* Get each reserved primary GDT block and verify it holds backups */
+	for (res = 0; res < reserved_gdb; res++, blk++) {
+		if (le32_to_cpu(*data) != blk) {
+			ext3_warning(sb, __FUNCTION__,
+				     "reserved block "E3FSBLK
+				     " not at offset %ld",
+				     blk,
+				     (long)(data - (__le32 *)dind->b_data));
+			err = -EINVAL;
+			goto exit_bh;
+		}
+		primary[res] = sb_bread(sb, blk);
+		if (!primary[res]) {
+			err = -EIO;
+			goto exit_bh;
+		}
+		if ((gdbackups = verify_reserved_gdb(sb, primary[res])) < 0) {
+			brelse(primary[res]);
+			err = gdbackups;
+			goto exit_bh;
+		}
+		if (++data >= end)
+			data = (__le32 *)dind->b_data;
+	}
+
+	for (i = 0; i < reserved_gdb; i++) {
+		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
+			/*
+			int j;
+			for (j = 0; j < i; j++)
+				ext3_journal_release_buffer(handle, primary[j]);
+			 */
+			goto exit_bh;
+		}
+	}
+
+	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+		goto exit_bh;
+
+	/*
+	 * Finally we can add each of the reserved backup GDT blocks from
+	 * the new group to its reserved primary GDT block.
+	 */
+	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
+	for (i = 0; i < reserved_gdb; i++) {
+		int err2;
+		data = (__le32 *)primary[i]->b_data;
+		/* printk("reserving backup %lu[%u] = %lu\n",
+		       primary[i]->b_blocknr, gdbackups,
+		       blk + primary[i]->b_blocknr); */
+		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
+		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
+		if (!err)
+			err = err2;
+	}
+	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
+	ext3_mark_iloc_dirty(handle, inode, &iloc);
+
+exit_bh:
+	while (--res >= 0)
+		brelse(primary[res]);
+	brelse(dind);
+
+exit_free:
+	kfree(primary);
+
+	return err;
+}
+
+/*
+ * Update the backup copies of the ext3 metadata.  These don't need to be part
+ * of the main resize transaction, because e2fsck will re-write them if there
+ * is a problem (basically only OOM will cause a problem).  However, we
+ * _should_ update the backups if possible, in case the primary gets trashed
+ * for some reason and we need to run e2fsck from a backup superblock.  The
+ * important part is that the new block and inode counts are in the backup
+ * superblocks, and the location of the new group metadata in the GDT backups.
+ *
+ * We do not need lock_super() for this, because these blocks are not
+ * otherwise touched by the filesystem code when it is mounted.  We don't
+ * need to worry about last changing from sbi->s_groups_count, because the
+ * worst that can happen is that we do not copy the full number of backups
+ * at this time.  The resize which changed s_groups_count will backup again.
+ */
+static void update_backups(struct super_block *sb,
+			   int blk_off, char *data, int size)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	const unsigned long last = sbi->s_groups_count;
+	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
+	unsigned three = 1;
+	unsigned five = 5;
+	unsigned seven = 7;
+	unsigned group;
+	int rest = sb->s_blocksize - size;
+	handle_t *handle;
+	int err = 0, err2;
+
+	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
+	if (IS_ERR(handle)) {
+		group = 1;
+		err = PTR_ERR(handle);
+		goto exit_err;
+	}
+
+	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
+		struct buffer_head *bh;
+
+		/* Out of journal space, and can't get more - abort - so sad */
+		if (handle->h_buffer_credits == 0 &&
+		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
+		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
+			break;
+
+		bh = sb_getblk(sb, group * bpg + blk_off);
+		if (!bh) {
+			err = -EIO;
+			break;
+		}
+		ext3_debug("update metadata backup %#04lx\n",
+			  (unsigned long)bh->b_blocknr);
+		if ((err = ext3_journal_get_write_access(handle, bh)))
+			break;
+		lock_buffer(bh);
+		memcpy(bh->b_data, data, size);
+		if (rest)
+			memset(bh->b_data + size, 0, rest);
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		ext3_journal_dirty_metadata(handle, bh);
+		brelse(bh);
+	}
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+
+	/*
+	 * Ugh! Need to have e2fsck write the backup copies.  It is too
+	 * late to revert the resize, we shouldn't fail just because of
+	 * the backup copies (they are only needed in case of corruption).
+	 *
+	 * However, if we got here we have a journal problem too, so we
+	 * can't really start a transaction to mark the superblock.
+	 * Chicken out and just set the flag on the hope it will be written
+	 * to disk, and if not - we will simply wait until next fsck.
+	 */
+exit_err:
+	if (err) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't update backup for group %d (err %d), "
+			     "forcing fsck on next reboot", group, err);
+		sbi->s_mount_state &= ~EXT3_VALID_FS;
+		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
+		mark_buffer_dirty(sbi->s_sbh);
+	}
+}
+
+/* Add group descriptor data to an existing or new group descriptor block.
+ * Ensure we handle all possible error conditions _before_ we start modifying
+ * the filesystem, because we cannot abort the transaction and not have it
+ * write the data to disk.
+ *
+ * If we are on a GDT block boundary, we need to get the reserved GDT block.
+ * Otherwise, we may need to add backup GDT blocks for a sparse group.
+ *
+ * We only need to hold the superblock lock while we are actually adding
+ * in the new group's counts to the superblock.  Prior to that we have
+ * not really "added" the group at all.  We re-check that we are still
+ * adding in the last group in case things have changed since verifying.
+ */
+int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
+	struct buffer_head *primary = NULL;
+	struct ext3_group_desc *gdp;
+	struct inode *inode = NULL;
+	handle_t *handle;
+	int gdb_off, gdb_num;
+	int err, err2;
+
+	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
+	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
+
+	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ext3_warning(sb, __FUNCTION__,
+			     "Can't resize non-sparse filesystem further");
+		return -EPERM;
+	}
+
+	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
+	    le32_to_cpu(es->s_blocks_count)) {
+		ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+		return -EINVAL;
+	}
+
+	if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
+	    le32_to_cpu(es->s_inodes_count)) {
+		ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+		return -EINVAL;
+	}
+
+	if (reserved_gdb || gdb_off == 0) {
+		if (!EXT3_HAS_COMPAT_FEATURE(sb,
+					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
+			ext3_warning(sb, __FUNCTION__,
+				     "No reserved GDT blocks, can't resize");
+			return -EPERM;
+		}
+		inode = iget(sb, EXT3_RESIZE_INO);
+		if (!inode || is_bad_inode(inode)) {
+			ext3_warning(sb, __FUNCTION__,
+				     "Error opening resize inode");
+			iput(inode);
+			return -ENOENT;
+		}
+	}
+
+	if ((err = verify_group_input(sb, input)))
+		goto exit_put;
+
+	if ((err = setup_new_group_blocks(sb, input)))
+		goto exit_put;
+
+	/*
+	 * We will always be modifying at least the superblock and a GDT
+	 * block.  If we are adding a group past the last current GDT block,
+	 * we will also modify the inode and the dindirect block.  If we
+	 * are adding a group with superblock/GDT backups  we will also
+	 * modify each of the reserved GDT dindirect blocks.
+	 */
+	handle = ext3_journal_start_sb(sb,
+				       ext3_bg_has_super(sb, input->group) ?
+				       3 + reserved_gdb : 4);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		goto exit_put;
+	}
+
+	lock_super(sb);
+	if (input->group != sbi->s_groups_count) {
+		ext3_warning(sb, __FUNCTION__,
+			     "multiple resizers run on filesystem!");
+		err = -EBUSY;
+		goto exit_journal;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
+		goto exit_journal;
+
+	/*
+	 * We will only either add reserved group blocks to a backup group
+	 * or remove reserved blocks for the first group in a new group block.
+	 * Doing both would be mean more complex code, and sane people don't
+	 * use non-sparse filesystems anymore.  This is already checked above.
+	 */
+	if (gdb_off) {
+		primary = sbi->s_group_desc[gdb_num];
+		if ((err = ext3_journal_get_write_access(handle, primary)))
+			goto exit_journal;
+
+		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
+		    (err = reserve_backup_gdb(handle, inode, input)))
+			goto exit_journal;
+	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
+		goto exit_journal;
+
+	/*
+	 * OK, now we've set up the new group.  Time to make it active.
+	 *
+	 * Current kernels don't lock all allocations via lock_super(),
+	 * so we have to be safe wrt. concurrent accesses the group
+	 * data.  So we need to be careful to set all of the relevant
+	 * group descriptor data etc. *before* we enable the group.
+	 *
+	 * The key field here is sbi->s_groups_count: as long as
+	 * that retains its old value, nobody is going to access the new
+	 * group.
+	 *
+	 * So first we update all the descriptor metadata for the new
+	 * group; then we update the total disk blocks count; then we
+	 * update the groups count to enable the group; then finally we
+	 * update the free space counts so that the system can start
+	 * using the new disk blocks.
+	 */
+
+	/* Update group descriptor block for new group */
+	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
+
+	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
+	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
+	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
+	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
+	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
+
+	/*
+	 * Make the new blocks and inodes valid next.  We do this before
+	 * increasing the group count so that once the group is enabled,
+	 * all of its blocks and inodes are already valid.
+	 *
+	 * We always allocate group-by-group, then block-by-block or
+	 * inode-by-inode within a group, so enabling these
+	 * blocks/inodes before the group is live won't actually let us
+	 * allocate the new space yet.
+	 */
+	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
+		input->blocks_count);
+	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
+		EXT3_INODES_PER_GROUP(sb));
+
+	/*
+	 * We need to protect s_groups_count against other CPUs seeing
+	 * inconsistent state in the superblock.
+	 *
+	 * The precise rules we use are:
+	 *
+	 * * Writers of s_groups_count *must* hold lock_super
+	 * AND
+	 * * Writers must perform a smp_wmb() after updating all dependent
+	 *   data and before modifying the groups count
+	 *
+	 * * Readers must hold lock_super() over the access
+	 * OR
+	 * * Readers must perform an smp_rmb() after reading the groups count
+	 *   and before reading any dependent data.
+	 *
+	 * NB. These rules can be relaxed when checking the group count
+	 * while freeing data, as we can only allocate from a block
+	 * group after serialising against the group count, and we can
+	 * only then free after serialising in turn against that
+	 * allocation.
+	 */
+	smp_wmb();
+
+	/* Update the global fs size fields */
+	sbi->s_groups_count++;
+
+	ext3_journal_dirty_metadata(handle, primary);
+
+	/* Update the reserved block counts only once the new group is
+	 * active. */
+	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
+		input->reserved_blocks);
+
+	/* Update the free space counts */
+	percpu_counter_mod(&sbi->s_freeblocks_counter,
+			   input->free_blocks_count);
+	percpu_counter_mod(&sbi->s_freeinodes_counter,
+			   EXT3_INODES_PER_GROUP(sb));
+
+	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	sb->s_dirt = 1;
+
+exit_journal:
+	unlock_super(sb);
+	if ((err2 = ext3_journal_stop(handle)) && !err)
+		err = err2;
+	if (!err) {
+		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
+			       sizeof(struct ext3_super_block));
+		update_backups(sb, primary->b_blocknr, primary->b_data,
+			       primary->b_size);
+	}
+exit_put:
+	iput(inode);
+	return err;
+} /* ext3_group_add */
+
+/* Extend the filesystem to the new number of blocks specified.  This entry
+ * point is only used to extend the current filesystem to the end of the last
+ * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
+ * for emergencies (because it has no dependencies on reserved blocks).
+ *
+ * If we _really_ wanted, we could use default values to call ext3_group_add()
+ * allow the "remount" trick to work for arbitrary resizing, assuming enough
+ * GDT blocks are reserved to grow to the desired size.
+ */
+int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
+		      ext3_fsblk_t n_blocks_count)
+{
+	ext3_fsblk_t o_blocks_count;
+	unsigned long o_groups_count;
+	ext3_grpblk_t last;
+	ext3_grpblk_t add;
+	struct buffer_head * bh;
+	handle_t *handle;
+	int err;
+	unsigned long freed_blocks;
+
+	/* We don't need to worry about locking wrt other resizers just
+	 * yet: we're going to revalidate es->s_blocks_count after
+	 * taking lock_super() below. */
+	o_blocks_count = le32_to_cpu(es->s_blocks_count);
+	o_groups_count = EXT3_SB(sb)->s_groups_count;
+
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		       o_blocks_count, n_blocks_count);
+
+	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
+		return 0;
+
+	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+			" too large to resize to %lu blocks safely\n",
+			sb->s_id, n_blocks_count);
+		if (sizeof(sector_t) < 8)
+			ext3_warning(sb, __FUNCTION__,
+			"CONFIG_LBD not enabled\n");
+		return -EINVAL;
+	}
+
+	if (n_blocks_count < o_blocks_count) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't shrink FS - resize aborted");
+		return -EBUSY;
+	}
+
+	/* Handle the remaining blocks in the last group only. */
+	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
+		EXT3_BLOCKS_PER_GROUP(sb);
+
+	if (last == 0) {
+		ext3_warning(sb, __FUNCTION__,
+			     "need to use ext2online to resize further");
+		return -EPERM;
+	}
+
+	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
+
+	if (o_blocks_count + add < o_blocks_count) {
+		ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
+		return -EINVAL;
+	}
+
+	if (o_blocks_count + add > n_blocks_count)
+		add = n_blocks_count - o_blocks_count;
+
+	if (o_blocks_count + add < n_blocks_count)
+		ext3_warning(sb, __FUNCTION__,
+			     "will only finish group ("E3FSBLK
+			     " blocks, %u new)",
+			     o_blocks_count + add, add);
+
+	/* See if the device is actually as big as what was requested */
+	bh = sb_bread(sb, o_blocks_count + add -1);
+	if (!bh) {
+		ext3_warning(sb, __FUNCTION__,
+			     "can't read last block, resize aborted");
+		return -ENOSPC;
+	}
+	brelse(bh);
+
+	/* We will update the superblock, one block bitmap, and
+	 * one group descriptor via ext3_free_blocks().
+	 */
+	handle = ext3_journal_start_sb(sb, 3);
+	if (IS_ERR(handle)) {
+		err = PTR_ERR(handle);
+		ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
+		goto exit_put;
+	}
+
+	lock_super(sb);
+	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
+		ext3_warning(sb, __FUNCTION__,
+			     "multiple resizers run on filesystem!");
+		unlock_super(sb);
+		err = -EBUSY;
+		goto exit_put;
+	}
+
+	if ((err = ext3_journal_get_write_access(handle,
+						 EXT3_SB(sb)->s_sbh))) {
+		ext3_warning(sb, __FUNCTION__,
+			     "error %d on journal write access", err);
+		unlock_super(sb);
+		ext3_journal_stop(handle);
+		goto exit_put;
+	}
+	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
+	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	sb->s_dirt = 1;
+	unlock_super(sb);
+	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+		   o_blocks_count + add);
+	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+		   o_blocks_count + add);
+	if ((err = ext3_journal_stop(handle)))
+		goto exit_put;
+	if (test_opt(sb, DEBUG))
+		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
+		       le32_to_cpu(es->s_blocks_count));
+	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
+		       sizeof(struct ext3_super_block));
+exit_put:
+	return err;
+} /* ext3_group_extend */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
new file mode 100644
index 000000000000..8bfd56ef18ca
--- /dev/null
+++ b/fs/ext4/super.c
@@ -0,0 +1,2754 @@
+/*
+ *  linux/fs/ext3/super.c
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/inode.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  Big-endian to little-endian byte-swapping/bitmaps by
+ *        David S. Miller (davem@caip.rutgers.edu), 1995
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/ext3_jbd.h>
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/blkdev.h>
+#include <linux/parser.h>
+#include <linux/smp_lock.h>
+#include <linux/buffer_head.h>
+#include <linux/vfs.h>
+#include <linux/random.h>
+#include <linux/mount.h>
+#include <linux/namei.h>
+#include <linux/quotaops.h>
+#include <linux/seq_file.h>
+
+#include <asm/uaccess.h>
+
+#include "xattr.h"
+#include "acl.h"
+#include "namei.h"
+
+static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+			     unsigned long journal_devnum);
+static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+			       unsigned int);
+static void ext3_commit_super (struct super_block * sb,
+			       struct ext3_super_block * es,
+			       int sync);
+static void ext3_mark_recovery_complete(struct super_block * sb,
+					struct ext3_super_block * es);
+static void ext3_clear_journal_err(struct super_block * sb,
+				   struct ext3_super_block * es);
+static int ext3_sync_fs(struct super_block *sb, int wait);
+static const char *ext3_decode_error(struct super_block * sb, int errno,
+				     char nbuf[16]);
+static int ext3_remount (struct super_block * sb, int * flags, char * data);
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
+static void ext3_unlockfs(struct super_block *sb);
+static void ext3_write_super (struct super_block * sb);
+static void ext3_write_super_lockfs(struct super_block *sb);
+
+/*
+ * Wrappers for journal_start/end.
+ *
+ * The only special thing we need to do here is to make sure that all
+ * journal_end calls result in the superblock being marked dirty, so
+ * that sync() will call the filesystem's write_super callback if
+ * appropriate.
+ */
+handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+{
+	journal_t *journal;
+
+	if (sb->s_flags & MS_RDONLY)
+		return ERR_PTR(-EROFS);
+
+	/* Special case here: if the journal has aborted behind our
+	 * backs (eg. EIO in the commit thread), then we still need to
+	 * take the FS itself readonly cleanly. */
+	journal = EXT3_SB(sb)->s_journal;
+	if (is_journal_aborted(journal)) {
+		ext3_abort(sb, __FUNCTION__,
+			   "Detected aborted journal");
+		return ERR_PTR(-EROFS);
+	}
+
+	return journal_start(journal, nblocks);
+}
+
+/*
+ * The only special thing we need to do here is to make sure that all
+ * journal_stop calls result in the superblock being marked dirty, so
+ * that sync() will call the filesystem's write_super callback if
+ * appropriate.
+ */
+int __ext3_journal_stop(const char *where, handle_t *handle)
+{
+	struct super_block *sb;
+	int err;
+	int rc;
+
+	sb = handle->h_transaction->t_journal->j_private;
+	err = handle->h_err;
+	rc = journal_stop(handle);
+
+	if (!err)
+		err = rc;
+	if (err)
+		__ext3_std_error(sb, where, err);
+	return err;
+}
+
+void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+		struct buffer_head *bh, handle_t *handle, int err)
+{
+	char nbuf[16];
+	const char *errstr = ext3_decode_error(NULL, err, nbuf);
+
+	if (bh)
+		BUFFER_TRACE(bh, "abort");
+
+	if (!handle->h_err)
+		handle->h_err = err;
+
+	if (is_handle_aborted(handle))
+		return;
+
+	printk(KERN_ERR "%s: aborting transaction: %s in %s\n",
+	       caller, errstr, err_fn);
+
+	journal_abort_handle(handle);
+}
+
+/* Deal with the reporting of failure conditions on a filesystem such as
+ * inconsistencies detected or read IO failures.
+ *
+ * On ext2, we can store the error state of the filesystem in the
+ * superblock.  That is not possible on ext3, because we may have other
+ * write ordering constraints on the superblock which prevent us from
+ * writing it out straight away; and given that the journal is about to
+ * be aborted, we can't rely on the current, or future, transactions to
+ * write out the superblock safely.
+ *
+ * We'll just use the journal_abort() error code to record an error in
+ * the journal instead.  On recovery, the journal will compain about
+ * that error until we've noted it down and cleared it.
+ */
+
+static void ext3_handle_error(struct super_block *sb)
+{
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	if (!test_opt (sb, ERRORS_CONT)) {
+		journal_t *journal = EXT3_SB(sb)->s_journal;
+
+		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+		if (journal)
+			journal_abort(journal, -EIO);
+	}
+	if (test_opt (sb, ERRORS_RO)) {
+		printk (KERN_CRIT "Remounting filesystem read-only\n");
+		sb->s_flags |= MS_RDONLY;
+	}
+	ext3_commit_super(sb, es, 1);
+	if (test_opt(sb, ERRORS_PANIC))
+		panic("EXT3-fs (device %s): panic forced after error\n",
+			sb->s_id);
+}
+
+void ext3_error (struct super_block * sb, const char * function,
+		 const char * fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	ext3_handle_error(sb);
+}
+
+static const char *ext3_decode_error(struct super_block * sb, int errno,
+				     char nbuf[16])
+{
+	char *errstr = NULL;
+
+	switch (errno) {
+	case -EIO:
+		errstr = "IO failure";
+		break;
+	case -ENOMEM:
+		errstr = "Out of memory";
+		break;
+	case -EROFS:
+		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
+			errstr = "Journal has aborted";
+		else
+			errstr = "Readonly filesystem";
+		break;
+	default:
+		/* If the caller passed in an extra buffer for unknown
+		 * errors, textualise them now.  Else we just return
+		 * NULL. */
+		if (nbuf) {
+			/* Check for truncated error codes... */
+			if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
+				errstr = nbuf;
+		}
+		break;
+	}
+
+	return errstr;
+}
+
+/* __ext3_std_error decodes expected errors from journaling functions
+ * automatically and invokes the appropriate error response.  */
+
+void __ext3_std_error (struct super_block * sb, const char * function,
+		       int errno)
+{
+	char nbuf[16];
+	const char *errstr;
+
+	/* Special case: if the error is EROFS, and we're not already
+	 * inside a transaction, then there's really no point in logging
+	 * an error. */
+	if (errno == -EROFS && journal_current_handle() == NULL &&
+	    (sb->s_flags & MS_RDONLY))
+		return;
+
+	errstr = ext3_decode_error(sb, errno, nbuf);
+	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
+		sb->s_id, function, errstr);
+
+	ext3_handle_error(sb);
+}
+
+/*
+ * ext3_abort is a much stronger failure handler than ext3_error.  The
+ * abort function may be used to deal with unrecoverable failures such
+ * as journal IO errors or ENOMEM at a critical moment in log management.
+ *
+ * We unconditionally force the filesystem into an ABORT|READONLY state,
+ * unless the error response on the fs has been set to panic in which
+ * case we take the easy way out and panic immediately.
+ */
+
+void ext3_abort (struct super_block * sb, const char * function,
+		 const char * fmt, ...)
+{
+	va_list args;
+
+	printk (KERN_CRIT "ext3_abort called.\n");
+
+	va_start(args, fmt);
+	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+
+	if (test_opt(sb, ERRORS_PANIC))
+		panic("EXT3-fs panic from previous error\n");
+
+	if (sb->s_flags & MS_RDONLY)
+		return;
+
+	printk(KERN_CRIT "Remounting filesystem read-only\n");
+	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	sb->s_flags |= MS_RDONLY;
+	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+}
+
+void ext3_warning (struct super_block * sb, const char * function,
+		   const char * fmt, ...)
+{
+	va_list args;
+
+	va_start(args, fmt);
+	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+	       sb->s_id, function);
+	vprintk(fmt, args);
+	printk("\n");
+	va_end(args);
+}
+
+void ext3_update_dynamic_rev(struct super_block *sb)
+{
+	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+
+	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
+		return;
+
+	ext3_warning(sb, __FUNCTION__,
+		     "updating to rev %d because of new feature flag, "
+		     "running e2fsck is recommended",
+		     EXT3_DYNAMIC_REV);
+
+	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
+	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
+	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
+	/* leave es->s_feature_*compat flags alone */
+	/* es->s_uuid will be set by e2fsck if empty */
+
+	/*
+	 * The rest of the superblock fields should be zero, and if not it
+	 * means they are likely already in use, so leave them alone.  We
+	 * can leave it up to e2fsck to clean up any inconsistencies there.
+	 */
+}
+
+/*
+ * Open the external journal device
+ */
+static struct block_device *ext3_blkdev_get(dev_t dev)
+{
+	struct block_device *bdev;
+	char b[BDEVNAME_SIZE];
+
+	bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
+	if (IS_ERR(bdev))
+		goto fail;
+	return bdev;
+
+fail:
+	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
+			__bdevname(dev, b), PTR_ERR(bdev));
+	return NULL;
+}
+
+/*
+ * Release the journal device
+ */
+static int ext3_blkdev_put(struct block_device *bdev)
+{
+	bd_release(bdev);
+	return blkdev_put(bdev);
+}
+
+static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
+{
+	struct block_device *bdev;
+	int ret = -ENODEV;
+
+	bdev = sbi->journal_bdev;
+	if (bdev) {
+		ret = ext3_blkdev_put(bdev);
+		sbi->journal_bdev = NULL;
+	}
+	return ret;
+}
+
+static inline struct inode *orphan_list_entry(struct list_head *l)
+{
+	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
+}
+
+static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+{
+	struct list_head *l;
+
+	printk(KERN_ERR "sb orphan head is %d\n",
+	       le32_to_cpu(sbi->s_es->s_last_orphan));
+
+	printk(KERN_ERR "sb_info orphan list:\n");
+	list_for_each(l, &sbi->s_orphan) {
+		struct inode *inode = orphan_list_entry(l);
+		printk(KERN_ERR "  "
+		       "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
+		       inode->i_sb->s_id, inode->i_ino, inode,
+		       inode->i_mode, inode->i_nlink,
+		       NEXT_ORPHAN(inode));
+	}
+}
+
+static void ext3_put_super (struct super_block * sb)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	int i;
+
+	ext3_xattr_put_super(sb);
+	journal_destroy(sbi->s_journal);
+	if (!(sb->s_flags & MS_RDONLY)) {
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		es->s_state = cpu_to_le16(sbi->s_mount_state);
+		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
+		mark_buffer_dirty(sbi->s_sbh);
+		ext3_commit_super(sb, es, 1);
+	}
+
+	for (i = 0; i < sbi->s_gdb_count; i++)
+		brelse(sbi->s_group_desc[i]);
+	kfree(sbi->s_group_desc);
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
+	brelse(sbi->s_sbh);
+#ifdef CONFIG_QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		kfree(sbi->s_qf_names[i]);
+#endif
+
+	/* Debugging code just in case the in-memory inode orphan list
+	 * isn't empty.  The on-disk one can be non-empty if we've
+	 * detected an error and taken the fs readonly, but the
+	 * in-memory list had better be clean by this point. */
+	if (!list_empty(&sbi->s_orphan))
+		dump_orphan_list(sb, sbi);
+	J_ASSERT(list_empty(&sbi->s_orphan));
+
+	invalidate_bdev(sb->s_bdev, 0);
+	if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
+		/*
+		 * Invalidate the journal device's buffers.  We don't want them
+		 * floating about in memory - the physical journal device may
+		 * hotswapped, and it breaks the `ro-after' testing code.
+		 */
+		sync_blockdev(sbi->journal_bdev);
+		invalidate_bdev(sbi->journal_bdev, 0);
+		ext3_blkdev_remove(sbi);
+	}
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+	return;
+}
+
+static kmem_cache_t *ext3_inode_cachep;
+
+/*
+ * Called inside transaction, so use GFP_NOFS
+ */
+static struct inode *ext3_alloc_inode(struct super_block *sb)
+{
+	struct ext3_inode_info *ei;
+
+	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	if (!ei)
+		return NULL;
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	ei->i_acl = EXT3_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#endif
+	ei->i_block_alloc_info = NULL;
+	ei->vfs_inode.i_version = 1;
+	return &ei->vfs_inode;
+}
+
+static void ext3_destroy_inode(struct inode *inode)
+{
+	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+}
+
+static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
+{
+	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
+
+	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
+	    SLAB_CTOR_CONSTRUCTOR) {
+		INIT_LIST_HEAD(&ei->i_orphan);
+#ifdef CONFIG_EXT3_FS_XATTR
+		init_rwsem(&ei->xattr_sem);
+#endif
+		mutex_init(&ei->truncate_mutex);
+		inode_init_once(&ei->vfs_inode);
+	}
+}
+
+static int init_inodecache(void)
+{
+	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
+					     sizeof(struct ext3_inode_info),
+					     0, (SLAB_RECLAIM_ACCOUNT|
+						SLAB_MEM_SPREAD),
+					     init_once, NULL);
+	if (ext3_inode_cachep == NULL)
+		return -ENOMEM;
+	return 0;
+}
+
+static void destroy_inodecache(void)
+{
+	kmem_cache_destroy(ext3_inode_cachep);
+}
+
+static void ext3_clear_inode(struct inode *inode)
+{
+	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	if (EXT3_I(inode)->i_acl &&
+			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_acl);
+		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
+	}
+	if (EXT3_I(inode)->i_default_acl &&
+			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
+		posix_acl_release(EXT3_I(inode)->i_default_acl);
+		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
+	}
+#endif
+	ext3_discard_reservation(inode);
+	EXT3_I(inode)->i_block_alloc_info = NULL;
+	if (unlikely(rsv))
+		kfree(rsv);
+}
+
+static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
+{
+#if defined(CONFIG_QUOTA)
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sbi->s_jquota_fmt)
+		seq_printf(seq, ",jqfmt=%s",
+		(sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold": "vfsv0");
+
+	if (sbi->s_qf_names[USRQUOTA])
+		seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
+
+	if (sbi->s_qf_names[GRPQUOTA])
+		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+		seq_puts(seq, ",usrquota");
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+		seq_puts(seq, ",grpquota");
+#endif
+}
+
+static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+{
+	struct super_block *sb = vfs->mnt_sb;
+
+	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+		seq_puts(seq, ",data=journal");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+		seq_puts(seq, ",data=ordered");
+	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+		seq_puts(seq, ",data=writeback");
+
+	ext3_show_quota_options(seq, sb);
+
+	return 0;
+}
+
+
+static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+{
+	__u32 *objp = vobjp;
+	unsigned long ino = objp[0];
+	__u32 generation = objp[1];
+	struct inode *inode;
+	struct dentry *result;
+
+	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
+		return ERR_PTR(-ESTALE);
+	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+		return ERR_PTR(-ESTALE);
+
+	/* iget isn't really right if the inode is currently unallocated!!
+	 *
+	 * ext3_read_inode will return a bad_inode if the inode had been
+	 * deleted, so we should be safe.
+	 *
+	 * Currently we don't know the generation for parent directory, so
+	 * a generation of 0 means "accept any"
+	 */
+	inode = iget(sb, ino);
+	if (inode == NULL)
+		return ERR_PTR(-ENOMEM);
+	if (is_bad_inode(inode) ||
+	    (generation && inode->i_generation != generation)) {
+		iput(inode);
+		return ERR_PTR(-ESTALE);
+	}
+	/* now to find a dentry.
+	 * If possible, get a well-connected one
+	 */
+	result = d_alloc_anon(inode);
+	if (!result) {
+		iput(inode);
+		return ERR_PTR(-ENOMEM);
+	}
+	return result;
+}
+
+#ifdef CONFIG_QUOTA
+#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
+#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
+
+static int ext3_dquot_initialize(struct inode *inode, int type);
+static int ext3_dquot_drop(struct inode *inode);
+static int ext3_write_dquot(struct dquot *dquot);
+static int ext3_acquire_dquot(struct dquot *dquot);
+static int ext3_release_dquot(struct dquot *dquot);
+static int ext3_mark_dquot_dirty(struct dquot *dquot);
+static int ext3_write_info(struct super_block *sb, int type);
+static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
+static int ext3_quota_on_mount(struct super_block *sb, int type);
+static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off);
+static ssize_t ext3_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off);
+
+static struct dquot_operations ext3_quota_operations = {
+	.initialize	= ext3_dquot_initialize,
+	.drop		= ext3_dquot_drop,
+	.alloc_space	= dquot_alloc_space,
+	.alloc_inode	= dquot_alloc_inode,
+	.free_space	= dquot_free_space,
+	.free_inode	= dquot_free_inode,
+	.transfer	= dquot_transfer,
+	.write_dquot	= ext3_write_dquot,
+	.acquire_dquot	= ext3_acquire_dquot,
+	.release_dquot	= ext3_release_dquot,
+	.mark_dirty	= ext3_mark_dquot_dirty,
+	.write_info	= ext3_write_info
+};
+
+static struct quotactl_ops ext3_qctl_operations = {
+	.quota_on	= ext3_quota_on,
+	.quota_off	= vfs_quota_off,
+	.quota_sync	= vfs_quota_sync,
+	.get_info	= vfs_get_dqinfo,
+	.set_info	= vfs_set_dqinfo,
+	.get_dqblk	= vfs_get_dqblk,
+	.set_dqblk	= vfs_set_dqblk
+};
+#endif
+
+static struct super_operations ext3_sops = {
+	.alloc_inode	= ext3_alloc_inode,
+	.destroy_inode	= ext3_destroy_inode,
+	.read_inode	= ext3_read_inode,
+	.write_inode	= ext3_write_inode,
+	.dirty_inode	= ext3_dirty_inode,
+	.delete_inode	= ext3_delete_inode,
+	.put_super	= ext3_put_super,
+	.write_super	= ext3_write_super,
+	.sync_fs	= ext3_sync_fs,
+	.write_super_lockfs = ext3_write_super_lockfs,
+	.unlockfs	= ext3_unlockfs,
+	.statfs		= ext3_statfs,
+	.remount_fs	= ext3_remount,
+	.clear_inode	= ext3_clear_inode,
+	.show_options	= ext3_show_options,
+#ifdef CONFIG_QUOTA
+	.quota_read	= ext3_quota_read,
+	.quota_write	= ext3_quota_write,
+#endif
+};
+
+static struct export_operations ext3_export_ops = {
+	.get_parent = ext3_get_parent,
+	.get_dentry = ext3_get_dentry,
+};
+
+enum {
+	Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
+	Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
+	Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+	Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+	Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
+	Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+	Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
+	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+	Opt_grpquota
+};
+
+static match_table_t tokens = {
+	{Opt_bsd_df, "bsddf"},
+	{Opt_minix_df, "minixdf"},
+	{Opt_grpid, "grpid"},
+	{Opt_grpid, "bsdgroups"},
+	{Opt_nogrpid, "nogrpid"},
+	{Opt_nogrpid, "sysvgroups"},
+	{Opt_resgid, "resgid=%u"},
+	{Opt_resuid, "resuid=%u"},
+	{Opt_sb, "sb=%u"},
+	{Opt_err_cont, "errors=continue"},
+	{Opt_err_panic, "errors=panic"},
+	{Opt_err_ro, "errors=remount-ro"},
+	{Opt_nouid32, "nouid32"},
+	{Opt_nocheck, "nocheck"},
+	{Opt_nocheck, "check=none"},
+	{Opt_debug, "debug"},
+	{Opt_oldalloc, "oldalloc"},
+	{Opt_orlov, "orlov"},
+	{Opt_user_xattr, "user_xattr"},
+	{Opt_nouser_xattr, "nouser_xattr"},
+	{Opt_acl, "acl"},
+	{Opt_noacl, "noacl"},
+	{Opt_reservation, "reservation"},
+	{Opt_noreservation, "noreservation"},
+	{Opt_noload, "noload"},
+	{Opt_nobh, "nobh"},
+	{Opt_bh, "bh"},
+	{Opt_commit, "commit=%u"},
+	{Opt_journal_update, "journal=update"},
+	{Opt_journal_inum, "journal=%u"},
+	{Opt_journal_dev, "journal_dev=%u"},
+	{Opt_abort, "abort"},
+	{Opt_data_journal, "data=journal"},
+	{Opt_data_ordered, "data=ordered"},
+	{Opt_data_writeback, "data=writeback"},
+	{Opt_offusrjquota, "usrjquota="},
+	{Opt_usrjquota, "usrjquota=%s"},
+	{Opt_offgrpjquota, "grpjquota="},
+	{Opt_grpjquota, "grpjquota=%s"},
+	{Opt_jqfmt_vfsold, "jqfmt=vfsold"},
+	{Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
+	{Opt_grpquota, "grpquota"},
+	{Opt_noquota, "noquota"},
+	{Opt_quota, "quota"},
+	{Opt_usrquota, "usrquota"},
+	{Opt_barrier, "barrier=%u"},
+	{Opt_err, NULL},
+	{Opt_resize, "resize"},
+};
+
+static ext3_fsblk_t get_sb_block(void **data)
+{
+	ext3_fsblk_t	sb_block;
+	char		*options = (char *) *data;
+
+	if (!options || strncmp(options, "sb=", 3) != 0)
+		return 1;	/* Default location */
+	options += 3;
+	/*todo: use simple_strtoll with >32bit ext3 */
+	sb_block = simple_strtoul(options, &options, 0);
+	if (*options && *options != ',') {
+		printk("EXT3-fs: Invalid sb specification: %s\n",
+		       (char *) *data);
+		return 1;
+	}
+	if (*options == ',')
+		options++;
+	*data = (void *) options;
+	return sb_block;
+}
+
+static int parse_options (char *options, struct super_block *sb,
+			  unsigned int *inum, unsigned long *journal_devnum,
+			  ext3_fsblk_t *n_blocks_count, int is_remount)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	char * p;
+	substring_t args[MAX_OPT_ARGS];
+	int data_opt = 0;
+	int option;
+#ifdef CONFIG_QUOTA
+	int qtype;
+	char *qname;
+#endif
+
+	if (!options)
+		return 1;
+
+	while ((p = strsep (&options, ",")) != NULL) {
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, tokens, args);
+		switch (token) {
+		case Opt_bsd_df:
+			clear_opt (sbi->s_mount_opt, MINIX_DF);
+			break;
+		case Opt_minix_df:
+			set_opt (sbi->s_mount_opt, MINIX_DF);
+			break;
+		case Opt_grpid:
+			set_opt (sbi->s_mount_opt, GRPID);
+			break;
+		case Opt_nogrpid:
+			clear_opt (sbi->s_mount_opt, GRPID);
+			break;
+		case Opt_resuid:
+			if (match_int(&args[0], &option))
+				return 0;
+			sbi->s_resuid = option;
+			break;
+		case Opt_resgid:
+			if (match_int(&args[0], &option))
+				return 0;
+			sbi->s_resgid = option;
+			break;
+		case Opt_sb:
+			/* handled by get_sb_block() instead of here */
+			/* *sb_block = match_int(&args[0]); */
+			break;
+		case Opt_err_panic:
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			set_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			break;
+		case Opt_err_ro:
+			clear_opt (sbi->s_mount_opt, ERRORS_CONT);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_RO);
+			break;
+		case Opt_err_cont:
+			clear_opt (sbi->s_mount_opt, ERRORS_RO);
+			clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
+			set_opt (sbi->s_mount_opt, ERRORS_CONT);
+			break;
+		case Opt_nouid32:
+			set_opt (sbi->s_mount_opt, NO_UID32);
+			break;
+		case Opt_nocheck:
+			clear_opt (sbi->s_mount_opt, CHECK);
+			break;
+		case Opt_debug:
+			set_opt (sbi->s_mount_opt, DEBUG);
+			break;
+		case Opt_oldalloc:
+			set_opt (sbi->s_mount_opt, OLDALLOC);
+			break;
+		case Opt_orlov:
+			clear_opt (sbi->s_mount_opt, OLDALLOC);
+			break;
+#ifdef CONFIG_EXT3_FS_XATTR
+		case Opt_user_xattr:
+			set_opt (sbi->s_mount_opt, XATTR_USER);
+			break;
+		case Opt_nouser_xattr:
+			clear_opt (sbi->s_mount_opt, XATTR_USER);
+			break;
+#else
+		case Opt_user_xattr:
+		case Opt_nouser_xattr:
+			printk("EXT3 (no)user_xattr options not supported\n");
+			break;
+#endif
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+		case Opt_acl:
+			set_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+		case Opt_noacl:
+			clear_opt(sbi->s_mount_opt, POSIX_ACL);
+			break;
+#else
+		case Opt_acl:
+		case Opt_noacl:
+			printk("EXT3 (no)acl options not supported\n");
+			break;
+#endif
+		case Opt_reservation:
+			set_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+		case Opt_noreservation:
+			clear_opt(sbi->s_mount_opt, RESERVATION);
+			break;
+		case Opt_journal_update:
+			/* @@@ FIXME */
+			/* Eventually we will want to be able to create
+			   a journal file here.  For now, only allow the
+			   user to specify an existing inode to be the
+			   journal file. */
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
+			break;
+		case Opt_journal_inum:
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option))
+				return 0;
+			*inum = option;
+			break;
+		case Opt_journal_dev:
+			if (is_remount) {
+				printk(KERN_ERR "EXT3-fs: cannot specify "
+				       "journal on remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option))
+				return 0;
+			*journal_devnum = option;
+			break;
+		case Opt_noload:
+			set_opt (sbi->s_mount_opt, NOLOAD);
+			break;
+		case Opt_commit:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option < 0)
+				return 0;
+			if (option == 0)
+				option = JBD_DEFAULT_MAX_COMMIT_AGE;
+			sbi->s_commit_interval = HZ * option;
+			break;
+		case Opt_data_journal:
+			data_opt = EXT3_MOUNT_JOURNAL_DATA;
+			goto datacheck;
+		case Opt_data_ordered:
+			data_opt = EXT3_MOUNT_ORDERED_DATA;
+			goto datacheck;
+		case Opt_data_writeback:
+			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
+		datacheck:
+			if (is_remount) {
+				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
+						!= data_opt) {
+					printk(KERN_ERR
+						"EXT3-fs: cannot change data "
+						"mode on remount\n");
+					return 0;
+				}
+			} else {
+				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
+				sbi->s_mount_opt |= data_opt;
+			}
+			break;
+#ifdef CONFIG_QUOTA
+		case Opt_usrjquota:
+			qtype = USRQUOTA;
+			goto set_qf_name;
+		case Opt_grpjquota:
+			qtype = GRPQUOTA;
+set_qf_name:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR
+					"EXT3-fs: Cannot change journalled "
+					"quota options when quota turned on.\n");
+				return 0;
+			}
+			qname = match_strdup(&args[0]);
+			if (!qname) {
+				printk(KERN_ERR
+					"EXT3-fs: not enough memory for "
+					"storing quotafile name.\n");
+				return 0;
+			}
+			if (sbi->s_qf_names[qtype] &&
+			    strcmp(sbi->s_qf_names[qtype], qname)) {
+				printk(KERN_ERR
+					"EXT3-fs: %s quota file already "
+					"specified.\n", QTYPE2NAME(qtype));
+				kfree(qname);
+				return 0;
+			}
+			sbi->s_qf_names[qtype] = qname;
+			if (strchr(sbi->s_qf_names[qtype], '/')) {
+				printk(KERN_ERR
+					"EXT3-fs: quotafile must be on "
+					"filesystem root.\n");
+				kfree(sbi->s_qf_names[qtype]);
+				sbi->s_qf_names[qtype] = NULL;
+				return 0;
+			}
+			set_opt(sbi->s_mount_opt, QUOTA);
+			break;
+		case Opt_offusrjquota:
+			qtype = USRQUOTA;
+			goto clear_qf_name;
+		case Opt_offgrpjquota:
+			qtype = GRPQUOTA;
+clear_qf_name:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change "
+					"journalled quota options when "
+					"quota turned on.\n");
+				return 0;
+			}
+			/*
+			 * The space will be released later when all options
+			 * are confirmed to be correct
+			 */
+			sbi->s_qf_names[qtype] = NULL;
+			break;
+		case Opt_jqfmt_vfsold:
+			sbi->s_jquota_fmt = QFMT_VFS_OLD;
+			break;
+		case Opt_jqfmt_vfsv0:
+			sbi->s_jquota_fmt = QFMT_VFS_V0;
+			break;
+		case Opt_quota:
+		case Opt_usrquota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, USRQUOTA);
+			break;
+		case Opt_grpquota:
+			set_opt(sbi->s_mount_opt, QUOTA);
+			set_opt(sbi->s_mount_opt, GRPQUOTA);
+			break;
+		case Opt_noquota:
+			if (sb_any_quota_enabled(sb)) {
+				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+					"options when quota turned on.\n");
+				return 0;
+			}
+			clear_opt(sbi->s_mount_opt, QUOTA);
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
+			break;
+#else
+		case Opt_quota:
+		case Opt_usrquota:
+		case Opt_grpquota:
+		case Opt_usrjquota:
+		case Opt_grpjquota:
+		case Opt_offusrjquota:
+		case Opt_offgrpjquota:
+		case Opt_jqfmt_vfsold:
+		case Opt_jqfmt_vfsv0:
+			printk(KERN_ERR
+				"EXT3-fs: journalled quota options not "
+				"supported.\n");
+			break;
+		case Opt_noquota:
+			break;
+#endif
+		case Opt_abort:
+			set_opt(sbi->s_mount_opt, ABORT);
+			break;
+		case Opt_barrier:
+			if (match_int(&args[0], &option))
+				return 0;
+			if (option)
+				set_opt(sbi->s_mount_opt, BARRIER);
+			else
+				clear_opt(sbi->s_mount_opt, BARRIER);
+			break;
+		case Opt_ignore:
+			break;
+		case Opt_resize:
+			if (!is_remount) {
+				printk("EXT3-fs: resize option only available "
+					"for remount\n");
+				return 0;
+			}
+			if (match_int(&args[0], &option) != 0)
+				return 0;
+			*n_blocks_count = option;
+			break;
+		case Opt_nobh:
+			set_opt(sbi->s_mount_opt, NOBH);
+			break;
+		case Opt_bh:
+			clear_opt(sbi->s_mount_opt, NOBH);
+			break;
+		default:
+			printk (KERN_ERR
+				"EXT3-fs: Unrecognized mount option \"%s\" "
+				"or missing value\n", p);
+			return 0;
+		}
+	}
+#ifdef CONFIG_QUOTA
+	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
+		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
+		     sbi->s_qf_names[USRQUOTA])
+			clear_opt(sbi->s_mount_opt, USRQUOTA);
+
+		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
+		     sbi->s_qf_names[GRPQUOTA])
+			clear_opt(sbi->s_mount_opt, GRPQUOTA);
+
+		if ((sbi->s_qf_names[USRQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
+		    (sbi->s_qf_names[GRPQUOTA] &&
+				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
+			printk(KERN_ERR "EXT3-fs: old and new quota "
+					"format mixing.\n");
+			return 0;
+		}
+
+		if (!sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"not specified.\n");
+			return 0;
+		}
+	} else {
+		if (sbi->s_jquota_fmt) {
+			printk(KERN_ERR "EXT3-fs: journalled quota format "
+					"specified with no journalling "
+					"enabled.\n");
+			return 0;
+		}
+	}
+#endif
+	return 1;
+}
+
+static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
+			    int read_only)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	int res = 0;
+
+	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
+		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
+			"forcing read-only mode\n");
+		res = MS_RDONLY;
+	}
+	if (read_only)
+		return res;
+	if (!(sbi->s_mount_state & EXT3_VALID_FS))
+		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
+			"running e2fsck is recommended\n");
+	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
+		printk (KERN_WARNING
+			"EXT3-fs warning: mounting fs with errors, "
+			"running e2fsck is recommended\n");
+	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
+		 le16_to_cpu(es->s_mnt_count) >=
+		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
+		printk (KERN_WARNING
+			"EXT3-fs warning: maximal mount count reached, "
+			"running e2fsck is recommended\n");
+	else if (le32_to_cpu(es->s_checkinterval) &&
+		(le32_to_cpu(es->s_lastcheck) +
+			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
+		printk (KERN_WARNING
+			"EXT3-fs warning: checktime reached, "
+			"running e2fsck is recommended\n");
+#if 0
+		/* @@@ We _will_ want to clear the valid bit if we find
+                   inconsistencies, to force a fsck at reboot.  But for
+                   a plain journaled filesystem we can keep it set as
+                   valid forever! :) */
+	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+#endif
+	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
+		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
+	es->s_mtime = cpu_to_le32(get_seconds());
+	ext3_update_dynamic_rev(sb);
+	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+
+	ext3_commit_super(sb, es, 1);
+	if (test_opt(sb, DEBUG))
+		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
+				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
+			sb->s_blocksize,
+			sbi->s_groups_count,
+			EXT3_BLOCKS_PER_GROUP(sb),
+			EXT3_INODES_PER_GROUP(sb),
+			sbi->s_mount_opt);
+
+	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
+	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+		char b[BDEVNAME_SIZE];
+
+		printk("external journal on %s\n",
+			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
+	} else {
+		printk("internal journal\n");
+	}
+	return res;
+}
+
+/* Called at mount-time, super-block is locked */
+static int ext3_check_descriptors (struct super_block * sb)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+	ext3_fsblk_t last_block;
+	struct ext3_group_desc * gdp = NULL;
+	int desc_block = 0;
+	int i;
+
+	ext3_debug ("Checking group descriptors");
+
+	for (i = 0; i < sbi->s_groups_count; i++)
+	{
+		if (i == sbi->s_groups_count - 1)
+			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+		else
+			last_block = first_block +
+				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
+
+		if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
+			gdp = (struct ext3_group_desc *)
+					sbi->s_group_desc[desc_block++]->b_data;
+		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
+		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Block bitmap for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_block_bitmap));
+			return 0;
+		}
+		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
+		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Inode bitmap for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_inode_bitmap));
+			return 0;
+		}
+		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
+		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
+		    last_block)
+		{
+			ext3_error (sb, "ext3_check_descriptors",
+				    "Inode table for group %d"
+				    " not in group (block %lu)!",
+				    i, (unsigned long)
+					le32_to_cpu(gdp->bg_inode_table));
+			return 0;
+		}
+		first_block += EXT3_BLOCKS_PER_GROUP(sb);
+		gdp++;
+	}
+
+	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
+	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
+	return 1;
+}
+
+
+/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
+ * the superblock) which were deleted from all directories, but held open by
+ * a process at the time of a crash.  We walk the list and try to delete these
+ * inodes at recovery time (only with a read-write filesystem).
+ *
+ * In order to keep the orphan inode chain consistent during traversal (in
+ * case of crash during recovery), we link each inode into the superblock
+ * orphan list_head and handle it the same way as an inode deletion during
+ * normal operation (which journals the operations for us).
+ *
+ * We only do an iget() and an iput() on each inode, which is very safe if we
+ * accidentally point at an in-use or already deleted inode.  The worst that
+ * can happen in this case is that we get a "bit already cleared" message from
+ * ext3_free_inode().  The only reason we would point at a wrong inode is if
+ * e2fsck was run on this filesystem, and it must have already done the orphan
+ * inode cleanup for us, so we can safely abort without any further action.
+ */
+static void ext3_orphan_cleanup (struct super_block * sb,
+				 struct ext3_super_block * es)
+{
+	unsigned int s_flags = sb->s_flags;
+	int nr_orphans = 0, nr_truncates = 0;
+#ifdef CONFIG_QUOTA
+	int i;
+#endif
+	if (!es->s_last_orphan) {
+		jbd_debug(4, "no orphan inodes to clean up\n");
+		return;
+	}
+
+	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+		if (es->s_last_orphan)
+			jbd_debug(1, "Errors on filesystem, "
+				  "clearing orphan list.\n");
+		es->s_last_orphan = 0;
+		jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
+		return;
+	}
+
+	if (s_flags & MS_RDONLY) {
+		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
+		       sb->s_id);
+		sb->s_flags &= ~MS_RDONLY;
+	}
+#ifdef CONFIG_QUOTA
+	/* Needed for iput() to work correctly and not trash data */
+	sb->s_flags |= MS_ACTIVE;
+	/* Turn on quotas so that they are updated correctly */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (EXT3_SB(sb)->s_qf_names[i]) {
+			int ret = ext3_quota_on_mount(sb, i);
+			if (ret < 0)
+				printk(KERN_ERR
+					"EXT3-fs: Cannot turn on journalled "
+					"quota: error %d\n", ret);
+		}
+	}
+#endif
+
+	while (es->s_last_orphan) {
+		struct inode *inode;
+
+		if (!(inode =
+		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+			es->s_last_orphan = 0;
+			break;
+		}
+
+		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		DQUOT_INIT(inode);
+		if (inode->i_nlink) {
+			printk(KERN_DEBUG
+				"%s: truncating inode %lu to %Ld bytes\n",
+				__FUNCTION__, inode->i_ino, inode->i_size);
+			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
+				  inode->i_ino, inode->i_size);
+			ext3_truncate(inode);
+			nr_truncates++;
+		} else {
+			printk(KERN_DEBUG
+				"%s: deleting unreferenced inode %lu\n",
+				__FUNCTION__, inode->i_ino);
+			jbd_debug(2, "deleting unreferenced inode %lu\n",
+				  inode->i_ino);
+			nr_orphans++;
+		}
+		iput(inode);  /* The delete magic happens here! */
+	}
+
+#define PLURAL(x) (x), ((x)==1) ? "" : "s"
+
+	if (nr_orphans)
+		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
+		       sb->s_id, PLURAL(nr_orphans));
+	if (nr_truncates)
+		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
+		       sb->s_id, PLURAL(nr_truncates));
+#ifdef CONFIG_QUOTA
+	/* Turn quotas off */
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (sb_dqopt(sb)->files[i])
+			vfs_quota_off(sb, i);
+	}
+#endif
+	sb->s_flags = s_flags; /* Restore MS_RDONLY status */
+}
+
+#define log2(n) ffz(~(n))
+
+/*
+ * Maximal file size.  There is a direct, and {,double-,triple-}indirect
+ * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
+ * We need to be 1 filesystem block less than the 2^32 sector limit.
+ */
+static loff_t ext3_max_size(int bits)
+{
+	loff_t res = EXT3_NDIR_BLOCKS;
+	/* This constant is calculated to be the largest file size for a
+	 * dense, 4k-blocksize file such that the total number of
+	 * sectors in the file, including data and all indirect blocks,
+	 * does not exceed 2^32. */
+	const loff_t upper_limit = 0x1ff7fffd000LL;
+
+	res += 1LL << (bits-2);
+	res += 1LL << (2*(bits-2));
+	res += 1LL << (3*(bits-2));
+	res <<= bits;
+	if (res > upper_limit)
+		res = upper_limit;
+	return res;
+}
+
+static ext3_fsblk_t descriptor_loc(struct super_block *sb,
+				    ext3_fsblk_t logic_sb_block,
+				    int nr)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	unsigned long bg, first_meta_bg;
+	int has_super = 0;
+
+	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
+	    nr < first_meta_bg)
+		return (logic_sb_block + nr + 1);
+	bg = sbi->s_desc_per_block * nr;
+	if (ext3_bg_has_super(sb, bg))
+		has_super = 1;
+	return (has_super + ext3_group_first_block_no(sb, bg));
+}
+
+
+static int ext3_fill_super (struct super_block *sb, void *data, int silent)
+{
+	struct buffer_head * bh;
+	struct ext3_super_block *es = NULL;
+	struct ext3_sb_info *sbi;
+	ext3_fsblk_t block;
+	ext3_fsblk_t sb_block = get_sb_block(&data);
+	ext3_fsblk_t logic_sb_block;
+	unsigned long offset = 0;
+	unsigned int journal_inum = 0;
+	unsigned long journal_devnum = 0;
+	unsigned long def_mount_opts;
+	struct inode *root;
+	int blocksize;
+	int hblock;
+	int db_count;
+	int i;
+	int needs_recovery;
+	__le32 features;
+
+	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
+	if (!sbi)
+		return -ENOMEM;
+	sb->s_fs_info = sbi;
+	sbi->s_mount_opt = 0;
+	sbi->s_resuid = EXT3_DEF_RESUID;
+	sbi->s_resgid = EXT3_DEF_RESGID;
+
+	unlock_kernel();
+
+	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
+	if (!blocksize) {
+		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+		goto out_fail;
+	}
+
+	/*
+	 * The ext3 superblock will not be buffer aligned for other than 1kB
+	 * block sizes.  We need to calculate the offset from buffer start.
+	 */
+	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
+		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+	} else {
+		logic_sb_block = sb_block;
+	}
+
+	if (!(bh = sb_bread(sb, logic_sb_block))) {
+		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+		goto out_fail;
+	}
+	/*
+	 * Note: s_es must be initialized as soon as possible because
+	 *       some ext3 macro-instructions depend on its value
+	 */
+	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	sbi->s_es = es;
+	sb->s_magic = le16_to_cpu(es->s_magic);
+	if (sb->s_magic != EXT3_SUPER_MAGIC)
+		goto cantfind_ext3;
+
+	/* Set defaults before we parse the mount options */
+	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+	if (def_mount_opts & EXT3_DEFM_DEBUG)
+		set_opt(sbi->s_mount_opt, DEBUG);
+	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
+		set_opt(sbi->s_mount_opt, GRPID);
+	if (def_mount_opts & EXT3_DEFM_UID16)
+		set_opt(sbi->s_mount_opt, NO_UID32);
+	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
+		set_opt(sbi->s_mount_opt, XATTR_USER);
+	if (def_mount_opts & EXT3_DEFM_ACL)
+		set_opt(sbi->s_mount_opt, POSIX_ACL);
+	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
+		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
+	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
+		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
+	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
+		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
+
+	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
+		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
+	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
+		set_opt(sbi->s_mount_opt, ERRORS_RO);
+
+	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
+	sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
+
+	set_opt(sbi->s_mount_opt, RESERVATION);
+
+	if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
+			    NULL, 0))
+		goto failed_mount;
+
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
+	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+		printk(KERN_WARNING
+		       "EXT3-fs warning: feature flags set on rev 0 fs, "
+		       "running e2fsck is recommended\n");
+	/*
+	 * Check feature flags regardless of the revision level, since we
+	 * previously didn't change the revision level when setting the flags,
+	 * so there is a chance incompat flags are set on a rev 0 filesystem.
+	 */
+	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
+	if (features) {
+		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+		       "unsupported optional features (%x).\n",
+		       sb->s_id, le32_to_cpu(features));
+		goto failed_mount;
+	}
+	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
+	if (!(sb->s_flags & MS_RDONLY) && features) {
+		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+		       "unsupported optional features (%x).\n",
+		       sb->s_id, le32_to_cpu(features));
+		goto failed_mount;
+	}
+	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
+
+	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
+	    blocksize > EXT3_MAX_BLOCK_SIZE) {
+		printk(KERN_ERR
+		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
+		       blocksize, sb->s_id);
+		goto failed_mount;
+	}
+
+	hblock = bdev_hardsect_size(sb->s_bdev);
+	if (sb->s_blocksize != blocksize) {
+		/*
+		 * Make sure the blocksize for the filesystem is larger
+		 * than the hardware sectorsize for the machine.
+		 */
+		if (blocksize < hblock) {
+			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
+			       "device blocksize %d.\n", blocksize, hblock);
+			goto failed_mount;
+		}
+
+		brelse (bh);
+		sb_set_blocksize(sb, blocksize);
+		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+		bh = sb_bread(sb, logic_sb_block);
+		if (!bh) {
+			printk(KERN_ERR
+			       "EXT3-fs: Can't read superblock on 2nd try.\n");
+			goto failed_mount;
+		}
+		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+		sbi->s_es = es;
+		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+			printk (KERN_ERR
+				"EXT3-fs: Magic mismatch, very weird !\n");
+			goto failed_mount;
+		}
+	}
+
+	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
+
+	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
+		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
+		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
+	} else {
+		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
+		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
+		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
+		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
+		    (sbi->s_inode_size > blocksize)) {
+			printk (KERN_ERR
+				"EXT3-fs: unsupported inode size: %d\n",
+				sbi->s_inode_size);
+			goto failed_mount;
+		}
+	}
+	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
+				   le32_to_cpu(es->s_log_frag_size);
+	if (blocksize != sbi->s_frag_size) {
+		printk(KERN_ERR
+		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+		       sbi->s_frag_size, blocksize);
+		goto failed_mount;
+	}
+	sbi->s_frags_per_block = 1;
+	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
+	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
+	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
+	if (EXT3_INODE_SIZE(sb) == 0)
+		goto cantfind_ext3;
+	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
+	if (sbi->s_inodes_per_block == 0)
+		goto cantfind_ext3;
+	sbi->s_itb_per_group = sbi->s_inodes_per_group /
+					sbi->s_inodes_per_block;
+	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
+	sbi->s_sbh = bh;
+	sbi->s_mount_state = le16_to_cpu(es->s_state);
+	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
+	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
+	for (i=0; i < 4; i++)
+		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
+	sbi->s_def_hash_version = es->s_def_hash_version;
+
+	if (sbi->s_blocks_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #blocks per group too big: %lu\n",
+			sbi->s_blocks_per_group);
+		goto failed_mount;
+	}
+	if (sbi->s_frags_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #fragments per group too big: %lu\n",
+			sbi->s_frags_per_group);
+		goto failed_mount;
+	}
+	if (sbi->s_inodes_per_group > blocksize * 8) {
+		printk (KERN_ERR
+			"EXT3-fs: #inodes per group too big: %lu\n",
+			sbi->s_inodes_per_group);
+		goto failed_mount;
+	}
+
+	if (le32_to_cpu(es->s_blocks_count) >
+		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
+		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+			" too large to mount safely\n", sb->s_id);
+		if (sizeof(sector_t) < 8)
+			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+					"enabled\n");
+		goto failed_mount;
+	}
+
+	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
+		goto cantfind_ext3;
+	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
+			       le32_to_cpu(es->s_first_data_block) - 1)
+				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
+	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
+		   EXT3_DESC_PER_BLOCK(sb);
+	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
+				    GFP_KERNEL);
+	if (sbi->s_group_desc == NULL) {
+		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+		goto failed_mount;
+	}
+
+	bgl_lock_init(&sbi->s_blockgroup_lock);
+
+	for (i = 0; i < db_count; i++) {
+		block = descriptor_loc(sb, logic_sb_block, i);
+		sbi->s_group_desc[i] = sb_bread(sb, block);
+		if (!sbi->s_group_desc[i]) {
+			printk (KERN_ERR "EXT3-fs: "
+				"can't read group descriptor %d\n", i);
+			db_count = i;
+			goto failed_mount2;
+		}
+	}
+	if (!ext3_check_descriptors (sb)) {
+		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+		goto failed_mount2;
+	}
+	sbi->s_gdb_count = db_count;
+	get_random_bytes(&sbi->s_next_generation, sizeof(u32));
+	spin_lock_init(&sbi->s_next_gen_lock);
+
+	percpu_counter_init(&sbi->s_freeblocks_counter,
+		ext3_count_free_blocks(sb));
+	percpu_counter_init(&sbi->s_freeinodes_counter,
+		ext3_count_free_inodes(sb));
+	percpu_counter_init(&sbi->s_dirs_counter,
+		ext3_count_dirs(sb));
+
+	/* per fileystem reservation list head & lock */
+	spin_lock_init(&sbi->s_rsv_window_lock);
+	sbi->s_rsv_window_root = RB_ROOT;
+	/* Add a single, static dummy reservation to the start of the
+	 * reservation window list --- it gives us a placeholder for
+	 * append-at-start-of-list which makes the allocation logic
+	 * _much_ simpler. */
+	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
+	sbi->s_rsv_window_head.rsv_goal_size = 0;
+	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
+
+	/*
+	 * set up enough so that it can read an inode
+	 */
+	sb->s_op = &ext3_sops;
+	sb->s_export_op = &ext3_export_ops;
+	sb->s_xattr = ext3_xattr_handlers;
+#ifdef CONFIG_QUOTA
+	sb->s_qcop = &ext3_qctl_operations;
+	sb->dq_op = &ext3_quota_operations;
+#endif
+	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
+
+	sb->s_root = NULL;
+
+	needs_recovery = (es->s_last_orphan != 0 ||
+			  EXT3_HAS_INCOMPAT_FEATURE(sb,
+				    EXT3_FEATURE_INCOMPAT_RECOVER));
+
+	/*
+	 * The first inode we look at is the journal inode.  Don't try
+	 * root first: it may be modified in the journal!
+	 */
+	if (!test_opt(sb, NOLOAD) &&
+	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+		if (ext3_load_journal(sb, es, journal_devnum))
+			goto failed_mount3;
+	} else if (journal_inum) {
+		if (ext3_create_journal(sb, es, journal_inum))
+			goto failed_mount3;
+	} else {
+		if (!silent)
+			printk (KERN_ERR
+				"ext3: No journal on filesystem on %s\n",
+				sb->s_id);
+		goto failed_mount3;
+	}
+
+	/* We have now updated the journal if required, so we can
+	 * validate the data journaling mode. */
+	switch (test_opt(sb, DATA_FLAGS)) {
+	case 0:
+		/* No mode set, assume a default based on the journal
+                   capabilities: ORDERED_DATA if the journal can
+                   cope, else JOURNAL_DATA */
+		if (journal_check_available_features
+		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
+			set_opt(sbi->s_mount_opt, ORDERED_DATA);
+		else
+			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
+		break;
+
+	case EXT3_MOUNT_ORDERED_DATA:
+	case EXT3_MOUNT_WRITEBACK_DATA:
+		if (!journal_check_available_features
+		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
+			printk(KERN_ERR "EXT3-fs: Journal does not support "
+			       "requested data journaling mode\n");
+			goto failed_mount4;
+		}
+	default:
+		break;
+	}
+
+	if (test_opt(sb, NOBH)) {
+		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+				"its supported only with writeback mode\n");
+			clear_opt(sbi->s_mount_opt, NOBH);
+		}
+	}
+	/*
+	 * The journal_load will have done any necessary log recovery,
+	 * so we can safely mount the rest of the filesystem now.
+	 */
+
+	root = iget(sb, EXT3_ROOT_INO);
+	sb->s_root = d_alloc_root(root);
+	if (!sb->s_root) {
+		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+		iput(root);
+		goto failed_mount4;
+	}
+	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
+		dput(sb->s_root);
+		sb->s_root = NULL;
+		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+		goto failed_mount4;
+	}
+
+	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	/*
+	 * akpm: core read_super() calls in here with the superblock locked.
+	 * That deadlocks, because orphan cleanup needs to lock the superblock
+	 * in numerous places.  Here we just pop the lock - it's relatively
+	 * harmless, because we are now ready to accept write_super() requests,
+	 * and aviro says that's the only reason for hanging onto the
+	 * superblock lock.
+	 */
+	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
+	ext3_orphan_cleanup(sb, es);
+	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
+	if (needs_recovery)
+		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
+	ext3_mark_recovery_complete(sb, es);
+	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
+		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
+		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+		"writeback");
+
+	lock_kernel();
+	return 0;
+
+cantfind_ext3:
+	if (!silent)
+		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+		       sb->s_id);
+	goto failed_mount;
+
+failed_mount4:
+	journal_destroy(sbi->s_journal);
+failed_mount3:
+	percpu_counter_destroy(&sbi->s_freeblocks_counter);
+	percpu_counter_destroy(&sbi->s_freeinodes_counter);
+	percpu_counter_destroy(&sbi->s_dirs_counter);
+failed_mount2:
+	for (i = 0; i < db_count; i++)
+		brelse(sbi->s_group_desc[i]);
+	kfree(sbi->s_group_desc);
+failed_mount:
+#ifdef CONFIG_QUOTA
+	for (i = 0; i < MAXQUOTAS; i++)
+		kfree(sbi->s_qf_names[i]);
+#endif
+	ext3_blkdev_remove(sbi);
+	brelse(bh);
+out_fail:
+	sb->s_fs_info = NULL;
+	kfree(sbi);
+	lock_kernel();
+	return -EINVAL;
+}
+
+/*
+ * Setup any per-fs journal parameters now.  We'll do this both on
+ * initial mount, once the journal has been initialised but before we've
+ * done any recovery; and again on any subsequent remount.
+ */
+static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
+{
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+	if (sbi->s_commit_interval)
+		journal->j_commit_interval = sbi->s_commit_interval;
+	/* We could also set up an ext3-specific default for the commit
+	 * interval here, but for now we'll just fall back to the jbd
+	 * default. */
+
+	spin_lock(&journal->j_state_lock);
+	if (test_opt(sb, BARRIER))
+		journal->j_flags |= JFS_BARRIER;
+	else
+		journal->j_flags &= ~JFS_BARRIER;
+	spin_unlock(&journal->j_state_lock);
+}
+
+static journal_t *ext3_get_journal(struct super_block *sb,
+				   unsigned int journal_inum)
+{
+	struct inode *journal_inode;
+	journal_t *journal;
+
+	/* First, test for the existence of a valid inode on disk.  Bad
+	 * things happen if we iget() an unused inode, as the subsequent
+	 * iput() will try to delete it. */
+
+	journal_inode = iget(sb, journal_inum);
+	if (!journal_inode) {
+		printk(KERN_ERR "EXT3-fs: no journal found.\n");
+		return NULL;
+	}
+	if (!journal_inode->i_nlink) {
+		make_bad_inode(journal_inode);
+		iput(journal_inode);
+		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+		return NULL;
+	}
+
+	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
+		  journal_inode, journal_inode->i_size);
+	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
+		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+		iput(journal_inode);
+		return NULL;
+	}
+
+	journal = journal_init_inode(journal_inode);
+	if (!journal) {
+		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+		iput(journal_inode);
+		return NULL;
+	}
+	journal->j_private = sb;
+	ext3_init_journal_params(sb, journal);
+	return journal;
+}
+
+static journal_t *ext3_get_dev_journal(struct super_block *sb,
+				       dev_t j_dev)
+{
+	struct buffer_head * bh;
+	journal_t *journal;
+	ext3_fsblk_t start;
+	ext3_fsblk_t len;
+	int hblock, blocksize;
+	ext3_fsblk_t sb_block;
+	unsigned long offset;
+	struct ext3_super_block * es;
+	struct block_device *bdev;
+
+	bdev = ext3_blkdev_get(j_dev);
+	if (bdev == NULL)
+		return NULL;
+
+	if (bd_claim(bdev, sb)) {
+		printk(KERN_ERR
+		        "EXT3: failed to claim external journal device.\n");
+		blkdev_put(bdev);
+		return NULL;
+	}
+
+	blocksize = sb->s_blocksize;
+	hblock = bdev_hardsect_size(bdev);
+	if (blocksize < hblock) {
+		printk(KERN_ERR
+			"EXT3-fs: blocksize too small for journal device.\n");
+		goto out_bdev;
+	}
+
+	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
+	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
+	set_blocksize(bdev, blocksize);
+	if (!(bh = __bread(bdev, sb_block, blocksize))) {
+		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
+		       "external journal\n");
+		goto out_bdev;
+	}
+
+	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
+	    !(le32_to_cpu(es->s_feature_incompat) &
+	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
+		printk(KERN_ERR "EXT3-fs: external journal has "
+					"bad superblock\n");
+		brelse(bh);
+		goto out_bdev;
+	}
+
+	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
+		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+		brelse(bh);
+		goto out_bdev;
+	}
+
+	len = le32_to_cpu(es->s_blocks_count);
+	start = sb_block + 1;
+	brelse(bh);	/* we're done with the superblock */
+
+	journal = journal_init_dev(bdev, sb->s_bdev,
+					start, len, blocksize);
+	if (!journal) {
+		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+		goto out_bdev;
+	}
+	journal->j_private = sb;
+	ll_rw_block(READ, 1, &journal->j_sb_buffer);
+	wait_on_buffer(journal->j_sb_buffer);
+	if (!buffer_uptodate(journal->j_sb_buffer)) {
+		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+		goto out_journal;
+	}
+	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
+		printk(KERN_ERR "EXT3-fs: External journal has more than one "
+					"user (unsupported) - %d\n",
+			be32_to_cpu(journal->j_superblock->s_nr_users));
+		goto out_journal;
+	}
+	EXT3_SB(sb)->journal_bdev = bdev;
+	ext3_init_journal_params(sb, journal);
+	return journal;
+out_journal:
+	journal_destroy(journal);
+out_bdev:
+	ext3_blkdev_put(bdev);
+	return NULL;
+}
+
+static int ext3_load_journal(struct super_block *sb,
+			     struct ext3_super_block *es,
+			     unsigned long journal_devnum)
+{
+	journal_t *journal;
+	unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
+	dev_t journal_dev;
+	int err = 0;
+	int really_read_only;
+
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+			"numbers have changed\n");
+		journal_dev = new_decode_dev(journal_devnum);
+	} else
+		journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
+
+	really_read_only = bdev_read_only(sb->s_bdev);
+
+	/*
+	 * Are we loading a blank journal or performing recovery after a
+	 * crash?  For recovery, we need to check in advance whether we
+	 * can get read-write access to the device.
+	 */
+
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
+		if (sb->s_flags & MS_RDONLY) {
+			printk(KERN_INFO "EXT3-fs: INFO: recovery "
+					"required on readonly filesystem.\n");
+			if (really_read_only) {
+				printk(KERN_ERR "EXT3-fs: write access "
+					"unavailable, cannot proceed.\n");
+				return -EROFS;
+			}
+			printk (KERN_INFO "EXT3-fs: write access will "
+					"be enabled during recovery.\n");
+		}
+	}
+
+	if (journal_inum && journal_dev) {
+		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
+		       "and inode journals!\n");
+		return -EINVAL;
+	}
+
+	if (journal_inum) {
+		if (!(journal = ext3_get_journal(sb, journal_inum)))
+			return -EINVAL;
+	} else {
+		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
+			return -EINVAL;
+	}
+
+	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
+		err = journal_update_format(journal);
+		if (err)  {
+			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+			journal_destroy(journal);
+			return err;
+		}
+	}
+
+	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
+		err = journal_wipe(journal, !really_read_only);
+	if (!err)
+		err = journal_load(journal);
+
+	if (err) {
+		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+		journal_destroy(journal);
+		return err;
+	}
+
+	EXT3_SB(sb)->s_journal = journal;
+	ext3_clear_journal_err(sb, es);
+
+	if (journal_devnum &&
+	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
+		es->s_journal_dev = cpu_to_le32(journal_devnum);
+		sb->s_dirt = 1;
+
+		/* Make sure we flush the recovery flag to disk. */
+		ext3_commit_super(sb, es, 1);
+	}
+
+	return 0;
+}
+
+static int ext3_create_journal(struct super_block * sb,
+			       struct ext3_super_block * es,
+			       unsigned int journal_inum)
+{
+	journal_t *journal;
+
+	if (sb->s_flags & MS_RDONLY) {
+		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
+				"create journal.\n");
+		return -EROFS;
+	}
+
+	if (!(journal = ext3_get_journal(sb, journal_inum)))
+		return -EINVAL;
+
+	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+	       journal_inum);
+
+	if (journal_create(journal)) {
+		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+		journal_destroy(journal);
+		return -EIO;
+	}
+
+	EXT3_SB(sb)->s_journal = journal;
+
+	ext3_update_dynamic_rev(sb);
+	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
+
+	es->s_journal_inum = cpu_to_le32(journal_inum);
+	sb->s_dirt = 1;
+
+	/* Make sure we flush the recovery flag to disk. */
+	ext3_commit_super(sb, es, 1);
+
+	return 0;
+}
+
+static void ext3_commit_super (struct super_block * sb,
+			       struct ext3_super_block * es,
+			       int sync)
+{
+	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
+
+	if (!sbh)
+		return;
+	es->s_wtime = cpu_to_le32(get_seconds());
+	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
+	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
+	BUFFER_TRACE(sbh, "marking dirty");
+	mark_buffer_dirty(sbh);
+	if (sync)
+		sync_dirty_buffer(sbh);
+}
+
+
+/*
+ * Have we just finished recovery?  If so, and if we are mounting (or
+ * remounting) the filesystem readonly, then we will end up with a
+ * consistent fs on disk.  Record that fact.
+ */
+static void ext3_mark_recovery_complete(struct super_block * sb,
+					struct ext3_super_block * es)
+{
+	journal_t *journal = EXT3_SB(sb)->s_journal;
+
+	journal_lock_updates(journal);
+	journal_flush(journal);
+	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
+	    sb->s_flags & MS_RDONLY) {
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		sb->s_dirt = 0;
+		ext3_commit_super(sb, es, 1);
+	}
+	journal_unlock_updates(journal);
+}
+
+/*
+ * If we are mounting (or read-write remounting) a filesystem whose journal
+ * has recorded an error from a previous lifetime, move that error to the
+ * main filesystem now.
+ */
+static void ext3_clear_journal_err(struct super_block * sb,
+				   struct ext3_super_block * es)
+{
+	journal_t *journal;
+	int j_errno;
+	const char *errstr;
+
+	journal = EXT3_SB(sb)->s_journal;
+
+	/*
+	 * Now check for any error status which may have been recorded in the
+	 * journal by a prior ext3_error() or ext3_abort()
+	 */
+
+	j_errno = journal_errno(journal);
+	if (j_errno) {
+		char nbuf[16];
+
+		errstr = ext3_decode_error(sb, j_errno, nbuf);
+		ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
+			     "from previous mount: %s", errstr);
+		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+			     "filesystem check.");
+
+		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+		ext3_commit_super (sb, es, 1);
+
+		journal_clear_err(journal);
+	}
+}
+
+/*
+ * Force the running and committing transactions to commit,
+ * and wait on the commit.
+ */
+int ext3_force_commit(struct super_block *sb)
+{
+	journal_t *journal;
+	int ret;
+
+	if (sb->s_flags & MS_RDONLY)
+		return 0;
+
+	journal = EXT3_SB(sb)->s_journal;
+	sb->s_dirt = 0;
+	ret = ext3_journal_force_commit(journal);
+	return ret;
+}
+
+/*
+ * Ext3 always journals updates to the superblock itself, so we don't
+ * have to propagate any other updates to the superblock on disk at this
+ * point.  Just start an async writeback to get the buffers on their way
+ * to the disk.
+ *
+ * This implicitly triggers the writebehind on sync().
+ */
+
+static void ext3_write_super (struct super_block * sb)
+{
+	if (mutex_trylock(&sb->s_lock) != 0)
+		BUG();
+	sb->s_dirt = 0;
+}
+
+static int ext3_sync_fs(struct super_block *sb, int wait)
+{
+	tid_t target;
+
+	sb->s_dirt = 0;
+	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+		if (wait)
+			log_wait_commit(EXT3_SB(sb)->s_journal, target);
+	}
+	return 0;
+}
+
+/*
+ * LVM calls this function before a (read-only) snapshot is created.  This
+ * gives us a chance to flush the journal completely and mark the fs clean.
+ */
+static void ext3_write_super_lockfs(struct super_block *sb)
+{
+	sb->s_dirt = 0;
+
+	if (!(sb->s_flags & MS_RDONLY)) {
+		journal_t *journal = EXT3_SB(sb)->s_journal;
+
+		/* Now we set up the journal barrier. */
+		journal_lock_updates(journal);
+		journal_flush(journal);
+
+		/* Journal blocked and flushed, clear needs_recovery flag. */
+		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+	}
+}
+
+/*
+ * Called by LVM after the snapshot is done.  We need to reset the RECOVER
+ * flag here, even though the filesystem is not technically dirty yet.
+ */
+static void ext3_unlockfs(struct super_block *sb)
+{
+	if (!(sb->s_flags & MS_RDONLY)) {
+		lock_super(sb);
+		/* Reser the needs_recovery flag before the fs is unlocked. */
+		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		unlock_super(sb);
+		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+	}
+}
+
+static int ext3_remount (struct super_block * sb, int * flags, char * data)
+{
+	struct ext3_super_block * es;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	ext3_fsblk_t n_blocks_count = 0;
+	unsigned long old_sb_flags;
+	struct ext3_mount_options old_opts;
+	int err;
+#ifdef CONFIG_QUOTA
+	int i;
+#endif
+
+	/* Store the original options */
+	old_sb_flags = sb->s_flags;
+	old_opts.s_mount_opt = sbi->s_mount_opt;
+	old_opts.s_resuid = sbi->s_resuid;
+	old_opts.s_resgid = sbi->s_resgid;
+	old_opts.s_commit_interval = sbi->s_commit_interval;
+#ifdef CONFIG_QUOTA
+	old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
+	for (i = 0; i < MAXQUOTAS; i++)
+		old_opts.s_qf_names[i] = sbi->s_qf_names[i];
+#endif
+
+	/*
+	 * Allow the "check" option to be passed as a remount option.
+	 */
+	if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
+		err = -EINVAL;
+		goto restore_opts;
+	}
+
+	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+
+	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
+		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+
+	es = sbi->s_es;
+
+	ext3_init_journal_params(sb, sbi->s_journal);
+
+	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
+		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
+			err = -EROFS;
+			goto restore_opts;
+		}
+
+		if (*flags & MS_RDONLY) {
+			/*
+			 * First of all, the unconditional stuff we have to do
+			 * to disable replay of the journal when we next remount
+			 */
+			sb->s_flags |= MS_RDONLY;
+
+			/*
+			 * OK, test if we are remounting a valid rw partition
+			 * readonly, and if so set the rdonly flag and then
+			 * mark the partition as valid again.
+			 */
+			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
+			    (sbi->s_mount_state & EXT3_VALID_FS))
+				es->s_state = cpu_to_le16(sbi->s_mount_state);
+
+			ext3_mark_recovery_complete(sb, es);
+		} else {
+			__le32 ret;
+			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
+					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
+				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+				       "remount RDWR because of unsupported "
+				       "optional features (%x).\n",
+				       sb->s_id, le32_to_cpu(ret));
+				err = -EROFS;
+				goto restore_opts;
+			}
+			/*
+			 * Mounting a RDONLY partition read-write, so reread
+			 * and store the current valid flag.  (It may have
+			 * been changed by e2fsck since we originally mounted
+			 * the partition.)
+			 */
+			ext3_clear_journal_err(sb, es);
+			sbi->s_mount_state = le16_to_cpu(es->s_state);
+			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
+				goto restore_opts;
+			if (!ext3_setup_super (sb, es, 0))
+				sb->s_flags &= ~MS_RDONLY;
+		}
+	}
+#ifdef CONFIG_QUOTA
+	/* Release old quota file names */
+	for (i = 0; i < MAXQUOTAS; i++)
+		if (old_opts.s_qf_names[i] &&
+		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
+			kfree(old_opts.s_qf_names[i]);
+#endif
+	return 0;
+restore_opts:
+	sb->s_flags = old_sb_flags;
+	sbi->s_mount_opt = old_opts.s_mount_opt;
+	sbi->s_resuid = old_opts.s_resuid;
+	sbi->s_resgid = old_opts.s_resgid;
+	sbi->s_commit_interval = old_opts.s_commit_interval;
+#ifdef CONFIG_QUOTA
+	sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
+	for (i = 0; i < MAXQUOTAS; i++) {
+		if (sbi->s_qf_names[i] &&
+		    old_opts.s_qf_names[i] != sbi->s_qf_names[i])
+			kfree(sbi->s_qf_names[i]);
+		sbi->s_qf_names[i] = old_opts.s_qf_names[i];
+	}
+#endif
+	return err;
+}
+
+static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
+{
+	struct super_block *sb = dentry->d_sb;
+	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext3_super_block *es = sbi->s_es;
+	ext3_fsblk_t overhead;
+	int i;
+
+	if (test_opt (sb, MINIX_DF))
+		overhead = 0;
+	else {
+		unsigned long ngroups;
+		ngroups = EXT3_SB(sb)->s_groups_count;
+		smp_rmb();
+
+		/*
+		 * Compute the overhead (FS structures)
+		 */
+
+		/*
+		 * All of the blocks before first_data_block are
+		 * overhead
+		 */
+		overhead = le32_to_cpu(es->s_first_data_block);
+
+		/*
+		 * Add the overhead attributed to the superblock and
+		 * block group descriptors.  If the sparse superblocks
+		 * feature is turned on, then not all groups have this.
+		 */
+		for (i = 0; i < ngroups; i++) {
+			overhead += ext3_bg_has_super(sb, i) +
+				ext3_bg_num_gdb(sb, i);
+			cond_resched();
+		}
+
+		/*
+		 * Every block group has an inode bitmap, a block
+		 * bitmap, and an inode table.
+		 */
+		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+	}
+
+	buf->f_type = EXT3_SUPER_MAGIC;
+	buf->f_bsize = sb->s_blocksize;
+	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
+	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
+	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
+		buf->f_bavail = 0;
+	buf->f_files = le32_to_cpu(es->s_inodes_count);
+	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
+	buf->f_namelen = EXT3_NAME_LEN;
+	return 0;
+}
+
+/* Helper function for writing quotas on sync - we need to start transaction before quota file
+ * is locked for write. Otherwise the are possible deadlocks:
+ * Process 1                         Process 2
+ * ext3_create()                     quota_sync()
+ *   journal_start()                   write_dquot()
+ *   DQUOT_INIT()                        down(dqio_mutex)
+ *     down(dqio_mutex)                    journal_start()
+ *
+ */
+
+#ifdef CONFIG_QUOTA
+
+static inline struct inode *dquot_to_inode(struct dquot *dquot)
+{
+	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
+}
+
+static int ext3_dquot_initialize(struct inode *inode, int type)
+{
+	handle_t *handle;
+	int ret, err;
+
+	/* We may create quota structure so we need to reserve enough blocks */
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_initialize(inode, type);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_dquot_drop(struct inode *inode)
+{
+	handle_t *handle;
+	int ret, err;
+
+	/* We may delete quota structure so we need to reserve enough blocks */
+	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_drop(inode);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_write_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+	struct inode *inode;
+
+	inode = dquot_to_inode(dquot);
+	handle = ext3_journal_start(inode,
+					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_commit(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_acquire_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+
+	handle = ext3_journal_start(dquot_to_inode(dquot),
+					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_acquire(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_release_dquot(struct dquot *dquot)
+{
+	int ret, err;
+	handle_t *handle;
+
+	handle = ext3_journal_start(dquot_to_inode(dquot),
+					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_release(dquot);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+static int ext3_mark_dquot_dirty(struct dquot *dquot)
+{
+	/* Are we journalling quotas? */
+	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
+	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+		dquot_mark_dquot_dirty(dquot);
+		return ext3_write_dquot(dquot);
+	} else {
+		return dquot_mark_dquot_dirty(dquot);
+	}
+}
+
+static int ext3_write_info(struct super_block *sb, int type)
+{
+	int ret, err;
+	handle_t *handle;
+
+	/* Data block + inode block */
+	handle = ext3_journal_start(sb->s_root->d_inode, 2);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+	ret = dquot_commit_info(sb, type);
+	err = ext3_journal_stop(handle);
+	if (!ret)
+		ret = err;
+	return ret;
+}
+
+/*
+ * Turn on quotas during mount time - we need to find
+ * the quota file and such...
+ */
+static int ext3_quota_on_mount(struct super_block *sb, int type)
+{
+	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
+			EXT3_SB(sb)->s_jquota_fmt, type);
+}
+
+/*
+ * Standard function to be called on quota_on
+ */
+static int ext3_quota_on(struct super_block *sb, int type, int format_id,
+			 char *path)
+{
+	int err;
+	struct nameidata nd;
+
+	if (!test_opt(sb, QUOTA))
+		return -EINVAL;
+	/* Not journalling quota? */
+	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
+	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
+		return vfs_quota_on(sb, type, format_id, path);
+	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
+	if (err)
+		return err;
+	/* Quotafile not on the same filesystem? */
+	if (nd.mnt->mnt_sb != sb) {
+		path_release(&nd);
+		return -EXDEV;
+	}
+	/* Quotafile not of fs root? */
+	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
+		printk(KERN_WARNING
+			"EXT3-fs: Quota file not on filesystem root. "
+			"Journalled quota will not work.\n");
+	path_release(&nd);
+	return vfs_quota_on(sb, type, format_id, path);
+}
+
+/* Read data from quotafile - avoid pagecache and such because we cannot afford
+ * acquiring the locks... As quota files are never truncated and quota code
+ * itself serializes the operations (and noone else should touch the files)
+ * we don't have to be afraid of races */
+static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+			       size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	size_t toread;
+	struct buffer_head *bh;
+	loff_t i_size = i_size_read(inode);
+
+	if (off > i_size)
+		return 0;
+	if (off+len > i_size)
+		len = i_size-off;
+	toread = len;
+	while (toread > 0) {
+		tocopy = sb->s_blocksize - offset < toread ?
+				sb->s_blocksize - offset : toread;
+		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		if (err)
+			return err;
+		if (!bh)	/* A hole? */
+			memset(data, 0, tocopy);
+		else
+			memcpy(data, bh->b_data+offset, tocopy);
+		brelse(bh);
+		offset = 0;
+		toread -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+	return len;
+}
+
+/* Write to quotafile (we know the transaction is already started and has
+ * enough credits) */
+static ssize_t ext3_quota_write(struct super_block *sb, int type,
+				const char *data, size_t len, loff_t off)
+{
+	struct inode *inode = sb_dqopt(sb)->files[type];
+	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	int err = 0;
+	int offset = off & (sb->s_blocksize - 1);
+	int tocopy;
+	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
+	size_t towrite = len;
+	struct buffer_head *bh;
+	handle_t *handle = journal_current_handle();
+
+	mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
+	while (towrite > 0) {
+		tocopy = sb->s_blocksize - offset < towrite ?
+				sb->s_blocksize - offset : towrite;
+		bh = ext3_bread(handle, inode, blk, 1, &err);
+		if (!bh)
+			goto out;
+		if (journal_quota) {
+			err = ext3_journal_get_write_access(handle, bh);
+			if (err) {
+				brelse(bh);
+				goto out;
+			}
+		}
+		lock_buffer(bh);
+		memcpy(bh->b_data+offset, data, tocopy);
+		flush_dcache_page(bh->b_page);
+		unlock_buffer(bh);
+		if (journal_quota)
+			err = ext3_journal_dirty_metadata(handle, bh);
+		else {
+			/* Always do at least ordered writes for quotas */
+			err = ext3_journal_dirty_data(handle, bh);
+			mark_buffer_dirty(bh);
+		}
+		brelse(bh);
+		if (err)
+			goto out;
+		offset = 0;
+		towrite -= tocopy;
+		data += tocopy;
+		blk++;
+	}
+out:
+	if (len == towrite)
+		return err;
+	if (inode->i_size < off+len-towrite) {
+		i_size_write(inode, off+len-towrite);
+		EXT3_I(inode)->i_disksize = inode->i_size;
+	}
+	inode->i_version++;
+	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+	ext3_mark_inode_dirty(handle, inode);
+	mutex_unlock(&inode->i_mutex);
+	return len - towrite;
+}
+
+#endif
+
+static int ext3_get_sb(struct file_system_type *fs_type,
+	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+{
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+}
+
+static struct file_system_type ext3_fs_type = {
+	.owner		= THIS_MODULE,
+	.name		= "ext3",
+	.get_sb		= ext3_get_sb,
+	.kill_sb	= kill_block_super,
+	.fs_flags	= FS_REQUIRES_DEV,
+};
+
+static int __init init_ext3_fs(void)
+{
+	int err = init_ext3_xattr();
+	if (err)
+		return err;
+	err = init_inodecache();
+	if (err)
+		goto out1;
+        err = register_filesystem(&ext3_fs_type);
+	if (err)
+		goto out;
+	return 0;
+out:
+	destroy_inodecache();
+out1:
+	exit_ext3_xattr();
+	return err;
+}
+
+static void __exit exit_ext3_fs(void)
+{
+	unregister_filesystem(&ext3_fs_type);
+	destroy_inodecache();
+	exit_ext3_xattr();
+}
+
+MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+MODULE_LICENSE("GPL");
+module_init(init_ext3_fs)
+module_exit(exit_ext3_fs)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
new file mode 100644
index 000000000000..4f79122cde67
--- /dev/null
+++ b/fs/ext4/symlink.c
@@ -0,0 +1,54 @@
+/*
+ *  linux/fs/ext3/symlink.c
+ *
+ * Only fast symlinks left here - the rest is done by generic code. AV, 1999
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/fs/minix/symlink.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ *
+ *  ext3 symlink handling code
+ */
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/namei.h>
+#include "xattr.h"
+
+static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+{
+	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+	nd_set_link(nd, (char*)ei->i_data);
+	return NULL;
+}
+
+struct inode_operations ext3_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= page_follow_link_light,
+	.put_link	= page_put_link,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
+
+struct inode_operations ext3_fast_symlink_inode_operations = {
+	.readlink	= generic_readlink,
+	.follow_link	= ext3_follow_link,
+#ifdef CONFIG_EXT3_FS_XATTR
+	.setxattr	= generic_setxattr,
+	.getxattr	= generic_getxattr,
+	.listxattr	= ext3_listxattr,
+	.removexattr	= generic_removexattr,
+#endif
+};
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
new file mode 100644
index 000000000000..f86f2482f01d
--- /dev/null
+++ b/fs/ext4/xattr.c
@@ -0,0 +1,1317 @@
+/*
+ * linux/fs/ext3/xattr.c
+ *
+ * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
+ *
+ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
+ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
+ * Extended attributes for symlinks and special files added per
+ *  suggestion of Luka Renko <luka.renko@hermes.si>.
+ * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
+ *  Red Hat Inc.
+ * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
+ *  and Andreas Gruenbacher <agruen@suse.de>.
+ */
+
+/*
+ * Extended attributes are stored directly in inodes (on file systems with
+ * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
+ * field contains the block number if an inode uses an additional block. All
+ * attributes must fit in the inode and one additional block. Blocks that
+ * contain the identical set of attributes may be shared among several inodes.
+ * Identical blocks are detected by keeping a cache of blocks that have
+ * recently been accessed.
+ *
+ * The attributes in inodes and on blocks have a different header; the entries
+ * are stored in the same format:
+ *
+ *   +------------------+
+ *   | header           |
+ *   | entry 1          | |
+ *   | entry 2          | | growing downwards
+ *   | entry 3          | v
+ *   | four null bytes  |
+ *   | . . .            |
+ *   | value 1          | ^
+ *   | value 3          | | growing upwards
+ *   | value 2          | |
+ *   +------------------+
+ *
+ * The header is followed by multiple entry descriptors. In disk blocks, the
+ * entry descriptors are kept sorted. In inodes, they are unsorted. The
+ * attribute values are aligned to the end of the block in no specific order.
+ *
+ * Locking strategy
+ * ----------------
+ * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
+ * EA blocks are only changed if they are exclusive to an inode, so
+ * holding xattr_sem also means that nothing but the EA block's reference
+ * count can change. Multiple writers to the same block are synchronized
+ * by the buffer lock.
+ */
+
+#include <linux/init.h>
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/mbcache.h>
+#include <linux/quotaops.h>
+#include <linux/rwsem.h>
+#include "xattr.h"
+#include "acl.h"
+
+#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
+#define BFIRST(bh) ENTRY(BHDR(bh)+1)
+#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
+
+#define IHDR(inode, raw_inode) \
+	((struct ext3_xattr_ibody_header *) \
+		((void *)raw_inode + \
+		 EXT3_GOOD_OLD_INODE_SIZE + \
+		 EXT3_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+
+#ifdef EXT3_XATTR_DEBUG
+# define ea_idebug(inode, f...) do { \
+		printk(KERN_DEBUG "inode %s:%lu: ", \
+			inode->i_sb->s_id, inode->i_ino); \
+		printk(f); \
+		printk("\n"); \
+	} while (0)
+# define ea_bdebug(bh, f...) do { \
+		char b[BDEVNAME_SIZE]; \
+		printk(KERN_DEBUG "block %s:%lu: ", \
+			bdevname(bh->b_bdev, b), \
+			(unsigned long) bh->b_blocknr); \
+		printk(f); \
+		printk("\n"); \
+	} while (0)
+#else
+# define ea_idebug(f...)
+# define ea_bdebug(f...)
+#endif
+
+static void ext3_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext3_xattr_cache_find(struct inode *,
+						 struct ext3_xattr_header *,
+						 struct mb_cache_entry **);
+static void ext3_xattr_rehash(struct ext3_xattr_header *,
+			      struct ext3_xattr_entry *);
+
+static struct mb_cache *ext3_xattr_cache;
+
+static struct xattr_handler *ext3_xattr_handler_map[] = {
+	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
+	[EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+#endif
+	[EXT3_XATTR_INDEX_TRUSTED]	     = &ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_SECURITY
+	[EXT3_XATTR_INDEX_SECURITY]	     = &ext3_xattr_security_handler,
+#endif
+};
+
+struct xattr_handler *ext3_xattr_handlers[] = {
+	&ext3_xattr_user_handler,
+	&ext3_xattr_trusted_handler,
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	&ext3_xattr_acl_access_handler,
+	&ext3_xattr_acl_default_handler,
+#endif
+#ifdef CONFIG_EXT3_FS_SECURITY
+	&ext3_xattr_security_handler,
+#endif
+	NULL
+};
+
+static inline struct xattr_handler *
+ext3_xattr_handler(int name_index)
+{
+	struct xattr_handler *handler = NULL;
+
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
+		handler = ext3_xattr_handler_map[name_index];
+	return handler;
+}
+
+/*
+ * Inode operation listxattr()
+ *
+ * dentry->d_inode->i_mutex: don't care
+ */
+ssize_t
+ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+	return ext3_xattr_list(dentry->d_inode, buffer, size);
+}
+
+static int
+ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+{
+	while (!IS_LAST_ENTRY(entry)) {
+		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+		if ((void *)next >= end)
+			return -EIO;
+		entry = next;
+	}
+	return 0;
+}
+
+static inline int
+ext3_xattr_check_block(struct buffer_head *bh)
+{
+	int error;
+
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1))
+		return -EIO;
+	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	return error;
+}
+
+static inline int
+ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+{
+	size_t value_size = le32_to_cpu(entry->e_value_size);
+
+	if (entry->e_value_block != 0 || value_size > size ||
+	    le16_to_cpu(entry->e_value_offs) + value_size > size)
+		return -EIO;
+	return 0;
+}
+
+static int
+ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+		      const char *name, size_t size, int sorted)
+{
+	struct ext3_xattr_entry *entry;
+	size_t name_len;
+	int cmp = 1;
+
+	if (name == NULL)
+		return -EINVAL;
+	name_len = strlen(name);
+	entry = *pentry;
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		cmp = name_index - entry->e_name_index;
+		if (!cmp)
+			cmp = name_len - entry->e_name_len;
+		if (!cmp)
+			cmp = memcmp(name, entry->e_name, name_len);
+		if (cmp <= 0 && (sorted || cmp == 0))
+			break;
+	}
+	*pentry = entry;
+	if (!cmp && ext3_xattr_check_entry(entry, size))
+			return -EIO;
+	return cmp ? -ENODATA : 0;
+}
+
+static int
+ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
+{
+	struct buffer_head *bh = NULL;
+	struct ext3_xattr_entry *entry;
+	size_t size;
+	int error;
+
+	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
+		  name_index, name, buffer, (long)buffer_size);
+
+	error = -ENODATA;
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	if (!bh)
+		goto cleanup;
+	ea_bdebug(bh, "b_count=%d, refcount=%d",
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+bad_block:	ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		goto cleanup;
+	}
+	ext3_xattr_cache_insert(bh);
+	entry = BFIRST(bh);
+	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+	if (error == -EIO)
+		goto bad_block;
+	if (error)
+		goto cleanup;
+	size = le32_to_cpu(entry->e_value_size);
+	if (buffer) {
+		error = -ERANGE;
+		if (size > buffer_size)
+			goto cleanup;
+		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
+		       size);
+	}
+	error = size;
+
+cleanup:
+	brelse(bh);
+	return error;
+}
+
+static int
+ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+		     void *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_entry *entry;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	size_t size;
+	void *end;
+	int error;
+
+	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return -ENODATA;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	entry = IFIRST(header);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(entry, end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_find_entry(&entry, name_index, name,
+				      end - (void *)entry, 0);
+	if (error)
+		goto cleanup;
+	size = le32_to_cpu(entry->e_value_size);
+	if (buffer) {
+		error = -ERANGE;
+		if (size > buffer_size)
+			goto cleanup;
+		memcpy(buffer, (void *)IFIRST(header) +
+		       le16_to_cpu(entry->e_value_offs), size);
+	}
+	error = size;
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_get()
+ *
+ * Copy an extended attribute into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+	       void *buffer, size_t buffer_size)
+{
+	int error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+				     buffer_size);
+	if (error == -ENODATA)
+		error = ext3_xattr_block_get(inode, name_index, name, buffer,
+					     buffer_size);
+	up_read(&EXT3_I(inode)->xattr_sem);
+	return error;
+}
+
+static int
+ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+			char *buffer, size_t buffer_size)
+{
+	size_t rest = buffer_size;
+
+	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+		struct xattr_handler *handler =
+			ext3_xattr_handler(entry->e_name_index);
+
+		if (handler) {
+			size_t size = handler->list(inode, buffer, rest,
+						    entry->e_name,
+						    entry->e_name_len);
+			if (buffer) {
+				if (size > rest)
+					return -ERANGE;
+				buffer += size;
+			}
+			rest -= size;
+		}
+	}
+	return buffer_size - rest;
+}
+
+static int
+ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	struct buffer_head *bh = NULL;
+	int error;
+
+	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
+		  buffer, (long)buffer_size);
+
+	error = 0;
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	error = -EIO;
+	if (!bh)
+		goto cleanup;
+	ea_bdebug(bh, "b_count=%d, refcount=%d",
+		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
+	if (ext3_xattr_check_block(bh)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		goto cleanup;
+	}
+	ext3_xattr_cache_insert(bh);
+	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+
+cleanup:
+	brelse(bh);
+
+	return error;
+}
+
+static int
+ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	struct ext3_iloc iloc;
+	void *end;
+	int error;
+
+	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+		return 0;
+	error = ext3_get_inode_loc(inode, &iloc);
+	if (error)
+		return error;
+	raw_inode = ext3_raw_inode(&iloc);
+	header = IHDR(inode, raw_inode);
+	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	error = ext3_xattr_check_names(IFIRST(header), end);
+	if (error)
+		goto cleanup;
+	error = ext3_xattr_list_entries(inode, IFIRST(header),
+					buffer, buffer_size);
+
+cleanup:
+	brelse(iloc.bh);
+	return error;
+}
+
+/*
+ * ext3_xattr_list()
+ *
+ * Copy a list of attribute names into the buffer
+ * provided, or compute the buffer size required.
+ * Buffer is NULL to compute the size of the buffer required.
+ *
+ * Returns a negative error number on failure, or the number of bytes
+ * used / required on success.
+ */
+int
+ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+{
+	int i_error, b_error;
+
+	down_read(&EXT3_I(inode)->xattr_sem);
+	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	if (i_error < 0) {
+		b_error = 0;
+	} else {
+		if (buffer) {
+			buffer += i_error;
+			buffer_size -= i_error;
+		}
+		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		if (b_error < 0)
+			i_error = 0;
+	}
+	up_read(&EXT3_I(inode)->xattr_sem);
+	return i_error + b_error;
+}
+
+/*
+ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
+ * not set, set it.
+ */
+static void ext3_xattr_update_super_block(handle_t *handle,
+					  struct super_block *sb)
+{
+	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
+		return;
+
+	lock_super(sb);
+	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
+		EXT3_SB(sb)->s_es->s_feature_compat |=
+			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+		sb->s_dirt = 1;
+		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	}
+	unlock_super(sb);
+}
+
+/*
+ * Release the xattr block BH: If the reference count is > 1, decrement
+ * it; otherwise free the block.
+ */
+static void
+ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+			 struct buffer_head *bh)
+{
+	struct mb_cache_entry *ce = NULL;
+
+	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+		ea_bdebug(bh, "refcount now=0; freeing");
+		if (ce)
+			mb_cache_entry_free(ce);
+		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+		get_bh(bh);
+		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+	} else {
+		if (ext3_journal_get_write_access(handle, bh) == 0) {
+			lock_buffer(bh);
+			BHDR(bh)->h_refcount = cpu_to_le32(
+				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
+			ext3_journal_dirty_metadata(handle, bh);
+			if (IS_SYNC(inode))
+				handle->h_sync = 1;
+			DQUOT_FREE_BLOCK(inode, 1);
+			unlock_buffer(bh);
+			ea_bdebug(bh, "refcount now=%d; releasing",
+				  le32_to_cpu(BHDR(bh)->h_refcount));
+		}
+		if (ce)
+			mb_cache_entry_release(ce);
+	}
+}
+
+struct ext3_xattr_info {
+	int name_index;
+	const char *name;
+	const void *value;
+	size_t value_len;
+};
+
+struct ext3_xattr_search {
+	struct ext3_xattr_entry *first;
+	void *base;
+	void *end;
+	struct ext3_xattr_entry *here;
+	int not_found;
+};
+
+static int
+ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+{
+	struct ext3_xattr_entry *last;
+	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
+
+	/* Compute min_offs and last. */
+	last = s->first;
+	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+		if (!last->e_value_block && last->e_value_size) {
+			size_t offs = le16_to_cpu(last->e_value_offs);
+			if (offs < min_offs)
+				min_offs = offs;
+		}
+	}
+	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
+	if (!s->not_found) {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			size_t size = le32_to_cpu(s->here->e_value_size);
+			free += EXT3_XATTR_SIZE(size);
+		}
+		free += EXT3_XATTR_LEN(name_len);
+	}
+	if (i->value) {
+		if (free < EXT3_XATTR_SIZE(i->value_len) ||
+		    free < EXT3_XATTR_LEN(name_len) +
+			   EXT3_XATTR_SIZE(i->value_len))
+			return -ENOSPC;
+	}
+
+	if (i->value && s->not_found) {
+		/* Insert the new name. */
+		size_t size = EXT3_XATTR_LEN(name_len);
+		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
+		memmove((void *)s->here + size, s->here, rest);
+		memset(s->here, 0, size);
+		s->here->e_name_index = i->name_index;
+		s->here->e_name_len = name_len;
+		memcpy(s->here->e_name, i->name, name_len);
+	} else {
+		if (!s->here->e_value_block && s->here->e_value_size) {
+			void *first_val = s->base + min_offs;
+			size_t offs = le16_to_cpu(s->here->e_value_offs);
+			void *val = s->base + offs;
+			size_t size = EXT3_XATTR_SIZE(
+				le32_to_cpu(s->here->e_value_size));
+
+			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+				/* The old and the new value have the same
+				   size. Just replace. */
+				s->here->e_value_size =
+					cpu_to_le32(i->value_len);
+				memset(val + size - EXT3_XATTR_PAD, 0,
+				       EXT3_XATTR_PAD); /* Clear pad bytes. */
+				memcpy(val, i->value, i->value_len);
+				return 0;
+			}
+
+			/* Remove the old value. */
+			memmove(first_val + size, first_val, val - first_val);
+			memset(first_val, 0, size);
+			s->here->e_value_size = 0;
+			s->here->e_value_offs = 0;
+			min_offs += size;
+
+			/* Adjust all value offsets. */
+			last = s->first;
+			while (!IS_LAST_ENTRY(last)) {
+				size_t o = le16_to_cpu(last->e_value_offs);
+				if (!last->e_value_block &&
+				    last->e_value_size && o < offs)
+					last->e_value_offs =
+						cpu_to_le16(o + size);
+				last = EXT3_XATTR_NEXT(last);
+			}
+		}
+		if (!i->value) {
+			/* Remove the old name. */
+			size_t size = EXT3_XATTR_LEN(name_len);
+			last = ENTRY((void *)last - size);
+			memmove(s->here, (void *)s->here + size,
+				(void *)last - (void *)s->here + sizeof(__u32));
+			memset(last, 0, size);
+		}
+	}
+
+	if (i->value) {
+		/* Insert the new value. */
+		s->here->e_value_size = cpu_to_le32(i->value_len);
+		if (i->value_len) {
+			size_t size = EXT3_XATTR_SIZE(i->value_len);
+			void *val = s->base + min_offs - size;
+			s->here->e_value_offs = cpu_to_le16(min_offs - size);
+			memset(val + size - EXT3_XATTR_PAD, 0,
+			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
+			memcpy(val, i->value, i->value_len);
+		}
+	}
+	return 0;
+}
+
+struct ext3_xattr_block_find {
+	struct ext3_xattr_search s;
+	struct buffer_head *bh;
+};
+
+static int
+ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_block_find *bs)
+{
+	struct super_block *sb = inode->i_sb;
+	int error;
+
+	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
+		  i->name_index, i->name, i->value, (long)i->value_len);
+
+	if (EXT3_I(inode)->i_file_acl) {
+		/* The inode already has an extended attribute block. */
+		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		error = -EIO;
+		if (!bs->bh)
+			goto cleanup;
+		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
+			atomic_read(&(bs->bh->b_count)),
+			le32_to_cpu(BHDR(bs->bh)->h_refcount));
+		if (ext3_xattr_check_block(bs->bh)) {
+			ext3_error(sb, __FUNCTION__,
+				"inode %lu: bad block "E3FSBLK, inode->i_ino,
+				EXT3_I(inode)->i_file_acl);
+			error = -EIO;
+			goto cleanup;
+		}
+		/* Find the named attribute. */
+		bs->s.base = BHDR(bs->bh);
+		bs->s.first = BFIRST(bs->bh);
+		bs->s.end = bs->bh->b_data + bs->bh->b_size;
+		bs->s.here = bs->s.first;
+		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+					      i->name, bs->bh->b_size, 1);
+		if (error && error != -ENODATA)
+			goto cleanup;
+		bs->s.not_found = error;
+	}
+	error = 0;
+
+cleanup:
+	return error;
+}
+
+static int
+ext3_xattr_block_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_block_find *bs)
+{
+	struct super_block *sb = inode->i_sb;
+	struct buffer_head *new_bh = NULL;
+	struct ext3_xattr_search *s = &bs->s;
+	struct mb_cache_entry *ce = NULL;
+	int error;
+
+#define header(x) ((struct ext3_xattr_header *)(x))
+
+	if (i->value && i->value_len > sb->s_blocksize)
+		return -ENOSPC;
+	if (s->base) {
+		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+					bs->bh->b_blocknr);
+		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
+			if (ce) {
+				mb_cache_entry_free(ce);
+				ce = NULL;
+			}
+			ea_bdebug(bs->bh, "modifying in-place");
+			error = ext3_journal_get_write_access(handle, bs->bh);
+			if (error)
+				goto cleanup;
+			lock_buffer(bs->bh);
+			error = ext3_xattr_set_entry(i, s);
+			if (!error) {
+				if (!IS_LAST_ENTRY(s->first))
+					ext3_xattr_rehash(header(s->base),
+							  s->here);
+				ext3_xattr_cache_insert(bs->bh);
+			}
+			unlock_buffer(bs->bh);
+			if (error == -EIO)
+				goto bad_block;
+			if (!error)
+				error = ext3_journal_dirty_metadata(handle,
+								    bs->bh);
+			if (error)
+				goto cleanup;
+			goto inserted;
+		} else {
+			int offset = (char *)s->here - bs->bh->b_data;
+
+			if (ce) {
+				mb_cache_entry_release(ce);
+				ce = NULL;
+			}
+			ea_bdebug(bs->bh, "cloning");
+			s->base = kmalloc(bs->bh->b_size, GFP_KERNEL);
+			error = -ENOMEM;
+			if (s->base == NULL)
+				goto cleanup;
+			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
+			s->first = ENTRY(header(s->base)+1);
+			header(s->base)->h_refcount = cpu_to_le32(1);
+			s->here = ENTRY(s->base + offset);
+			s->end = s->base + bs->bh->b_size;
+		}
+	} else {
+		/* Allocate a buffer where we construct the new block. */
+		s->base = kmalloc(sb->s_blocksize, GFP_KERNEL);
+		/* assert(header == s->base) */
+		error = -ENOMEM;
+		if (s->base == NULL)
+			goto cleanup;
+		memset(s->base, 0, sb->s_blocksize);
+		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		header(s->base)->h_blocks = cpu_to_le32(1);
+		header(s->base)->h_refcount = cpu_to_le32(1);
+		s->first = ENTRY(header(s->base)+1);
+		s->here = ENTRY(header(s->base)+1);
+		s->end = s->base + sb->s_blocksize;
+	}
+
+	error = ext3_xattr_set_entry(i, s);
+	if (error == -EIO)
+		goto bad_block;
+	if (error)
+		goto cleanup;
+	if (!IS_LAST_ENTRY(s->first))
+		ext3_xattr_rehash(header(s->base), s->here);
+
+inserted:
+	if (!IS_LAST_ENTRY(s->first)) {
+		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
+		if (new_bh) {
+			/* We found an identical block in the cache. */
+			if (new_bh == bs->bh)
+				ea_bdebug(new_bh, "keeping");
+			else {
+				/* The old block is released after updating
+				   the inode. */
+				error = -EDQUOT;
+				if (DQUOT_ALLOC_BLOCK(inode, 1))
+					goto cleanup;
+				error = ext3_journal_get_write_access(handle,
+								      new_bh);
+				if (error)
+					goto cleanup_dquot;
+				lock_buffer(new_bh);
+				BHDR(new_bh)->h_refcount = cpu_to_le32(1 +
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				ea_bdebug(new_bh, "reusing; refcount now=%d",
+					le32_to_cpu(BHDR(new_bh)->h_refcount));
+				unlock_buffer(new_bh);
+				error = ext3_journal_dirty_metadata(handle,
+								    new_bh);
+				if (error)
+					goto cleanup_dquot;
+			}
+			mb_cache_entry_release(ce);
+			ce = NULL;
+		} else if (bs->bh && s->base == bs->bh->b_data) {
+			/* We were modifying this block in-place. */
+			ea_bdebug(bs->bh, "keeping this block");
+			new_bh = bs->bh;
+			get_bh(new_bh);
+		} else {
+			/* We need to allocate a new block */
+			ext3_fsblk_t goal = le32_to_cpu(
+					EXT3_SB(sb)->s_es->s_first_data_block) +
+				(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
+				EXT3_BLOCKS_PER_GROUP(sb);
+			ext3_fsblk_t block = ext3_new_block(handle, inode,
+							goal, &error);
+			if (error)
+				goto cleanup;
+			ea_idebug(inode, "creating block %d", block);
+
+			new_bh = sb_getblk(sb, block);
+			if (!new_bh) {
+getblk_failed:
+				ext3_free_blocks(handle, inode, block, 1);
+				error = -EIO;
+				goto cleanup;
+			}
+			lock_buffer(new_bh);
+			error = ext3_journal_get_create_access(handle, new_bh);
+			if (error) {
+				unlock_buffer(new_bh);
+				goto getblk_failed;
+			}
+			memcpy(new_bh->b_data, s->base, new_bh->b_size);
+			set_buffer_uptodate(new_bh);
+			unlock_buffer(new_bh);
+			ext3_xattr_cache_insert(new_bh);
+			error = ext3_journal_dirty_metadata(handle, new_bh);
+			if (error)
+				goto cleanup;
+		}
+	}
+
+	/* Update the inode. */
+	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+
+	/* Drop the previous xattr block. */
+	if (bs->bh && bs->bh != new_bh)
+		ext3_xattr_release_block(handle, inode, bs->bh);
+	error = 0;
+
+cleanup:
+	if (ce)
+		mb_cache_entry_release(ce);
+	brelse(new_bh);
+	if (!(bs->bh && s->base == bs->bh->b_data))
+		kfree(s->base);
+
+	return error;
+
+cleanup_dquot:
+	DQUOT_FREE_BLOCK(inode, 1);
+	goto cleanup;
+
+bad_block:
+	ext3_error(inode->i_sb, __FUNCTION__,
+		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+		   EXT3_I(inode)->i_file_acl);
+	goto cleanup;
+
+#undef header
+}
+
+struct ext3_xattr_ibody_find {
+	struct ext3_xattr_search s;
+	struct ext3_iloc iloc;
+};
+
+static int
+ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
+		      struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_inode *raw_inode;
+	int error;
+
+	if (EXT3_I(inode)->i_extra_isize == 0)
+		return 0;
+	raw_inode = ext3_raw_inode(&is->iloc);
+	header = IHDR(inode, raw_inode);
+	is->s.base = is->s.first = IFIRST(header);
+	is->s.here = is->s.first;
+	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
+	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
+		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+		if (error)
+			return error;
+		/* Find the named attribute. */
+		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+					      i->name, is->s.end -
+					      (void *)is->s.base, 0);
+		if (error && error != -ENODATA)
+			return error;
+		is->s.not_found = error;
+	}
+	return 0;
+}
+
+static int
+ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
+		     struct ext3_xattr_info *i,
+		     struct ext3_xattr_ibody_find *is)
+{
+	struct ext3_xattr_ibody_header *header;
+	struct ext3_xattr_search *s = &is->s;
+	int error;
+
+	if (EXT3_I(inode)->i_extra_isize == 0)
+		return -ENOSPC;
+	error = ext3_xattr_set_entry(i, s);
+	if (error)
+		return error;
+	header = IHDR(inode, ext3_raw_inode(&is->iloc));
+	if (!IS_LAST_ENTRY(s->first)) {
+		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+	} else {
+		header->h_magic = cpu_to_le32(0);
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+	}
+	return 0;
+}
+
+/*
+ * ext3_xattr_set_handle()
+ *
+ * Create, replace or remove an extended attribute for this inode. Buffer
+ * is NULL to remove an existing extended attribute, and non-NULL to
+ * either replace an existing extended attribute, or create a new extended
+ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
+ * specify that an extended attribute must exist and must not exist
+ * previous to the call, respectively.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+		      const char *name, const void *value, size_t value_len,
+		      int flags)
+{
+	struct ext3_xattr_info i = {
+		.name_index = name_index,
+		.name = name,
+		.value = value,
+		.value_len = value_len,
+
+	};
+	struct ext3_xattr_ibody_find is = {
+		.s = { .not_found = -ENODATA, },
+	};
+	struct ext3_xattr_block_find bs = {
+		.s = { .not_found = -ENODATA, },
+	};
+	int error;
+
+	if (!name)
+		return -EINVAL;
+	if (strlen(name) > 255)
+		return -ERANGE;
+	down_write(&EXT3_I(inode)->xattr_sem);
+	error = ext3_get_inode_loc(inode, &is.iloc);
+	if (error)
+		goto cleanup;
+
+	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
+		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
+		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+	}
+
+	error = ext3_xattr_ibody_find(inode, &i, &is);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found)
+		error = ext3_xattr_block_find(inode, &i, &bs);
+	if (error)
+		goto cleanup;
+	if (is.s.not_found && bs.s.not_found) {
+		error = -ENODATA;
+		if (flags & XATTR_REPLACE)
+			goto cleanup;
+		error = 0;
+		if (!value)
+			goto cleanup;
+	} else {
+		error = -EEXIST;
+		if (flags & XATTR_CREATE)
+			goto cleanup;
+	}
+	error = ext3_journal_get_write_access(handle, is.iloc.bh);
+	if (error)
+		goto cleanup;
+	if (!value) {
+		if (!is.s.not_found)
+			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		else if (!bs.s.not_found)
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+	} else {
+		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		if (!error && !bs.s.not_found) {
+			i.value = NULL;
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+		} else if (error == -ENOSPC) {
+			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			if (error)
+				goto cleanup;
+			if (!is.s.not_found) {
+				i.value = NULL;
+				error = ext3_xattr_ibody_set(handle, inode, &i,
+							     &is);
+			}
+		}
+	}
+	if (!error) {
+		ext3_xattr_update_super_block(handle, inode->i_sb);
+		inode->i_ctime = CURRENT_TIME_SEC;
+		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+		/*
+		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+		 * error != 0.
+		 */
+		is.iloc.bh = NULL;
+		if (IS_SYNC(inode))
+			handle->h_sync = 1;
+	}
+
+cleanup:
+	brelse(is.iloc.bh);
+	brelse(bs.bh);
+	up_write(&EXT3_I(inode)->xattr_sem);
+	return error;
+}
+
+/*
+ * ext3_xattr_set()
+ *
+ * Like ext3_xattr_set_handle, but start from an inode. This extended
+ * attribute modification is a filesystem transaction by itself.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+int
+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+	       const void *value, size_t value_len, int flags)
+{
+	handle_t *handle;
+	int error, retries = 0;
+
+retry:
+	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	if (IS_ERR(handle)) {
+		error = PTR_ERR(handle);
+	} else {
+		int error2;
+
+		error = ext3_xattr_set_handle(handle, inode, name_index, name,
+					      value, value_len, flags);
+		error2 = ext3_journal_stop(handle);
+		if (error == -ENOSPC &&
+		    ext3_should_retry_alloc(inode->i_sb, &retries))
+			goto retry;
+		if (error == 0)
+			error = error2;
+	}
+
+	return error;
+}
+
+/*
+ * ext3_xattr_delete_inode()
+ *
+ * Free extended attribute resources associated with this inode. This
+ * is called immediately before an inode is freed. We have exclusive
+ * access to the inode.
+ */
+void
+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+{
+	struct buffer_head *bh = NULL;
+
+	if (!EXT3_I(inode)->i_file_acl)
+		goto cleanup;
+	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	if (!bh) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
+			EXT3_I(inode)->i_file_acl);
+		goto cleanup;
+	}
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
+		ext3_error(inode->i_sb, __FUNCTION__,
+			"inode %lu: bad block "E3FSBLK, inode->i_ino,
+			EXT3_I(inode)->i_file_acl);
+		goto cleanup;
+	}
+	ext3_xattr_release_block(handle, inode, bh);
+	EXT3_I(inode)->i_file_acl = 0;
+
+cleanup:
+	brelse(bh);
+}
+
+/*
+ * ext3_xattr_put_super()
+ *
+ * This is called when a file system is unmounted.
+ */
+void
+ext3_xattr_put_super(struct super_block *sb)
+{
+	mb_cache_shrink(sb->s_bdev);
+}
+
+/*
+ * ext3_xattr_cache_insert()
+ *
+ * Create a new entry in the extended attribute cache, and insert
+ * it unless such an entry is already in the cache.
+ *
+ * Returns 0, or a negative error number on failure.
+ */
+static void
+ext3_xattr_cache_insert(struct buffer_head *bh)
+{
+	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
+	struct mb_cache_entry *ce;
+	int error;
+
+	ce = mb_cache_entry_alloc(ext3_xattr_cache);
+	if (!ce) {
+		ea_bdebug(bh, "out of memory");
+		return;
+	}
+	error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, &hash);
+	if (error) {
+		mb_cache_entry_free(ce);
+		if (error == -EBUSY) {
+			ea_bdebug(bh, "already in cache");
+			error = 0;
+		}
+	} else {
+		ea_bdebug(bh, "inserting [%x]", (int)hash);
+		mb_cache_entry_release(ce);
+	}
+}
+
+/*
+ * ext3_xattr_cmp()
+ *
+ * Compare two extended attribute blocks for equality.
+ *
+ * Returns 0 if the blocks are equal, 1 if they differ, and
+ * a negative error number on errors.
+ */
+static int
+ext3_xattr_cmp(struct ext3_xattr_header *header1,
+	       struct ext3_xattr_header *header2)
+{
+	struct ext3_xattr_entry *entry1, *entry2;
+
+	entry1 = ENTRY(header1+1);
+	entry2 = ENTRY(header2+1);
+	while (!IS_LAST_ENTRY(entry1)) {
+		if (IS_LAST_ENTRY(entry2))
+			return 1;
+		if (entry1->e_hash != entry2->e_hash ||
+		    entry1->e_name_index != entry2->e_name_index ||
+		    entry1->e_name_len != entry2->e_name_len ||
+		    entry1->e_value_size != entry2->e_value_size ||
+		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
+			return 1;
+		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
+			return -EIO;
+		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
+			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
+			   le32_to_cpu(entry1->e_value_size)))
+			return 1;
+
+		entry1 = EXT3_XATTR_NEXT(entry1);
+		entry2 = EXT3_XATTR_NEXT(entry2);
+	}
+	if (!IS_LAST_ENTRY(entry2))
+		return 1;
+	return 0;
+}
+
+/*
+ * ext3_xattr_cache_find()
+ *
+ * Find an identical extended attribute block.
+ *
+ * Returns a pointer to the block found, or NULL if such a block was
+ * not found or an error occurred.
+ */
+static struct buffer_head *
+ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+		      struct mb_cache_entry **pce)
+{
+	__u32 hash = le32_to_cpu(header->h_hash);
+	struct mb_cache_entry *ce;
+
+	if (!header->h_hash)
+		return NULL;  /* never share */
+	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
+again:
+	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
+				       inode->i_sb->s_bdev, hash);
+	while (ce) {
+		struct buffer_head *bh;
+
+		if (IS_ERR(ce)) {
+			if (PTR_ERR(ce) == -EAGAIN)
+				goto again;
+			break;
+		}
+		bh = sb_bread(inode->i_sb, ce->e_block);
+		if (!bh) {
+			ext3_error(inode->i_sb, __FUNCTION__,
+				"inode %lu: block %lu read error",
+				inode->i_ino, (unsigned long) ce->e_block);
+		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
+				EXT3_XATTR_REFCOUNT_MAX) {
+			ea_idebug(inode, "block %lu refcount %d>=%d",
+				  (unsigned long) ce->e_block,
+				  le32_to_cpu(BHDR(bh)->h_refcount),
+					  EXT3_XATTR_REFCOUNT_MAX);
+		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
+			*pce = ce;
+			return bh;
+		}
+		brelse(bh);
+		ce = mb_cache_entry_find_next(ce, 0, inode->i_sb->s_bdev, hash);
+	}
+	return NULL;
+}
+
+#define NAME_HASH_SHIFT 5
+#define VALUE_HASH_SHIFT 16
+
+/*
+ * ext3_xattr_hash_entry()
+ *
+ * Compute the hash of an extended attribute.
+ */
+static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
+					 struct ext3_xattr_entry *entry)
+{
+	__u32 hash = 0;
+	char *name = entry->e_name;
+	int n;
+
+	for (n=0; n < entry->e_name_len; n++) {
+		hash = (hash << NAME_HASH_SHIFT) ^
+		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
+		       *name++;
+	}
+
+	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
+		__le32 *value = (__le32 *)((char *)header +
+			le16_to_cpu(entry->e_value_offs));
+		for (n = (le32_to_cpu(entry->e_value_size) +
+		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+			hash = (hash << VALUE_HASH_SHIFT) ^
+			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
+			       le32_to_cpu(*value++);
+		}
+	}
+	entry->e_hash = cpu_to_le32(hash);
+}
+
+#undef NAME_HASH_SHIFT
+#undef VALUE_HASH_SHIFT
+
+#define BLOCK_HASH_SHIFT 16
+
+/*
+ * ext3_xattr_rehash()
+ *
+ * Re-compute the extended attribute hash value after an entry has changed.
+ */
+static void ext3_xattr_rehash(struct ext3_xattr_header *header,
+			      struct ext3_xattr_entry *entry)
+{
+	struct ext3_xattr_entry *here;
+	__u32 hash = 0;
+
+	ext3_xattr_hash_entry(header, entry);
+	here = ENTRY(header+1);
+	while (!IS_LAST_ENTRY(here)) {
+		if (!here->e_hash) {
+			/* Block is not shared if an entry's hash value == 0 */
+			hash = 0;
+			break;
+		}
+		hash = (hash << BLOCK_HASH_SHIFT) ^
+		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
+		       le32_to_cpu(here->e_hash);
+		here = EXT3_XATTR_NEXT(here);
+	}
+	header->h_hash = cpu_to_le32(hash);
+}
+
+#undef BLOCK_HASH_SHIFT
+
+int __init
+init_ext3_xattr(void)
+{
+	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+		sizeof(struct mb_cache_entry) +
+		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
+	if (!ext3_xattr_cache)
+		return -ENOMEM;
+	return 0;
+}
+
+void
+exit_ext3_xattr(void)
+{
+	if (ext3_xattr_cache)
+		mb_cache_destroy(ext3_xattr_cache);
+	ext3_xattr_cache = NULL;
+}
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
new file mode 100644
index 000000000000..6b1ae1c6182c
--- /dev/null
+++ b/fs/ext4/xattr.h
@@ -0,0 +1,145 @@
+/*
+  File: fs/ext3/xattr.h
+
+  On-disk format of extended attributes for the ext3 filesystem.
+
+  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
+*/
+
+#include <linux/xattr.h>
+
+/* Magic value in attribute blocks */
+#define EXT3_XATTR_MAGIC		0xEA020000
+
+/* Maximum number of references to one attribute block */
+#define EXT3_XATTR_REFCOUNT_MAX		1024
+
+/* Name indexes */
+#define EXT3_XATTR_INDEX_USER			1
+#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS	2
+#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT	3
+#define EXT3_XATTR_INDEX_TRUSTED		4
+#define	EXT3_XATTR_INDEX_LUSTRE			5
+#define EXT3_XATTR_INDEX_SECURITY	        6
+
+struct ext3_xattr_header {
+	__le32	h_magic;	/* magic number for identification */
+	__le32	h_refcount;	/* reference count */
+	__le32	h_blocks;	/* number of disk blocks used */
+	__le32	h_hash;		/* hash value of all attributes */
+	__u32	h_reserved[4];	/* zero right now */
+};
+
+struct ext3_xattr_ibody_header {
+	__le32	h_magic;	/* magic number for identification */
+};
+
+struct ext3_xattr_entry {
+	__u8	e_name_len;	/* length of name */
+	__u8	e_name_index;	/* attribute name index */
+	__le16	e_value_offs;	/* offset in disk block of value */
+	__le32	e_value_block;	/* disk block attribute is stored on (n/i) */
+	__le32	e_value_size;	/* size of attribute value */
+	__le32	e_hash;		/* hash value of name and value */
+	char	e_name[0];	/* attribute name */
+};
+
+#define EXT3_XATTR_PAD_BITS		2
+#define EXT3_XATTR_PAD		(1<<EXT3_XATTR_PAD_BITS)
+#define EXT3_XATTR_ROUND		(EXT3_XATTR_PAD-1)
+#define EXT3_XATTR_LEN(name_len) \
+	(((name_len) + EXT3_XATTR_ROUND + \
+	sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
+#define EXT3_XATTR_NEXT(entry) \
+	( (struct ext3_xattr_entry *)( \
+	  (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
+#define EXT3_XATTR_SIZE(size) \
+	(((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
+
+# ifdef CONFIG_EXT3_FS_XATTR
+
+extern struct xattr_handler ext3_xattr_user_handler;
+extern struct xattr_handler ext3_xattr_trusted_handler;
+extern struct xattr_handler ext3_xattr_acl_access_handler;
+extern struct xattr_handler ext3_xattr_acl_default_handler;
+extern struct xattr_handler ext3_xattr_security_handler;
+
+extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
+
+extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext3_xattr_list(struct inode *, char *, size_t);
+extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+
+extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
+extern void ext3_xattr_put_super(struct super_block *);
+
+extern int init_ext3_xattr(void);
+extern void exit_ext3_xattr(void);
+
+extern struct xattr_handler *ext3_xattr_handlers[];
+
+# else  /* CONFIG_EXT3_FS_XATTR */
+
+static inline int
+ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+	       void *buffer, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+	       const void *value, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int
+ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+	       const char *name, const void *value, size_t size, int flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline void
+ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+{
+}
+
+static inline void
+ext3_xattr_put_super(struct super_block *sb)
+{
+}
+
+static inline int
+init_ext3_xattr(void)
+{
+	return 0;
+}
+
+static inline void
+exit_ext3_xattr(void)
+{
+}
+
+#define ext3_xattr_handlers	NULL
+
+# endif  /* CONFIG_EXT3_FS_XATTR */
+
+#ifdef CONFIG_EXT3_FS_SECURITY
+extern int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir);
+#else
+static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+				struct inode *dir)
+{
+	return 0;
+}
+#endif
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
new file mode 100644
index 000000000000..b9c40c15647b
--- /dev/null
+++ b/fs/ext4/xattr_security.c
@@ -0,0 +1,77 @@
+/*
+ * linux/fs/ext3/xattr_security.c
+ * Handler for storing security labels as extended attributes.
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include <linux/security.h>
+#include "xattr.h"
+
+static size_t
+ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
+			 const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_security_get(struct inode *inode, const char *name,
+		       void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
+			      buffer, size);
+}
+
+static int
+ext3_xattr_security_set(struct inode *inode, const char *name,
+		       const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
+			      value, size, flags);
+}
+
+int
+ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+{
+	int err;
+	size_t len;
+	void *value;
+	char *name;
+
+	err = security_inode_init_security(inode, dir, &name, &value, &len);
+	if (err) {
+		if (err == -EOPNOTSUPP)
+			return 0;
+		return err;
+	}
+	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+				    name, value, len, 0);
+	kfree(name);
+	kfree(value);
+	return err;
+}
+
+struct xattr_handler ext3_xattr_security_handler = {
+	.prefix	= XATTR_SECURITY_PREFIX,
+	.list	= ext3_xattr_security_list,
+	.get	= ext3_xattr_security_get,
+	.set	= ext3_xattr_security_set,
+};
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
new file mode 100644
index 000000000000..86d91f1186dc
--- /dev/null
+++ b/fs/ext4/xattr_trusted.c
@@ -0,0 +1,62 @@
+/*
+ * linux/fs/ext3/xattr_trusted.c
+ * Handler for trusted extended attributes.
+ *
+ * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/capability.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+
+#define XATTR_TRUSTED_PREFIX "trusted."
+
+static size_t
+ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
+			const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return 0;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_trusted_get(struct inode *inode, const char *name,
+		       void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+			      buffer, size);
+}
+
+static int
+ext3_xattr_trusted_set(struct inode *inode, const char *name,
+		       const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+			      value, size, flags);
+}
+
+struct xattr_handler ext3_xattr_trusted_handler = {
+	.prefix	= XATTR_TRUSTED_PREFIX,
+	.list	= ext3_xattr_trusted_list,
+	.get	= ext3_xattr_trusted_get,
+	.set	= ext3_xattr_trusted_set,
+};
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
new file mode 100644
index 000000000000..a85a0a17c4fd
--- /dev/null
+++ b/fs/ext4/xattr_user.c
@@ -0,0 +1,64 @@
+/*
+ * linux/fs/ext3/xattr_user.c
+ * Handler for extended user attributes.
+ *
+ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
+ */
+
+#include <linux/module.h>
+#include <linux/string.h>
+#include <linux/fs.h>
+#include <linux/smp_lock.h>
+#include <linux/ext3_jbd.h>
+#include <linux/ext3_fs.h>
+#include "xattr.h"
+
+#define XATTR_USER_PREFIX "user."
+
+static size_t
+ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
+		     const char *name, size_t name_len)
+{
+	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
+	const size_t total_len = prefix_len + name_len + 1;
+
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return 0;
+
+	if (list && total_len <= list_size) {
+		memcpy(list, XATTR_USER_PREFIX, prefix_len);
+		memcpy(list+prefix_len, name, name_len);
+		list[prefix_len + name_len] = '\0';
+	}
+	return total_len;
+}
+
+static int
+ext3_xattr_user_get(struct inode *inode, const char *name,
+		    void *buffer, size_t size)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return -EOPNOTSUPP;
+	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+}
+
+static int
+ext3_xattr_user_set(struct inode *inode, const char *name,
+		    const void *value, size_t size, int flags)
+{
+	if (strcmp(name, "") == 0)
+		return -EINVAL;
+	if (!test_opt(inode->i_sb, XATTR_USER))
+		return -EOPNOTSUPP;
+	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
+			      value, size, flags);
+}
+
+struct xattr_handler ext3_xattr_user_handler = {
+	.prefix	= XATTR_USER_PREFIX,
+	.list	= ext3_xattr_user_list,
+	.get	= ext3_xattr_user_get,
+	.set	= ext3_xattr_user_set,
+};
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
new file mode 100644
index 000000000000..11cca1bdc0c7
--- /dev/null
+++ b/include/linux/ext4_fs.h
@@ -0,0 +1,885 @@
+/*
+ *  linux/include/linux/ext3_fs.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT3_FS_H
+#define _LINUX_EXT3_FS_H
+
+#include <linux/types.h>
+#include <linux/magic.h>
+
+/*
+ * The second extended filesystem constants/structures
+ */
+
+/*
+ * Define EXT3FS_DEBUG to produce debug messages
+ */
+#undef EXT3FS_DEBUG
+
+/*
+ * Define EXT3_RESERVATION to reserve data blocks for expanding files
+ */
+#define EXT3_DEFAULT_RESERVE_BLOCKS     8
+/*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
+#define EXT3_MAX_RESERVE_BLOCKS         1027
+#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
+/*
+ * Always enable hashed directories
+ */
+#define CONFIG_EXT3_INDEX
+
+/*
+ * Debug code
+ */
+#ifdef EXT3FS_DEBUG
+#define ext3_debug(f, a...)						\
+	do {								\
+		printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:",	\
+			__FILE__, __LINE__, __FUNCTION__);		\
+		printk (KERN_DEBUG f, ## a);				\
+	} while (0)
+#else
+#define ext3_debug(f, a...)	do {} while (0)
+#endif
+
+/*
+ * Special inodes numbers
+ */
+#define	EXT3_BAD_INO		 1	/* Bad blocks inode */
+#define EXT3_ROOT_INO		 2	/* Root inode */
+#define EXT3_BOOT_LOADER_INO	 5	/* Boot loader inode */
+#define EXT3_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+#define EXT3_RESIZE_INO		 7	/* Reserved group descriptors inode */
+#define EXT3_JOURNAL_INO	 8	/* Journal inode */
+
+/* First non-reserved inode for old ext3 filesystems */
+#define EXT3_GOOD_OLD_FIRST_INO	11
+
+/*
+ * Maximal count of links to a file
+ */
+#define EXT3_LINK_MAX		32000
+
+/*
+ * Macro-instructions used to manage several block sizes
+ */
+#define EXT3_MIN_BLOCK_SIZE		1024
+#define	EXT3_MAX_BLOCK_SIZE		4096
+#define EXT3_MIN_BLOCK_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
+#else
+# define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+#endif
+#define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+#ifdef __KERNEL__
+# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+#else
+# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+#endif
+#ifdef __KERNEL__
+#define	EXT3_ADDR_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_addr_per_block_bits)
+#define EXT3_INODE_SIZE(s)		(EXT3_SB(s)->s_inode_size)
+#define EXT3_FIRST_INO(s)		(EXT3_SB(s)->s_first_ino)
+#else
+#define EXT3_INODE_SIZE(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+				 EXT3_GOOD_OLD_INODE_SIZE : \
+				 (s)->s_inode_size)
+#define EXT3_FIRST_INO(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+				 EXT3_GOOD_OLD_FIRST_INO : \
+				 (s)->s_first_ino)
+#endif
+
+/*
+ * Macro-instructions used to manage fragments
+ */
+#define EXT3_MIN_FRAG_SIZE		1024
+#define	EXT3_MAX_FRAG_SIZE		4096
+#define EXT3_MIN_FRAG_LOG_SIZE		  10
+#ifdef __KERNEL__
+# define EXT3_FRAG_SIZE(s)		(EXT3_SB(s)->s_frag_size)
+# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_SB(s)->s_frags_per_block)
+#else
+# define EXT3_FRAG_SIZE(s)		(EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
+#endif
+
+/*
+ * Structure of a blocks group descriptor
+ */
+struct ext3_group_desc
+{
+	__le32	bg_block_bitmap;		/* Blocks bitmap block */
+	__le32	bg_inode_bitmap;		/* Inodes bitmap block */
+	__le32	bg_inode_table;		/* Inodes table block */
+	__le16	bg_free_blocks_count;	/* Free blocks count */
+	__le16	bg_free_inodes_count;	/* Free inodes count */
+	__le16	bg_used_dirs_count;	/* Directories count */
+	__u16	bg_pad;
+	__le32	bg_reserved[3];
+};
+
+/*
+ * Macro-instructions used to manage group descriptors
+ */
+#ifdef __KERNEL__
+# define EXT3_BLOCKS_PER_GROUP(s)	(EXT3_SB(s)->s_blocks_per_group)
+# define EXT3_DESC_PER_BLOCK(s)		(EXT3_SB(s)->s_desc_per_block)
+# define EXT3_INODES_PER_GROUP(s)	(EXT3_SB(s)->s_inodes_per_group)
+# define EXT3_DESC_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_desc_per_block_bits)
+#else
+# define EXT3_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
+# define EXT3_DESC_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
+# define EXT3_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
+#endif
+
+/*
+ * Constants relative to the data blocks
+ */
+#define	EXT3_NDIR_BLOCKS		12
+#define	EXT3_IND_BLOCK			EXT3_NDIR_BLOCKS
+#define	EXT3_DIND_BLOCK			(EXT3_IND_BLOCK + 1)
+#define	EXT3_TIND_BLOCK			(EXT3_DIND_BLOCK + 1)
+#define	EXT3_N_BLOCKS			(EXT3_TIND_BLOCK + 1)
+
+/*
+ * Inode flags
+ */
+#define	EXT3_SECRM_FL			0x00000001 /* Secure deletion */
+#define	EXT3_UNRM_FL			0x00000002 /* Undelete */
+#define	EXT3_COMPR_FL			0x00000004 /* Compress file */
+#define EXT3_SYNC_FL			0x00000008 /* Synchronous updates */
+#define EXT3_IMMUTABLE_FL		0x00000010 /* Immutable file */
+#define EXT3_APPEND_FL			0x00000020 /* writes to file may only append */
+#define EXT3_NODUMP_FL			0x00000040 /* do not dump file */
+#define EXT3_NOATIME_FL			0x00000080 /* do not update atime */
+/* Reserved for compression usage... */
+#define EXT3_DIRTY_FL			0x00000100
+#define EXT3_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
+#define EXT3_NOCOMPR_FL			0x00000400 /* Don't compress */
+#define EXT3_ECOMPR_FL			0x00000800 /* Compression error */
+/* End compression flags --- maybe not all used */
+#define EXT3_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define EXT3_IMAGIC_FL			0x00002000 /* AFS directory */
+#define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
+#define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
+
+#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+
+/*
+ * Inode dynamic state flags
+ */
+#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
+#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
+
+/* Used to pass group descriptor data when online resize is done */
+struct ext3_new_group_input {
+	__u32 group;            /* Group number for this data */
+	__u32 block_bitmap;     /* Absolute block number of block bitmap */
+	__u32 inode_bitmap;     /* Absolute block number of inode bitmap */
+	__u32 inode_table;      /* Absolute block number of inode table start */
+	__u32 blocks_count;     /* Total number of blocks in this group */
+	__u16 reserved_blocks;  /* Number of reserved blocks in this group */
+	__u16 unused;
+};
+
+/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
+struct ext3_new_group_data {
+	__u32 group;
+	__u32 block_bitmap;
+	__u32 inode_bitmap;
+	__u32 inode_table;
+	__u32 blocks_count;
+	__u16 reserved_blocks;
+	__u16 unused;
+	__u32 free_blocks_count;
+};
+
+
+/*
+ * ioctl commands
+ */
+#define	EXT3_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define	EXT3_IOC_SETFLAGS		FS_IOC_SETFLAGS
+#define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
+#define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
+#define EXT3_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
+#define EXT3_IOC_GROUP_ADD		_IOW('f', 8,struct ext3_new_group_input)
+#define	EXT3_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
+#define	EXT3_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
+#endif
+#define EXT3_IOC_GETRSVSZ		_IOR('f', 5, long)
+#define EXT3_IOC_SETRSVSZ		_IOW('f', 6, long)
+
+/*
+ * ioctl commands in 32 bit emulation
+ */
+#define EXT3_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define EXT3_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define EXT3_IOC32_GETVERSION		_IOR('f', 3, int)
+#define EXT3_IOC32_SETVERSION		_IOW('f', 4, int)
+#define EXT3_IOC32_GETRSVSZ		_IOR('f', 5, int)
+#define EXT3_IOC32_SETRSVSZ		_IOW('f', 6, int)
+#define EXT3_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
+#ifdef CONFIG_JBD_DEBUG
+#define EXT3_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
+#endif
+#define EXT3_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
+#define EXT3_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
+
+
+/*
+ *  Mount options
+ */
+struct ext3_mount_options {
+	unsigned long s_mount_opt;
+	uid_t s_resuid;
+	gid_t s_resgid;
+	unsigned long s_commit_interval;
+#ifdef CONFIG_QUOTA
+	int s_jquota_fmt;
+	char *s_qf_names[MAXQUOTAS];
+#endif
+};
+
+/*
+ * Structure of an inode on the disk
+ */
+struct ext3_inode {
+	__le16	i_mode;		/* File mode */
+	__le16	i_uid;		/* Low 16 bits of Owner Uid */
+	__le32	i_size;		/* Size in bytes */
+	__le32	i_atime;	/* Access time */
+	__le32	i_ctime;	/* Creation time */
+	__le32	i_mtime;	/* Modification time */
+	__le32	i_dtime;	/* Deletion Time */
+	__le16	i_gid;		/* Low 16 bits of Group Id */
+	__le16	i_links_count;	/* Links count */
+	__le32	i_blocks;	/* Blocks count */
+	__le32	i_flags;	/* File flags */
+	union {
+		struct {
+			__u32  l_i_reserved1;
+		} linux1;
+		struct {
+			__u32  h_i_translator;
+		} hurd1;
+		struct {
+			__u32  m_i_reserved1;
+		} masix1;
+	} osd1;				/* OS dependent 1 */
+	__le32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+	__le32	i_generation;	/* File version (for NFS) */
+	__le32	i_file_acl;	/* File ACL */
+	__le32	i_dir_acl;	/* Directory ACL */
+	__le32	i_faddr;	/* Fragment address */
+	union {
+		struct {
+			__u8	l_i_frag;	/* Fragment number */
+			__u8	l_i_fsize;	/* Fragment size */
+			__u16	i_pad1;
+			__le16	l_i_uid_high;	/* these 2 fields    */
+			__le16	l_i_gid_high;	/* were reserved2[0] */
+			__u32	l_i_reserved2;
+		} linux2;
+		struct {
+			__u8	h_i_frag;	/* Fragment number */
+			__u8	h_i_fsize;	/* Fragment size */
+			__u16	h_i_mode_high;
+			__u16	h_i_uid_high;
+			__u16	h_i_gid_high;
+			__u32	h_i_author;
+		} hurd2;
+		struct {
+			__u8	m_i_frag;	/* Fragment number */
+			__u8	m_i_fsize;	/* Fragment size */
+			__u16	m_pad1;
+			__u32	m_i_reserved2[2];
+		} masix2;
+	} osd2;				/* OS dependent 2 */
+	__le16	i_extra_isize;
+	__le16	i_pad1;
+};
+
+#define i_size_high	i_dir_acl
+
+#if defined(__KERNEL__) || defined(__linux__)
+#define i_reserved1	osd1.linux1.l_i_reserved1
+#define i_frag		osd2.linux2.l_i_frag
+#define i_fsize		osd2.linux2.l_i_fsize
+#define i_uid_low	i_uid
+#define i_gid_low	i_gid
+#define i_uid_high	osd2.linux2.l_i_uid_high
+#define i_gid_high	osd2.linux2.l_i_gid_high
+#define i_reserved2	osd2.linux2.l_i_reserved2
+
+#elif defined(__GNU__)
+
+#define i_translator	osd1.hurd1.h_i_translator
+#define i_frag		osd2.hurd2.h_i_frag;
+#define i_fsize		osd2.hurd2.h_i_fsize;
+#define i_uid_high	osd2.hurd2.h_i_uid_high
+#define i_gid_high	osd2.hurd2.h_i_gid_high
+#define i_author	osd2.hurd2.h_i_author
+
+#elif defined(__masix__)
+
+#define i_reserved1	osd1.masix1.m_i_reserved1
+#define i_frag		osd2.masix2.m_i_frag
+#define i_fsize		osd2.masix2.m_i_fsize
+#define i_reserved2	osd2.masix2.m_i_reserved2
+
+#endif /* defined(__KERNEL__) || defined(__linux__) */
+
+/*
+ * File system states
+ */
+#define	EXT3_VALID_FS			0x0001	/* Unmounted cleanly */
+#define	EXT3_ERROR_FS			0x0002	/* Errors detected */
+#define	EXT3_ORPHAN_FS			0x0004	/* Orphans being recovered */
+
+/*
+ * Mount flags
+ */
+#define EXT3_MOUNT_CHECK		0x00001	/* Do mount-time checks */
+#define EXT3_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
+#define EXT3_MOUNT_GRPID		0x00004	/* Create files with directory's group */
+#define EXT3_MOUNT_DEBUG		0x00008	/* Some debugging messages */
+#define EXT3_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
+#define EXT3_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
+#define EXT3_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
+#define EXT3_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
+#define EXT3_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
+#define EXT3_MOUNT_ABORT		0x00200	/* Fatal error detected */
+#define EXT3_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
+#define EXT3_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
+#define EXT3_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
+#define EXT3_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */
+#define EXT3_MOUNT_UPDATE_JOURNAL	0x01000	/* Update the journal format */
+#define EXT3_MOUNT_NO_UID32		0x02000  /* Disable 32-bit UIDs */
+#define EXT3_MOUNT_XATTR_USER		0x04000	/* Extended user attributes */
+#define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
+#define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
+#define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
+#define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
+#define EXT3_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
+#define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+
+/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+#ifndef _LINUX_EXT2_FS_H
+#define clear_opt(o, opt)		o &= ~EXT3_MOUNT_##opt
+#define set_opt(o, opt)			o |= EXT3_MOUNT_##opt
+#define test_opt(sb, opt)		(EXT3_SB(sb)->s_mount_opt & \
+					 EXT3_MOUNT_##opt)
+#else
+#define EXT2_MOUNT_NOLOAD		EXT3_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT		EXT3_MOUNT_ABORT
+#define EXT2_MOUNT_DATA_FLAGS		EXT3_MOUNT_DATA_FLAGS
+#endif
+
+#define ext3_set_bit			ext2_set_bit
+#define ext3_set_bit_atomic		ext2_set_bit_atomic
+#define ext3_clear_bit			ext2_clear_bit
+#define ext3_clear_bit_atomic		ext2_clear_bit_atomic
+#define ext3_test_bit			ext2_test_bit
+#define ext3_find_first_zero_bit	ext2_find_first_zero_bit
+#define ext3_find_next_zero_bit		ext2_find_next_zero_bit
+
+/*
+ * Maximal mount counts between two filesystem checks
+ */
+#define EXT3_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
+#define EXT3_DFL_CHECKINTERVAL		0	/* Don't use interval check */
+
+/*
+ * Behaviour when detecting errors
+ */
+#define EXT3_ERRORS_CONTINUE		1	/* Continue execution */
+#define EXT3_ERRORS_RO			2	/* Remount fs read-only */
+#define EXT3_ERRORS_PANIC		3	/* Panic */
+#define EXT3_ERRORS_DEFAULT		EXT3_ERRORS_CONTINUE
+
+/*
+ * Structure of the super block
+ */
+struct ext3_super_block {
+/*00*/	__le32	s_inodes_count;		/* Inodes count */
+	__le32	s_blocks_count;		/* Blocks count */
+	__le32	s_r_blocks_count;	/* Reserved blocks count */
+	__le32	s_free_blocks_count;	/* Free blocks count */
+/*10*/	__le32	s_free_inodes_count;	/* Free inodes count */
+	__le32	s_first_data_block;	/* First Data Block */
+	__le32	s_log_block_size;	/* Block size */
+	__le32	s_log_frag_size;	/* Fragment size */
+/*20*/	__le32	s_blocks_per_group;	/* # Blocks per group */
+	__le32	s_frags_per_group;	/* # Fragments per group */
+	__le32	s_inodes_per_group;	/* # Inodes per group */
+	__le32	s_mtime;		/* Mount time */
+/*30*/	__le32	s_wtime;		/* Write time */
+	__le16	s_mnt_count;		/* Mount count */
+	__le16	s_max_mnt_count;	/* Maximal mount count */
+	__le16	s_magic;		/* Magic signature */
+	__le16	s_state;		/* File system state */
+	__le16	s_errors;		/* Behaviour when detecting errors */
+	__le16	s_minor_rev_level;	/* minor revision level */
+/*40*/	__le32	s_lastcheck;		/* time of last check */
+	__le32	s_checkinterval;	/* max. time between checks */
+	__le32	s_creator_os;		/* OS */
+	__le32	s_rev_level;		/* Revision level */
+/*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
+	__le16	s_def_resgid;		/* Default gid for reserved blocks */
+	/*
+	 * These fields are for EXT3_DYNAMIC_REV superblocks only.
+	 *
+	 * Note: the difference between the compatible feature set and
+	 * the incompatible feature set is that if there is a bit set
+	 * in the incompatible feature set that the kernel doesn't
+	 * know about, it should refuse to mount the filesystem.
+	 *
+	 * e2fsck's requirements are more strict; if it doesn't know
+	 * about a feature in either the compatible or incompatible
+	 * feature set, it must abort and not try to meddle with
+	 * things it doesn't understand...
+	 */
+	__le32	s_first_ino;		/* First non-reserved inode */
+	__le16   s_inode_size;		/* size of inode structure */
+	__le16	s_block_group_nr;	/* block group # of this superblock */
+	__le32	s_feature_compat;	/* compatible feature set */
+/*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
+	__le32	s_feature_ro_compat;	/* readonly-compatible feature set */
+/*68*/	__u8	s_uuid[16];		/* 128-bit uuid for volume */
+/*78*/	char	s_volume_name[16];	/* volume name */
+/*88*/	char	s_last_mounted[64];	/* directory where last mounted */
+/*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
+	/*
+	 * Performance hints.  Directory preallocation should only
+	 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+	 */
+	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
+	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
+	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
+	/*
+	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+	 */
+/*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
+/*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
+	__le32	s_journal_dev;		/* device number of journal file */
+	__le32	s_last_orphan;		/* start of list of inodes to delete */
+	__le32	s_hash_seed[4];		/* HTREE hash seed */
+	__u8	s_def_hash_version;	/* Default hash version to use */
+	__u8	s_reserved_char_pad;
+	__u16	s_reserved_word_pad;
+	__le32	s_default_mount_opts;
+	__le32	s_first_meta_bg;	/* First metablock block group */
+	__u32	s_reserved[190];	/* Padding to the end of the block */
+};
+
+#ifdef __KERNEL__
+#include <linux/ext3_fs_i.h>
+#include <linux/ext3_fs_sb.h>
+static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
+{
+	return sb->s_fs_info;
+}
+static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
+{
+	return container_of(inode, struct ext3_inode_info, vfs_inode);
+}
+
+static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
+{
+	return ino == EXT3_ROOT_INO ||
+		ino == EXT3_JOURNAL_INO ||
+		ino == EXT3_RESIZE_INO ||
+		(ino >= EXT3_FIRST_INO(sb) &&
+		 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
+}
+#else
+/* Assume that user mode programs are passing in an ext3fs superblock, not
+ * a kernel struct super_block.  This will allow us to call the feature-test
+ * macros from user land. */
+#define EXT3_SB(sb)	(sb)
+#endif
+
+#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
+
+/*
+ * Codes for operating systems
+ */
+#define EXT3_OS_LINUX		0
+#define EXT3_OS_HURD		1
+#define EXT3_OS_MASIX		2
+#define EXT3_OS_FREEBSD		3
+#define EXT3_OS_LITES		4
+
+/*
+ * Revision levels
+ */
+#define EXT3_GOOD_OLD_REV	0	/* The good old (original) format */
+#define EXT3_DYNAMIC_REV	1	/* V2 format w/ dynamic inode sizes */
+
+#define EXT3_CURRENT_REV	EXT3_GOOD_OLD_REV
+#define EXT3_MAX_SUPP_REV	EXT3_DYNAMIC_REV
+
+#define EXT3_GOOD_OLD_INODE_SIZE 128
+
+/*
+ * Feature set definitions
+ */
+
+#define EXT3_HAS_COMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask)			\
+	( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT3_SET_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT3_SET_INCOMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
+	EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT3_FEATURE_COMPAT_DIR_PREALLOC	0x0001
+#define EXT3_FEATURE_COMPAT_IMAGIC_INODES	0x0002
+#define EXT3_FEATURE_COMPAT_HAS_JOURNAL		0x0004
+#define EXT3_FEATURE_COMPAT_EXT_ATTR		0x0008
+#define EXT3_FEATURE_COMPAT_RESIZE_INODE	0x0010
+#define EXT3_FEATURE_COMPAT_DIR_INDEX		0x0020
+
+#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
+#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
+#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+
+#define EXT3_FEATURE_INCOMPAT_COMPRESSION	0x0001
+#define EXT3_FEATURE_INCOMPAT_FILETYPE		0x0002
+#define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
+#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
+#define EXT3_FEATURE_INCOMPAT_META_BG		0x0010
+
+#define EXT3_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
+#define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
+					 EXT3_FEATURE_INCOMPAT_RECOVER| \
+					 EXT3_FEATURE_INCOMPAT_META_BG)
+#define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+					 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
+
+/*
+ * Default values for user and/or group using reserved blocks
+ */
+#define	EXT3_DEF_RESUID		0
+#define	EXT3_DEF_RESGID		0
+
+/*
+ * Default mount options
+ */
+#define EXT3_DEFM_DEBUG		0x0001
+#define EXT3_DEFM_BSDGROUPS	0x0002
+#define EXT3_DEFM_XATTR_USER	0x0004
+#define EXT3_DEFM_ACL		0x0008
+#define EXT3_DEFM_UID16		0x0010
+#define EXT3_DEFM_JMODE		0x0060
+#define EXT3_DEFM_JMODE_DATA	0x0020
+#define EXT3_DEFM_JMODE_ORDERED	0x0040
+#define EXT3_DEFM_JMODE_WBACK	0x0060
+
+/*
+ * Structure of a directory entry
+ */
+#define EXT3_NAME_LEN 255
+
+struct ext3_dir_entry {
+	__le32	inode;			/* Inode number */
+	__le16	rec_len;		/* Directory entry length */
+	__le16	name_len;		/* Name length */
+	char	name[EXT3_NAME_LEN];	/* File name */
+};
+
+/*
+ * The new version of the directory entry.  Since EXT3 structures are
+ * stored in intel byte order, and the name_len field could never be
+ * bigger than 255 chars, it's safe to reclaim the extra byte for the
+ * file_type field.
+ */
+struct ext3_dir_entry_2 {
+	__le32	inode;			/* Inode number */
+	__le16	rec_len;		/* Directory entry length */
+	__u8	name_len;		/* Name length */
+	__u8	file_type;
+	char	name[EXT3_NAME_LEN];	/* File name */
+};
+
+/*
+ * Ext3 directory file types.  Only the low 3 bits are used.  The
+ * other bits are reserved for now.
+ */
+#define EXT3_FT_UNKNOWN		0
+#define EXT3_FT_REG_FILE	1
+#define EXT3_FT_DIR		2
+#define EXT3_FT_CHRDEV		3
+#define EXT3_FT_BLKDEV		4
+#define EXT3_FT_FIFO		5
+#define EXT3_FT_SOCK		6
+#define EXT3_FT_SYMLINK		7
+
+#define EXT3_FT_MAX		8
+
+/*
+ * EXT3_DIR_PAD defines the directory entries boundaries
+ *
+ * NOTE: It must be a multiple of 4
+ */
+#define EXT3_DIR_PAD			4
+#define EXT3_DIR_ROUND			(EXT3_DIR_PAD - 1)
+#define EXT3_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT3_DIR_ROUND) & \
+					 ~EXT3_DIR_ROUND)
+/*
+ * Hash Tree Directory indexing
+ * (c) Daniel Phillips, 2001
+ */
+
+#ifdef CONFIG_EXT3_INDEX
+  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
+					      EXT3_FEATURE_COMPAT_DIR_INDEX) && \
+		      (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
+#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
+#else
+  #define is_dx(dir) 0
+#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
+#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+#endif
+
+/* Legal values for the dx_root hash_version field: */
+
+#define DX_HASH_LEGACY		0
+#define DX_HASH_HALF_MD4	1
+#define DX_HASH_TEA		2
+
+#ifdef __KERNEL__
+
+/* hash info structure used by the directory hash */
+struct dx_hash_info
+{
+	u32		hash;
+	u32		minor_hash;
+	int		hash_version;
+	u32		*seed;
+};
+
+#define EXT3_HTREE_EOF	0x7fffffff
+
+/*
+ * Control parameters used by ext3_htree_next_block
+ */
+#define HASH_NB_ALWAYS		1
+
+
+/*
+ * Describe an inode's exact location on disk and in memory
+ */
+struct ext3_iloc
+{
+	struct buffer_head *bh;
+	unsigned long offset;
+	unsigned long block_group;
+};
+
+static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
+{
+	return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
+}
+
+/*
+ * This structure is stuffed into the struct file's private_data field
+ * for directories.  It is where we put information so that we can do
+ * readdir operations in hash tree order.
+ */
+struct dir_private_info {
+	struct rb_root	root;
+	struct rb_node	*curr_node;
+	struct fname	*extra_fname;
+	loff_t		last_pos;
+	__u32		curr_hash;
+	__u32		curr_minor_hash;
+	__u32		next_hash;
+};
+
+/* calculate the first block number of the group */
+static inline ext3_fsblk_t
+ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
+{
+	return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
+		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
+}
+
+/*
+ * Special error return code only used by dx_probe() and its callers.
+ */
+#define ERR_BAD_DX_DIR	-75000
+
+/*
+ * Function prototypes
+ */
+
+/*
+ * Ok, these declarations are also in <linux/kernel.h> but none of the
+ * ext3 source programs needs to include it so they are duplicated here.
+ */
+# define NORET_TYPE    /**/
+# define ATTRIB_NORET  __attribute__((noreturn))
+# define NORET_AND     noreturn,
+
+/* balloc.c */
+extern int ext3_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, int *errp);
+extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
+			ext3_fsblk_t goal, unsigned long *count, int *errp);
+extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
+			ext3_fsblk_t block, unsigned long count);
+extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
+				 ext3_fsblk_t block, unsigned long count,
+				unsigned long *pdquot_freed_blocks);
+extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
+extern void ext3_check_blocks_bitmap (struct super_block *);
+extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+						    unsigned int block_group,
+						    struct buffer_head ** bh);
+extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
+extern void ext3_init_block_alloc_info(struct inode *);
+extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
+
+/* dir.c */
+extern int ext3_check_dir_entry(const char *, struct inode *,
+				struct ext3_dir_entry_2 *,
+				struct buffer_head *, unsigned long);
+extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+				    __u32 minor_hash,
+				    struct ext3_dir_entry_2 *dirent);
+extern void ext3_htree_free_dir_info(struct dir_private_info *p);
+
+/* fsync.c */
+extern int ext3_sync_file (struct file *, struct dentry *, int);
+
+/* hash.c */
+extern int ext3fs_dirhash(const char *name, int len, struct
+			  dx_hash_info *hinfo);
+
+/* ialloc.c */
+extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
+extern void ext3_free_inode (handle_t *, struct inode *);
+extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
+extern unsigned long ext3_count_free_inodes (struct super_block *);
+extern unsigned long ext3_count_dirs (struct super_block *);
+extern void ext3_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+
+
+/* inode.c */
+int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
+		struct buffer_head *bh, ext3_fsblk_t blocknr);
+struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
+	int create, int extend_disksize);
+
+extern void ext3_read_inode (struct inode *);
+extern int  ext3_write_inode (struct inode *, int);
+extern int  ext3_setattr (struct dentry *, struct iattr *);
+extern void ext3_delete_inode (struct inode *);
+extern int  ext3_sync_inode (handle_t *, struct inode *);
+extern void ext3_discard_reservation (struct inode *);
+extern void ext3_dirty_inode(struct inode *);
+extern int ext3_change_inode_journal_flag(struct inode *, int);
+extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
+extern void ext3_truncate (struct inode *);
+extern void ext3_set_inode_flags(struct inode *);
+extern void ext3_set_aops(struct inode *inode);
+
+/* ioctl.c */
+extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+		       unsigned long);
+extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
+
+/* namei.c */
+extern int ext3_orphan_add(handle_t *, struct inode *);
+extern int ext3_orphan_del(handle_t *, struct inode *);
+extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+				__u32 start_minor_hash, __u32 *next_hash);
+
+/* resize.c */
+extern int ext3_group_add(struct super_block *sb,
+				struct ext3_new_group_data *input);
+extern int ext3_group_extend(struct super_block *sb,
+				struct ext3_super_block *es,
+				ext3_fsblk_t n_blocks_count);
+
+/* super.c */
+extern void ext3_error (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void __ext3_std_error (struct super_block *, const char *, int);
+extern void ext3_abort (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void ext3_warning (struct super_block *, const char *, const char *, ...)
+	__attribute__ ((format (printf, 3, 4)));
+extern void ext3_update_dynamic_rev (struct super_block *sb);
+
+#define ext3_std_error(sb, errno)				\
+do {								\
+	if ((errno))						\
+		__ext3_std_error((sb), __FUNCTION__, (errno));	\
+} while (0)
+
+/*
+ * Inodes and files operations
+ */
+
+/* dir.c */
+extern const struct file_operations ext3_dir_operations;
+
+/* file.c */
+extern struct inode_operations ext3_file_inode_operations;
+extern const struct file_operations ext3_file_operations;
+
+/* namei.c */
+extern struct inode_operations ext3_dir_inode_operations;
+extern struct inode_operations ext3_special_inode_operations;
+
+/* symlink.c */
+extern struct inode_operations ext3_symlink_inode_operations;
+extern struct inode_operations ext3_fast_symlink_inode_operations;
+
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _LINUX_EXT3_FS_H */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
new file mode 100644
index 000000000000..4395e5206746
--- /dev/null
+++ b/include/linux/ext4_fs_i.h
@@ -0,0 +1,147 @@
+/*
+ *  linux/include/linux/ext3_fs_i.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs_i.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT3_FS_I
+#define _LINUX_EXT3_FS_I
+
+#include <linux/rwsem.h>
+#include <linux/rbtree.h>
+#include <linux/seqlock.h>
+#include <linux/mutex.h>
+
+/* data type for block offset of block group */
+typedef int ext3_grpblk_t;
+
+/* data type for filesystem-wide blocks number */
+typedef unsigned long ext3_fsblk_t;
+
+#define E3FSBLK "%lu"
+
+struct ext3_reserve_window {
+	ext3_fsblk_t	_rsv_start;	/* First byte reserved */
+	ext3_fsblk_t	_rsv_end;	/* Last byte reserved or 0 */
+};
+
+struct ext3_reserve_window_node {
+	struct rb_node		rsv_node;
+	__u32			rsv_goal_size;
+	__u32			rsv_alloc_hit;
+	struct ext3_reserve_window	rsv_window;
+};
+
+struct ext3_block_alloc_info {
+	/* information about reservation window */
+	struct ext3_reserve_window_node	rsv_window_node;
+	/*
+	 * was i_next_alloc_block in ext3_inode_info
+	 * is the logical (file-relative) number of the
+	 * most-recently-allocated block in this file.
+	 * We use this for detecting linearly ascending allocation requests.
+	 */
+	__u32                   last_alloc_logical_block;
+	/*
+	 * Was i_next_alloc_goal in ext3_inode_info
+	 * is the *physical* companion to i_next_alloc_block.
+	 * it the the physical block number of the block which was most-recentl
+	 * allocated to this file.  This give us the goal (target) for the next
+	 * allocation when we detect linearly ascending requests.
+	 */
+	ext3_fsblk_t		last_alloc_physical_block;
+};
+
+#define rsv_start rsv_window._rsv_start
+#define rsv_end rsv_window._rsv_end
+
+/*
+ * third extended file system inode data in memory
+ */
+struct ext3_inode_info {
+	__le32	i_data[15];	/* unconverted */
+	__u32	i_flags;
+#ifdef EXT3_FRAGMENTS
+	__u32	i_faddr;
+	__u8	i_frag_no;
+	__u8	i_frag_size;
+#endif
+	ext3_fsblk_t	i_file_acl;
+	__u32	i_dir_acl;
+	__u32	i_dtime;
+
+	/*
+	 * i_block_group is the number of the block group which contains
+	 * this file's inode.  Constant across the lifetime of the inode,
+	 * it is ued for making block allocation decisions - we try to
+	 * place a file's data blocks near its inode block, and new inodes
+	 * near to their parent directory's inode.
+	 */
+	__u32	i_block_group;
+	__u32	i_state;		/* Dynamic state flags for ext3 */
+
+	/* block reservation info */
+	struct ext3_block_alloc_info *i_block_alloc_info;
+
+	__u32	i_dir_start_lookup;
+#ifdef CONFIG_EXT3_FS_XATTR
+	/*
+	 * Extended attributes can be read independently of the main file
+	 * data. Taking i_mutex even when reading would cause contention
+	 * between readers of EAs and writers of regular file data, so
+	 * instead we synchronize on xattr_sem when reading or changing
+	 * EAs.
+	 */
+	struct rw_semaphore xattr_sem;
+#endif
+#ifdef CONFIG_EXT3_FS_POSIX_ACL
+	struct posix_acl	*i_acl;
+	struct posix_acl	*i_default_acl;
+#endif
+
+	struct list_head i_orphan;	/* unlinked but open inodes */
+
+	/*
+	 * i_disksize keeps track of what the inode size is ON DISK, not
+	 * in memory.  During truncate, i_size is set to the new size by
+	 * the VFS prior to calling ext3_truncate(), but the filesystem won't
+	 * set i_disksize to 0 until the truncate is actually under way.
+	 *
+	 * The intent is that i_disksize always represents the blocks which
+	 * are used by this file.  This allows recovery to restart truncate
+	 * on orphans if we crash during truncate.  We actually write i_disksize
+	 * into the on-disk inode when writing inodes out, instead of i_size.
+	 *
+	 * The only time when i_disksize and i_size may be different is when
+	 * a truncate is in progress.  The only things which change i_disksize
+	 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
+	 */
+	loff_t	i_disksize;
+
+	/* on-disk additional length */
+	__u16 i_extra_isize;
+
+	/*
+	 * truncate_mutex is for serialising ext3_truncate() against
+	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
+	 * data tree are chopped off during truncate. We can't do that in
+	 * ext3 because whenever we perform intermediate commits during
+	 * truncate, the inode and all the metadata blocks *must* be in a
+	 * consistent state which allows truncation of the orphans to restart
+	 * during recovery.  Hence we must fix the get_block-vs-truncate race
+	 * by other means, so we have truncate_mutex.
+	 */
+	struct mutex truncate_mutex;
+	struct inode vfs_inode;
+};
+
+#endif	/* _LINUX_EXT3_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
new file mode 100644
index 000000000000..f61309c81cc4
--- /dev/null
+++ b/include/linux/ext4_fs_sb.h
@@ -0,0 +1,83 @@
+/*
+ *  linux/include/linux/ext3_fs_sb.h
+ *
+ * Copyright (C) 1992, 1993, 1994, 1995
+ * Remy Card (card@masi.ibp.fr)
+ * Laboratoire MASI - Institut Blaise Pascal
+ * Universite Pierre et Marie Curie (Paris VI)
+ *
+ *  from
+ *
+ *  linux/include/linux/minix_fs_sb.h
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#ifndef _LINUX_EXT3_FS_SB
+#define _LINUX_EXT3_FS_SB
+
+#ifdef __KERNEL__
+#include <linux/timer.h>
+#include <linux/wait.h>
+#include <linux/blockgroup_lock.h>
+#include <linux/percpu_counter.h>
+#endif
+#include <linux/rbtree.h>
+
+/*
+ * third extended-fs super-block data in memory
+ */
+struct ext3_sb_info {
+	unsigned long s_frag_size;	/* Size of a fragment in bytes */
+	unsigned long s_frags_per_block;/* Number of fragments per block */
+	unsigned long s_inodes_per_block;/* Number of inodes per block */
+	unsigned long s_frags_per_group;/* Number of fragments in a group */
+	unsigned long s_blocks_per_group;/* Number of blocks in a group */
+	unsigned long s_inodes_per_group;/* Number of inodes in a group */
+	unsigned long s_itb_per_group;	/* Number of inode table blocks per group */
+	unsigned long s_gdb_count;	/* Number of group descriptor blocks */
+	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
+	unsigned long s_groups_count;	/* Number of groups in the fs */
+	struct buffer_head * s_sbh;	/* Buffer containing the super block */
+	struct ext3_super_block * s_es;	/* Pointer to the super block in the buffer */
+	struct buffer_head ** s_group_desc;
+	unsigned long  s_mount_opt;
+	uid_t s_resuid;
+	gid_t s_resgid;
+	unsigned short s_mount_state;
+	unsigned short s_pad;
+	int s_addr_per_block_bits;
+	int s_desc_per_block_bits;
+	int s_inode_size;
+	int s_first_ino;
+	spinlock_t s_next_gen_lock;
+	u32 s_next_generation;
+	u32 s_hash_seed[4];
+	int s_def_hash_version;
+	struct percpu_counter s_freeblocks_counter;
+	struct percpu_counter s_freeinodes_counter;
+	struct percpu_counter s_dirs_counter;
+	struct blockgroup_lock s_blockgroup_lock;
+
+	/* root of the per fs reservation window tree */
+	spinlock_t s_rsv_window_lock;
+	struct rb_root s_rsv_window_root;
+	struct ext3_reserve_window_node s_rsv_window_head;
+
+	/* Journaling */
+	struct inode * s_journal_inode;
+	struct journal_s * s_journal;
+	struct list_head s_orphan;
+	unsigned long s_commit_interval;
+	struct block_device *journal_bdev;
+#ifdef CONFIG_JBD_DEBUG
+	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
+	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
+#endif
+#ifdef CONFIG_QUOTA
+	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
+	int s_jquota_fmt;			/* Format of quota to use */
+#endif
+};
+
+#endif	/* _LINUX_EXT3_FS_SB */
diff --git a/include/linux/ext4_jbd.h b/include/linux/ext4_jbd.h
new file mode 100644
index 000000000000..ce0e6109aff0
--- /dev/null
+++ b/include/linux/ext4_jbd.h
@@ -0,0 +1,268 @@
+/*
+ * linux/include/linux/ext3_jbd.h
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Ext3-specific journaling extensions.
+ */
+
+#ifndef _LINUX_EXT3_JBD_H
+#define _LINUX_EXT3_JBD_H
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext3_fs.h>
+
+#define EXT3_JOURNAL(inode)	(EXT3_SB((inode)->i_sb)->s_journal)
+
+/* Define the number of blocks we need to account to a transaction to
+ * modify one block of data.
+ *
+ * We may have to touch one inode, one bitmap buffer, up to three
+ * indirection blocks, the group and superblock summaries, and the data
+ * block to complete the transaction.  */
+
+#define EXT3_SINGLEDATA_TRANS_BLOCKS	8U
+
+/* Extended attribute operations touch at most two data buffers,
+ * two bitmap buffers, and two group summaries, in addition to the inode
+ * and the superblock, which are already accounted for. */
+
+#define EXT3_XATTR_TRANS_BLOCKS		6U
+
+/* Define the minimum size for a transaction which modifies data.  This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota).  The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+
+#define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
+					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
+					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+
+/* Delete operations potentially hit one directory's namespace plus an
+ * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+ * generous.  We can grow the delete transaction later if necessary. */
+
+#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+
+/* Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction.  For unbounded transactions such as
+ * write(2) and truncate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go. */
+
+#define EXT3_MAX_TRANS_DATA		64U
+
+/* We break up a large truncate or write transaction once the handle's
+ * buffer credits gets this low, we need either to extend the
+ * transaction or to start a new one.  Reserve enough space here for
+ * inode, bitmap, superblock, group and indirection updates for at least
+ * one block, plus two quota updates.  Quota allocations are not
+ * needed. */
+
+#define EXT3_RESERVE_TRANS_BLOCKS	12U
+
+#define EXT3_INDEX_EXTRA_TRANS_BLOCKS	8
+
+#ifdef CONFIG_QUOTA
+/* Amount of blocks needed for quota update - we know that the structure was
+ * allocated so we need to update only inode+data */
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+/* Amount of blocks needed for quota insert/delete - we do some block writes
+ * but inode, sb and group updates are done only once */
+#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+#else
+#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
+#endif
+
+int
+ext3_mark_iloc_dirty(handle_t *handle,
+		     struct inode *inode,
+		     struct ext3_iloc *iloc);
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later.
+ */
+
+int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
+			struct ext3_iloc *iloc);
+
+int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
+
+/*
+ * Wrapper functions with which ext3 calls into JBD.  The intent here is
+ * to allow these to be turned into appropriate stubs so ext3 can control
+ * ext2 filesystems, so ext2+ext3 systems only nee one fs.  This work hasn't
+ * been done yet.
+ */
+
+void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+		struct buffer_head *bh, handle_t *handle, int err);
+
+static inline int
+__ext3_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_undo_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext3_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_write_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline void
+ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	journal_release_buffer(handle, bh);
+}
+
+static inline int
+__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_forget(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext3_journal_revoke(const char *where, handle_t *handle,
+		      unsigned long blocknr, struct buffer_head *bh)
+{
+	int err = journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext3_journal_get_create_access(const char *where,
+				 handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_get_create_access(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext3_journal_dirty_metadata(const char *where,
+			      handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_metadata(handle, bh);
+	if (err)
+		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+
+#define ext3_journal_get_undo_access(handle, bh) \
+	__ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+#define ext3_journal_get_write_access(handle, bh) \
+	__ext3_journal_get_write_access(__FUNCTION__, (handle), (bh))
+#define ext3_journal_revoke(handle, blocknr, bh) \
+	__ext3_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+#define ext3_journal_get_create_access(handle, bh) \
+	__ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
+#define ext3_journal_dirty_metadata(handle, bh) \
+	__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+#define ext3_journal_forget(handle, bh) \
+	__ext3_journal_forget(__FUNCTION__, (handle), (bh))
+
+int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+
+handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext3_journal_stop(const char *where, handle_t *handle);
+
+static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
+{
+	return ext3_journal_start_sb(inode->i_sb, nblocks);
+}
+
+#define ext3_journal_stop(handle) \
+	__ext3_journal_stop(__FUNCTION__, (handle))
+
+static inline handle_t *ext3_journal_current_handle(void)
+{
+	return journal_current_handle();
+}
+
+static inline int ext3_journal_extend(handle_t *handle, int nblocks)
+{
+	return journal_extend(handle, nblocks);
+}
+
+static inline int ext3_journal_restart(handle_t *handle, int nblocks)
+{
+	return journal_restart(handle, nblocks);
+}
+
+static inline int ext3_journal_blocks_per_page(struct inode *inode)
+{
+	return journal_blocks_per_page(inode);
+}
+
+static inline int ext3_journal_force_commit(journal_t *journal)
+{
+	return journal_force_commit(journal);
+}
+
+/* super.c */
+int ext3_force_commit(struct super_block *sb);
+
+static inline int ext3_should_journal_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 1;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+		return 1;
+	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+		return 1;
+	return 0;
+}
+
+static inline int ext3_should_order_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+		return 0;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+		return 1;
+	return 0;
+}
+
+static inline int ext3_should_writeback_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+		return 0;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+		return 1;
+	return 0;
+}
+
+#endif	/* _LINUX_EXT3_JBD_H */
-- 
cgit v1.2.3


From 617ba13b31fbf505cc21799826639ef24ed94af0 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:53 -0700
Subject: [PATCH] ext4: rename ext4 symbols to avoid duplication of ext3
 symbols

Mingming Cao originally did this work, and Shaggy reproduced it using some
scripts from her.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile           |   12 +-
 fs/ext4/acl.c              |  188 ++++----
 fs/ext4/acl.h              |   58 +--
 fs/ext4/balloc.c           |  536 ++++++++++-----------
 fs/ext4/bitmap.c           |   10 +-
 fs/ext4/dir.c              |  102 ++--
 fs/ext4/file.c             |   50 +-
 fs/ext4/fsync.c            |   20 +-
 fs/ext4/hash.c             |   10 +-
 fs/ext4/ialloc.c           |  230 ++++-----
 fs/ext4/inode.c            | 1020 ++++++++++++++++++++--------------------
 fs/ext4/ioctl.c            |  156 +++----
 fs/ext4/namei.c            |  830 ++++++++++++++++-----------------
 fs/ext4/namei.h            |    4 +-
 fs/ext4/resize.c           |  412 ++++++++--------
 fs/ext4/super.c            | 1114 ++++++++++++++++++++++----------------------
 fs/ext4/symlink.c          |   24 +-
 fs/ext4/xattr.c            |  560 +++++++++++-----------
 fs/ext4/xattr.h            |  110 ++---
 fs/ext4/xattr_security.c   |   28 +-
 fs/ext4/xattr_trusted.c    |   24 +-
 fs/ext4/xattr_user.c       |   24 +-
 include/linux/ext4_fs.h    |  702 ++++++++++++++--------------
 include/linux/ext4_fs_i.h  |   56 +--
 include/linux/ext4_fs_sb.h |   14 +-
 include/linux/ext4_jbd.h   |  160 +++----
 26 files changed, 3227 insertions(+), 3227 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 704cd44a40c2..09c487893e4a 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -1,12 +1,12 @@
 #
-# Makefile for the linux ext3-filesystem routines.
+# Makefile for the linux ext4-filesystem routines.
 #
 
-obj-$(CONFIG_EXT3_FS) += ext3.o
+obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
-ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
 	   ioctl.o namei.o super.o symlink.o hash.o resize.o
 
-ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
-ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-ext3-$(CONFIG_EXT3_FS_SECURITY)	 += xattr_security.o
+ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
+ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
+ext4dev-$(CONFIG_EXT4DEV_FS_SECURITY)	+= xattr_security.o
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c
index 1e5038d9a01b..d143489aeb4c 100644
--- a/fs/ext4/acl.c
+++ b/fs/ext4/acl.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/acl.c
+ * linux/fs/ext4/acl.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  */
@@ -9,8 +9,8 @@
 #include <linux/slab.h>
 #include <linux/capability.h>
 #include <linux/fs.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 #include "acl.h"
 
@@ -18,7 +18,7 @@
  * Convert from filesystem to in-memory representation.
  */
 static struct posix_acl *
-ext3_acl_from_disk(const void *value, size_t size)
+ext4_acl_from_disk(const void *value, size_t size)
 {
 	const char *end = (char *)value + size;
 	int n, count;
@@ -26,13 +26,13 @@ ext3_acl_from_disk(const void *value, size_t size)
 
 	if (!value)
 		return NULL;
-	if (size < sizeof(ext3_acl_header))
+	if (size < sizeof(ext4_acl_header))
 		 return ERR_PTR(-EINVAL);
-	if (((ext3_acl_header *)value)->a_version !=
-	    cpu_to_le32(EXT3_ACL_VERSION))
+	if (((ext4_acl_header *)value)->a_version !=
+	    cpu_to_le32(EXT4_ACL_VERSION))
 		return ERR_PTR(-EINVAL);
-	value = (char *)value + sizeof(ext3_acl_header);
-	count = ext3_acl_count(size);
+	value = (char *)value + sizeof(ext4_acl_header);
+	count = ext4_acl_count(size);
 	if (count < 0)
 		return ERR_PTR(-EINVAL);
 	if (count == 0)
@@ -41,9 +41,9 @@ ext3_acl_from_disk(const void *value, size_t size)
 	if (!acl)
 		return ERR_PTR(-ENOMEM);
 	for (n=0; n < count; n++) {
-		ext3_acl_entry *entry =
-			(ext3_acl_entry *)value;
-		if ((char *)value + sizeof(ext3_acl_entry_short) > end)
+		ext4_acl_entry *entry =
+			(ext4_acl_entry *)value;
+		if ((char *)value + sizeof(ext4_acl_entry_short) > end)
 			goto fail;
 		acl->a_entries[n].e_tag  = le16_to_cpu(entry->e_tag);
 		acl->a_entries[n].e_perm = le16_to_cpu(entry->e_perm);
@@ -53,13 +53,13 @@ ext3_acl_from_disk(const void *value, size_t size)
 			case ACL_MASK:
 			case ACL_OTHER:
 				value = (char *)value +
-					sizeof(ext3_acl_entry_short);
+					sizeof(ext4_acl_entry_short);
 				acl->a_entries[n].e_id = ACL_UNDEFINED_ID;
 				break;
 
 			case ACL_USER:
 			case ACL_GROUP:
-				value = (char *)value + sizeof(ext3_acl_entry);
+				value = (char *)value + sizeof(ext4_acl_entry);
 				if ((char *)value > end)
 					goto fail;
 				acl->a_entries[n].e_id =
@@ -83,21 +83,21 @@ fail:
  * Convert from in-memory to filesystem representation.
  */
 static void *
-ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
+ext4_acl_to_disk(const struct posix_acl *acl, size_t *size)
 {
-	ext3_acl_header *ext_acl;
+	ext4_acl_header *ext_acl;
 	char *e;
 	size_t n;
 
-	*size = ext3_acl_size(acl->a_count);
-	ext_acl = kmalloc(sizeof(ext3_acl_header) + acl->a_count *
-			sizeof(ext3_acl_entry), GFP_KERNEL);
+	*size = ext4_acl_size(acl->a_count);
+	ext_acl = kmalloc(sizeof(ext4_acl_header) + acl->a_count *
+			sizeof(ext4_acl_entry), GFP_KERNEL);
 	if (!ext_acl)
 		return ERR_PTR(-ENOMEM);
-	ext_acl->a_version = cpu_to_le32(EXT3_ACL_VERSION);
-	e = (char *)ext_acl + sizeof(ext3_acl_header);
+	ext_acl->a_version = cpu_to_le32(EXT4_ACL_VERSION);
+	e = (char *)ext_acl + sizeof(ext4_acl_header);
 	for (n=0; n < acl->a_count; n++) {
-		ext3_acl_entry *entry = (ext3_acl_entry *)e;
+		ext4_acl_entry *entry = (ext4_acl_entry *)e;
 		entry->e_tag  = cpu_to_le16(acl->a_entries[n].e_tag);
 		entry->e_perm = cpu_to_le16(acl->a_entries[n].e_perm);
 		switch(acl->a_entries[n].e_tag) {
@@ -105,14 +105,14 @@ ext3_acl_to_disk(const struct posix_acl *acl, size_t *size)
 			case ACL_GROUP:
 				entry->e_id =
 					cpu_to_le32(acl->a_entries[n].e_id);
-				e += sizeof(ext3_acl_entry);
+				e += sizeof(ext4_acl_entry);
 				break;
 
 			case ACL_USER_OBJ:
 			case ACL_GROUP_OBJ:
 			case ACL_MASK:
 			case ACL_OTHER:
-				e += sizeof(ext3_acl_entry_short);
+				e += sizeof(ext4_acl_entry_short);
 				break;
 
 			default:
@@ -127,12 +127,12 @@ fail:
 }
 
 static inline struct posix_acl *
-ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
+ext4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 {
-	struct posix_acl *acl = EXT3_ACL_NOT_CACHED;
+	struct posix_acl *acl = EXT4_ACL_NOT_CACHED;
 
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != EXT4_ACL_NOT_CACHED)
 		acl = posix_acl_dup(*i_acl);
 	spin_unlock(&inode->i_lock);
 
@@ -140,11 +140,11 @@ ext3_iget_acl(struct inode *inode, struct posix_acl **i_acl)
 }
 
 static inline void
-ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
+ext4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
                   struct posix_acl *acl)
 {
 	spin_lock(&inode->i_lock);
-	if (*i_acl != EXT3_ACL_NOT_CACHED)
+	if (*i_acl != EXT4_ACL_NOT_CACHED)
 		posix_acl_release(*i_acl);
 	*i_acl = posix_acl_dup(acl);
 	spin_unlock(&inode->i_lock);
@@ -156,9 +156,9 @@ ext3_iset_acl(struct inode *inode, struct posix_acl **i_acl,
  * inode->i_mutex: don't care
  */
 static struct posix_acl *
-ext3_get_acl(struct inode *inode, int type)
+ext4_get_acl(struct inode *inode, int type)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	char *value = NULL;
 	struct posix_acl *acl;
@@ -169,31 +169,31 @@ ext3_get_acl(struct inode *inode, int type)
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			acl = ext3_iget_acl(inode, &ei->i_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext4_iget_acl(inode, &ei->i_acl);
+			if (acl != EXT4_ACL_NOT_CACHED)
 				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			acl = ext3_iget_acl(inode, &ei->i_default_acl);
-			if (acl != EXT3_ACL_NOT_CACHED)
+			acl = ext4_iget_acl(inode, &ei->i_default_acl);
+			if (acl != EXT4_ACL_NOT_CACHED)
 				return acl;
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			break;
 
 		default:
 			return ERR_PTR(-EINVAL);
 	}
-	retval = ext3_xattr_get(inode, name_index, "", NULL, 0);
+	retval = ext4_xattr_get(inode, name_index, "", NULL, 0);
 	if (retval > 0) {
 		value = kmalloc(retval, GFP_KERNEL);
 		if (!value)
 			return ERR_PTR(-ENOMEM);
-		retval = ext3_xattr_get(inode, name_index, "", value, retval);
+		retval = ext4_xattr_get(inode, name_index, "", value, retval);
 	}
 	if (retval > 0)
-		acl = ext3_acl_from_disk(value, retval);
+		acl = ext4_acl_from_disk(value, retval);
 	else if (retval == -ENODATA || retval == -ENOSYS)
 		acl = NULL;
 	else
@@ -203,11 +203,11 @@ ext3_get_acl(struct inode *inode, int type)
 	if (!IS_ERR(acl)) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext4_iset_acl(inode, &ei->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext4_iset_acl(inode, &ei->i_default_acl, acl);
 				break;
 		}
 	}
@@ -217,13 +217,13 @@ ext3_get_acl(struct inode *inode, int type)
 /*
  * Set the access or default ACL of an inode.
  *
- * inode->i_mutex: down unless called from ext3_new_inode
+ * inode->i_mutex: down unless called from ext4_new_inode
  */
 static int
-ext3_set_acl(handle_t *handle, struct inode *inode, int type,
+ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 	     struct posix_acl *acl)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int name_index;
 	void *value = NULL;
 	size_t size = 0;
@@ -234,7 +234,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 
 	switch(type) {
 		case ACL_TYPE_ACCESS:
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_ACCESS;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_ACCESS;
 			if (acl) {
 				mode_t mode = inode->i_mode;
 				error = posix_acl_equiv_mode(acl, &mode);
@@ -242,7 +242,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 					return error;
 				else {
 					inode->i_mode = mode;
-					ext3_mark_inode_dirty(handle, inode);
+					ext4_mark_inode_dirty(handle, inode);
 					if (error == 0)
 						acl = NULL;
 				}
@@ -250,7 +250,7 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 			break;
 
 		case ACL_TYPE_DEFAULT:
-			name_index = EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT;
+			name_index = EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT;
 			if (!S_ISDIR(inode->i_mode))
 				return acl ? -EACCES : 0;
 			break;
@@ -259,23 +259,23 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 			return -EINVAL;
 	}
 	if (acl) {
-		value = ext3_acl_to_disk(acl, &size);
+		value = ext4_acl_to_disk(acl, &size);
 		if (IS_ERR(value))
 			return (int)PTR_ERR(value);
 	}
 
-	error = ext3_xattr_set_handle(handle, inode, name_index, "",
+	error = ext4_xattr_set_handle(handle, inode, name_index, "",
 				      value, size, 0);
 
 	kfree(value);
 	if (!error) {
 		switch(type) {
 			case ACL_TYPE_ACCESS:
-				ext3_iset_acl(inode, &ei->i_acl, acl);
+				ext4_iset_acl(inode, &ei->i_acl, acl);
 				break;
 
 			case ACL_TYPE_DEFAULT:
-				ext3_iset_acl(inode, &ei->i_default_acl, acl);
+				ext4_iset_acl(inode, &ei->i_default_acl, acl);
 				break;
 		}
 	}
@@ -283,9 +283,9 @@ ext3_set_acl(handle_t *handle, struct inode *inode, int type,
 }
 
 static int
-ext3_check_acl(struct inode *inode, int mask)
+ext4_check_acl(struct inode *inode, int mask)
 {
-	struct posix_acl *acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	struct posix_acl *acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
 
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
@@ -299,26 +299,26 @@ ext3_check_acl(struct inode *inode, int mask)
 }
 
 int
-ext3_permission(struct inode *inode, int mask, struct nameidata *nd)
+ext4_permission(struct inode *inode, int mask, struct nameidata *nd)
 {
-	return generic_permission(inode, mask, ext3_check_acl);
+	return generic_permission(inode, mask, ext4_check_acl);
 }
 
 /*
- * Initialize the ACLs of a new inode. Called from ext3_new_inode.
+ * Initialize the ACLs of a new inode. Called from ext4_new_inode.
  *
  * dir->i_mutex: down
  * inode->i_mutex: up (access to inode is still exclusive)
  */
 int
-ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	struct posix_acl *acl = NULL;
 	int error = 0;
 
 	if (!S_ISLNK(inode->i_mode)) {
 		if (test_opt(dir->i_sb, POSIX_ACL)) {
-			acl = ext3_get_acl(dir, ACL_TYPE_DEFAULT);
+			acl = ext4_get_acl(dir, ACL_TYPE_DEFAULT);
 			if (IS_ERR(acl))
 				return PTR_ERR(acl);
 		}
@@ -330,7 +330,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 		mode_t mode;
 
 		if (S_ISDIR(inode->i_mode)) {
-			error = ext3_set_acl(handle, inode,
+			error = ext4_set_acl(handle, inode,
 					     ACL_TYPE_DEFAULT, acl);
 			if (error)
 				goto cleanup;
@@ -346,7 +346,7 @@ ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 			inode->i_mode = mode;
 			if (error > 0) {
 				/* This is an extended ACL */
-				error = ext3_set_acl(handle, inode,
+				error = ext4_set_acl(handle, inode,
 						     ACL_TYPE_ACCESS, clone);
 			}
 		}
@@ -372,7 +372,7 @@ cleanup:
  * inode->i_mutex: down
  */
 int
-ext3_acl_chmod(struct inode *inode)
+ext4_acl_chmod(struct inode *inode)
 {
 	struct posix_acl *acl, *clone;
         int error;
@@ -381,7 +381,7 @@ ext3_acl_chmod(struct inode *inode)
 		return -EOPNOTSUPP;
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return 0;
-	acl = ext3_get_acl(inode, ACL_TYPE_ACCESS);
+	acl = ext4_get_acl(inode, ACL_TYPE_ACCESS);
 	if (IS_ERR(acl) || !acl)
 		return PTR_ERR(acl);
 	clone = posix_acl_clone(acl, GFP_KERNEL);
@@ -394,17 +394,17 @@ ext3_acl_chmod(struct inode *inode)
 		int retries = 0;
 
 	retry:
-		handle = ext3_journal_start(inode,
-				EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+		handle = ext4_journal_start(inode,
+				EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
-			ext3_std_error(inode->i_sb, error);
+			ext4_std_error(inode->i_sb, error);
 			goto out;
 		}
-		error = ext3_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
-		ext3_journal_stop(handle);
+		error = ext4_set_acl(handle, inode, ACL_TYPE_ACCESS, clone);
+		ext4_journal_stop(handle);
 		if (error == -ENOSPC &&
-		    ext3_should_retry_alloc(inode->i_sb, &retries))
+		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry;
 	}
 out:
@@ -416,7 +416,7 @@ out:
  * Extended attribute handlers
  */
 static size_t
-ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
+ext4_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 			   const char *name, size_t name_len)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_ACCESS);
@@ -429,7 +429,7 @@ ext3_xattr_list_acl_access(struct inode *inode, char *list, size_t list_len,
 }
 
 static size_t
-ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
+ext4_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 			    const char *name, size_t name_len)
 {
 	const size_t size = sizeof(POSIX_ACL_XATTR_DEFAULT);
@@ -442,7 +442,7 @@ ext3_xattr_list_acl_default(struct inode *inode, char *list, size_t list_len,
 }
 
 static int
-ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
+ext4_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 {
 	struct posix_acl *acl;
 	int error;
@@ -450,7 +450,7 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 	if (!test_opt(inode->i_sb, POSIX_ACL))
 		return -EOPNOTSUPP;
 
-	acl = ext3_get_acl(inode, type);
+	acl = ext4_get_acl(inode, type);
 	if (IS_ERR(acl))
 		return PTR_ERR(acl);
 	if (acl == NULL)
@@ -462,25 +462,25 @@ ext3_xattr_get_acl(struct inode *inode, int type, void *buffer, size_t size)
 }
 
 static int
-ext3_xattr_get_acl_access(struct inode *inode, const char *name,
+ext4_xattr_get_acl_access(struct inode *inode, const char *name,
 			  void *buffer, size_t size)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
+	return ext4_xattr_get_acl(inode, ACL_TYPE_ACCESS, buffer, size);
 }
 
 static int
-ext3_xattr_get_acl_default(struct inode *inode, const char *name,
+ext4_xattr_get_acl_default(struct inode *inode, const char *name,
 			   void *buffer, size_t size)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
+	return ext4_xattr_get_acl(inode, ACL_TYPE_DEFAULT, buffer, size);
 }
 
 static int
-ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
+ext4_xattr_set_acl(struct inode *inode, int type, const void *value,
 		   size_t size)
 {
 	handle_t *handle;
@@ -505,12 +505,12 @@ ext3_xattr_set_acl(struct inode *inode, int type, const void *value,
 		acl = NULL;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
-	error = ext3_set_acl(handle, inode, type, acl);
-	ext3_journal_stop(handle);
-	if (error == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+	error = ext4_set_acl(handle, inode, type, acl);
+	ext4_journal_stop(handle);
+	if (error == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 
 release_and_out:
@@ -519,33 +519,33 @@ release_and_out:
 }
 
 static int
-ext3_xattr_set_acl_access(struct inode *inode, const char *name,
+ext4_xattr_set_acl_access(struct inode *inode, const char *name,
 			  const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
+	return ext4_xattr_set_acl(inode, ACL_TYPE_ACCESS, value, size);
 }
 
 static int
-ext3_xattr_set_acl_default(struct inode *inode, const char *name,
+ext4_xattr_set_acl_default(struct inode *inode, const char *name,
 			   const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") != 0)
 		return -EINVAL;
-	return ext3_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
+	return ext4_xattr_set_acl(inode, ACL_TYPE_DEFAULT, value, size);
 }
 
-struct xattr_handler ext3_xattr_acl_access_handler = {
+struct xattr_handler ext4_xattr_acl_access_handler = {
 	.prefix	= POSIX_ACL_XATTR_ACCESS,
-	.list	= ext3_xattr_list_acl_access,
-	.get	= ext3_xattr_get_acl_access,
-	.set	= ext3_xattr_set_acl_access,
+	.list	= ext4_xattr_list_acl_access,
+	.get	= ext4_xattr_get_acl_access,
+	.set	= ext4_xattr_set_acl_access,
 };
 
-struct xattr_handler ext3_xattr_acl_default_handler = {
+struct xattr_handler ext4_xattr_acl_default_handler = {
 	.prefix	= POSIX_ACL_XATTR_DEFAULT,
-	.list	= ext3_xattr_list_acl_default,
-	.get	= ext3_xattr_get_acl_default,
-	.set	= ext3_xattr_set_acl_default,
+	.list	= ext4_xattr_list_acl_default,
+	.get	= ext4_xattr_get_acl_default,
+	.set	= ext4_xattr_set_acl_default,
 };
diff --git a/fs/ext4/acl.h b/fs/ext4/acl.h
index 0d1e6279cbfd..26a5c1abf147 100644
--- a/fs/ext4/acl.h
+++ b/fs/ext4/acl.h
@@ -1,81 +1,81 @@
 /*
-  File: fs/ext3/acl.h
+  File: fs/ext4/acl.h
 
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
 
 #include <linux/posix_acl_xattr.h>
 
-#define EXT3_ACL_VERSION	0x0001
+#define EXT4_ACL_VERSION	0x0001
 
 typedef struct {
 	__le16		e_tag;
 	__le16		e_perm;
 	__le32		e_id;
-} ext3_acl_entry;
+} ext4_acl_entry;
 
 typedef struct {
 	__le16		e_tag;
 	__le16		e_perm;
-} ext3_acl_entry_short;
+} ext4_acl_entry_short;
 
 typedef struct {
 	__le32		a_version;
-} ext3_acl_header;
+} ext4_acl_header;
 
-static inline size_t ext3_acl_size(int count)
+static inline size_t ext4_acl_size(int count)
 {
 	if (count <= 4) {
-		return sizeof(ext3_acl_header) +
-		       count * sizeof(ext3_acl_entry_short);
+		return sizeof(ext4_acl_header) +
+		       count * sizeof(ext4_acl_entry_short);
 	} else {
-		return sizeof(ext3_acl_header) +
-		       4 * sizeof(ext3_acl_entry_short) +
-		       (count - 4) * sizeof(ext3_acl_entry);
+		return sizeof(ext4_acl_header) +
+		       4 * sizeof(ext4_acl_entry_short) +
+		       (count - 4) * sizeof(ext4_acl_entry);
 	}
 }
 
-static inline int ext3_acl_count(size_t size)
+static inline int ext4_acl_count(size_t size)
 {
 	ssize_t s;
-	size -= sizeof(ext3_acl_header);
-	s = size - 4 * sizeof(ext3_acl_entry_short);
+	size -= sizeof(ext4_acl_header);
+	s = size - 4 * sizeof(ext4_acl_entry_short);
 	if (s < 0) {
-		if (size % sizeof(ext3_acl_entry_short))
+		if (size % sizeof(ext4_acl_entry_short))
 			return -1;
-		return size / sizeof(ext3_acl_entry_short);
+		return size / sizeof(ext4_acl_entry_short);
 	} else {
-		if (s % sizeof(ext3_acl_entry))
+		if (s % sizeof(ext4_acl_entry))
 			return -1;
-		return s / sizeof(ext3_acl_entry) + 4;
+		return s / sizeof(ext4_acl_entry) + 4;
 	}
 }
 
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 
-/* Value for inode->u.ext3_i.i_acl and inode->u.ext3_i.i_default_acl
+/* Value for inode->u.ext4_i.i_acl and inode->u.ext4_i.i_default_acl
    if the ACL has not been cached */
-#define EXT3_ACL_NOT_CACHED ((void *)-1)
+#define EXT4_ACL_NOT_CACHED ((void *)-1)
 
 /* acl.c */
-extern int ext3_permission (struct inode *, int, struct nameidata *);
-extern int ext3_acl_chmod (struct inode *);
-extern int ext3_init_acl (handle_t *, struct inode *, struct inode *);
+extern int ext4_permission (struct inode *, int, struct nameidata *);
+extern int ext4_acl_chmod (struct inode *);
+extern int ext4_init_acl (handle_t *, struct inode *, struct inode *);
 
-#else  /* CONFIG_EXT3_FS_POSIX_ACL */
+#else  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
 #include <linux/sched.h>
-#define ext3_permission NULL
+#define ext4_permission NULL
 
 static inline int
-ext3_acl_chmod(struct inode *inode)
+ext4_acl_chmod(struct inode *inode)
 {
 	return 0;
 }
 
 static inline int
-ext3_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_acl(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	return 0;
 }
-#endif  /* CONFIG_EXT3_FS_POSIX_ACL */
+#endif  /* CONFIG_EXT4DEV_FS_POSIX_ACL */
 
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index b41a7d7e20f0..357e4e50374a 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/balloc.c
+ *  linux/fs/ext4/balloc.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -15,8 +15,8 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/quotaops.h>
 #include <linux/buffer_head.h>
 
@@ -32,30 +32,30 @@
  * The file system contains group descriptors which are located after the
  * super block.  Each descriptor contains the number of the bitmap block and
  * the free blocks count in the block.  The descriptors are loaded in memory
- * when a file system is mounted (see ext3_read_super).
+ * when a file system is mounted (see ext4_read_super).
  */
 
 
 #define in_range(b, first, len)	((b) >= (first) && (b) <= (first) + (len) - 1)
 
 /**
- * ext3_get_group_desc() -- load group descriptor from disk
+ * ext4_get_group_desc() -- load group descriptor from disk
  * @sb:			super block
  * @block_group:	given block group
  * @bh:			pointer to the buffer head to store the block
  *			group descriptor
  */
-struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 					     unsigned int block_group,
 					     struct buffer_head ** bh)
 {
 	unsigned long group_desc;
 	unsigned long offset;
-	struct ext3_group_desc * desc;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_group_desc * desc;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (block_group >= sbi->s_groups_count) {
-		ext3_error (sb, "ext3_get_group_desc",
+		ext4_error (sb, "ext4_get_group_desc",
 			    "block_group >= groups_count - "
 			    "block_group = %d, groups_count = %lu",
 			    block_group, sbi->s_groups_count);
@@ -64,17 +64,17 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 	}
 	smp_rmb();
 
-	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-	offset = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
+	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
+	offset = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
 	if (!sbi->s_group_desc[group_desc]) {
-		ext3_error (sb, "ext3_get_group_desc",
+		ext4_error (sb, "ext4_get_group_desc",
 			    "Group descriptor not loaded - "
 			    "block_group = %d, group_desc = %lu, desc = %lu",
 			     block_group, group_desc, offset);
 		return NULL;
 	}
 
-	desc = (struct ext3_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	desc = (struct ext4_group_desc *) sbi->s_group_desc[group_desc]->b_data;
 	if (bh)
 		*bh = sbi->s_group_desc[group_desc];
 	return desc + offset;
@@ -93,15 +93,15 @@ struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
 static struct buffer_head *
 read_block_bitmap(struct super_block *sb, unsigned int block_group)
 {
-	struct ext3_group_desc * desc;
+	struct ext4_group_desc * desc;
 	struct buffer_head * bh = NULL;
 
-	desc = ext3_get_group_desc (sb, block_group, NULL);
+	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
 	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
 	if (!bh)
-		ext3_error (sb, "read_block_bitmap",
+		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %u",
 			    block_group, le32_to_cpu(desc->bg_block_bitmap));
@@ -134,7 +134,7 @@ static void __rsv_window_dump(struct rb_root *root, int verbose,
 			      const char *fn)
 {
 	struct rb_node *n;
-	struct ext3_reserve_window_node *rsv, *prev;
+	struct ext4_reserve_window_node *rsv, *prev;
 	int bad;
 
 restart:
@@ -144,7 +144,7 @@ restart:
 
 	printk("Block Allocation Reservation Windows Map (%s):\n", fn);
 	while (n) {
-		rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
 			       "start:  %lu, end:  %lu\n",
@@ -196,13 +196,13 @@ restart:
  * otherwise, return 0;
  */
 static int
-goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
+goal_in_my_reservation(struct ext4_reserve_window *rsv, ext4_grpblk_t grp_goal,
 			unsigned int group, struct super_block * sb)
 {
-	ext3_fsblk_t group_first_block, group_last_block;
+	ext4_fsblk_t group_first_block, group_last_block;
 
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	if ((rsv->_rsv_start > group_last_block) ||
 	    (rsv->_rsv_end < group_first_block))
@@ -222,17 +222,17 @@ goal_in_my_reservation(struct ext3_reserve_window *rsv, ext3_grpblk_t grp_goal,
  * if the goal is not in any window.
  * Returns NULL if there are no windows or if all windows start after the goal.
  */
-static struct ext3_reserve_window_node *
-search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
+static struct ext4_reserve_window_node *
+search_reserve_window(struct rb_root *root, ext4_fsblk_t goal)
 {
 	struct rb_node *n = root->rb_node;
-	struct ext3_reserve_window_node *rsv;
+	struct ext4_reserve_window_node *rsv;
 
 	if (!n)
 		return NULL;
 
 	do {
-		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 
 		if (goal < rsv->rsv_start)
 			n = n->rb_left;
@@ -249,33 +249,33 @@ search_reserve_window(struct rb_root *root, ext3_fsblk_t goal)
 	 */
 	if (rsv->rsv_start > goal) {
 		n = rb_prev(&rsv->rsv_node);
-		rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node);
+		rsv = rb_entry(n, struct ext4_reserve_window_node, rsv_node);
 	}
 	return rsv;
 }
 
 /**
- * ext3_rsv_window_add() -- Insert a window to the block reservation rb tree.
+ * ext4_rsv_window_add() -- Insert a window to the block reservation rb tree.
  * @sb:			super block
  * @rsv:		reservation window to add
  *
  * Must be called with rsv_lock hold.
  */
-void ext3_rsv_window_add(struct super_block *sb,
-		    struct ext3_reserve_window_node *rsv)
+void ext4_rsv_window_add(struct super_block *sb,
+		    struct ext4_reserve_window_node *rsv)
 {
-	struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct rb_root *root = &EXT4_SB(sb)->s_rsv_window_root;
 	struct rb_node *node = &rsv->rsv_node;
-	ext3_fsblk_t start = rsv->rsv_start;
+	ext4_fsblk_t start = rsv->rsv_start;
 
 	struct rb_node ** p = &root->rb_node;
 	struct rb_node * parent = NULL;
-	struct ext3_reserve_window_node *this;
+	struct ext4_reserve_window_node *this;
 
 	while (*p)
 	{
 		parent = *p;
-		this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node);
+		this = rb_entry(parent, struct ext4_reserve_window_node, rsv_node);
 
 		if (start < this->rsv_start)
 			p = &(*p)->rb_left;
@@ -292,7 +292,7 @@ void ext3_rsv_window_add(struct super_block *sb,
 }
 
 /**
- * ext3_rsv_window_remove() -- unlink a window from the reservation rb tree
+ * ext4_rsv_window_remove() -- unlink a window from the reservation rb tree
  * @sb:			super block
  * @rsv:		reservation window to remove
  *
@@ -301,59 +301,59 @@ void ext3_rsv_window_add(struct super_block *sb,
  * rsv_lock hold.
  */
 static void rsv_window_remove(struct super_block *sb,
-			      struct ext3_reserve_window_node *rsv)
+			      struct ext4_reserve_window_node *rsv)
 {
-	rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-	rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+	rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 	rsv->rsv_alloc_hit = 0;
-	rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root);
+	rb_erase(&rsv->rsv_node, &EXT4_SB(sb)->s_rsv_window_root);
 }
 
 /*
  * rsv_is_empty() -- Check if the reservation window is allocated.
  * @rsv:		given reservation window to check
  *
- * returns 1 if the end block is EXT3_RESERVE_WINDOW_NOT_ALLOCATED.
+ * returns 1 if the end block is EXT4_RESERVE_WINDOW_NOT_ALLOCATED.
  */
-static inline int rsv_is_empty(struct ext3_reserve_window *rsv)
+static inline int rsv_is_empty(struct ext4_reserve_window *rsv)
 {
 	/* a valid reservation end block could not be 0 */
-	return rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	return rsv->_rsv_end == EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 }
 
 /**
- * ext3_init_block_alloc_info()
+ * ext4_init_block_alloc_info()
  * @inode:		file inode structure
  *
  * Allocate and initialize the	reservation window structure, and
- * link the window to the ext3 inode structure at last
+ * link the window to the ext4 inode structure at last
  *
  * The reservation window structure is only dynamically allocated
- * and linked to ext3 inode the first time the open file
- * needs a new block. So, before every ext3_new_block(s) call, for
+ * and linked to ext4 inode the first time the open file
+ * needs a new block. So, before every ext4_new_block(s) call, for
  * regular files, we should check whether the reservation window
  * structure exists or not. In the latter case, this function is called.
  * Fail to do so will result in block reservation being turned off for that
  * open file.
  *
- * This function is called from ext3_get_blocks_handle(), also called
+ * This function is called from ext4_get_blocks_handle(), also called
  * when setting the reservation window size through ioctl before the file
  * is open for write (needs block allocation).
  *
  * Needs truncate_mutex protection prior to call this function.
  */
-void ext3_init_block_alloc_info(struct inode *inode)
+void ext4_init_block_alloc_info(struct inode *inode)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
 	struct super_block *sb = inode->i_sb;
 
 	block_i = kmalloc(sizeof(*block_i), GFP_NOFS);
 	if (block_i) {
-		struct ext3_reserve_window_node *rsv = &block_i->rsv_window_node;
+		struct ext4_reserve_window_node *rsv = &block_i->rsv_window_node;
 
-		rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-		rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+		rsv->rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 
 		/*
 		 * if filesystem is mounted with NORESERVATION, the goal
@@ -363,7 +363,7 @@ void ext3_init_block_alloc_info(struct inode *inode)
 		if (!test_opt(sb, RESERVATION))
 			rsv->rsv_goal_size = 0;
 		else
-			rsv->rsv_goal_size = EXT3_DEFAULT_RESERVE_BLOCKS;
+			rsv->rsv_goal_size = EXT4_DEFAULT_RESERVE_BLOCKS;
 		rsv->rsv_alloc_hit = 0;
 		block_i->last_alloc_logical_block = 0;
 		block_i->last_alloc_physical_block = 0;
@@ -372,24 +372,24 @@ void ext3_init_block_alloc_info(struct inode *inode)
 }
 
 /**
- * ext3_discard_reservation()
+ * ext4_discard_reservation()
  * @inode:		inode
  *
  * Discard(free) block reservation window on last file close, or truncate
  * or at last iput().
  *
  * It is being called in three cases:
- *	ext3_release_file(): last writer close the file
- *	ext3_clear_inode(): last iput(), when nobody link to this file.
- *	ext3_truncate(): when the block indirect map is about to change.
+ *	ext4_release_file(): last writer close the file
+ *	ext4_clear_inode(): last iput(), when nobody link to this file.
+ *	ext4_truncate(): when the block indirect map is about to change.
  *
  */
-void ext3_discard_reservation(struct inode *inode)
+void ext4_discard_reservation(struct inode *inode)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_block_alloc_info *block_i = ei->i_block_alloc_info;
-	struct ext3_reserve_window_node *rsv;
-	spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_block_alloc_info *block_i = ei->i_block_alloc_info;
+	struct ext4_reserve_window_node *rsv;
+	spinlock_t *rsv_lock = &EXT4_SB(inode->i_sb)->s_rsv_window_lock;
 
 	if (!block_i)
 		return;
@@ -404,62 +404,62 @@ void ext3_discard_reservation(struct inode *inode)
 }
 
 /**
- * ext3_free_blocks_sb() -- Free given blocks and update quota
+ * ext4_free_blocks_sb() -- Free given blocks and update quota
  * @handle:			handle to this transaction
  * @sb:				super block
  * @block:			start physcial block to free
  * @count:			number of blocks to free
  * @pdquot_freed_blocks:	pointer to quota
  */
-void ext3_free_blocks_sb(handle_t *handle, struct super_block *sb,
-			 ext3_fsblk_t block, unsigned long count,
+void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
+			 ext4_fsblk_t block, unsigned long count,
 			 unsigned long *pdquot_freed_blocks)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gd_bh;
 	unsigned long block_group;
-	ext3_grpblk_t bit;
+	ext4_grpblk_t bit;
 	unsigned long i;
 	unsigned long overflow;
-	struct ext3_group_desc * desc;
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * desc;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi;
 	int err = 0, ret;
-	ext3_grpblk_t group_freed;
+	ext4_grpblk_t group_freed;
 
 	*pdquot_freed_blocks = 0;
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
 	    block + count > le32_to_cpu(es->s_blocks_count)) {
-		ext3_error (sb, "ext3_free_blocks",
+		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
 			    "block = "E3FSBLK", count = %lu", block, count);
 		goto error_return;
 	}
 
-	ext3_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+	ext4_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
 
 do_more:
 	overflow = 0;
 	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-		      EXT3_BLOCKS_PER_GROUP(sb);
+		      EXT4_BLOCKS_PER_GROUP(sb);
 	bit = (block - le32_to_cpu(es->s_first_data_block)) %
-		      EXT3_BLOCKS_PER_GROUP(sb);
+		      EXT4_BLOCKS_PER_GROUP(sb);
 	/*
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
 	 */
-	if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
-		overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
+	if (bit + count > EXT4_BLOCKS_PER_GROUP(sb)) {
+		overflow = bit + count - EXT4_BLOCKS_PER_GROUP(sb);
 		count -= overflow;
 	}
 	brelse(bitmap_bh);
 	bitmap_bh = read_block_bitmap(sb, block_group);
 	if (!bitmap_bh)
 		goto error_return;
-	desc = ext3_get_group_desc (sb, block_group, &gd_bh);
+	desc = ext4_get_group_desc (sb, block_group, &gd_bh);
 	if (!desc)
 		goto error_return;
 
@@ -469,7 +469,7 @@ do_more:
 		      sbi->s_itb_per_group) ||
 	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
 		      sbi->s_itb_per_group))
-		ext3_error (sb, "ext3_free_blocks",
+		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
 			    "Block = "E3FSBLK", count = %lu",
 			    block, count);
@@ -480,7 +480,7 @@ do_more:
 	 */
 	/* @@@ check errors */
 	BUFFER_TRACE(bitmap_bh, "getting undo access");
-	err = ext3_journal_get_undo_access(handle, bitmap_bh);
+	err = ext4_journal_get_undo_access(handle, bitmap_bh);
 	if (err)
 		goto error_return;
 
@@ -490,7 +490,7 @@ do_more:
 	 * using it
 	 */
 	BUFFER_TRACE(gd_bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, gd_bh);
+	err = ext4_journal_get_write_access(handle, gd_bh);
 	if (err)
 		goto error_return;
 
@@ -542,7 +542,7 @@ do_more:
 		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
 		J_ASSERT_BH(bitmap_bh,
 				bh2jh(bitmap_bh)->b_committed_data != NULL);
-		ext3_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
+		ext4_set_bit_atomic(sb_bgl_lock(sbi, block_group), bit + i,
 				bh2jh(bitmap_bh)->b_committed_data);
 
 		/*
@@ -551,10 +551,10 @@ do_more:
 		 * the allocator uses.
 		 */
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+		if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
-			ext3_error(sb, __FUNCTION__,
+			ext4_error(sb, __FUNCTION__,
 				"bit already cleared for block "E3FSBLK,
 				 block + i);
 			jbd_lock_bh_state(bitmap_bh);
@@ -574,11 +574,11 @@ do_more:
 
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
-	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
 
 	/* And the group descriptor block */
 	BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
-	ret = ext3_journal_dirty_metadata(handle, gd_bh);
+	ret = ext4_journal_dirty_metadata(handle, gd_bh);
 	if (!err) err = ret;
 	*pdquot_freed_blocks += group_freed;
 
@@ -590,40 +590,40 @@ do_more:
 	sb->s_dirt = 1;
 error_return:
 	brelse(bitmap_bh);
-	ext3_std_error(sb, err);
+	ext4_std_error(sb, err);
 	return;
 }
 
 /**
- * ext3_free_blocks() -- Free given blocks and update quota
+ * ext4_free_blocks() -- Free given blocks and update quota
  * @handle:		handle for this transaction
  * @inode:		inode
  * @block:		start physical block to free
  * @count:		number of blocks to count
  */
-void ext3_free_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t block, unsigned long count)
+void ext4_free_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t block, unsigned long count)
 {
 	struct super_block * sb;
 	unsigned long dquot_freed_blocks;
 
 	sb = inode->i_sb;
 	if (!sb) {
-		printk ("ext3_free_blocks: nonexistent device");
+		printk ("ext4_free_blocks: nonexistent device");
 		return;
 	}
-	ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+	ext4_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
 	if (dquot_freed_blocks)
 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
 	return;
 }
 
 /**
- * ext3_test_allocatable()
+ * ext4_test_allocatable()
  * @nr:			given allocation block group
  * @bh:			bufferhead contains the bitmap of the given block group
  *
- * For ext3 allocations, we must not reuse any blocks which are
+ * For ext4 allocations, we must not reuse any blocks which are
  * allocated in the bitmap buffer's "last committed data" copy.  This
  * prevents deletes from freeing up the page for reuse until we have
  * committed the delete transaction.
@@ -638,19 +638,19 @@ void ext3_free_blocks(handle_t *handle, struct inode *inode,
  * data-writes at some point, and disable it for metadata allocations or
  * sync-data inodes.
  */
-static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
+static int ext4_test_allocatable(ext4_grpblk_t nr, struct buffer_head *bh)
 {
 	int ret;
 	struct journal_head *jh = bh2jh(bh);
 
-	if (ext3_test_bit(nr, bh->b_data))
+	if (ext4_test_bit(nr, bh->b_data))
 		return 0;
 
 	jbd_lock_bh_state(bh);
 	if (!jh->b_committed_data)
 		ret = 1;
 	else
-		ret = !ext3_test_bit(nr, jh->b_committed_data);
+		ret = !ext4_test_bit(nr, jh->b_committed_data);
 	jbd_unlock_bh_state(bh);
 	return ret;
 }
@@ -665,22 +665,22 @@ static int ext3_test_allocatable(ext3_grpblk_t nr, struct buffer_head *bh)
  * bitmap on disk and the last-committed copy in journal, until we find a
  * bit free in both bitmaps.
  */
-static ext3_grpblk_t
-bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
-					ext3_grpblk_t maxblocks)
+static ext4_grpblk_t
+bitmap_search_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
+					ext4_grpblk_t maxblocks)
 {
-	ext3_grpblk_t next;
+	ext4_grpblk_t next;
 	struct journal_head *jh = bh2jh(bh);
 
 	while (start < maxblocks) {
-		next = ext3_find_next_zero_bit(bh->b_data, maxblocks, start);
+		next = ext4_find_next_zero_bit(bh->b_data, maxblocks, start);
 		if (next >= maxblocks)
 			return -1;
-		if (ext3_test_allocatable(next, bh))
+		if (ext4_test_allocatable(next, bh))
 			return next;
 		jbd_lock_bh_state(bh);
 		if (jh->b_committed_data)
-			start = ext3_find_next_zero_bit(jh->b_committed_data,
+			start = ext4_find_next_zero_bit(jh->b_committed_data,
 							maxblocks, next);
 		jbd_unlock_bh_state(bh);
 	}
@@ -700,11 +700,11 @@ bitmap_search_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
  * the initial goal; then for a free byte somewhere in the bitmap; then
  * for any free bit in the bitmap.
  */
-static ext3_grpblk_t
-find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
-			ext3_grpblk_t maxblocks)
+static ext4_grpblk_t
+find_next_usable_block(ext4_grpblk_t start, struct buffer_head *bh,
+			ext4_grpblk_t maxblocks)
 {
-	ext3_grpblk_t here, next;
+	ext4_grpblk_t here, next;
 	char *p, *r;
 
 	if (start > 0) {
@@ -713,16 +713,16 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 		 * block within the next XX blocks.
 		 *
 		 * end_goal is more or less random, but it has to be
-		 * less than EXT3_BLOCKS_PER_GROUP. Aligning up to the
+		 * less than EXT4_BLOCKS_PER_GROUP. Aligning up to the
 		 * next 64-bit boundary is simple..
 		 */
-		ext3_grpblk_t end_goal = (start + 63) & ~63;
+		ext4_grpblk_t end_goal = (start + 63) & ~63;
 		if (end_goal > maxblocks)
 			end_goal = maxblocks;
-		here = ext3_find_next_zero_bit(bh->b_data, end_goal, start);
-		if (here < end_goal && ext3_test_allocatable(here, bh))
+		here = ext4_find_next_zero_bit(bh->b_data, end_goal, start);
+		if (here < end_goal && ext4_test_allocatable(here, bh))
 			return here;
-		ext3_debug("Bit not found near goal\n");
+		ext4_debug("Bit not found near goal\n");
 	}
 
 	here = start;
@@ -733,7 +733,7 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
 	r = memscan(p, 0, (maxblocks - here + 7) >> 3);
 	next = (r - ((char *)bh->b_data)) << 3;
 
-	if (next < maxblocks && next >= start && ext3_test_allocatable(next, bh))
+	if (next < maxblocks && next >= start && ext4_test_allocatable(next, bh))
 		return next;
 
 	/*
@@ -757,16 +757,16 @@ find_next_usable_block(ext3_grpblk_t start, struct buffer_head *bh,
  * zero (failure).
  */
 static inline int
-claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
+claim_block(spinlock_t *lock, ext4_grpblk_t block, struct buffer_head *bh)
 {
 	struct journal_head *jh = bh2jh(bh);
 	int ret;
 
-	if (ext3_set_bit_atomic(lock, block, bh->b_data))
+	if (ext4_set_bit_atomic(lock, block, bh->b_data))
 		return 0;
 	jbd_lock_bh_state(bh);
-	if (jh->b_committed_data && ext3_test_bit(block,jh->b_committed_data)) {
-		ext3_clear_bit_atomic(lock, block, bh->b_data);
+	if (jh->b_committed_data && ext4_test_bit(block,jh->b_committed_data)) {
+		ext4_clear_bit_atomic(lock, block, bh->b_data);
 		ret = 0;
 	} else {
 		ret = 1;
@@ -776,7 +776,7 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
 }
 
 /**
- * ext3_try_to_allocate()
+ * ext4_try_to_allocate()
  * @sb:			superblock
  * @handle:		handle to this transaction
  * @group:		given allocation block group
@@ -797,29 +797,29 @@ claim_block(spinlock_t *lock, ext3_grpblk_t block, struct buffer_head *bh)
  *
  * If we failed to allocate the desired block then we may end up crossing to a
  * new bitmap.  In that case we must release write access to the old one via
- * ext3_journal_release_buffer(), else we'll run out of credits.
+ * ext4_journal_release_buffer(), else we'll run out of credits.
  */
-static ext3_grpblk_t
-ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
-			struct buffer_head *bitmap_bh, ext3_grpblk_t grp_goal,
-			unsigned long *count, struct ext3_reserve_window *my_rsv)
+static ext4_grpblk_t
+ext4_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+			struct buffer_head *bitmap_bh, ext4_grpblk_t grp_goal,
+			unsigned long *count, struct ext4_reserve_window *my_rsv)
 {
-	ext3_fsblk_t group_first_block;
-	ext3_grpblk_t start, end;
+	ext4_fsblk_t group_first_block;
+	ext4_grpblk_t start, end;
 	unsigned long num = 0;
 
 	/* we do allocation within the reservation window if we have a window */
 	if (my_rsv) {
-		group_first_block = ext3_group_first_block_no(sb, group);
+		group_first_block = ext4_group_first_block_no(sb, group);
 		if (my_rsv->_rsv_start >= group_first_block)
 			start = my_rsv->_rsv_start - group_first_block;
 		else
 			/* reservation window cross group boundary */
 			start = 0;
 		end = my_rsv->_rsv_end - group_first_block + 1;
-		if (end > EXT3_BLOCKS_PER_GROUP(sb))
+		if (end > EXT4_BLOCKS_PER_GROUP(sb))
 			/* reservation window crosses group boundary */
-			end = EXT3_BLOCKS_PER_GROUP(sb);
+			end = EXT4_BLOCKS_PER_GROUP(sb);
 		if ((start <= grp_goal) && (grp_goal < end))
 			start = grp_goal;
 		else
@@ -829,13 +829,13 @@ ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
 			start = grp_goal;
 		else
 			start = 0;
-		end = EXT3_BLOCKS_PER_GROUP(sb);
+		end = EXT4_BLOCKS_PER_GROUP(sb);
 	}
 
-	BUG_ON(start > EXT3_BLOCKS_PER_GROUP(sb));
+	BUG_ON(start > EXT4_BLOCKS_PER_GROUP(sb));
 
 repeat:
-	if (grp_goal < 0 || !ext3_test_allocatable(grp_goal, bitmap_bh)) {
+	if (grp_goal < 0 || !ext4_test_allocatable(grp_goal, bitmap_bh)) {
 		grp_goal = find_next_usable_block(start, bitmap_bh, end);
 		if (grp_goal < 0)
 			goto fail_access;
@@ -843,7 +843,7 @@ repeat:
 			int i;
 
 			for (i = 0; i < 7 && grp_goal > start &&
-					ext3_test_allocatable(grp_goal - 1,
+					ext4_test_allocatable(grp_goal - 1,
 								bitmap_bh);
 					i++, grp_goal--)
 				;
@@ -851,7 +851,7 @@ repeat:
 	}
 	start = grp_goal;
 
-	if (!claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+	if (!claim_block(sb_bgl_lock(EXT4_SB(sb), group),
 		grp_goal, bitmap_bh)) {
 		/*
 		 * The block was allocated by another thread, or it was
@@ -866,8 +866,8 @@ repeat:
 	num++;
 	grp_goal++;
 	while (num < *count && grp_goal < end
-		&& ext3_test_allocatable(grp_goal, bitmap_bh)
-		&& claim_block(sb_bgl_lock(EXT3_SB(sb), group),
+		&& ext4_test_allocatable(grp_goal, bitmap_bh)
+		&& claim_block(sb_bgl_lock(EXT4_SB(sb), group),
 				grp_goal, bitmap_bh)) {
 		num++;
 		grp_goal++;
@@ -913,15 +913,15 @@ fail_access:
  *
  */
 static int find_next_reservable_window(
-				struct ext3_reserve_window_node *search_head,
-				struct ext3_reserve_window_node *my_rsv,
+				struct ext4_reserve_window_node *search_head,
+				struct ext4_reserve_window_node *my_rsv,
 				struct super_block * sb,
-				ext3_fsblk_t start_block,
-				ext3_fsblk_t last_block)
+				ext4_fsblk_t start_block,
+				ext4_fsblk_t last_block)
 {
 	struct rb_node *next;
-	struct ext3_reserve_window_node *rsv, *prev;
-	ext3_fsblk_t cur;
+	struct ext4_reserve_window_node *rsv, *prev;
+	ext4_fsblk_t cur;
 	int size = my_rsv->rsv_goal_size;
 
 	/* TODO: make the start of the reservation window byte-aligned */
@@ -949,7 +949,7 @@ static int find_next_reservable_window(
 
 		prev = rsv;
 		next = rb_next(&rsv->rsv_node);
-		rsv = list_entry(next,struct ext3_reserve_window_node,rsv_node);
+		rsv = list_entry(next,struct ext4_reserve_window_node,rsv_node);
 
 		/*
 		 * Reached the last reservation, we can just append to the
@@ -992,7 +992,7 @@ static int find_next_reservable_window(
 	my_rsv->rsv_alloc_hit = 0;
 
 	if (prev != my_rsv)
-		ext3_rsv_window_add(sb, my_rsv);
+		ext4_rsv_window_add(sb, my_rsv);
 
 	return 0;
 }
@@ -1034,20 +1034,20 @@ static int find_next_reservable_window(
  *	@bitmap_bh: the block group block bitmap
  *
  */
-static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
-		ext3_grpblk_t grp_goal, struct super_block *sb,
+static int alloc_new_reservation(struct ext4_reserve_window_node *my_rsv,
+		ext4_grpblk_t grp_goal, struct super_block *sb,
 		unsigned int group, struct buffer_head *bitmap_bh)
 {
-	struct ext3_reserve_window_node *search_head;
-	ext3_fsblk_t group_first_block, group_end_block, start_block;
-	ext3_grpblk_t first_free_block;
-	struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root;
+	struct ext4_reserve_window_node *search_head;
+	ext4_fsblk_t group_first_block, group_end_block, start_block;
+	ext4_grpblk_t first_free_block;
+	struct rb_root *fs_rsv_root = &EXT4_SB(sb)->s_rsv_window_root;
 	unsigned long size;
 	int ret;
-	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+	spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
 
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_end_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_end_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	if (grp_goal < 0)
 		start_block = group_first_block;
@@ -1085,8 +1085,8 @@ static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv,
 			 * otherwise we keep the same size window
 			 */
 			size = size * 2;
-			if (size > EXT3_MAX_RESERVE_BLOCKS)
-				size = EXT3_MAX_RESERVE_BLOCKS;
+			if (size > EXT4_MAX_RESERVE_BLOCKS)
+				size = EXT4_MAX_RESERVE_BLOCKS;
 			my_rsv->rsv_goal_size= size;
 		}
 	}
@@ -1170,20 +1170,20 @@ retry:
  * Attempt to expand the reservation window large enough to have
  * required number of free blocks
  *
- * Since ext3_try_to_allocate() will always allocate blocks within
+ * Since ext4_try_to_allocate() will always allocate blocks within
  * the reservation window range, if the window size is too small,
  * multiple blocks allocation has to stop at the end of the reservation
  * window. To make this more efficient, given the total number of
  * blocks needed and the current size of the window, we try to
  * expand the reservation window size if necessary on a best-effort
- * basis before ext3_new_blocks() tries to allocate blocks,
+ * basis before ext4_new_blocks() tries to allocate blocks,
  */
-static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
+static void try_to_extend_reservation(struct ext4_reserve_window_node *my_rsv,
 			struct super_block *sb, int size)
 {
-	struct ext3_reserve_window_node *next_rsv;
+	struct ext4_reserve_window_node *next_rsv;
 	struct rb_node *next;
-	spinlock_t *rsv_lock = &EXT3_SB(sb)->s_rsv_window_lock;
+	spinlock_t *rsv_lock = &EXT4_SB(sb)->s_rsv_window_lock;
 
 	if (!spin_trylock(rsv_lock))
 		return;
@@ -1193,7 +1193,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 	if (!next)
 		my_rsv->rsv_end += size;
 	else {
-		next_rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node);
+		next_rsv = list_entry(next, struct ext4_reserve_window_node, rsv_node);
 
 		if ((next_rsv->rsv_start - my_rsv->rsv_end - 1) >= size)
 			my_rsv->rsv_end += size;
@@ -1204,7 +1204,7 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
 }
 
 /**
- * ext3_try_to_allocate_with_rsv()
+ * ext4_try_to_allocate_with_rsv()
  * @sb:			superblock
  * @handle:		handle to this transaction
  * @group:		given allocation block group
@@ -1232,15 +1232,15 @@ static void try_to_extend_reservation(struct ext3_reserve_window_node *my_rsv,
  * We use a red-black tree for the per-filesystem reservation list.
  *
  */
-static ext3_grpblk_t
-ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
+static ext4_grpblk_t
+ext4_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 			unsigned int group, struct buffer_head *bitmap_bh,
-			ext3_grpblk_t grp_goal,
-			struct ext3_reserve_window_node * my_rsv,
+			ext4_grpblk_t grp_goal,
+			struct ext4_reserve_window_node * my_rsv,
 			unsigned long *count, int *errp)
 {
-	ext3_fsblk_t group_first_block, group_last_block;
-	ext3_grpblk_t ret = 0;
+	ext4_fsblk_t group_first_block, group_last_block;
+	ext4_grpblk_t ret = 0;
 	int fatal;
 	unsigned long num = *count;
 
@@ -1252,7 +1252,7 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * if the buffer is in BJ_Forget state in the committing transaction.
 	 */
 	BUFFER_TRACE(bitmap_bh, "get undo access for new block");
-	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	fatal = ext4_journal_get_undo_access(handle, bitmap_bh);
 	if (fatal) {
 		*errp = fatal;
 		return -1;
@@ -1265,18 +1265,18 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 	 * or last attempt to allocate a block with reservation turned on failed
 	 */
 	if (my_rsv == NULL ) {
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+		ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
 						grp_goal, count, NULL);
 		goto out;
 	}
 	/*
 	 * grp_goal is a group relative block number (if there is a goal)
-	 * 0 < grp_goal < EXT3_BLOCKS_PER_GROUP(sb)
+	 * 0 < grp_goal < EXT4_BLOCKS_PER_GROUP(sb)
 	 * first block is a filesystem wide block number
 	 * first block is the block number of the first block in this group
 	 */
-	group_first_block = ext3_group_first_block_no(sb, group);
-	group_last_block = group_first_block + (EXT3_BLOCKS_PER_GROUP(sb) - 1);
+	group_first_block = ext4_group_first_block_no(sb, group);
+	group_last_block = group_first_block + (EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
 	/*
 	 * Basically we will allocate a new block from inode's reservation
@@ -1314,10 +1314,10 @@ ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle,
 
 		if ((my_rsv->rsv_start > group_last_block) ||
 				(my_rsv->rsv_end < group_first_block)) {
-			rsv_window_dump(&EXT3_SB(sb)->s_rsv_window_root, 1);
+			rsv_window_dump(&EXT4_SB(sb)->s_rsv_window_root, 1);
 			BUG();
 		}
-		ret = ext3_try_to_allocate(sb, handle, group, bitmap_bh,
+		ret = ext4_try_to_allocate(sb, handle, group, bitmap_bh,
 					   grp_goal, &num, &my_rsv->rsv_window);
 		if (ret >= 0) {
 			my_rsv->rsv_alloc_hit += num;
@@ -1330,7 +1330,7 @@ out:
 	if (ret >= 0) {
 		BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for "
 					"bitmap block");
-		fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+		fatal = ext4_journal_dirty_metadata(handle, bitmap_bh);
 		if (fatal) {
 			*errp = fatal;
 			return -1;
@@ -1339,19 +1339,19 @@ out:
 	}
 
 	BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
-	ext3_journal_release_buffer(handle, bitmap_bh);
+	ext4_journal_release_buffer(handle, bitmap_bh);
 	return ret;
 }
 
 /**
- * ext3_has_free_blocks()
+ * ext4_has_free_blocks()
  * @sbi:		in-core super block structure.
  *
  * Check if filesystem has at least 1 free block available for allocation.
  */
-static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
+static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
 {
-	ext3_fsblk_t free_blocks, root_blocks;
+	ext4_fsblk_t free_blocks, root_blocks;
 
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
 	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
@@ -1364,63 +1364,63 @@ static int ext3_has_free_blocks(struct ext3_sb_info *sbi)
 }
 
 /**
- * ext3_should_retry_alloc()
+ * ext4_should_retry_alloc()
  * @sb:			super block
  * @retries		number of attemps has been made
  *
- * ext3_should_retry_alloc() is called when ENOSPC is returned, and if
+ * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
  * it is profitable to retry the operation, this function will wait
  * for the current or commiting transaction to complete, and then
  * return TRUE.
  *
  * if the total number of retries exceed three times, return FALSE.
  */
-int ext3_should_retry_alloc(struct super_block *sb, int *retries)
+int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-	if (!ext3_has_free_blocks(EXT3_SB(sb)) || (*retries)++ > 3)
+	if (!ext4_has_free_blocks(EXT4_SB(sb)) || (*retries)++ > 3)
 		return 0;
 
 	jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
 
-	return journal_force_commit_nested(EXT3_SB(sb)->s_journal);
+	return journal_force_commit_nested(EXT4_SB(sb)->s_journal);
 }
 
 /**
- * ext3_new_blocks() -- core block(s) allocation function
+ * ext4_new_blocks() -- core block(s) allocation function
  * @handle:		handle to this transaction
  * @inode:		file inode
  * @goal:		given target block(filesystem wide)
  * @count:		target number of blocks to allocate
  * @errp:		error code
  *
- * ext3_new_blocks uses a goal block to assist allocation.  It tries to
+ * ext4_new_blocks uses a goal block to assist allocation.  It tries to
  * allocate block(s) from the block group contains the goal block first. If that
  * fails, it will try to allocate block(s) from other block groups without
  * any specific goal block.
  *
  */
-ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, unsigned long *count, int *errp)
+ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, unsigned long *count, int *errp)
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
 	int group_no;
 	int goal_group;
-	ext3_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
-	ext3_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
-	ext3_fsblk_t ret_block;		/* filesyetem-wide allocated block */
+	ext4_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
+	ext4_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
+	ext4_fsblk_t ret_block;		/* filesyetem-wide allocated block */
 	int bgi;			/* blockgroup iteration index */
 	int fatal = 0, err;
 	int performed_allocation = 0;
-	ext3_grpblk_t free_blocks;	/* number of free blocks in a group */
+	ext4_grpblk_t free_blocks;	/* number of free blocks in a group */
 	struct super_block *sb;
-	struct ext3_group_desc *gdp;
-	struct ext3_super_block *es;
-	struct ext3_sb_info *sbi;
-	struct ext3_reserve_window_node *my_rsv = NULL;
-	struct ext3_block_alloc_info *block_i;
+	struct ext4_group_desc *gdp;
+	struct ext4_super_block *es;
+	struct ext4_sb_info *sbi;
+	struct ext4_reserve_window_node *my_rsv = NULL;
+	struct ext4_block_alloc_info *block_i;
 	unsigned short windowsz = 0;
-#ifdef EXT3FS_DEBUG
+#ifdef EXT4FS_DEBUG
 	static int goal_hits, goal_attempts;
 #endif
 	unsigned long ngroups;
@@ -1429,7 +1429,7 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	*errp = -ENOSPC;
 	sb = inode->i_sb;
 	if (!sb) {
-		printk("ext3_new_block: nonexistent device");
+		printk("ext4_new_block: nonexistent device");
 		return 0;
 	}
 
@@ -1441,22 +1441,22 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 		return 0;
 	}
 
-	sbi = EXT3_SB(sb);
-	es = EXT3_SB(sb)->s_es;
-	ext3_debug("goal=%lu.\n", goal);
+	sbi = EXT4_SB(sb);
+	es = EXT4_SB(sb)->s_es;
+	ext4_debug("goal=%lu.\n", goal);
 	/*
 	 * Allocate a block from reservation only when
 	 * filesystem is mounted with reservation(default,-o reservation), and
 	 * it's a regular file, and
 	 * the desired window size is greater than 0 (One could use ioctl
-	 * command EXT3_IOC_SETRSVSZ to set the window size to 0 to turn off
+	 * command EXT4_IOC_SETRSVSZ to set the window size to 0 to turn off
 	 * reservation on that particular file)
 	 */
-	block_i = EXT3_I(inode)->i_block_alloc_info;
+	block_i = EXT4_I(inode)->i_block_alloc_info;
 	if (block_i && ((windowsz = block_i->rsv_window_node.rsv_goal_size) > 0))
 		my_rsv = &block_i->rsv_window_node;
 
-	if (!ext3_has_free_blocks(sbi)) {
+	if (!ext4_has_free_blocks(sbi)) {
 		*errp = -ENOSPC;
 		goto out;
 	}
@@ -1468,10 +1468,10 @@ ext3_fsblk_t ext3_new_blocks(handle_t *handle, struct inode *inode,
 	    goal >= le32_to_cpu(es->s_blocks_count))
 		goal = le32_to_cpu(es->s_first_data_block);
 	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
-			EXT3_BLOCKS_PER_GROUP(sb);
+			EXT4_BLOCKS_PER_GROUP(sb);
 	goal_group = group_no;
 retry_alloc:
-	gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+	gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
 	if (!gdp)
 		goto io_error;
 
@@ -1486,11 +1486,11 @@ retry_alloc:
 
 	if (free_blocks > 0) {
 		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
-				EXT3_BLOCKS_PER_GROUP(sb));
+				EXT4_BLOCKS_PER_GROUP(sb));
 		bitmap_bh = read_block_bitmap(sb, group_no);
 		if (!bitmap_bh)
 			goto io_error;
-		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+		grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
 					group_no, bitmap_bh, grp_target_blk,
 					my_rsv,	&num, &fatal);
 		if (fatal)
@@ -1499,7 +1499,7 @@ retry_alloc:
 			goto allocated;
 	}
 
-	ngroups = EXT3_SB(sb)->s_groups_count;
+	ngroups = EXT4_SB(sb)->s_groups_count;
 	smp_rmb();
 
 	/*
@@ -1510,7 +1510,7 @@ retry_alloc:
 		group_no++;
 		if (group_no >= ngroups)
 			group_no = 0;
-		gdp = ext3_get_group_desc(sb, group_no, &gdp_bh);
+		gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
 		if (!gdp) {
 			*errp = -EIO;
 			goto out;
@@ -1531,7 +1531,7 @@ retry_alloc:
 		/*
 		 * try to allocate block(s) from this group, without a goal(-1).
 		 */
-		grp_alloc_blk = ext3_try_to_allocate_with_rsv(sb, handle,
+		grp_alloc_blk = ext4_try_to_allocate_with_rsv(sb, handle,
 					group_no, bitmap_bh, -1, my_rsv,
 					&num, &fatal);
 		if (fatal)
@@ -1557,23 +1557,23 @@ retry_alloc:
 
 allocated:
 
-	ext3_debug("using block group %d(%d)\n",
+	ext4_debug("using block group %d(%d)\n",
 			group_no, gdp->bg_free_blocks_count);
 
 	BUFFER_TRACE(gdp_bh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, gdp_bh);
+	fatal = ext4_journal_get_write_access(handle, gdp_bh);
 	if (fatal)
 		goto out;
 
-	ret_block = grp_alloc_blk + ext3_group_first_block_no(sb, group_no);
+	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
 	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
 	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
 	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
-		      EXT3_SB(sb)->s_itb_per_group) ||
+		      EXT4_SB(sb)->s_itb_per_group) ||
 	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
-		      EXT3_SB(sb)->s_itb_per_group))
-		ext3_error(sb, "ext3_new_block",
+		      EXT4_SB(sb)->s_itb_per_group))
+		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
 			    "blocks from "E3FSBLK", length %lu",
 			     ret_block, num);
@@ -1598,20 +1598,20 @@ allocated:
 		int i;
 
 		for (i = 0; i < num; i++) {
-			if (ext3_test_bit(grp_alloc_blk+i,
+			if (ext4_test_bit(grp_alloc_blk+i,
 					bh2jh(bitmap_bh)->b_committed_data)) {
 				printk("%s: block was unexpectedly set in "
 					"b_committed_data\n", __FUNCTION__);
 			}
 		}
 	}
-	ext3_debug("found bit %d\n", grp_alloc_blk);
+	ext4_debug("found bit %d\n", grp_alloc_blk);
 	spin_unlock(sb_bgl_lock(sbi, group_no));
 	jbd_unlock_bh_state(bitmap_bh);
 #endif
 
 	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
-		ext3_error(sb, "ext3_new_block",
+		ext4_error(sb, "ext4_new_block",
 			    "block("E3FSBLK") >= blocks count(%d) - "
 			    "block_group = %d, es == %p ", ret_block,
 			le32_to_cpu(es->s_blocks_count), group_no, es);
@@ -1623,7 +1623,7 @@ allocated:
 	 * list of some description.  We don't know in advance whether
 	 * the caller wants to use it as metadata or data.
 	 */
-	ext3_debug("allocating block %lu. Goal hits %d of %d.\n",
+	ext4_debug("allocating block %lu. Goal hits %d of %d.\n",
 			ret_block, goal_hits, goal_attempts);
 
 	spin_lock(sb_bgl_lock(sbi, group_no));
@@ -1633,7 +1633,7 @@ allocated:
 	percpu_counter_mod(&sbi->s_freeblocks_counter, -num);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
-	err = ext3_journal_dirty_metadata(handle, gdp_bh);
+	err = ext4_journal_dirty_metadata(handle, gdp_bh);
 	if (!fatal)
 		fatal = err;
 
@@ -1652,7 +1652,7 @@ io_error:
 out:
 	if (fatal) {
 		*errp = fatal;
-		ext3_std_error(sb, fatal);
+		ext4_std_error(sb, fatal);
 	}
 	/*
 	 * Undo the block allocation
@@ -1663,40 +1663,40 @@ out:
 	return 0;
 }
 
-ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, int *errp)
+ext4_fsblk_t ext4_new_block(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, int *errp)
 {
 	unsigned long count = 1;
 
-	return ext3_new_blocks(handle, inode, goal, &count, errp);
+	return ext4_new_blocks(handle, inode, goal, &count, errp);
 }
 
 /**
- * ext3_count_free_blocks() -- count filesystem free blocks
+ * ext4_count_free_blocks() -- count filesystem free blocks
  * @sb:		superblock
  *
  * Adds up the number of free blocks from each block group.
  */
-ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
+ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 {
-	ext3_fsblk_t desc_count;
-	struct ext3_group_desc *gdp;
+	ext4_fsblk_t desc_count;
+	struct ext4_group_desc *gdp;
 	int i;
-	unsigned long ngroups = EXT3_SB(sb)->s_groups_count;
-#ifdef EXT3FS_DEBUG
-	struct ext3_super_block *es;
-	ext3_fsblk_t bitmap_count;
+	unsigned long ngroups = EXT4_SB(sb)->s_groups_count;
+#ifdef EXT4FS_DEBUG
+	struct ext4_super_block *es;
+	ext4_fsblk_t bitmap_count;
 	unsigned long x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	es = EXT3_SB(sb)->s_es;
+	es = EXT4_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
 
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
-		gdp = ext3_get_group_desc(sb, i, NULL);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
@@ -1705,13 +1705,13 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 		if (bitmap_bh == NULL)
 			continue;
 
-		x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+		x = ext4_count_free(bitmap_bh, sb->s_blocksize);
 		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext3_count_free_blocks: stored = "E3FSBLK
+	printk("ext4_count_free_blocks: stored = "E3FSBLK
 		", computed = "E3FSBLK", "E3FSBLK"\n",
 	       le32_to_cpu(es->s_free_blocks_count),
 		desc_count, bitmap_count);
@@ -1720,7 +1720,7 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 	desc_count = 0;
 	smp_rmb();
 	for (i = 0; i < ngroups; i++) {
-		gdp = ext3_get_group_desc(sb, i, NULL);
+		gdp = ext4_get_group_desc(sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
@@ -1731,11 +1731,11 @@ ext3_fsblk_t ext3_count_free_blocks(struct super_block *sb)
 }
 
 static inline int
-block_in_use(ext3_fsblk_t block, struct super_block *sb, unsigned char *map)
+block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
 {
-	return ext3_test_bit ((block -
-		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
-			 EXT3_BLOCKS_PER_GROUP(sb), map);
+	return ext4_test_bit ((block -
+		le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) %
+			 EXT4_BLOCKS_PER_GROUP(sb), map);
 }
 
 static inline int test_root(int a, int b)
@@ -1747,7 +1747,7 @@ static inline int test_root(int a, int b)
 	return num == a;
 }
 
-static int ext3_group_sparse(int group)
+static int ext4_group_sparse(int group)
 {
 	if (group <= 1)
 		return 1;
@@ -1758,44 +1758,44 @@ static int ext3_group_sparse(int group)
 }
 
 /**
- *	ext3_bg_has_super - number of blocks used by the superblock in group
+ *	ext4_bg_has_super - number of blocks used by the superblock in group
  *	@sb: superblock for filesystem
  *	@group: group number to check
  *
  *	Return the number of blocks used by the superblock (primary or backup)
  *	in this group.  Currently this will be only 0 or 1.
  */
-int ext3_bg_has_super(struct super_block *sb, int group)
+int ext4_bg_has_super(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
-				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
-			!ext3_group_sparse(group))
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext4_group_sparse(group))
 		return 0;
 	return 1;
 }
 
-static unsigned long ext3_bg_num_gdb_meta(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, int group)
 {
-	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
-	unsigned long first = metagroup * EXT3_DESC_PER_BLOCK(sb);
-	unsigned long last = first + EXT3_DESC_PER_BLOCK(sb) - 1;
+	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
+	unsigned long first = metagroup * EXT4_DESC_PER_BLOCK(sb);
+	unsigned long last = first + EXT4_DESC_PER_BLOCK(sb) - 1;
 
 	if (group == first || group == first + 1 || group == last)
 		return 1;
 	return 0;
 }
 
-static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
+static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, int group)
 {
-	if (EXT3_HAS_RO_COMPAT_FEATURE(sb,
-				EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
-			!ext3_group_sparse(group))
+	if (EXT4_HAS_RO_COMPAT_FEATURE(sb,
+				EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) &&
+			!ext4_group_sparse(group))
 		return 0;
-	return EXT3_SB(sb)->s_gdb_count;
+	return EXT4_SB(sb)->s_gdb_count;
 }
 
 /**
- *	ext3_bg_num_gdb - number of blocks used by the group table in group
+ *	ext4_bg_num_gdb - number of blocks used by the group table in group
  *	@sb: superblock for filesystem
  *	@group: group number to check
  *
@@ -1803,16 +1803,16 @@ static unsigned long ext3_bg_num_gdb_nometa(struct super_block *sb, int group)
  *	(primary or backup) in this group.  In the future there may be a
  *	different number of descriptor blocks in each group.
  */
-unsigned long ext3_bg_num_gdb(struct super_block *sb, int group)
+unsigned long ext4_bg_num_gdb(struct super_block *sb, int group)
 {
 	unsigned long first_meta_bg =
-			le32_to_cpu(EXT3_SB(sb)->s_es->s_first_meta_bg);
-	unsigned long metagroup = group / EXT3_DESC_PER_BLOCK(sb);
+			le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
+	unsigned long metagroup = group / EXT4_DESC_PER_BLOCK(sb);
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb,EXT3_FEATURE_INCOMPAT_META_BG) ||
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG) ||
 			metagroup < first_meta_bg)
-		return ext3_bg_num_gdb_nometa(sb,group);
+		return ext4_bg_num_gdb_nometa(sb,group);
 
-	return ext3_bg_num_gdb_meta(sb,group);
+	return ext4_bg_num_gdb_meta(sb,group);
 
 }
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index b9176eed98d1..f4b35706f39c 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/bitmap.c
+ *  linux/fs/ext4/bitmap.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -9,13 +9,13 @@
 
 #include <linux/buffer_head.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 
-#ifdef EXT3FS_DEBUG
+#ifdef EXT4FS_DEBUG
 
 static int nibblemap[] = {4, 3, 3, 2, 3, 2, 2, 1, 3, 2, 2, 1, 2, 1, 1, 0};
 
-unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
+unsigned long ext4_count_free (struct buffer_head * map, unsigned int numchars)
 {
 	unsigned int i;
 	unsigned long sum = 0;
@@ -28,5 +28,5 @@ unsigned long ext3_count_free (struct buffer_head * map, unsigned int numchars)
 	return (sum);
 }
 
-#endif  /*  EXT3FS_DEBUG  */
+#endif  /*  EXT4FS_DEBUG  */
 
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d0b54f30b914..ec114d7886cc 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/dir.c
+ *  linux/fs/ext4/dir.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -12,7 +12,7 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 directory handling functions
+ *  ext4 directory handling functions
  *
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
@@ -23,69 +23,69 @@
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/buffer_head.h>
 #include <linux/smp_lock.h>
 #include <linux/slab.h>
 #include <linux/rbtree.h>
 
-static unsigned char ext3_filetype_table[] = {
+static unsigned char ext4_filetype_table[] = {
 	DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
 };
 
-static int ext3_readdir(struct file *, void *, filldir_t);
-static int ext3_dx_readdir(struct file * filp,
+static int ext4_readdir(struct file *, void *, filldir_t);
+static int ext4_dx_readdir(struct file * filp,
 			   void * dirent, filldir_t filldir);
-static int ext3_release_dir (struct inode * inode,
+static int ext4_release_dir (struct inode * inode,
 				struct file * filp);
 
-const struct file_operations ext3_dir_operations = {
+const struct file_operations ext4_dir_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= generic_read_dir,
-	.readdir	= ext3_readdir,		/* we take BKL. needed?*/
-	.ioctl		= ext3_ioctl,		/* BKL held */
+	.readdir	= ext4_readdir,		/* we take BKL. needed?*/
+	.ioctl		= ext4_ioctl,		/* BKL held */
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= ext3_compat_ioctl,
+	.compat_ioctl	= ext4_compat_ioctl,
 #endif
-	.fsync		= ext3_sync_file,	/* BKL held */
-#ifdef CONFIG_EXT3_INDEX
-	.release	= ext3_release_dir,
+	.fsync		= ext4_sync_file,	/* BKL held */
+#ifdef CONFIG_EXT4_INDEX
+	.release	= ext4_release_dir,
 #endif
 };
 
 
 static unsigned char get_dtype(struct super_block *sb, int filetype)
 {
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE) ||
-	    (filetype >= EXT3_FT_MAX))
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE) ||
+	    (filetype >= EXT4_FT_MAX))
 		return DT_UNKNOWN;
 
-	return (ext3_filetype_table[filetype]);
+	return (ext4_filetype_table[filetype]);
 }
 
 
-int ext3_check_dir_entry (const char * function, struct inode * dir,
-			  struct ext3_dir_entry_2 * de,
+int ext4_check_dir_entry (const char * function, struct inode * dir,
+			  struct ext4_dir_entry_2 * de,
 			  struct buffer_head * bh,
 			  unsigned long offset)
 {
 	const char * error_msg = NULL;
 	const int rlen = le16_to_cpu(de->rec_len);
 
-	if (rlen < EXT3_DIR_REC_LEN(1))
+	if (rlen < EXT4_DIR_REC_LEN(1))
 		error_msg = "rec_len is smaller than minimal";
 	else if (rlen % 4 != 0)
 		error_msg = "rec_len % 4 != 0";
-	else if (rlen < EXT3_DIR_REC_LEN(de->name_len))
+	else if (rlen < EXT4_DIR_REC_LEN(de->name_len))
 		error_msg = "rec_len is too small for name_len";
 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
 		error_msg = "directory entry across blocks";
 	else if (le32_to_cpu(de->inode) >
-			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+			le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count))
 		error_msg = "inode out of bounds";
 
 	if (error_msg != NULL)
-		ext3_error (dir->i_sb, function,
+		ext4_error (dir->i_sb, function,
 			"bad entry in directory #%lu: %s - "
 			"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
 			dir->i_ino, error_msg, offset,
@@ -94,13 +94,13 @@ int ext3_check_dir_entry (const char * function, struct inode * dir,
 	return error_msg == NULL ? 1 : 0;
 }
 
-static int ext3_readdir(struct file * filp,
+static int ext4_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	int error = 0;
 	unsigned long offset;
 	int i, stored;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct super_block *sb;
 	int err;
 	struct inode *inode = filp->f_dentry->d_inode;
@@ -108,12 +108,12 @@ static int ext3_readdir(struct file * filp,
 
 	sb = inode->i_sb;
 
-#ifdef CONFIG_EXT3_INDEX
-	if (EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-				    EXT3_FEATURE_COMPAT_DIR_INDEX) &&
-	    ((EXT3_I(inode)->i_flags & EXT3_INDEX_FL) ||
+#ifdef CONFIG_EXT4_INDEX
+	if (EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+				    EXT4_FEATURE_COMPAT_DIR_INDEX) &&
+	    ((EXT4_I(inode)->i_flags & EXT4_INDEX_FL) ||
 	     ((inode->i_size >> sb->s_blocksize_bits) == 1))) {
-		err = ext3_dx_readdir(filp, dirent, filldir);
+		err = ext4_dx_readdir(filp, dirent, filldir);
 		if (err != ERR_BAD_DX_DIR) {
 			ret = err;
 			goto out;
@@ -122,19 +122,19 @@ static int ext3_readdir(struct file * filp,
 		 * We don't set the inode dirty flag since it's not
 		 * critical that it get flushed back to the disk.
 		 */
-		EXT3_I(filp->f_dentry->d_inode)->i_flags &= ~EXT3_INDEX_FL;
+		EXT4_I(filp->f_dentry->d_inode)->i_flags &= ~EXT4_INDEX_FL;
 	}
 #endif
 	stored = 0;
 	offset = filp->f_pos & (sb->s_blocksize - 1);
 
 	while (!error && !stored && filp->f_pos < inode->i_size) {
-		unsigned long blk = filp->f_pos >> EXT3_BLOCK_SIZE_BITS(sb);
+		unsigned long blk = filp->f_pos >> EXT4_BLOCK_SIZE_BITS(sb);
 		struct buffer_head map_bh;
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext3_get_blocks_handle(NULL, inode, blk, 1,
+		err = ext4_get_blocks_handle(NULL, inode, blk, 1,
 						&map_bh, 0, 0);
 		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
@@ -143,7 +143,7 @@ static int ext3_readdir(struct file * filp,
 				map_bh.b_blocknr >>
 					(PAGE_CACHE_SHIFT - inode->i_blkbits),
 				1);
-			bh = ext3_bread(NULL, inode, blk, 0, &err);
+			bh = ext4_bread(NULL, inode, blk, 0, &err);
 		}
 
 		/*
@@ -151,7 +151,7 @@ static int ext3_readdir(struct file * filp,
 		 * of recovering data when there's a bad sector
 		 */
 		if (!bh) {
-			ext3_error (sb, "ext3_readdir",
+			ext4_error (sb, "ext4_readdir",
 				"directory #%lu contains a hole at offset %lu",
 				inode->i_ino, (unsigned long)filp->f_pos);
 			filp->f_pos += sb->s_blocksize - offset;
@@ -165,7 +165,7 @@ revalidate:
 		 * to make sure. */
 		if (filp->f_version != inode->i_version) {
 			for (i = 0; i < sb->s_blocksize && i < offset; ) {
-				de = (struct ext3_dir_entry_2 *)
+				de = (struct ext4_dir_entry_2 *)
 					(bh->b_data + i);
 				/* It's too expensive to do a full
 				 * dirent test each time round this
@@ -174,7 +174,7 @@ revalidate:
 				 * failure will be detected in the
 				 * dirent test below. */
 				if (le16_to_cpu(de->rec_len) <
-						EXT3_DIR_REC_LEN(1))
+						EXT4_DIR_REC_LEN(1))
 					break;
 				i += le16_to_cpu(de->rec_len);
 			}
@@ -186,8 +186,8 @@ revalidate:
 
 		while (!error && filp->f_pos < inode->i_size
 		       && offset < sb->s_blocksize) {
-			de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
-			if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+			de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
+			if (!ext4_check_dir_entry ("ext4_readdir", inode, de,
 						   bh, offset)) {
 				/* On error, skip the f_pos to the
                                    next block. */
@@ -228,7 +228,7 @@ out:
 	return ret;
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * These functions convert from the major/minor hash to an f_pos
  * value.
@@ -323,7 +323,7 @@ static struct dir_private_info *create_dir_info(loff_t pos)
 	return p;
 }
 
-void ext3_htree_free_dir_info(struct dir_private_info *p)
+void ext4_htree_free_dir_info(struct dir_private_info *p)
 {
 	free_rb_tree_fname(&p->root);
 	kfree(p);
@@ -332,9 +332,9 @@ void ext3_htree_free_dir_info(struct dir_private_info *p)
 /*
  * Given a directory entry, enter it into the fname rb tree.
  */
-int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 			     __u32 minor_hash,
-			     struct ext3_dir_entry_2 *dirent)
+			     struct ext4_dir_entry_2 *dirent)
 {
 	struct rb_node **p, *parent = NULL;
 	struct fname * fname, *new_fn;
@@ -390,7 +390,7 @@ int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
 
 
 /*
- * This is a helper function for ext3_dx_readdir.  It calls filldir
+ * This is a helper function for ext4_dx_readdir.  It calls filldir
  * for all entres on the fname linked list.  (Normally there is only
  * one entry on the linked list, unless there are 62 bit hash collisions.)
  */
@@ -425,7 +425,7 @@ static int call_filldir(struct file * filp, void * dirent,
 	return 0;
 }
 
-static int ext3_dx_readdir(struct file * filp,
+static int ext4_dx_readdir(struct file * filp,
 			 void * dirent, filldir_t filldir)
 {
 	struct dir_private_info *info = filp->private_data;
@@ -440,7 +440,7 @@ static int ext3_dx_readdir(struct file * filp,
 		filp->private_data = info;
 	}
 
-	if (filp->f_pos == EXT3_HTREE_EOF)
+	if (filp->f_pos == EXT4_HTREE_EOF)
 		return 0;	/* EOF */
 
 	/* Some one has messed with f_pos; reset the world */
@@ -474,13 +474,13 @@ static int ext3_dx_readdir(struct file * filp,
 			info->curr_node = NULL;
 			free_rb_tree_fname(&info->root);
 			filp->f_version = inode->i_version;
-			ret = ext3_htree_fill_tree(filp, info->curr_hash,
+			ret = ext4_htree_fill_tree(filp, info->curr_hash,
 						   info->curr_minor_hash,
 						   &info->next_hash);
 			if (ret < 0)
 				return ret;
 			if (ret == 0) {
-				filp->f_pos = EXT3_HTREE_EOF;
+				filp->f_pos = EXT4_HTREE_EOF;
 				break;
 			}
 			info->curr_node = rb_first(&info->root);
@@ -495,7 +495,7 @@ static int ext3_dx_readdir(struct file * filp,
 		info->curr_node = rb_next(info->curr_node);
 		if (!info->curr_node) {
 			if (info->next_hash == ~0) {
-				filp->f_pos = EXT3_HTREE_EOF;
+				filp->f_pos = EXT4_HTREE_EOF;
 				break;
 			}
 			info->curr_hash = info->next_hash;
@@ -507,10 +507,10 @@ finished:
 	return 0;
 }
 
-static int ext3_release_dir (struct inode * inode, struct file * filp)
+static int ext4_release_dir (struct inode * inode, struct file * filp)
 {
        if (filp->private_data)
-		ext3_htree_free_dir_info(filp->private_data);
+		ext4_htree_free_dir_info(filp->private_data);
 
 	return 0;
 }
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index e96c388047e0..d938fbe1e08b 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/file.c
+ *  linux/fs/ext4/file.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -12,7 +12,7 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 fs regular file handling primitives
+ *  ext4 fs regular file handling primitives
  *
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *	(jj@sunsite.ms.mff.cuni.cz)
@@ -21,34 +21,34 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include "xattr.h"
 #include "acl.h"
 
 /*
  * Called when an inode is released. Note that this is different
- * from ext3_file_open: open gets called at every open, but release
+ * from ext4_file_open: open gets called at every open, but release
  * gets called only when /all/ the files are closed.
  */
-static int ext3_release_file (struct inode * inode, struct file * filp)
+static int ext4_release_file (struct inode * inode, struct file * filp)
 {
 	/* if we are the last writer on the inode, drop the block reservation */
 	if ((filp->f_mode & FMODE_WRITE) &&
 			(atomic_read(&inode->i_writecount) == 1))
 	{
-		mutex_lock(&EXT3_I(inode)->truncate_mutex);
-		ext3_discard_reservation(inode);
-		mutex_unlock(&EXT3_I(inode)->truncate_mutex);
+		mutex_lock(&EXT4_I(inode)->truncate_mutex);
+		ext4_discard_reservation(inode);
+		mutex_unlock(&EXT4_I(inode)->truncate_mutex);
 	}
 	if (is_dx(inode) && filp->private_data)
-		ext3_htree_free_dir_info(filp->private_data);
+		ext4_htree_free_dir_info(filp->private_data);
 
 	return 0;
 }
 
 static ssize_t
-ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
+ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
 		unsigned long nr_segs, loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
@@ -79,7 +79,7 @@ ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
 		 * Open question --- do we care about flushing timestamps too
 		 * if the inode is IS_SYNC?
 		 */
-		if (!ext3_should_journal_data(inode))
+		if (!ext4_should_journal_data(inode))
 			return ret;
 
 		goto force_commit;
@@ -100,40 +100,40 @@ ext3_file_write(struct kiocb *iocb, const struct iovec *iov,
 	 */
 
 force_commit:
-	err = ext3_force_commit(inode->i_sb);
+	err = ext4_force_commit(inode->i_sb);
 	if (err)
 		return err;
 	return ret;
 }
 
-const struct file_operations ext3_file_operations = {
+const struct file_operations ext4_file_operations = {
 	.llseek		= generic_file_llseek,
 	.read		= do_sync_read,
 	.write		= do_sync_write,
 	.aio_read	= generic_file_aio_read,
-	.aio_write	= ext3_file_write,
-	.ioctl		= ext3_ioctl,
+	.aio_write	= ext4_file_write,
+	.ioctl		= ext4_ioctl,
 #ifdef CONFIG_COMPAT
-	.compat_ioctl	= ext3_compat_ioctl,
+	.compat_ioctl	= ext4_compat_ioctl,
 #endif
 	.mmap		= generic_file_mmap,
 	.open		= generic_file_open,
-	.release	= ext3_release_file,
-	.fsync		= ext3_sync_file,
+	.release	= ext4_release_file,
+	.fsync		= ext4_sync_file,
 	.sendfile	= generic_file_sendfile,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= generic_file_splice_write,
 };
 
-struct inode_operations ext3_file_inode_operations = {
-	.truncate	= ext3_truncate,
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_file_inode_operations = {
+	.truncate	= ext4_truncate,
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
 
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index dd1fd3c0fc05..272faa27761d 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/fsync.c
+ *  linux/fs/ext4/fsync.c
  *
  *  Copyright (C) 1993  Stephen Tweedie (sct@redhat.com)
  *  from
@@ -9,7 +9,7 @@
  *  from
  *  linux/fs/minix/truncate.c   Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3fs fsync primitive
+ *  ext4fs fsync primitive
  *
  *  Big-endian to little-endian byte-swapping/bitmaps by
  *        David S. Miller (davem@caip.rutgers.edu), 1995
@@ -27,11 +27,11 @@
 #include <linux/sched.h>
 #include <linux/writeback.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 
 /*
- * akpm: A new design for ext3_sync_file().
+ * akpm: A new design for ext4_sync_file().
  *
  * This is only called from sys_fsync(), sys_fdatasync() and sys_msync().
  * There cannot be a transaction open by this task.
@@ -42,12 +42,12 @@
  * inode to disk.
  */
 
-int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
+int ext4_sync_file(struct file * file, struct dentry *dentry, int datasync)
 {
 	struct inode *inode = dentry->d_inode;
 	int ret = 0;
 
-	J_ASSERT(ext3_journal_current_handle() == 0);
+	J_ASSERT(ext4_journal_current_handle() == 0);
 
 	/*
 	 * data=writeback:
@@ -61,14 +61,14 @@ int ext3_sync_file(struct file * file, struct dentry *dentry, int datasync)
 	 *
 	 * data=journal:
 	 *  filemap_fdatawrite won't do anything (the buffers are clean).
-	 *  ext3_force_commit will write the file data into the journal and
+	 *  ext4_force_commit will write the file data into the journal and
 	 *  will wait on that.
 	 *  filemap_fdatawait() will encounter a ton of newly-dirtied pages
 	 *  (they were dirtied by commit).  But that's OK - the blocks are
 	 *  safe in-journal, which is all fsync() needs to ensure.
 	 */
-	if (ext3_should_journal_data(inode)) {
-		ret = ext3_force_commit(inode->i_sb);
+	if (ext4_should_journal_data(inode)) {
+		ret = ext4_force_commit(inode->i_sb);
 		goto out;
 	}
 
diff --git a/fs/ext4/hash.c b/fs/ext4/hash.c
index deeb27b5ba83..d15bb4274428 100644
--- a/fs/ext4/hash.c
+++ b/fs/ext4/hash.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/hash.c
+ *  linux/fs/ext4/hash.c
  *
  * Copyright (C) 2002 by Theodore Ts'o
  *
@@ -12,7 +12,7 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/sched.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/cryptohash.h>
 
 #define DELTA 0x9E3779B9
@@ -89,7 +89,7 @@ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
  * represented, and whether or not the returned hash is 32 bits or 64
  * bits.  32 bit hashes will return 0 for the minor hash.
  */
-int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
+int ext4fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
 {
 	__u32	hash;
 	__u32	minor_hash = 0;
@@ -144,8 +144,8 @@ int ext3fs_dirhash(const char *name, int len, struct dx_hash_info *hinfo)
 		return -1;
 	}
 	hash = hash & ~1;
-	if (hash == (EXT3_HTREE_EOF << 1))
-		hash = (EXT3_HTREE_EOF-1) << 1;
+	if (hash == (EXT4_HTREE_EOF << 1))
+		hash = (EXT4_HTREE_EOF-1) << 1;
 	hinfo->hash = hash;
 	hinfo->minor_hash = minor_hash;
 	return 0;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e45dbd651736..4b92066ca08f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/ialloc.c
+ *  linux/fs/ext4/ialloc.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -15,8 +15,8 @@
 #include <linux/time.h>
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/stat.h>
 #include <linux/string.h>
 #include <linux/quotaops.h>
@@ -53,16 +53,16 @@
 static struct buffer_head *
 read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 {
-	struct ext3_group_desc *desc;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh = NULL;
 
-	desc = ext3_get_group_desc(sb, block_group, NULL);
+	desc = ext4_get_group_desc(sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
 
 	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
 	if (!bh)
-		ext3_error(sb, "read_inode_bitmap",
+		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %u",
 			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
@@ -86,7 +86,7 @@ error_out:
  * though), and then we'd have two inodes sharing the
  * same inode number and space on the harddisk.
  */
-void ext3_free_inode (handle_t *handle, struct inode * inode)
+void ext4_free_inode (handle_t *handle, struct inode * inode)
 {
 	struct super_block * sb = inode->i_sb;
 	int is_directory;
@@ -95,36 +95,36 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	struct buffer_head *bh2;
 	unsigned long block_group;
 	unsigned long bit;
-	struct ext3_group_desc * gdp;
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * gdp;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi;
 	int fatal = 0, err;
 
 	if (atomic_read(&inode->i_count) > 1) {
-		printk ("ext3_free_inode: inode has count=%d\n",
+		printk ("ext4_free_inode: inode has count=%d\n",
 					atomic_read(&inode->i_count));
 		return;
 	}
 	if (inode->i_nlink) {
-		printk ("ext3_free_inode: inode has nlink=%d\n",
+		printk ("ext4_free_inode: inode has nlink=%d\n",
 			inode->i_nlink);
 		return;
 	}
 	if (!sb) {
-		printk("ext3_free_inode: inode on nonexistent device\n");
+		printk("ext4_free_inode: inode on nonexistent device\n");
 		return;
 	}
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 
 	ino = inode->i_ino;
-	ext3_debug ("freeing inode %lu\n", ino);
+	ext4_debug ("freeing inode %lu\n", ino);
 
 	/*
 	 * Note: we must free any quota before locking the superblock,
 	 * as writing the quota to disk may need the lock as well.
 	 */
 	DQUOT_INIT(inode);
-	ext3_xattr_delete_inode(handle, inode);
+	ext4_xattr_delete_inode(handle, inode);
 	DQUOT_FREE_INODE(inode);
 	DQUOT_DROP(inode);
 
@@ -133,33 +133,33 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 	/* Do this BEFORE marking the inode not in use or returning an error */
 	clear_inode (inode);
 
-	es = EXT3_SB(sb)->s_es;
-	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext3_error (sb, "ext3_free_inode",
+	es = EXT4_SB(sb)->s_es;
+	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext4_error (sb, "ext4_free_inode",
 			    "reserved or nonexistent inode %lu", ino);
 		goto error_return;
 	}
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh)
 		goto error_return;
 
 	BUFFER_TRACE(bitmap_bh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, bitmap_bh);
+	fatal = ext4_journal_get_write_access(handle, bitmap_bh);
 	if (fatal)
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+	if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
 					bit, bitmap_bh->b_data))
-		ext3_error (sb, "ext3_free_inode",
+		ext4_error (sb, "ext4_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
-		gdp = ext3_get_group_desc (sb, block_group, &bh2);
+		gdp = ext4_get_group_desc (sb, block_group, &bh2);
 
 		BUFFER_TRACE(bh2, "get_write_access");
-		fatal = ext3_journal_get_write_access(handle, bh2);
+		fatal = ext4_journal_get_write_access(handle, bh2);
 		if (fatal) goto error_return;
 
 		if (gdp) {
@@ -175,18 +175,18 @@ void ext3_free_inode (handle_t *handle, struct inode * inode)
 				percpu_counter_dec(&sbi->s_dirs_counter);
 
 		}
-		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, bh2);
+		BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, bh2);
 		if (!fatal) fatal = err;
 	}
-	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	BUFFER_TRACE(bitmap_bh, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bitmap_bh);
 	if (!fatal)
 		fatal = err;
 	sb->s_dirt = 1;
 error_return:
 	brelse(bitmap_bh);
-	ext3_std_error(sb, fatal);
+	ext4_std_error(sb, fatal);
 }
 
 /*
@@ -201,17 +201,17 @@ error_return:
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
-	int ngroups = EXT3_SB(sb)->s_groups_count;
+	int ngroups = EXT4_SB(sb)->s_groups_count;
 	unsigned int freei, avefreei;
-	struct ext3_group_desc *desc, *best_desc = NULL;
+	struct ext4_group_desc *desc, *best_desc = NULL;
 	struct buffer_head *bh;
 	int group, best_group = -1;
 
-	freei = percpu_counter_read_positive(&EXT3_SB(sb)->s_freeinodes_counter);
+	freei = percpu_counter_read_positive(&EXT4_SB(sb)->s_freeinodes_counter);
 	avefreei = freei / ngroups;
 
 	for (group = 0; group < ngroups; group++) {
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
@@ -256,19 +256,19 @@ static int find_group_dir(struct super_block *sb, struct inode *parent)
 
 static int find_group_orlov(struct super_block *sb, struct inode *parent)
 {
-	int parent_group = EXT3_I(parent)->i_block_group;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
+	int parent_group = EXT4_I(parent)->i_block_group;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
 	int ngroups = sbi->s_groups_count;
-	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
+	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
 	unsigned int freei, avefreei;
-	ext3_fsblk_t freeb, avefreeb;
-	ext3_fsblk_t blocks_per_dir;
+	ext4_fsblk_t freeb, avefreeb;
+	ext4_fsblk_t blocks_per_dir;
 	unsigned int ndirs;
 	int max_debt, max_dirs, min_inodes;
-	ext3_grpblk_t min_blocks;
+	ext4_grpblk_t min_blocks;
 	int group = -1, i;
-	struct ext3_group_desc *desc;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh;
 
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
@@ -278,7 +278,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
-	    (EXT3_I(parent)->i_flags & EXT3_TOPDIR_FL)) {
+	    (EXT4_I(parent)->i_flags & EXT4_TOPDIR_FL)) {
 		int best_ndir = inodes_per_group;
 		int best_group = -1;
 
@@ -286,7 +286,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		parent_group = (unsigned)group % ngroups;
 		for (i = 0; i < ngroups; i++) {
 			group = (parent_group + i) % ngroups;
-			desc = ext3_get_group_desc (sb, group, &bh);
+			desc = ext4_get_group_desc (sb, group, &bh);
 			if (!desc || !desc->bg_free_inodes_count)
 				continue;
 			if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
@@ -307,9 +307,9 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
 	min_inodes = avefreei - inodes_per_group / 4;
-	min_blocks = avefreeb - EXT3_BLOCKS_PER_GROUP(sb) / 4;
+	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
 
-	max_debt = EXT3_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext3_fsblk_t)BLOCK_COST);
+	max_debt = EXT4_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST);
 	if (max_debt * INODE_COST > inodes_per_group)
 		max_debt = inodes_per_group / INODE_COST;
 	if (max_debt > 255)
@@ -319,7 +319,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 
 	for (i = 0; i < ngroups; i++) {
 		group = (parent_group + i) % ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
@@ -334,7 +334,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 fallback:
 	for (i = 0; i < ngroups; i++) {
 		group = (parent_group + i) % ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
 		if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
@@ -355,9 +355,9 @@ fallback:
 
 static int find_group_other(struct super_block *sb, struct inode *parent)
 {
-	int parent_group = EXT3_I(parent)->i_block_group;
-	int ngroups = EXT3_SB(sb)->s_groups_count;
-	struct ext3_group_desc *desc;
+	int parent_group = EXT4_I(parent)->i_block_group;
+	int ngroups = EXT4_SB(sb)->s_groups_count;
+	struct ext4_group_desc *desc;
 	struct buffer_head *bh;
 	int group, i;
 
@@ -365,7 +365,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 	 * Try to place the inode in its parent directory
 	 */
 	group = parent_group;
-	desc = ext3_get_group_desc (sb, group, &bh);
+	desc = ext4_get_group_desc (sb, group, &bh);
 	if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
 			le16_to_cpu(desc->bg_free_blocks_count))
 		return group;
@@ -389,7 +389,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 		group += i;
 		if (group >= ngroups)
 			group -= ngroups;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
 				le16_to_cpu(desc->bg_free_blocks_count))
 			return group;
@@ -403,7 +403,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
 	for (i = 0; i < ngroups; i++) {
 		if (++group >= ngroups)
 			group = 0;
-		desc = ext3_get_group_desc (sb, group, &bh);
+		desc = ext4_get_group_desc (sb, group, &bh);
 		if (desc && le16_to_cpu(desc->bg_free_inodes_count))
 			return group;
 	}
@@ -421,7 +421,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent)
  * For other inodes, search forward from the parent directory's block
  * group to find a free inode.
  */
-struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
+struct inode *ext4_new_inode(handle_t *handle, struct inode * dir, int mode)
 {
 	struct super_block *sb;
 	struct buffer_head *bitmap_bh = NULL;
@@ -429,10 +429,10 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	int group;
 	unsigned long ino = 0;
 	struct inode * inode;
-	struct ext3_group_desc * gdp = NULL;
-	struct ext3_super_block * es;
-	struct ext3_inode_info *ei;
-	struct ext3_sb_info *sbi;
+	struct ext4_group_desc * gdp = NULL;
+	struct ext4_super_block * es;
+	struct ext4_inode_info *ei;
+	struct ext4_sb_info *sbi;
 	int err = 0;
 	struct inode *ret;
 	int i;
@@ -445,9 +445,9 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	inode = new_inode(sb);
 	if (!inode)
 		return ERR_PTR(-ENOMEM);
-	ei = EXT3_I(inode);
+	ei = EXT4_I(inode);
 
-	sbi = EXT3_SB(sb);
+	sbi = EXT4_SB(sb);
 	es = sbi->s_es;
 	if (S_ISDIR(mode)) {
 		if (test_opt (sb, OLDALLOC))
@@ -464,7 +464,7 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 	for (i = 0; i < sbi->s_groups_count; i++) {
 		err = -EIO;
 
-		gdp = ext3_get_group_desc(sb, group, &bh2);
+		gdp = ext4_get_group_desc(sb, group, &bh2);
 		if (!gdp)
 			goto fail;
 
@@ -476,21 +476,21 @@ struct inode *ext3_new_inode(handle_t *handle, struct inode * dir, int mode)
 		ino = 0;
 
 repeat_in_this_group:
-		ino = ext3_find_next_zero_bit((unsigned long *)
-				bitmap_bh->b_data, EXT3_INODES_PER_GROUP(sb), ino);
-		if (ino < EXT3_INODES_PER_GROUP(sb)) {
+		ino = ext4_find_next_zero_bit((unsigned long *)
+				bitmap_bh->b_data, EXT4_INODES_PER_GROUP(sb), ino);
+		if (ino < EXT4_INODES_PER_GROUP(sb)) {
 
 			BUFFER_TRACE(bitmap_bh, "get_write_access");
-			err = ext3_journal_get_write_access(handle, bitmap_bh);
+			err = ext4_journal_get_write_access(handle, bitmap_bh);
 			if (err)
 				goto fail;
 
-			if (!ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+			if (!ext4_set_bit_atomic(sb_bgl_lock(sbi, group),
 						ino, bitmap_bh->b_data)) {
 				/* we won it */
 				BUFFER_TRACE(bitmap_bh,
-					"call ext3_journal_dirty_metadata");
-				err = ext3_journal_dirty_metadata(handle,
+					"call ext4_journal_dirty_metadata");
+				err = ext4_journal_dirty_metadata(handle,
 								bitmap_bh);
 				if (err)
 					goto fail;
@@ -499,7 +499,7 @@ repeat_in_this_group:
 			/* we lost it */
 			journal_release_buffer(handle, bitmap_bh);
 
-			if (++ino < EXT3_INODES_PER_GROUP(sb))
+			if (++ino < EXT4_INODES_PER_GROUP(sb))
 				goto repeat_in_this_group;
 		}
 
@@ -517,9 +517,9 @@ repeat_in_this_group:
 	goto out;
 
 got:
-	ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
-	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
-		ext3_error (sb, "ext3_new_inode",
+	ino += group * EXT4_INODES_PER_GROUP(sb) + 1;
+	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+		ext4_error (sb, "ext4_new_inode",
 			    "reserved inode or inode > inodes count - "
 			    "block_group = %d, inode=%lu", group, ino);
 		err = -EIO;
@@ -527,7 +527,7 @@ got:
 	}
 
 	BUFFER_TRACE(bh2, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh2);
+	err = ext4_journal_get_write_access(handle, bh2);
 	if (err) goto fail;
 	spin_lock(sb_bgl_lock(sbi, group));
 	gdp->bg_free_inodes_count =
@@ -537,8 +537,8 @@ got:
 			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
 	}
 	spin_unlock(sb_bgl_lock(sbi, group));
-	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bh2);
+	BUFFER_TRACE(bh2, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bh2);
 	if (err) goto fail;
 
 	percpu_counter_dec(&sbi->s_freeinodes_counter);
@@ -566,13 +566,13 @@ got:
 	ei->i_dir_start_lookup = 0;
 	ei->i_disksize = 0;
 
-	ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+	ei->i_flags = EXT4_I(dir)->i_flags & ~EXT4_INDEX_FL;
 	if (S_ISLNK(mode))
-		ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+		ei->i_flags &= ~(EXT4_IMMUTABLE_FL|EXT4_APPEND_FL);
 	/* dirsync only applies to directories */
 	if (!S_ISDIR(mode))
-		ei->i_flags &= ~EXT3_DIRSYNC_FL;
-#ifdef EXT3_FRAGMENTS
+		ei->i_flags &= ~EXT4_DIRSYNC_FL;
+#ifdef EXT4_FRAGMENTS
 	ei->i_faddr = 0;
 	ei->i_frag_no = 0;
 	ei->i_frag_size = 0;
@@ -583,7 +583,7 @@ got:
 	ei->i_block_alloc_info = NULL;
 	ei->i_block_group = group;
 
-	ext3_set_inode_flags(inode);
+	ext4_set_inode_flags(inode);
 	if (IS_DIRSYNC(inode))
 		handle->h_sync = 1;
 	insert_inode_hash(inode);
@@ -591,10 +591,10 @@ got:
 	inode->i_generation = sbi->s_next_generation++;
 	spin_unlock(&sbi->s_next_gen_lock);
 
-	ei->i_state = EXT3_STATE_NEW;
+	ei->i_state = EXT4_STATE_NEW;
 	ei->i_extra_isize =
-		(EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) ?
-		sizeof(struct ext3_inode) - EXT3_GOOD_OLD_INODE_SIZE : 0;
+		(EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) ?
+		sizeof(struct ext4_inode) - EXT4_GOOD_OLD_INODE_SIZE : 0;
 
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
@@ -602,24 +602,24 @@ got:
 		goto fail_drop;
 	}
 
-	err = ext3_init_acl(handle, inode, dir);
+	err = ext4_init_acl(handle, inode, dir);
 	if (err)
 		goto fail_free_drop;
 
-	err = ext3_init_security(handle,inode, dir);
+	err = ext4_init_security(handle,inode, dir);
 	if (err)
 		goto fail_free_drop;
 
-	err = ext3_mark_inode_dirty(handle, inode);
+	err = ext4_mark_inode_dirty(handle, inode);
 	if (err) {
-		ext3_std_error(sb, err);
+		ext4_std_error(sb, err);
 		goto fail_free_drop;
 	}
 
-	ext3_debug("allocating inode %lu\n", inode->i_ino);
+	ext4_debug("allocating inode %lu\n", inode->i_ino);
 	goto really_out;
 fail:
-	ext3_std_error(sb, err);
+	ext4_std_error(sb, err);
 out:
 	iput(inode);
 	ret = ERR_PTR(err);
@@ -640,9 +640,9 @@ fail_drop:
 }
 
 /* Verify that we are loading a valid orphan from disk */
-struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
+struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
 {
-	unsigned long max_ino = le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count);
+	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
 	unsigned long block_group;
 	int bit;
 	struct buffer_head *bitmap_bh = NULL;
@@ -650,16 +650,16 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 
 	/* Error cases - e2fsck has already cleaned up for us */
 	if (ino > max_ino) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan ino %lu!  e2fsck was run?", ino);
 		goto out;
 	}
 
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	bit = (ino - 1) % EXT3_INODES_PER_GROUP(sb);
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
 	bitmap_bh = read_inode_bitmap(sb, block_group);
 	if (!bitmap_bh) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "inode bitmap error for orphan %lu", ino);
 		goto out;
 	}
@@ -668,14 +668,14 @@ struct inode *ext3_orphan_get(struct super_block *sb, unsigned long ino)
 	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
 	 * inodes that were being truncated, so we can't check i_nlink==0.
 	 */
-	if (!ext3_test_bit(bit, bitmap_bh->b_data) ||
+	if (!ext4_test_bit(bit, bitmap_bh->b_data) ||
 			!(inode = iget(sb, ino)) || is_bad_inode(inode) ||
 			NEXT_ORPHAN(inode) > max_ino) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "bad orphan inode %lu!  e2fsck was run?", ino);
-		printk(KERN_NOTICE "ext3_test_bit(bit=%d, block=%llu) = %d\n",
+		printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
 		       bit, (unsigned long long)bitmap_bh->b_blocknr,
-		       ext3_test_bit(bit, bitmap_bh->b_data));
+		       ext4_test_bit(bit, bitmap_bh->b_data));
 		printk(KERN_NOTICE "inode=%p\n", inode);
 		if (inode) {
 			printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
@@ -695,22 +695,22 @@ out:
 	return inode;
 }
 
-unsigned long ext3_count_free_inodes (struct super_block * sb)
+unsigned long ext4_count_free_inodes (struct super_block * sb)
 {
 	unsigned long desc_count;
-	struct ext3_group_desc *gdp;
+	struct ext4_group_desc *gdp;
 	int i;
-#ifdef EXT3FS_DEBUG
-	struct ext3_super_block *es;
+#ifdef EXT4FS_DEBUG
+	struct ext4_super_block *es;
 	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
 
-	es = EXT3_SB(sb)->s_es;
+	es = EXT4_SB(sb)->s_es;
 	desc_count = 0;
 	bitmap_count = 0;
 	gdp = NULL;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -719,19 +719,19 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 		if (!bitmap_bh)
 			continue;
 
-		x = ext3_count_free(bitmap_bh, EXT3_INODES_PER_GROUP(sb) / 8);
+		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
 		printk("group %d: stored = %d, counted = %lu\n",
 			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext3_count_free_inodes: stored = %u, computed = %lu, %lu\n",
+	printk("ext4_count_free_inodes: stored = %u, computed = %lu, %lu\n",
 		le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
 	return desc_count;
 #else
 	desc_count = 0;
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
@@ -742,13 +742,13 @@ unsigned long ext3_count_free_inodes (struct super_block * sb)
 }
 
 /* Called at mount-time, super-block is locked */
-unsigned long ext3_count_dirs (struct super_block * sb)
+unsigned long ext4_count_dirs (struct super_block * sb)
 {
 	unsigned long count = 0;
 	int i;
 
-	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
-		struct ext3_group_desc *gdp = ext3_get_group_desc (sb, i, NULL);
+	for (i = 0; i < EXT4_SB(sb)->s_groups_count; i++) {
+		struct ext4_group_desc *gdp = ext4_get_group_desc (sb, i, NULL);
 		if (!gdp)
 			continue;
 		count += le16_to_cpu(gdp->bg_used_dirs_count);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 03ba5bcab186..7275d60dcc59 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/inode.c
+ *  linux/fs/ext4/inode.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -19,13 +19,13 @@
  *  64-bit file support on 64-bit platforms by Jakub Jelinek
  *	(jj@sunsite.ms.mff.cuni.cz)
  *
- *  Assorted race fixes, rewrite of ext3_get_block() by Al Viro, 2000
+ *  Assorted race fixes, rewrite of ext4_get_block() by Al Viro, 2000
  */
 
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/time.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_jbd.h>
 #include <linux/jbd.h>
 #include <linux/smp_lock.h>
 #include <linux/highuid.h>
@@ -40,21 +40,21 @@
 #include "xattr.h"
 #include "acl.h"
 
-static int ext3_writepage_trans_blocks(struct inode *inode);
+static int ext4_writepage_trans_blocks(struct inode *inode);
 
 /*
  * Test whether an inode is a fast symlink.
  */
-static int ext3_inode_is_fast_symlink(struct inode *inode)
+static int ext4_inode_is_fast_symlink(struct inode *inode)
 {
-	int ea_blocks = EXT3_I(inode)->i_file_acl ?
+	int ea_blocks = EXT4_I(inode)->i_file_acl ?
 		(inode->i_sb->s_blocksize >> 9) : 0;
 
 	return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
 }
 
 /*
- * The ext3 forget function must perform a revoke if we are freeing data
+ * The ext4 forget function must perform a revoke if we are freeing data
  * which has been journaled.  Metadata (eg. indirect blocks) must be
  * revoked in all cases.
  *
@@ -62,8 +62,8 @@ static int ext3_inode_is_fast_symlink(struct inode *inode)
  * but there may still be a record of it in the journal, and that record
  * still needs to be revoked.
  */
-int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
-			struct buffer_head *bh, ext3_fsblk_t blocknr)
+int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+			struct buffer_head *bh, ext4_fsblk_t blocknr)
 {
 	int err;
 
@@ -81,11 +81,11 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	 * support it.  Otherwise, only skip the revoke on un-journaled
 	 * data blocks. */
 
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ||
-	    (!is_metadata && !ext3_should_journal_data(inode))) {
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
+	    (!is_metadata && !ext4_should_journal_data(inode))) {
 		if (bh) {
 			BUFFER_TRACE(bh, "call journal_forget");
-			return ext3_journal_forget(handle, bh);
+			return ext4_journal_forget(handle, bh);
 		}
 		return 0;
 	}
@@ -93,10 +93,10 @@ int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
 	/*
 	 * data!=journal && (is_metadata || should_journal_data(inode))
 	 */
-	BUFFER_TRACE(bh, "call ext3_journal_revoke");
-	err = ext3_journal_revoke(handle, blocknr, bh);
+	BUFFER_TRACE(bh, "call ext4_journal_revoke");
+	err = ext4_journal_revoke(handle, blocknr, bh);
 	if (err)
-		ext3_abort(inode->i_sb, __FUNCTION__,
+		ext4_abort(inode->i_sb, __FUNCTION__,
 			   "error %d when attempting revoke", err);
 	BUFFER_TRACE(bh, "exit");
 	return err;
@@ -115,7 +115,7 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 	/* Give ourselves just enough room to cope with inodes in which
 	 * i_blocks is corrupt: we've seen disk corruptions in the past
 	 * which resulted in random data in an inode which looked enough
-	 * like a regular file for ext3 to try to delete it.  Things
+	 * like a regular file for ext4 to try to delete it.  Things
 	 * will go a bit crazy if that happens, but at least we should
 	 * try not to panic the whole kernel. */
 	if (needed < 2)
@@ -123,10 +123,10 @@ static unsigned long blocks_for_truncate(struct inode *inode)
 
 	/* But we need to bound the transaction so we don't overflow the
 	 * journal. */
-	if (needed > EXT3_MAX_TRANS_DATA)
-		needed = EXT3_MAX_TRANS_DATA;
+	if (needed > EXT4_MAX_TRANS_DATA)
+		needed = EXT4_MAX_TRANS_DATA;
 
-	return EXT3_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
+	return EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + needed;
 }
 
 /*
@@ -143,11 +143,11 @@ static handle_t *start_transaction(struct inode *inode)
 {
 	handle_t *result;
 
-	result = ext3_journal_start(inode, blocks_for_truncate(inode));
+	result = ext4_journal_start(inode, blocks_for_truncate(inode));
 	if (!IS_ERR(result))
 		return result;
 
-	ext3_std_error(inode->i_sb, PTR_ERR(result));
+	ext4_std_error(inode->i_sb, PTR_ERR(result));
 	return result;
 }
 
@@ -159,9 +159,9 @@ static handle_t *start_transaction(struct inode *inode)
  */
 static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
 {
-	if (handle->h_buffer_credits > EXT3_RESERVE_TRANS_BLOCKS)
+	if (handle->h_buffer_credits > EXT4_RESERVE_TRANS_BLOCKS)
 		return 0;
-	if (!ext3_journal_extend(handle, blocks_for_truncate(inode)))
+	if (!ext4_journal_extend(handle, blocks_for_truncate(inode)))
 		return 0;
 	return 1;
 }
@@ -171,16 +171,16 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
  * so before we call here everything must be consistently dirtied against
  * this transaction.
  */
-static int ext3_journal_test_restart(handle_t *handle, struct inode *inode)
+static int ext4_journal_test_restart(handle_t *handle, struct inode *inode)
 {
 	jbd_debug(2, "restarting handle %p\n", handle);
-	return ext3_journal_restart(handle, blocks_for_truncate(inode));
+	return ext4_journal_restart(handle, blocks_for_truncate(inode));
 }
 
 /*
  * Called at the last iput() if i_nlink is zero.
  */
-void ext3_delete_inode (struct inode * inode)
+void ext4_delete_inode (struct inode * inode)
 {
 	handle_t *handle;
 
@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * inode)
 		 * make sure that the in-core orphan linked list is properly
 		 * cleaned up.
 		 */
-		ext3_orphan_del(NULL, inode);
+		ext4_orphan_del(NULL, inode);
 		goto no_delete;
 	}
 
@@ -204,17 +204,17 @@ void ext3_delete_inode (struct inode * inode)
 		handle->h_sync = 1;
 	inode->i_size = 0;
 	if (inode->i_blocks)
-		ext3_truncate(inode);
+		ext4_truncate(inode);
 	/*
-	 * Kill off the orphan record which ext3_truncate created.
+	 * Kill off the orphan record which ext4_truncate created.
 	 * AKPM: I think this can be inside the above `if'.
-	 * Note that ext3_orphan_del() has to be able to cope with the
+	 * Note that ext4_orphan_del() has to be able to cope with the
 	 * deletion of a non-existent orphan - this is because we don't
-	 * know if ext3_truncate() actually created an orphan record.
+	 * know if ext4_truncate() actually created an orphan record.
 	 * (Well, we could do this if we need to, but heck - it works)
 	 */
-	ext3_orphan_del(handle, inode);
-	EXT3_I(inode)->i_dtime	= get_seconds();
+	ext4_orphan_del(handle, inode);
+	EXT4_I(inode)->i_dtime	= get_seconds();
 
 	/*
 	 * One subtle ordering requirement: if anything has gone wrong
@@ -223,12 +223,12 @@ void ext3_delete_inode (struct inode * inode)
 	 * having errors), but we can't free the inode if the mark_dirty
 	 * fails.
 	 */
-	if (ext3_mark_inode_dirty(handle, inode))
+	if (ext4_mark_inode_dirty(handle, inode))
 		/* If that failed, just do the required in-core inode clear. */
 		clear_inode(inode);
 	else
-		ext3_free_inode(handle, inode);
-	ext3_journal_stop(handle);
+		ext4_free_inode(handle, inode);
+	ext4_journal_stop(handle);
 	return;
 no_delete:
 	clear_inode(inode);	/* We must guarantee clearing of inode... */
@@ -254,14 +254,14 @@ static int verify_chain(Indirect *from, Indirect *to)
 }
 
 /**
- *	ext3_block_to_path - parse the block number into array of offsets
+ *	ext4_block_to_path - parse the block number into array of offsets
  *	@inode: inode in question (we are only interested in its superblock)
  *	@i_block: block number to be parsed
  *	@offsets: array to store the offsets in
  *      @boundary: set this non-zero if the referred-to block is likely to be
  *             followed (on disk) by an indirect block.
  *
- *	To store the locations of file's data ext3 uses a data structure common
+ *	To store the locations of file's data ext4 uses a data structure common
  *	for UNIX filesystems - tree of pointers anchored in the inode, with
  *	data blocks at leaves and indirect blocks in intermediate nodes.
  *	This function translates the block number into path in that tree -
@@ -284,39 +284,39 @@ static int verify_chain(Indirect *from, Indirect *to)
  * get there at all.
  */
 
-static int ext3_block_to_path(struct inode *inode,
+static int ext4_block_to_path(struct inode *inode,
 			long i_block, int offsets[4], int *boundary)
 {
-	int ptrs = EXT3_ADDR_PER_BLOCK(inode->i_sb);
-	int ptrs_bits = EXT3_ADDR_PER_BLOCK_BITS(inode->i_sb);
-	const long direct_blocks = EXT3_NDIR_BLOCKS,
+	int ptrs = EXT4_ADDR_PER_BLOCK(inode->i_sb);
+	int ptrs_bits = EXT4_ADDR_PER_BLOCK_BITS(inode->i_sb);
+	const long direct_blocks = EXT4_NDIR_BLOCKS,
 		indirect_blocks = ptrs,
 		double_blocks = (1 << (ptrs_bits * 2));
 	int n = 0;
 	int final = 0;
 
 	if (i_block < 0) {
-		ext3_warning (inode->i_sb, "ext3_block_to_path", "block < 0");
+		ext4_warning (inode->i_sb, "ext4_block_to_path", "block < 0");
 	} else if (i_block < direct_blocks) {
 		offsets[n++] = i_block;
 		final = direct_blocks;
 	} else if ( (i_block -= direct_blocks) < indirect_blocks) {
-		offsets[n++] = EXT3_IND_BLOCK;
+		offsets[n++] = EXT4_IND_BLOCK;
 		offsets[n++] = i_block;
 		final = ptrs;
 	} else if ((i_block -= indirect_blocks) < double_blocks) {
-		offsets[n++] = EXT3_DIND_BLOCK;
+		offsets[n++] = EXT4_DIND_BLOCK;
 		offsets[n++] = i_block >> ptrs_bits;
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else if (((i_block -= double_blocks) >> (ptrs_bits * 2)) < ptrs) {
-		offsets[n++] = EXT3_TIND_BLOCK;
+		offsets[n++] = EXT4_TIND_BLOCK;
 		offsets[n++] = i_block >> (ptrs_bits * 2);
 		offsets[n++] = (i_block >> ptrs_bits) & (ptrs - 1);
 		offsets[n++] = i_block & (ptrs - 1);
 		final = ptrs;
 	} else {
-		ext3_warning(inode->i_sb, "ext3_block_to_path", "block > big");
+		ext4_warning(inode->i_sb, "ext4_block_to_path", "block > big");
 	}
 	if (boundary)
 		*boundary = final - 1 - (i_block & (ptrs - 1));
@@ -324,7 +324,7 @@ static int ext3_block_to_path(struct inode *inode,
 }
 
 /**
- *	ext3_get_branch - read the chain of indirect blocks leading to data
+ *	ext4_get_branch - read the chain of indirect blocks leading to data
  *	@inode: inode in question
  *	@depth: depth of the chain (1 - direct pointer, etc.)
  *	@offsets: offsets of pointers in inode/indirect blocks
@@ -352,7 +352,7 @@ static int ext3_block_to_path(struct inode *inode,
  *	or when it reads all @depth-1 indirect blocks successfully and finds
  *	the whole chain, all way to the data (returns %NULL, *err == 0).
  */
-static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
+static Indirect *ext4_get_branch(struct inode *inode, int depth, int *offsets,
 				 Indirect chain[4], int *err)
 {
 	struct super_block *sb = inode->i_sb;
@@ -361,7 +361,7 @@ static Indirect *ext3_get_branch(struct inode *inode, int depth, int *offsets,
 
 	*err = 0;
 	/* i_data is not going away, no lock needed */
-	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+	add_chain (chain, NULL, EXT4_I(inode)->i_data + *offsets);
 	if (!p->key)
 		goto no_block;
 	while (--depth) {
@@ -389,7 +389,7 @@ no_block:
 }
 
 /**
- *	ext3_find_near - find a place for allocation with sufficient locality
+ *	ext4_find_near - find a place for allocation with sufficient locality
  *	@inode: owner
  *	@ind: descriptor of indirect block.
  *
@@ -408,13 +408,13 @@ no_block:
  *
  *	Caller must make sure that @ind is valid and will stay that way.
  */
-static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
+static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	__le32 *start = ind->bh ? (__le32*) ind->bh->b_data : ei->i_data;
 	__le32 *p;
-	ext3_fsblk_t bg_start;
-	ext3_grpblk_t colour;
+	ext4_fsblk_t bg_start;
+	ext4_grpblk_t colour;
 
 	/* Try to find previous block */
 	for (p = ind->p - 1; p >= start; p--) {
@@ -430,14 +430,14 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
 	 * It is going to be referred to from the inode itself? OK, just put it
 	 * into the same cylinder group then.
 	 */
-	bg_start = ext3_group_first_block_no(inode->i_sb, ei->i_block_group);
+	bg_start = ext4_group_first_block_no(inode->i_sb, ei->i_block_group);
 	colour = (current->pid % 16) *
-			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
 	return bg_start + colour;
 }
 
 /**
- *	ext3_find_goal - find a prefered place for allocation.
+ *	ext4_find_goal - find a prefered place for allocation.
  *	@inode: owner
  *	@block:  block we want
  *	@chain:  chain of indirect blocks
@@ -448,12 +448,12 @@ static ext3_fsblk_t ext3_find_near(struct inode *inode, Indirect *ind)
  *	stores it in *@goal and returns zero.
  */
 
-static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
+static ext4_fsblk_t ext4_find_goal(struct inode *inode, long block,
 		Indirect chain[4], Indirect *partial)
 {
-	struct ext3_block_alloc_info *block_i;
+	struct ext4_block_alloc_info *block_i;
 
-	block_i =  EXT3_I(inode)->i_block_alloc_info;
+	block_i =  EXT4_I(inode)->i_block_alloc_info;
 
 	/*
 	 * try the heuristic for sequential allocation,
@@ -464,11 +464,11 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
 		return block_i->last_alloc_physical_block + 1;
 	}
 
-	return ext3_find_near(inode, partial);
+	return ext4_find_near(inode, partial);
 }
 
 /**
- *	ext3_blks_to_allocate: Look up the block map and count the number
+ *	ext4_blks_to_allocate: Look up the block map and count the number
  *	of direct blocks need to be allocated for the given branch.
  *
  *	@branch: chain of indirect blocks
@@ -479,7 +479,7 @@ static ext3_fsblk_t ext3_find_goal(struct inode *inode, long block,
  *	return the total number of blocks to be allocate, including the
  *	direct and indirect blocks.
  */
-static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
+static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 		int blocks_to_boundary)
 {
 	unsigned long count = 0;
@@ -506,7 +506,7 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
 }
 
 /**
- *	ext3_alloc_blocks: multiple allocate blocks needed for a branch
+ *	ext4_alloc_blocks: multiple allocate blocks needed for a branch
  *	@indirect_blks: the number of blocks need to allocate for indirect
  *			blocks
  *
@@ -515,14 +515,14 @@ static int ext3_blks_to_allocate(Indirect *branch, int k, unsigned long blks,
  *	@blks:	on return it will store the total number of allocated
  *		direct blocks
  */
-static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, int indirect_blks, int blks,
-			ext3_fsblk_t new_blocks[4], int *err)
+static int ext4_alloc_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, int indirect_blks, int blks,
+			ext4_fsblk_t new_blocks[4], int *err)
 {
 	int target, i;
 	unsigned long count = 0;
 	int index = 0;
-	ext3_fsblk_t current_block = 0;
+	ext4_fsblk_t current_block = 0;
 	int ret = 0;
 
 	/*
@@ -538,7 +538,7 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	while (1) {
 		count = target;
 		/* allocating blocks for indirect blocks and direct blocks */
-		current_block = ext3_new_blocks(handle,inode,goal,&count,err);
+		current_block = ext4_new_blocks(handle,inode,goal,&count,err);
 		if (*err)
 			goto failed_out;
 
@@ -562,12 +562,12 @@ static int ext3_alloc_blocks(handle_t *handle, struct inode *inode,
 	return ret;
 failed_out:
 	for (i = 0; i <index; i++)
-		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+		ext4_free_blocks(handle, inode, new_blocks[i], 1);
 	return ret;
 }
 
 /**
- *	ext3_alloc_branch - allocate and set up a chain of blocks.
+ *	ext4_alloc_branch - allocate and set up a chain of blocks.
  *	@inode: owner
  *	@indirect_blks: number of allocated indirect blocks
  *	@blks: number of allocated direct blocks
@@ -578,21 +578,21 @@ failed_out:
  *	links them into chain and (if we are synchronous) writes them to disk.
  *	In other words, it prepares a branch that can be spliced onto the
  *	inode. It stores the information about that chain in the branch[], in
- *	the same format as ext3_get_branch() would do. We are calling it after
+ *	the same format as ext4_get_branch() would do. We are calling it after
  *	we had read the existing part of chain and partial points to the last
  *	triple of that (one with zero ->key). Upon the exit we have the same
- *	picture as after the successful ext3_get_block(), except that in one
+ *	picture as after the successful ext4_get_block(), except that in one
  *	place chain is disconnected - *branch->p is still zero (we did not
  *	set the last link), but branch->key contains the number that should
  *	be placed into *branch->p to fill that gap.
  *
  *	If allocation fails we free all blocks we've allocated (and forget
  *	their buffer_heads) and return the error value the from failed
- *	ext3_alloc_block() (normally -ENOSPC). Otherwise we set the chain
+ *	ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
  *	as described above and return 0.
  */
-static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
-			int indirect_blks, int *blks, ext3_fsblk_t goal,
+static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
+			int indirect_blks, int *blks, ext4_fsblk_t goal,
 			int *offsets, Indirect *branch)
 {
 	int blocksize = inode->i_sb->s_blocksize;
@@ -600,10 +600,10 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 	int err = 0;
 	struct buffer_head *bh;
 	int num;
-	ext3_fsblk_t new_blocks[4];
-	ext3_fsblk_t current_block;
+	ext4_fsblk_t new_blocks[4];
+	ext4_fsblk_t current_block;
 
-	num = ext3_alloc_blocks(handle, inode, goal, indirect_blks,
+	num = ext4_alloc_blocks(handle, inode, goal, indirect_blks,
 				*blks, new_blocks, &err);
 	if (err)
 		return err;
@@ -622,7 +622,7 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 		branch[n].bh = bh;
 		lock_buffer(bh);
 		BUFFER_TRACE(bh, "call get_create_access");
-		err = ext3_journal_get_create_access(handle, bh);
+		err = ext4_journal_get_create_access(handle, bh);
 		if (err) {
 			unlock_buffer(bh);
 			brelse(bh);
@@ -647,8 +647,8 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
 
-		BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, bh);
+		BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, bh);
 		if (err)
 			goto failed;
 	}
@@ -658,22 +658,22 @@ failed:
 	/* Allocation failed, free what we already allocated */
 	for (i = 1; i <= n ; i++) {
 		BUFFER_TRACE(branch[i].bh, "call journal_forget");
-		ext3_journal_forget(handle, branch[i].bh);
+		ext4_journal_forget(handle, branch[i].bh);
 	}
 	for (i = 0; i <indirect_blks; i++)
-		ext3_free_blocks(handle, inode, new_blocks[i], 1);
+		ext4_free_blocks(handle, inode, new_blocks[i], 1);
 
-	ext3_free_blocks(handle, inode, new_blocks[i], num);
+	ext4_free_blocks(handle, inode, new_blocks[i], num);
 
 	return err;
 }
 
 /**
- * ext3_splice_branch - splice the allocated branch onto inode.
+ * ext4_splice_branch - splice the allocated branch onto inode.
  * @inode: owner
  * @block: (logical) number of block we are adding
  * @chain: chain of indirect blocks (with a missing link - see
- *	ext3_alloc_branch)
+ *	ext4_alloc_branch)
  * @where: location of missing link
  * @num:   number of indirect blocks we are adding
  * @blks:  number of direct blocks we are adding
@@ -682,15 +682,15 @@ failed:
  * inode (->i_blocks, etc.). In case of success we end up with the full
  * chain to new block and return 0.
  */
-static int ext3_splice_branch(handle_t *handle, struct inode *inode,
+static int ext4_splice_branch(handle_t *handle, struct inode *inode,
 			long block, Indirect *where, int num, int blks)
 {
 	int i;
 	int err = 0;
-	struct ext3_block_alloc_info *block_i;
-	ext3_fsblk_t current_block;
+	struct ext4_block_alloc_info *block_i;
+	ext4_fsblk_t current_block;
 
-	block_i = EXT3_I(inode)->i_block_alloc_info;
+	block_i = EXT4_I(inode)->i_block_alloc_info;
 	/*
 	 * If we're splicing into a [td]indirect block (as opposed to the
 	 * inode) then we need to get write access to the [td]indirect block
@@ -698,7 +698,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	 */
 	if (where->bh) {
 		BUFFER_TRACE(where->bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, where->bh);
+		err = ext4_journal_get_write_access(handle, where->bh);
 		if (err)
 			goto err_out;
 	}
@@ -730,7 +730,7 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 	/* We are done with atomic stuff, now do the rest of housekeeping */
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 
 	/* had we spliced it onto indirect block? */
 	if (where->bh) {
@@ -740,11 +740,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 		 * onto an indirect block at the very end of the file (the
 		 * file is growing) then we *will* alter the inode to reflect
 		 * the new i_size.  But that is not done here - it is done in
-		 * generic_commit_write->__mark_inode_dirty->ext3_dirty_inode.
+		 * generic_commit_write->__mark_inode_dirty->ext4_dirty_inode.
 		 */
 		jbd_debug(5, "splicing indirect only\n");
-		BUFFER_TRACE(where->bh, "call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, where->bh);
+		BUFFER_TRACE(where->bh, "call ext4_journal_dirty_metadata");
+		err = ext4_journal_dirty_metadata(handle, where->bh);
 		if (err)
 			goto err_out;
 	} else {
@@ -759,10 +759,10 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode,
 err_out:
 	for (i = 1; i <= num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
-		ext3_journal_forget(handle, where[i].bh);
-		ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
+		ext4_journal_forget(handle, where[i].bh);
+		ext4_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
 	}
-	ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
+	ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
 
 	return err;
 }
@@ -786,7 +786,7 @@ err_out:
  * return = 0, if plain lookup failed.
  * return < 0, error case.
  */
-int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 		sector_t iblock, unsigned long maxblocks,
 		struct buffer_head *bh_result,
 		int create, int extend_disksize)
@@ -795,22 +795,22 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	int offsets[4];
 	Indirect chain[4];
 	Indirect *partial;
-	ext3_fsblk_t goal;
+	ext4_fsblk_t goal;
 	int indirect_blks;
 	int blocks_to_boundary = 0;
 	int depth;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	int count = 0;
-	ext3_fsblk_t first_block = 0;
+	ext4_fsblk_t first_block = 0;
 
 
 	J_ASSERT(handle != NULL || create == 0);
-	depth = ext3_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
+	depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
 	if (depth == 0)
 		goto out;
 
-	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+	partial = ext4_get_branch(inode, depth, offsets, chain, &err);
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
@@ -819,7 +819,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 		count++;
 		/*map more blocks*/
 		while (count < maxblocks && count <= blocks_to_boundary) {
-			ext3_fsblk_t blk;
+			ext4_fsblk_t blk;
 
 			if (!verify_chain(chain, partial)) {
 				/*
@@ -852,7 +852,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 
 	/*
 	 * If the indirect block is missing while we are reading
-	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * the chain(ext4_get_branch() returns -EAGAIN err), or
 	 * if the chain has been changed after we grab the semaphore,
 	 * (either because another process truncated this branch, or
 	 * another get_block allocated this branch) re-grab the chain to see if
@@ -867,7 +867,7 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 			brelse(partial->bh);
 			partial--;
 		}
-		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+		partial = ext4_get_branch(inode, depth, offsets, chain, &err);
 		if (!partial) {
 			count++;
 			mutex_unlock(&ei->truncate_mutex);
@@ -883,9 +883,9 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	 * allocation info here if necessary
 	*/
 	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
-		ext3_init_block_alloc_info(inode);
+		ext4_init_block_alloc_info(inode);
 
-	goal = ext3_find_goal(inode, iblock, chain, partial);
+	goal = ext4_find_goal(inode, iblock, chain, partial);
 
 	/* the number of blocks need to allocate for [d,t]indirect blocks */
 	indirect_blks = (chain + depth) - partial - 1;
@@ -894,28 +894,28 @@ int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
 	 * Next look up the indirect map to count the totoal number of
 	 * direct blocks to allocate for this branch.
 	 */
-	count = ext3_blks_to_allocate(partial, indirect_blks,
+	count = ext4_blks_to_allocate(partial, indirect_blks,
 					maxblocks, blocks_to_boundary);
 	/*
-	 * Block out ext3_truncate while we alter the tree
+	 * Block out ext4_truncate while we alter the tree
 	 */
-	err = ext3_alloc_branch(handle, inode, indirect_blks, &count, goal,
+	err = ext4_alloc_branch(handle, inode, indirect_blks, &count, goal,
 				offsets + (partial - chain), partial);
 
 	/*
-	 * The ext3_splice_branch call will free and forget any buffers
+	 * The ext4_splice_branch call will free and forget any buffers
 	 * on the new chain if there is a failure, but that risks using
 	 * up transaction credits, especially for bitmaps where the
 	 * credits cannot be returned.  Can we handle this somehow?  We
 	 * may need to return -EAGAIN upwards in the worst case.  --sct
 	 */
 	if (!err)
-		err = ext3_splice_branch(handle, inode, iblock,
+		err = ext4_splice_branch(handle, inode, iblock,
 					partial, indirect_blks, count);
 	/*
 	 * i_disksize growing is protected by truncate_mutex.  Don't forget to
 	 * protect it if you're about to implement concurrent
-	 * ext3_get_block() -bzzz
+	 * ext4_get_block() -bzzz
 	*/
 	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
 		ei->i_disksize = inode->i_size;
@@ -942,9 +942,9 @@ out:
 	return err;
 }
 
-#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
+#define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32)
 
-static int ext3_get_block(struct inode *inode, sector_t iblock,
+static int ext4_get_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
 	handle_t *handle = journal_current_handle();
@@ -962,29 +962,29 @@ static int ext3_get_block(struct inode *inode, sector_t iblock,
 		 * Huge direct-io writes can hold off commits for long
 		 * periods of time.  Let this commit run.
 		 */
-		ext3_journal_stop(handle);
-		handle = ext3_journal_start(inode, DIO_CREDITS);
+		ext4_journal_stop(handle);
+		handle = ext4_journal_start(inode, DIO_CREDITS);
 		if (IS_ERR(handle))
 			ret = PTR_ERR(handle);
 		goto get_block;
 	}
 
-	if (handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
+	if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) {
 		/*
 		 * Getting low on buffer credits...
 		 */
-		ret = ext3_journal_extend(handle, DIO_CREDITS);
+		ret = ext4_journal_extend(handle, DIO_CREDITS);
 		if (ret > 0) {
 			/*
 			 * Couldn't extend the transaction.  Start a new one.
 			 */
-			ret = ext3_journal_restart(handle, DIO_CREDITS);
+			ret = ext4_journal_restart(handle, DIO_CREDITS);
 		}
 	}
 
 get_block:
 	if (ret == 0) {
-		ret = ext3_get_blocks_handle(handle, inode, iblock,
+		ret = ext4_get_blocks_handle(handle, inode, iblock,
 					max_blocks, bh_result, create, 0);
 		if (ret > 0) {
 			bh_result->b_size = (ret << inode->i_blkbits);
@@ -997,7 +997,7 @@ get_block:
 /*
  * `handle' can be NULL if create is zero
  */
-struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
+struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 				long block, int create, int *errp)
 {
 	struct buffer_head dummy;
@@ -1008,10 +1008,10 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext3_get_blocks_handle(handle, inode, block, 1,
+	err = ext4_get_blocks_handle(handle, inode, block, 1,
 					&dummy, create, 1);
 	/*
-	 * ext3_get_blocks_handle() returns number of blocks
+	 * ext4_get_blocks_handle() returns number of blocks
 	 * mapped. 0 in case of a HOLE.
 	 */
 	if (err > 0) {
@@ -1035,19 +1035,19 @@ struct buffer_head *ext3_getblk(handle_t *handle, struct inode *inode,
 			 * Now that we do not always journal data, we should
 			 * keep in mind whether this should always journal the
 			 * new buffer as metadata.  For now, regular file
-			 * writes use ext3_get_block instead, so it's not a
+			 * writes use ext4_get_block instead, so it's not a
 			 * problem.
 			 */
 			lock_buffer(bh);
 			BUFFER_TRACE(bh, "call get_create_access");
-			fatal = ext3_journal_get_create_access(handle, bh);
+			fatal = ext4_journal_get_create_access(handle, bh);
 			if (!fatal && !buffer_uptodate(bh)) {
 				memset(bh->b_data,0,inode->i_sb->s_blocksize);
 				set_buffer_uptodate(bh);
 			}
 			unlock_buffer(bh);
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			err = ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			err = ext4_journal_dirty_metadata(handle, bh);
 			if (!fatal)
 				fatal = err;
 		} else {
@@ -1064,12 +1064,12 @@ err:
 	return NULL;
 }
 
-struct buffer_head *ext3_bread(handle_t *handle, struct inode *inode,
+struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
 			       int block, int create, int *err)
 {
 	struct buffer_head * bh;
 
-	bh = ext3_getblk(handle, inode, block, create, err);
+	bh = ext4_getblk(handle, inode, block, create, err);
 	if (!bh)
 		return bh;
 	if (buffer_uptodate(bh))
@@ -1118,17 +1118,17 @@ static int walk_page_buffers(	handle_t *handle,
 /*
  * To preserve ordering, it is essential that the hole instantiation and
  * the data write be encapsulated in a single transaction.  We cannot
- * close off a transaction and start a new one between the ext3_get_block()
+ * close off a transaction and start a new one between the ext4_get_block()
  * and the commit_write().  So doing the journal_start at the start of
  * prepare_write() is the right place.
  *
- * Also, this function can nest inside ext3_writepage() ->
- * block_write_full_page(). In that case, we *know* that ext3_writepage()
+ * Also, this function can nest inside ext4_writepage() ->
+ * block_write_full_page(). In that case, we *know* that ext4_writepage()
  * has generated enough buffer credits to do the whole page.  So we won't
  * block on the journal in that case, which is good, because the caller may
  * be PF_MEMALLOC.
  *
- * By accident, ext3 can be reentered when a transaction is open via
+ * By accident, ext4 can be reentered when a transaction is open via
  * quota file writes.  If we were to commit the transaction while thus
  * reentered, there can be a deadlock - we would be holding a quota
  * lock, and the commit would never complete if another thread had a
@@ -1145,48 +1145,48 @@ static int do_journal_get_write_access(handle_t *handle,
 {
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
-	return ext3_journal_get_write_access(handle, bh);
+	return ext4_journal_get_write_access(handle, bh);
 }
 
-static int ext3_prepare_write(struct file *file, struct page *page,
+static int ext4_prepare_write(struct file *file, struct page *page,
 			      unsigned from, unsigned to)
 {
 	struct inode *inode = page->mapping->host;
-	int ret, needed_blocks = ext3_writepage_trans_blocks(inode);
+	int ret, needed_blocks = ext4_writepage_trans_blocks(inode);
 	handle_t *handle;
 	int retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, needed_blocks);
+	handle = ext4_journal_start(inode, needed_blocks);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto out;
 	}
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_prepare_write(page, from, to, ext3_get_block);
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+		ret = nobh_prepare_write(page, from, to, ext4_get_block);
 	else
-		ret = block_prepare_write(page, from, to, ext3_get_block);
+		ret = block_prepare_write(page, from, to, ext4_get_block);
 	if (ret)
 		goto prepare_write_failed;
 
-	if (ext3_should_journal_data(inode)) {
+	if (ext4_should_journal_data(inode)) {
 		ret = walk_page_buffers(handle, page_buffers(page),
 				from, to, NULL, do_journal_get_write_access);
 	}
 prepare_write_failed:
 	if (ret)
-		ext3_journal_stop(handle);
-	if (ret == -ENOSPC && ext3_should_retry_alloc(inode->i_sb, &retries))
+		ext4_journal_stop(handle);
+	if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
 		goto retry;
 out:
 	return ret;
 }
 
-int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_data(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(__FUNCTION__, __FUNCTION__,
+		ext4_journal_abort_handle(__FUNCTION__, __FUNCTION__,
 						bh, handle,err);
 	return err;
 }
@@ -1197,25 +1197,25 @@ static int commit_write_fn(handle_t *handle, struct buffer_head *bh)
 	if (!buffer_mapped(bh) || buffer_freed(bh))
 		return 0;
 	set_buffer_uptodate(bh);
-	return ext3_journal_dirty_metadata(handle, bh);
+	return ext4_journal_dirty_metadata(handle, bh);
 }
 
 /*
  * We need to pick up the new inode size which generic_commit_write gave us
  * `file' can be NULL - eg, when called from page_symlink().
  *
- * ext3 never places buffers on inode->i_mapping->private_list.  metadata
+ * ext4 never places buffers on inode->i_mapping->private_list.  metadata
  * buffers are managed internally.
  */
-static int ext3_ordered_commit_write(struct file *file, struct page *page,
+static int ext4_ordered_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 
 	ret = walk_page_buffers(handle, page_buffers(page),
-		from, to, NULL, ext3_journal_dirty_data);
+		from, to, NULL, ext4_journal_dirty_data);
 
 	if (ret == 0) {
 		/*
@@ -1226,43 +1226,43 @@ static int ext3_ordered_commit_write(struct file *file, struct page *page,
 		loff_t new_i_size;
 
 		new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-		if (new_i_size > EXT3_I(inode)->i_disksize)
-			EXT3_I(inode)->i_disksize = new_i_size;
+		if (new_i_size > EXT4_I(inode)->i_disksize)
+			EXT4_I(inode)->i_disksize = new_i_size;
 		ret = generic_commit_write(file, page, from, to);
 	}
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
 }
 
-static int ext3_writeback_commit_write(struct file *file, struct page *page,
+static int ext4_writeback_commit_write(struct file *file, struct page *page,
 			     unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	loff_t new_i_size;
 
 	new_i_size = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
-	if (new_i_size > EXT3_I(inode)->i_disksize)
-		EXT3_I(inode)->i_disksize = new_i_size;
+	if (new_i_size > EXT4_I(inode)->i_disksize)
+		EXT4_I(inode)->i_disksize = new_i_size;
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
 		ret = nobh_commit_write(file, page, from, to);
 	else
 		ret = generic_commit_write(file, page, from, to);
 
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
 }
 
-static int ext3_journalled_commit_write(struct file *file,
+static int ext4_journalled_commit_write(struct file *file,
 			struct page *page, unsigned from, unsigned to)
 {
-	handle_t *handle = ext3_journal_current_handle();
+	handle_t *handle = ext4_journal_current_handle();
 	struct inode *inode = page->mapping->host;
 	int ret = 0, ret2;
 	int partial = 0;
@@ -1279,14 +1279,14 @@ static int ext3_journalled_commit_write(struct file *file,
 		SetPageUptodate(page);
 	if (pos > inode->i_size)
 		i_size_write(inode, pos);
-	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
-	if (inode->i_size > EXT3_I(inode)->i_disksize) {
-		EXT3_I(inode)->i_disksize = inode->i_size;
-		ret2 = ext3_mark_inode_dirty(handle, inode);
+	EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
+	if (inode->i_size > EXT4_I(inode)->i_disksize) {
+		EXT4_I(inode)->i_disksize = inode->i_size;
+		ret2 = ext4_mark_inode_dirty(handle, inode);
 		if (!ret)
 			ret = ret2;
 	}
-	ret2 = ext3_journal_stop(handle);
+	ret2 = ext4_journal_stop(handle);
 	if (!ret)
 		ret = ret2;
 	return ret;
@@ -1297,7 +1297,7 @@ static int ext3_journalled_commit_write(struct file *file,
  * the swapper to find the on-disk block of a specific piece of data.
  *
  * Naturally, this is dangerous if the block concerned is still in the
- * journal.  If somebody makes a swapfile on an ext3 data-journaling
+ * journal.  If somebody makes a swapfile on an ext4 data-journaling
  * filesystem and enables swap, then they may get a nasty shock when the
  * data getting swapped to that swapfile suddenly gets overwritten by
  * the original zero's written out previously to the journal and
@@ -1306,13 +1306,13 @@ static int ext3_journalled_commit_write(struct file *file,
  * So, if we see any bmap calls here on a modified, data-journaled file,
  * take extra steps to flush any blocks which might be in the cache.
  */
-static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
+static sector_t ext4_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 	journal_t *journal;
 	int err;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_JDATA) {
+	if (EXT4_I(inode)->i_state & EXT4_STATE_JDATA) {
 		/*
 		 * This is a REALLY heavyweight approach, but the use of
 		 * bmap on dirty files is expected to be extremely rare:
@@ -1324,15 +1324,15 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 		 * in trouble if mortal users could trigger this path at
 		 * will.)
 		 *
-		 * NB. EXT3_STATE_JDATA is not set on files other than
+		 * NB. EXT4_STATE_JDATA is not set on files other than
 		 * regular files.  If somebody wants to bmap a directory
 		 * or symlink and gets confused because the buffer
 		 * hasn't yet been flushed to disk, they deserve
 		 * everything they get.
 		 */
 
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_JDATA;
-		journal = EXT3_JOURNAL(inode);
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_JDATA;
+		journal = EXT4_JOURNAL(inode);
 		journal_lock_updates(journal);
 		err = journal_flush(journal);
 		journal_unlock_updates(journal);
@@ -1341,7 +1341,7 @@ static sector_t ext3_bmap(struct address_space *mapping, sector_t block)
 			return 0;
 	}
 
-	return generic_block_bmap(mapping,block,ext3_get_block);
+	return generic_block_bmap(mapping,block,ext4_get_block);
 }
 
 static int bget_one(handle_t *handle, struct buffer_head *bh)
@@ -1359,14 +1359,14 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
 static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
 {
 	if (buffer_mapped(bh))
-		return ext3_journal_dirty_data(handle, bh);
+		return ext4_journal_dirty_data(handle, bh);
 	return 0;
 }
 
 /*
  * Note that we always start a transaction even if we're not journalling
  * data.  This is to preserve ordering: any hole instantiation within
- * __block_write_full_page -> ext3_get_block() should be journalled
+ * __block_write_full_page -> ext4_get_block() should be journalled
  * along with the data so we don't crash and then get metadata which
  * refers to old data.
  *
@@ -1374,14 +1374,14 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  *
  * Problem:
  *
- *	ext3_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
- *		ext3_writepage()
+ *	ext4_writepage() -> kmalloc() -> __alloc_pages() -> page_launder() ->
+ *		ext4_writepage()
  *
  * Similar for:
  *
- *	ext3_file_write() -> generic_file_write() -> __alloc_pages() -> ...
+ *	ext4_file_write() -> generic_file_write() -> __alloc_pages() -> ...
  *
- * Same applies to ext3_get_block().  We will deadlock on various things like
+ * Same applies to ext4_get_block().  We will deadlock on various things like
  * lock_journal and i_truncate_mutex.
  *
  * Setting PF_MEMALLOC here doesn't work - too many internal memory
@@ -1415,7 +1415,7 @@ static int journal_dirty_data_fn(handle_t *handle, struct buffer_head *bh)
  * AKPM2: if all the page's buffers are mapped to disk and !data=journal,
  * we don't need to open a transaction here.
  */
-static int ext3_ordered_writepage(struct page *page,
+static int ext4_ordered_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1430,10 +1430,10 @@ static int ext3_ordered_writepage(struct page *page,
 	 * We give up here if we're reentered, because it might be for a
 	 * different filesystem.
 	 */
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto out_fail;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
@@ -1448,7 +1448,7 @@ static int ext3_ordered_writepage(struct page *page,
 	walk_page_buffers(handle, page_bufs, 0,
 			PAGE_CACHE_SIZE, NULL, bget_one);
 
-	ret = block_write_full_page(page, ext3_get_block, wbc);
+	ret = block_write_full_page(page, ext4_get_block, wbc);
 
 	/*
 	 * The page can become unlocked at any point now, and
@@ -1470,7 +1470,7 @@ static int ext3_ordered_writepage(struct page *page,
 	}
 	walk_page_buffers(handle, page_bufs, 0,
 			PAGE_CACHE_SIZE, NULL, bput_one);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -1481,7 +1481,7 @@ out_fail:
 	return ret;
 }
 
-static int ext3_writeback_writepage(struct page *page,
+static int ext4_writeback_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1489,21 +1489,21 @@ static int ext3_writeback_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto out_fail;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto out_fail;
 	}
 
-	if (test_opt(inode->i_sb, NOBH) && ext3_should_writeback_data(inode))
-		ret = nobh_writepage(page, ext3_get_block, wbc);
+	if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
+		ret = nobh_writepage(page, ext4_get_block, wbc);
 	else
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+		ret = block_write_full_page(page, ext4_get_block, wbc);
 
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -1514,7 +1514,7 @@ out_fail:
 	return ret;
 }
 
-static int ext3_journalled_writepage(struct page *page,
+static int ext4_journalled_writepage(struct page *page,
 				struct writeback_control *wbc)
 {
 	struct inode *inode = page->mapping->host;
@@ -1522,10 +1522,10 @@ static int ext3_journalled_writepage(struct page *page,
 	int ret = 0;
 	int err;
 
-	if (ext3_journal_current_handle())
+	if (ext4_journal_current_handle())
 		goto no_write;
 
-	handle = ext3_journal_start(inode, ext3_writepage_trans_blocks(inode));
+	handle = ext4_journal_start(inode, ext4_writepage_trans_blocks(inode));
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 		goto no_write;
@@ -1538,9 +1538,9 @@ static int ext3_journalled_writepage(struct page *page,
 		 */
 		ClearPageChecked(page);
 		ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-					ext3_get_block);
+					ext4_get_block);
 		if (ret != 0) {
-			ext3_journal_stop(handle);
+			ext4_journal_stop(handle);
 			goto out_unlock;
 		}
 		ret = walk_page_buffers(handle, page_buffers(page), 0,
@@ -1550,7 +1550,7 @@ static int ext3_journalled_writepage(struct page *page,
 				PAGE_CACHE_SIZE, NULL, commit_write_fn);
 		if (ret == 0)
 			ret = err;
-		EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
+		EXT4_I(inode)->i_state |= EXT4_STATE_JDATA;
 		unlock_page(page);
 	} else {
 		/*
@@ -1558,9 +1558,9 @@ static int ext3_journalled_writepage(struct page *page,
 		 * really know unless we go poke around in the buffer_heads.
 		 * But block_write_full_page will do the right thing.
 		 */
-		ret = block_write_full_page(page, ext3_get_block, wbc);
+		ret = block_write_full_page(page, ext4_get_block, wbc);
 	}
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 out:
@@ -1573,21 +1573,21 @@ out_unlock:
 	goto out;
 }
 
-static int ext3_readpage(struct file *file, struct page *page)
+static int ext4_readpage(struct file *file, struct page *page)
 {
-	return mpage_readpage(page, ext3_get_block);
+	return mpage_readpage(page, ext4_get_block);
 }
 
 static int
-ext3_readpages(struct file *file, struct address_space *mapping,
+ext4_readpages(struct file *file, struct address_space *mapping,
 		struct list_head *pages, unsigned nr_pages)
 {
-	return mpage_readpages(mapping, pages, nr_pages, ext3_get_block);
+	return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
 }
 
-static void ext3_invalidatepage(struct page *page, unsigned long offset)
+static void ext4_invalidatepage(struct page *page, unsigned long offset)
 {
-	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
 	/*
 	 * If it's a full truncate we just forget about the pending dirtying
@@ -1598,9 +1598,9 @@ static void ext3_invalidatepage(struct page *page, unsigned long offset)
 	journal_invalidatepage(journal, page, offset);
 }
 
-static int ext3_releasepage(struct page *page, gfp_t wait)
+static int ext4_releasepage(struct page *page, gfp_t wait)
 {
-	journal_t *journal = EXT3_JOURNAL(page->mapping->host);
+	journal_t *journal = EXT4_JOURNAL(page->mapping->host);
 
 	WARN_ON(PageChecked(page));
 	if (!page_has_buffers(page))
@@ -1616,13 +1616,13 @@ static int ext3_releasepage(struct page *page, gfp_t wait)
  * If the O_DIRECT write is intantiating holes inside i_size and the machine
  * crashes then stale disk data _may_ be exposed inside the file.
  */
-static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
+static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb,
 			const struct iovec *iov, loff_t offset,
 			unsigned long nr_segs)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *inode = file->f_mapping->host;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	handle_t *handle = NULL;
 	ssize_t ret;
 	int orphan = 0;
@@ -1631,13 +1631,13 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 	if (rw == WRITE) {
 		loff_t final_size = offset + count;
 
-		handle = ext3_journal_start(inode, DIO_CREDITS);
+		handle = ext4_journal_start(inode, DIO_CREDITS);
 		if (IS_ERR(handle)) {
 			ret = PTR_ERR(handle);
 			goto out;
 		}
 		if (final_size > inode->i_size) {
-			ret = ext3_orphan_add(handle, inode);
+			ret = ext4_orphan_add(handle, inode);
 			if (ret)
 				goto out_stop;
 			orphan = 1;
@@ -1647,10 +1647,10 @@ static ssize_t ext3_direct_IO(int rw, struct kiocb *iocb,
 
 	ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
 				 offset, nr_segs,
-				 ext3_get_block, NULL);
+				 ext4_get_block, NULL);
 
 	/*
-	 * Reacquire the handle: ext3_get_block() can restart the transaction
+	 * Reacquire the handle: ext4_get_block() can restart the transaction
 	 */
 	handle = journal_current_handle();
 
@@ -1659,7 +1659,7 @@ out_stop:
 		int err;
 
 		if (orphan && inode->i_nlink)
-			ext3_orphan_del(handle, inode);
+			ext4_orphan_del(handle, inode);
 		if (orphan && ret > 0) {
 			loff_t end = offset + ret;
 			if (end > inode->i_size) {
@@ -1669,13 +1669,13 @@ out_stop:
 				 * We're going to return a positive `ret'
 				 * here due to non-zero-length I/O, so there's
 				 * no way of reporting error returns from
-				 * ext3_mark_inode_dirty() to userspace.  So
+				 * ext4_mark_inode_dirty() to userspace.  So
 				 * ignore it.
 				 */
-				ext3_mark_inode_dirty(handle, inode);
+				ext4_mark_inode_dirty(handle, inode);
 			}
 		}
-		err = ext3_journal_stop(handle);
+		err = ext4_journal_stop(handle);
 		if (ret == 0)
 			ret = err;
 	}
@@ -1684,7 +1684,7 @@ out:
 }
 
 /*
- * Pages can be marked dirty completely asynchronously from ext3's journalling
+ * Pages can be marked dirty completely asynchronously from ext4's journalling
  * activity.  By filemap_sync_pte(), try_to_unmap_one(), etc.  We cannot do
  * much here because ->set_page_dirty is called under VFS locks.  The page is
  * not necessarily locked.
@@ -1696,73 +1696,73 @@ out:
  * So what we do is to mark the page "pending dirty" and next time writepage
  * is called, propagate that into the buffers appropriately.
  */
-static int ext3_journalled_set_page_dirty(struct page *page)
+static int ext4_journalled_set_page_dirty(struct page *page)
 {
 	SetPageChecked(page);
 	return __set_page_dirty_nobuffers(page);
 }
 
-static const struct address_space_operations ext3_ordered_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_ordered_writepage,
+static const struct address_space_operations ext4_ordered_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_ordered_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_ordered_commit_write,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_ordered_commit_write,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
+	.direct_IO	= ext4_direct_IO,
 	.migratepage	= buffer_migrate_page,
 };
 
-static const struct address_space_operations ext3_writeback_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_writeback_writepage,
+static const struct address_space_operations ext4_writeback_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_writeback_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_writeback_commit_write,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
-	.direct_IO	= ext3_direct_IO,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_writeback_commit_write,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
+	.direct_IO	= ext4_direct_IO,
 	.migratepage	= buffer_migrate_page,
 };
 
-static const struct address_space_operations ext3_journalled_aops = {
-	.readpage	= ext3_readpage,
-	.readpages	= ext3_readpages,
-	.writepage	= ext3_journalled_writepage,
+static const struct address_space_operations ext4_journalled_aops = {
+	.readpage	= ext4_readpage,
+	.readpages	= ext4_readpages,
+	.writepage	= ext4_journalled_writepage,
 	.sync_page	= block_sync_page,
-	.prepare_write	= ext3_prepare_write,
-	.commit_write	= ext3_journalled_commit_write,
-	.set_page_dirty	= ext3_journalled_set_page_dirty,
-	.bmap		= ext3_bmap,
-	.invalidatepage	= ext3_invalidatepage,
-	.releasepage	= ext3_releasepage,
+	.prepare_write	= ext4_prepare_write,
+	.commit_write	= ext4_journalled_commit_write,
+	.set_page_dirty	= ext4_journalled_set_page_dirty,
+	.bmap		= ext4_bmap,
+	.invalidatepage	= ext4_invalidatepage,
+	.releasepage	= ext4_releasepage,
 };
 
-void ext3_set_aops(struct inode *inode)
+void ext4_set_aops(struct inode *inode)
 {
-	if (ext3_should_order_data(inode))
-		inode->i_mapping->a_ops = &ext3_ordered_aops;
-	else if (ext3_should_writeback_data(inode))
-		inode->i_mapping->a_ops = &ext3_writeback_aops;
+	if (ext4_should_order_data(inode))
+		inode->i_mapping->a_ops = &ext4_ordered_aops;
+	else if (ext4_should_writeback_data(inode))
+		inode->i_mapping->a_ops = &ext4_writeback_aops;
 	else
-		inode->i_mapping->a_ops = &ext3_journalled_aops;
+		inode->i_mapping->a_ops = &ext4_journalled_aops;
 }
 
 /*
- * ext3_block_truncate_page() zeroes out a mapping from file offset `from'
+ * ext4_block_truncate_page() zeroes out a mapping from file offset `from'
  * up to the end of the block which corresponds to `from'.
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-static int ext3_block_truncate_page(handle_t *handle, struct page *page,
+static int ext4_block_truncate_page(handle_t *handle, struct page *page,
 		struct address_space *mapping, loff_t from)
 {
-	ext3_fsblk_t index = from >> PAGE_CACHE_SHIFT;
+	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
 	unsigned offset = from & (PAGE_CACHE_SIZE-1);
 	unsigned blocksize, iblock, length, pos;
 	struct inode *inode = mapping->host;
@@ -1779,7 +1779,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 	 * read-in the page - otherwise we create buffers to do the IO.
 	 */
 	if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) &&
-	     ext3_should_writeback_data(inode) && PageUptodate(page)) {
+	     ext4_should_writeback_data(inode) && PageUptodate(page)) {
 		kaddr = kmap_atomic(page, KM_USER0);
 		memset(kaddr + offset, 0, length);
 		flush_dcache_page(page);
@@ -1808,7 +1808,7 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 
 	if (!buffer_mapped(bh)) {
 		BUFFER_TRACE(bh, "unmapped");
-		ext3_get_block(inode, iblock, bh, 0);
+		ext4_get_block(inode, iblock, bh, 0);
 		/* unmapped? It's a hole - nothing to do */
 		if (!buffer_mapped(bh)) {
 			BUFFER_TRACE(bh, "still unmapped");
@@ -1829,9 +1829,9 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 			goto unlock;
 	}
 
-	if (ext3_should_journal_data(inode)) {
+	if (ext4_should_journal_data(inode)) {
 		BUFFER_TRACE(bh, "get write access");
-		err = ext3_journal_get_write_access(handle, bh);
+		err = ext4_journal_get_write_access(handle, bh);
 		if (err)
 			goto unlock;
 	}
@@ -1844,11 +1844,11 @@ static int ext3_block_truncate_page(handle_t *handle, struct page *page,
 	BUFFER_TRACE(bh, "zeroed end of block");
 
 	err = 0;
-	if (ext3_should_journal_data(inode)) {
-		err = ext3_journal_dirty_metadata(handle, bh);
+	if (ext4_should_journal_data(inode)) {
+		err = ext4_journal_dirty_metadata(handle, bh);
 	} else {
-		if (ext3_should_order_data(inode))
-			err = ext3_journal_dirty_data(handle, bh);
+		if (ext4_should_order_data(inode))
+			err = ext4_journal_dirty_data(handle, bh);
 		mark_buffer_dirty(bh);
 	}
 
@@ -1872,14 +1872,14 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
 }
 
 /**
- *	ext3_find_shared - find the indirect blocks for partial truncation.
+ *	ext4_find_shared - find the indirect blocks for partial truncation.
  *	@inode:	  inode in question
  *	@depth:	  depth of the affected branch
- *	@offsets: offsets of pointers in that branch (see ext3_block_to_path)
+ *	@offsets: offsets of pointers in that branch (see ext4_block_to_path)
  *	@chain:	  place to store the pointers to partial indirect blocks
  *	@top:	  place to the (detached) top of branch
  *
- *	This is a helper function used by ext3_truncate().
+ *	This is a helper function used by ext4_truncate().
  *
  *	When we do truncate() we may have to clean the ends of several
  *	indirect blocks but leave the blocks themselves alive. Block is
@@ -1887,7 +1887,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *	from it (and it is on the path to the first completely truncated
  *	data block, indeed).  We have to free the top of that path along
  *	with everything to the right of the path. Since no allocation
- *	past the truncation point is possible until ext3_truncate()
+ *	past the truncation point is possible until ext4_truncate()
  *	finishes, we may safely do the latter, but top of branch may
  *	require special attention - pageout below the truncation point
  *	might try to populate it.
@@ -1906,7 +1906,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q)
  *		c) free the subtrees growing from the inode past the @chain[0].
  *			(no partially truncated stuff there).  */
 
-static Indirect *ext3_find_shared(struct inode *inode, int depth,
+static Indirect *ext4_find_shared(struct inode *inode, int depth,
 			int offsets[4], Indirect chain[4], __le32 *top)
 {
 	Indirect *partial, *p;
@@ -1916,7 +1916,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
 	/* Make k index the deepest non-null offest + 1 */
 	for (k = depth; k > 1 && !offsets[k-1]; k--)
 		;
-	partial = ext3_get_branch(inode, k, offsets, chain, &err);
+	partial = ext4_get_branch(inode, k, offsets, chain, &err);
 	/* Writer: pointers */
 	if (!partial)
 		partial = chain + k-1;
@@ -1939,7 +1939,7 @@ static Indirect *ext3_find_shared(struct inode *inode, int depth,
 		p->p--;
 	} else {
 		*top = *p->p;
-		/* Nope, don't do this in ext3.  Must leave the tree intact */
+		/* Nope, don't do this in ext4.  Must leave the tree intact */
 #if 0
 		*p->p = 0;
 #endif
@@ -1962,21 +1962,21 @@ no_top:
  * We release `count' blocks on disk, but (last - first) may be greater
  * than `count' because there can be holes in there.
  */
-static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
-		struct buffer_head *bh, ext3_fsblk_t block_to_free,
+static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
+		struct buffer_head *bh, ext4_fsblk_t block_to_free,
 		unsigned long count, __le32 *first, __le32 *last)
 {
 	__le32 *p;
 	if (try_to_extend_transaction(handle, inode)) {
 		if (bh) {
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			ext4_journal_dirty_metadata(handle, bh);
 		}
-		ext3_mark_inode_dirty(handle, inode);
-		ext3_journal_test_restart(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_test_restart(handle, inode);
 		if (bh) {
 			BUFFER_TRACE(bh, "retaking write access");
-			ext3_journal_get_write_access(handle, bh);
+			ext4_journal_get_write_access(handle, bh);
 		}
 	}
 
@@ -1995,15 +1995,15 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
 
 			*p = 0;
 			bh = sb_find_get_block(inode->i_sb, nr);
-			ext3_forget(handle, 0, inode, bh, nr);
+			ext4_forget(handle, 0, inode, bh, nr);
 		}
 	}
 
-	ext3_free_blocks(handle, inode, block_to_free, count);
+	ext4_free_blocks(handle, inode, block_to_free, count);
 }
 
 /**
- * ext3_free_data - free a list of data blocks
+ * ext4_free_data - free a list of data blocks
  * @handle:	handle for this transaction
  * @inode:	inode we are dealing with
  * @this_bh:	indirect buffer_head which contains *@first and *@last
@@ -2021,23 +2021,23 @@ static void ext3_clear_blocks(handle_t *handle, struct inode *inode,
  * @this_bh will be %NULL if @first and @last point into the inode's direct
  * block pointers.
  */
-static void ext3_free_data(handle_t *handle, struct inode *inode,
+static void ext4_free_data(handle_t *handle, struct inode *inode,
 			   struct buffer_head *this_bh,
 			   __le32 *first, __le32 *last)
 {
-	ext3_fsblk_t block_to_free = 0;    /* Starting block # of a run */
+	ext4_fsblk_t block_to_free = 0;    /* Starting block # of a run */
 	unsigned long count = 0;	    /* Number of blocks in the run */
 	__le32 *block_to_free_p = NULL;	    /* Pointer into inode/ind
 					       corresponding to
 					       block_to_free */
-	ext3_fsblk_t nr;		    /* Current block # */
+	ext4_fsblk_t nr;		    /* Current block # */
 	__le32 *p;			    /* Pointer into inode/ind
 					       for current block */
 	int err;
 
 	if (this_bh) {				/* For indirect block */
 		BUFFER_TRACE(this_bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, this_bh);
+		err = ext4_journal_get_write_access(handle, this_bh);
 		/* Important: if we can't update the indirect pointers
 		 * to the blocks, we can't free them. */
 		if (err)
@@ -2055,7 +2055,7 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 			} else if (nr == block_to_free + count) {
 				count++;
 			} else {
-				ext3_clear_blocks(handle, inode, this_bh,
+				ext4_clear_blocks(handle, inode, this_bh,
 						  block_to_free,
 						  count, block_to_free_p, p);
 				block_to_free = nr;
@@ -2066,17 +2066,17 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
 	}
 
 	if (count > 0)
-		ext3_clear_blocks(handle, inode, this_bh, block_to_free,
+		ext4_clear_blocks(handle, inode, this_bh, block_to_free,
 				  count, block_to_free_p, p);
 
 	if (this_bh) {
-		BUFFER_TRACE(this_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, this_bh);
+		BUFFER_TRACE(this_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, this_bh);
 	}
 }
 
 /**
- *	ext3_free_branches - free an array of branches
+ *	ext4_free_branches - free an array of branches
  *	@handle: JBD handle for this transaction
  *	@inode:	inode we are dealing with
  *	@parent_bh: the buffer_head which contains *@first and *@last
@@ -2088,11 +2088,11 @@ static void ext3_free_data(handle_t *handle, struct inode *inode,
  *	stored as little-endian 32-bit) and updating @inode->i_blocks
  *	appropriately.
  */
-static void ext3_free_branches(handle_t *handle, struct inode *inode,
+static void ext4_free_branches(handle_t *handle, struct inode *inode,
 			       struct buffer_head *parent_bh,
 			       __le32 *first, __le32 *last, int depth)
 {
-	ext3_fsblk_t nr;
+	ext4_fsblk_t nr;
 	__le32 *p;
 
 	if (is_handle_aborted(handle))
@@ -2100,7 +2100,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 
 	if (depth--) {
 		struct buffer_head *bh;
-		int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+		int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
 		p = last;
 		while (--p >= first) {
 			nr = le32_to_cpu(*p);
@@ -2115,7 +2115,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * (should be rare).
 			 */
 			if (!bh) {
-				ext3_error(inode->i_sb, "ext3_free_branches",
+				ext4_error(inode->i_sb, "ext4_free_branches",
 					   "Read failure, inode=%lu, block="E3FSBLK,
 					   inode->i_ino, nr);
 				continue;
@@ -2123,7 +2123,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 
 			/* This zaps the entire block.  Bottom up. */
 			BUFFER_TRACE(bh, "free child branches");
-			ext3_free_branches(handle, inode, bh,
+			ext4_free_branches(handle, inode, bh,
 					   (__le32*)bh->b_data,
 					   (__le32*)bh->b_data + addr_per_block,
 					   depth);
@@ -2138,7 +2138,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * transaction.  But if it's part of the committing
 			 * transaction then journal_forget() will simply
 			 * brelse() it.  That means that if the underlying
-			 * block is reallocated in ext3_get_block(),
+			 * block is reallocated in ext4_get_block(),
 			 * unmap_underlying_metadata() will find this block
 			 * and will try to get rid of it.  damn, damn.
 			 *
@@ -2147,7 +2147,7 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			 * revoke records must be emitted *before* clearing
 			 * this block's bit in the bitmaps.
 			 */
-			ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+			ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 
 			/*
 			 * Everything below this this pointer has been
@@ -2168,11 +2168,11 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 			if (is_handle_aborted(handle))
 				return;
 			if (try_to_extend_transaction(handle, inode)) {
-				ext3_mark_inode_dirty(handle, inode);
-				ext3_journal_test_restart(handle, inode);
+				ext4_mark_inode_dirty(handle, inode);
+				ext4_journal_test_restart(handle, inode);
 			}
 
-			ext3_free_blocks(handle, inode, nr, 1);
+			ext4_free_blocks(handle, inode, nr, 1);
 
 			if (parent_bh) {
 				/*
@@ -2180,12 +2180,12 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 				 * pointed to by an indirect block: journal it
 				 */
 				BUFFER_TRACE(parent_bh, "get_write_access");
-				if (!ext3_journal_get_write_access(handle,
+				if (!ext4_journal_get_write_access(handle,
 								   parent_bh)){
 					*p = 0;
 					BUFFER_TRACE(parent_bh,
-					"call ext3_journal_dirty_metadata");
-					ext3_journal_dirty_metadata(handle,
+					"call ext4_journal_dirty_metadata");
+					ext4_journal_dirty_metadata(handle,
 								    parent_bh);
 				}
 			}
@@ -2193,15 +2193,15 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
 	} else {
 		/* We have reached the bottom of the tree. */
 		BUFFER_TRACE(parent_bh, "free data blocks");
-		ext3_free_data(handle, inode, parent_bh, first, last);
+		ext4_free_data(handle, inode, parent_bh, first, last);
 	}
 }
 
 /*
- * ext3_truncate()
+ * ext4_truncate()
  *
- * We block out ext3_get_block() block instantiations across the entire
- * transaction, and VFS/VM ensures that ext3_truncate() cannot run
+ * We block out ext4_get_block() block instantiations across the entire
+ * transaction, and VFS/VM ensures that ext4_truncate() cannot run
  * simultaneously on behalf of the same inode.
  *
  * As we work through the truncate and commmit bits of it to the journal there
@@ -2218,19 +2218,19 @@ static void ext3_free_branches(handle_t *handle, struct inode *inode,
  * truncate against the orphan inode list.
  *
  * The committed inode has the new, desired i_size (which is the same as
- * i_disksize in this case).  After a crash, ext3_orphan_cleanup() will see
+ * i_disksize in this case).  After a crash, ext4_orphan_cleanup() will see
  * that this inode's truncate did not complete and it will again call
- * ext3_truncate() to have another go.  So there will be instantiated blocks
- * to the right of the truncation point in a crashed ext3 filesystem.  But
+ * ext4_truncate() to have another go.  So there will be instantiated blocks
+ * to the right of the truncation point in a crashed ext4 filesystem.  But
  * that's fine - as long as they are linked from the inode, the post-crash
- * ext3_truncate() run will find them and release them.
+ * ext4_truncate() run will find them and release them.
  */
-void ext3_truncate(struct inode *inode)
+void ext4_truncate(struct inode *inode)
 {
 	handle_t *handle;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	__le32 *i_data = ei->i_data;
-	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+	int addr_per_block = EXT4_ADDR_PER_BLOCK(inode->i_sb);
 	struct address_space *mapping = inode->i_mapping;
 	int offsets[4];
 	Indirect chain[4];
@@ -2244,7 +2244,7 @@ void ext3_truncate(struct inode *inode)
 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 	    S_ISLNK(inode->i_mode)))
 		return;
-	if (ext3_inode_is_fast_symlink(inode))
+	if (ext4_inode_is_fast_symlink(inode))
 		return;
 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
 		return;
@@ -2275,12 +2275,12 @@ void ext3_truncate(struct inode *inode)
 	}
 
 	last_block = (inode->i_size + blocksize-1)
-					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
+					>> EXT4_BLOCK_SIZE_BITS(inode->i_sb);
 
 	if (page)
-		ext3_block_truncate_page(handle, page, mapping, inode->i_size);
+		ext4_block_truncate_page(handle, page, mapping, inode->i_size);
 
-	n = ext3_block_to_path(inode, last_block, offsets, NULL);
+	n = ext4_block_to_path(inode, last_block, offsets, NULL);
 	if (n == 0)
 		goto out_stop;	/* error */
 
@@ -2293,7 +2293,7 @@ void ext3_truncate(struct inode *inode)
 	 * Implication: the file must always be in a sane, consistent
 	 * truncatable state while each transaction commits.
 	 */
-	if (ext3_orphan_add(handle, inode))
+	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
 
 	/*
@@ -2301,28 +2301,28 @@ void ext3_truncate(struct inode *inode)
 	 * occurs before the truncate completes, so it is now safe to propagate
 	 * the new, shorter inode size (held for now in i_size) into the
 	 * on-disk inode. We do this via i_disksize, which is the value which
-	 * ext3 *really* writes onto the disk inode.
+	 * ext4 *really* writes onto the disk inode.
 	 */
 	ei->i_disksize = inode->i_size;
 
 	/*
-	 * From here we block out all ext3_get_block() callers who want to
+	 * From here we block out all ext4_get_block() callers who want to
 	 * modify the block allocation tree.
 	 */
 	mutex_lock(&ei->truncate_mutex);
 
 	if (n == 1) {		/* direct blocks */
-		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
-			       i_data + EXT3_NDIR_BLOCKS);
+		ext4_free_data(handle, inode, NULL, i_data+offsets[0],
+			       i_data + EXT4_NDIR_BLOCKS);
 		goto do_indirects;
 	}
 
-	partial = ext3_find_shared(inode, n, offsets, chain, &nr);
+	partial = ext4_find_shared(inode, n, offsets, chain, &nr);
 	/* Kill the top of shared branch (not detached) */
 	if (nr) {
 		if (partial == chain) {
 			/* Shared branch grows from the inode */
-			ext3_free_branches(handle, inode, NULL,
+			ext4_free_branches(handle, inode, NULL,
 					   &nr, &nr+1, (chain+n-1) - partial);
 			*partial->p = 0;
 			/*
@@ -2332,14 +2332,14 @@ void ext3_truncate(struct inode *inode)
 		} else {
 			/* Shared branch grows from an indirect block */
 			BUFFER_TRACE(partial->bh, "get_write_access");
-			ext3_free_branches(handle, inode, partial->bh,
+			ext4_free_branches(handle, inode, partial->bh,
 					partial->p,
 					partial->p+1, (chain+n-1) - partial);
 		}
 	}
 	/* Clear the ends of indirect blocks on the shared branch */
 	while (partial > chain) {
-		ext3_free_branches(handle, inode, partial->bh, partial->p + 1,
+		ext4_free_branches(handle, inode, partial->bh, partial->p + 1,
 				   (__le32*)partial->bh->b_data+addr_per_block,
 				   (chain+n-1) - partial);
 		BUFFER_TRACE(partial->bh, "call brelse");
@@ -2350,32 +2350,32 @@ do_indirects:
 	/* Kill the remaining (whole) subtrees */
 	switch (offsets[0]) {
 	default:
-		nr = i_data[EXT3_IND_BLOCK];
+		nr = i_data[EXT4_IND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
-			i_data[EXT3_IND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 1);
+			i_data[EXT4_IND_BLOCK] = 0;
 		}
-	case EXT3_IND_BLOCK:
-		nr = i_data[EXT3_DIND_BLOCK];
+	case EXT4_IND_BLOCK:
+		nr = i_data[EXT4_DIND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
-			i_data[EXT3_DIND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 2);
+			i_data[EXT4_DIND_BLOCK] = 0;
 		}
-	case EXT3_DIND_BLOCK:
-		nr = i_data[EXT3_TIND_BLOCK];
+	case EXT4_DIND_BLOCK:
+		nr = i_data[EXT4_TIND_BLOCK];
 		if (nr) {
-			ext3_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
-			i_data[EXT3_TIND_BLOCK] = 0;
+			ext4_free_branches(handle, inode, NULL, &nr, &nr+1, 3);
+			i_data[EXT4_TIND_BLOCK] = 0;
 		}
-	case EXT3_TIND_BLOCK:
+	case EXT4_TIND_BLOCK:
 		;
 	}
 
-	ext3_discard_reservation(inode);
+	ext4_discard_reservation(inode);
 
 	mutex_unlock(&ei->truncate_mutex);
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 
 	/*
 	 * In a multi-transaction truncate, we only make the final transaction
@@ -2388,25 +2388,25 @@ out_stop:
 	 * If this was a simple ftruncate(), and the file will remain alive
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
-	 * ext3_delete_inode(), and we allow that function to clean up the
+	 * ext4_delete_inode(), and we allow that function to clean up the
 	 * orphan info for us.
 	 */
 	if (inode->i_nlink)
-		ext3_orphan_del(handle, inode);
+		ext4_orphan_del(handle, inode);
 
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 }
 
-static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
-		unsigned long ino, struct ext3_iloc *iloc)
+static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
+		unsigned long ino, struct ext4_iloc *iloc)
 {
 	unsigned long desc, group_desc, block_group;
 	unsigned long offset;
-	ext3_fsblk_t block;
+	ext4_fsblk_t block;
 	struct buffer_head *bh;
-	struct ext3_group_desc * gdp;
+	struct ext4_group_desc * gdp;
 
-	if (!ext3_valid_inum(sb, ino)) {
+	if (!ext4_valid_inum(sb, ino)) {
 		/*
 		 * This error is already checked for in namei.c unless we are
 		 * looking at an NFS filehandle, in which case no error
@@ -2415,54 +2415,54 @@ static ext3_fsblk_t ext3_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	block_group = (ino - 1) / EXT3_INODES_PER_GROUP(sb);
-	if (block_group >= EXT3_SB(sb)->s_groups_count) {
-		ext3_error(sb,"ext3_get_inode_block","group >= groups count");
+	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
+	if (block_group >= EXT4_SB(sb)->s_groups_count) {
+		ext4_error(sb,"ext4_get_inode_block","group >= groups count");
 		return 0;
 	}
 	smp_rmb();
-	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(sb);
-	desc = block_group & (EXT3_DESC_PER_BLOCK(sb) - 1);
-	bh = EXT3_SB(sb)->s_group_desc[group_desc];
+	group_desc = block_group >> EXT4_DESC_PER_BLOCK_BITS(sb);
+	desc = block_group & (EXT4_DESC_PER_BLOCK(sb) - 1);
+	bh = EXT4_SB(sb)->s_group_desc[group_desc];
 	if (!bh) {
-		ext3_error (sb, "ext3_get_inode_block",
+		ext4_error (sb, "ext4_get_inode_block",
 			    "Descriptor not loaded");
 		return 0;
 	}
 
-	gdp = (struct ext3_group_desc *)bh->b_data;
+	gdp = (struct ext4_group_desc *)bh->b_data;
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
-	offset = ((ino - 1) % EXT3_INODES_PER_GROUP(sb)) *
-		EXT3_INODE_SIZE(sb);
+	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
+		EXT4_INODE_SIZE(sb);
 	block = le32_to_cpu(gdp[desc].bg_inode_table) +
-		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
+		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
-	iloc->offset = offset & (EXT3_BLOCK_SIZE(sb) - 1);
+	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
 	return block;
 }
 
 /*
- * ext3_get_inode_loc returns with an extra refcount against the inode's
+ * ext4_get_inode_loc returns with an extra refcount against the inode's
  * underlying buffer_head on success. If 'in_mem' is true, we have all
  * data in memory that is needed to recreate the on-disk version of this
  * inode.
  */
-static int __ext3_get_inode_loc(struct inode *inode,
-				struct ext3_iloc *iloc, int in_mem)
+static int __ext4_get_inode_loc(struct inode *inode,
+				struct ext4_iloc *iloc, int in_mem)
 {
-	ext3_fsblk_t block;
+	ext4_fsblk_t block;
 	struct buffer_head *bh;
 
-	block = ext3_get_inode_block(inode->i_sb, inode->i_ino, iloc);
+	block = ext4_get_inode_block(inode->i_sb, inode->i_ino, iloc);
 	if (!block)
 		return -EIO;
 
 	bh = sb_getblk(inode->i_sb, block);
 	if (!bh) {
-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+		ext4_error (inode->i_sb, "ext4_get_inode_loc",
 				"unable to read inode block - "
 				"inode=%lu, block="E3FSBLK,
 				 inode->i_ino, block);
@@ -2483,22 +2483,22 @@ static int __ext3_get_inode_loc(struct inode *inode,
 		 */
 		if (in_mem) {
 			struct buffer_head *bitmap_bh;
-			struct ext3_group_desc *desc;
+			struct ext4_group_desc *desc;
 			int inodes_per_buffer;
 			int inode_offset, i;
 			int block_group;
 			int start;
 
 			block_group = (inode->i_ino - 1) /
-					EXT3_INODES_PER_GROUP(inode->i_sb);
+					EXT4_INODES_PER_GROUP(inode->i_sb);
 			inodes_per_buffer = bh->b_size /
-				EXT3_INODE_SIZE(inode->i_sb);
+				EXT4_INODE_SIZE(inode->i_sb);
 			inode_offset = ((inode->i_ino - 1) %
-					EXT3_INODES_PER_GROUP(inode->i_sb));
+					EXT4_INODES_PER_GROUP(inode->i_sb));
 			start = inode_offset & ~(inodes_per_buffer - 1);
 
 			/* Is the inode bitmap in cache? */
-			desc = ext3_get_group_desc(inode->i_sb,
+			desc = ext4_get_group_desc(inode->i_sb,
 						block_group, NULL);
 			if (!desc)
 				goto make_io;
@@ -2520,7 +2520,7 @@ static int __ext3_get_inode_loc(struct inode *inode,
 			for (i = start; i < start + inodes_per_buffer; i++) {
 				if (i == inode_offset)
 					continue;
-				if (ext3_test_bit(i, bitmap_bh->b_data))
+				if (ext4_test_bit(i, bitmap_bh->b_data))
 					break;
 			}
 			brelse(bitmap_bh);
@@ -2544,7 +2544,7 @@ make_io:
 		submit_bh(READ_META, bh);
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
-			ext3_error(inode->i_sb, "ext3_get_inode_loc",
+			ext4_error(inode->i_sb, "ext4_get_inode_loc",
 					"unable to read inode block - "
 					"inode=%lu, block="E3FSBLK,
 					inode->i_ino, block);
@@ -2557,48 +2557,48 @@ has_buffer:
 	return 0;
 }
 
-int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
+int ext4_get_inode_loc(struct inode *inode, struct ext4_iloc *iloc)
 {
 	/* We have all inode data except xattrs in memory here. */
-	return __ext3_get_inode_loc(inode, iloc,
-		!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR));
+	return __ext4_get_inode_loc(inode, iloc,
+		!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR));
 }
 
-void ext3_set_inode_flags(struct inode *inode)
+void ext4_set_inode_flags(struct inode *inode)
 {
-	unsigned int flags = EXT3_I(inode)->i_flags;
+	unsigned int flags = EXT4_I(inode)->i_flags;
 
 	inode->i_flags &= ~(S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC);
-	if (flags & EXT3_SYNC_FL)
+	if (flags & EXT4_SYNC_FL)
 		inode->i_flags |= S_SYNC;
-	if (flags & EXT3_APPEND_FL)
+	if (flags & EXT4_APPEND_FL)
 		inode->i_flags |= S_APPEND;
-	if (flags & EXT3_IMMUTABLE_FL)
+	if (flags & EXT4_IMMUTABLE_FL)
 		inode->i_flags |= S_IMMUTABLE;
-	if (flags & EXT3_NOATIME_FL)
+	if (flags & EXT4_NOATIME_FL)
 		inode->i_flags |= S_NOATIME;
-	if (flags & EXT3_DIRSYNC_FL)
+	if (flags & EXT4_DIRSYNC_FL)
 		inode->i_flags |= S_DIRSYNC;
 }
 
-void ext3_read_inode(struct inode * inode)
+void ext4_read_inode(struct inode * inode)
 {
-	struct ext3_iloc iloc;
-	struct ext3_inode *raw_inode;
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_iloc iloc;
+	struct ext4_inode *raw_inode;
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh;
 	int block;
 
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	ei->i_acl = EXT4_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 
-	if (__ext3_get_inode_loc(inode, &iloc, 0))
+	if (__ext4_get_inode_loc(inode, &iloc, 0))
 		goto bad_inode;
 	bh = iloc.bh;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
 	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
 	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
@@ -2623,7 +2623,7 @@ void ext3_read_inode(struct inode * inode)
 	 */
 	if (inode->i_nlink == 0) {
 		if (inode->i_mode == 0 ||
-		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+		    !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
 			/* this inode is deleted */
 			brelse (bh);
 			goto bad_inode;
@@ -2635,7 +2635,7 @@ void ext3_read_inode(struct inode * inode)
 	}
 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
 	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
-#ifdef EXT3_FRAGMENTS
+#ifdef EXT4_FRAGMENTS
 	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
 	ei->i_frag_no = raw_inode->i_frag;
 	ei->i_frag_size = raw_inode->i_fsize;
@@ -2654,51 +2654,51 @@ void ext3_read_inode(struct inode * inode)
 	 * NOTE! The in-memory inode i_data array is in little-endian order
 	 * even on big-endian machines: we do NOT byteswap the block numbers!
 	 */
-	for (block = 0; block < EXT3_N_BLOCKS; block++)
+	for (block = 0; block < EXT4_N_BLOCKS; block++)
 		ei->i_data[block] = raw_inode->i_block[block];
 	INIT_LIST_HEAD(&ei->i_orphan);
 
-	if (inode->i_ino >= EXT3_FIRST_INO(inode->i_sb) + 1 &&
-	    EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) {
+	if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 &&
+	    EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
 		/*
 		 * When mke2fs creates big inodes it does not zero out
-		 * the unused bytes above EXT3_GOOD_OLD_INODE_SIZE,
+		 * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE,
 		 * so ignore those first few inodes.
 		 */
 		ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
-		if (EXT3_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
-		    EXT3_INODE_SIZE(inode->i_sb))
+		if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
+		    EXT4_INODE_SIZE(inode->i_sb))
 			goto bad_inode;
 		if (ei->i_extra_isize == 0) {
 			/* The extra space is currently unused. Use it. */
-			ei->i_extra_isize = sizeof(struct ext3_inode) -
-					    EXT3_GOOD_OLD_INODE_SIZE;
+			ei->i_extra_isize = sizeof(struct ext4_inode) -
+					    EXT4_GOOD_OLD_INODE_SIZE;
 		} else {
 			__le32 *magic = (void *)raw_inode +
-					EXT3_GOOD_OLD_INODE_SIZE +
+					EXT4_GOOD_OLD_INODE_SIZE +
 					ei->i_extra_isize;
-			if (*magic == cpu_to_le32(EXT3_XATTR_MAGIC))
-				 ei->i_state |= EXT3_STATE_XATTR;
+			if (*magic == cpu_to_le32(EXT4_XATTR_MAGIC))
+				 ei->i_state |= EXT4_STATE_XATTR;
 		}
 	} else
 		ei->i_extra_isize = 0;
 
 	if (S_ISREG(inode->i_mode)) {
-		inode->i_op = &ext3_file_inode_operations;
-		inode->i_fop = &ext3_file_operations;
-		ext3_set_aops(inode);
+		inode->i_op = &ext4_file_inode_operations;
+		inode->i_fop = &ext4_file_operations;
+		ext4_set_aops(inode);
 	} else if (S_ISDIR(inode->i_mode)) {
-		inode->i_op = &ext3_dir_inode_operations;
-		inode->i_fop = &ext3_dir_operations;
+		inode->i_op = &ext4_dir_inode_operations;
+		inode->i_fop = &ext4_dir_operations;
 	} else if (S_ISLNK(inode->i_mode)) {
-		if (ext3_inode_is_fast_symlink(inode))
-			inode->i_op = &ext3_fast_symlink_inode_operations;
+		if (ext4_inode_is_fast_symlink(inode))
+			inode->i_op = &ext4_fast_symlink_inode_operations;
 		else {
-			inode->i_op = &ext3_symlink_inode_operations;
-			ext3_set_aops(inode);
+			inode->i_op = &ext4_symlink_inode_operations;
+			ext4_set_aops(inode);
 		}
 	} else {
-		inode->i_op = &ext3_special_inode_operations;
+		inode->i_op = &ext4_special_inode_operations;
 		if (raw_inode->i_block[0])
 			init_special_inode(inode, inode->i_mode,
 			   old_decode_dev(le32_to_cpu(raw_inode->i_block[0])));
@@ -2707,7 +2707,7 @@ void ext3_read_inode(struct inode * inode)
 			   new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
 	}
 	brelse (iloc.bh);
-	ext3_set_inode_flags(inode);
+	ext4_set_inode_flags(inode);
 	return;
 
 bad_inode:
@@ -2722,19 +2722,19 @@ bad_inode:
  *
  * The caller must have write access to iloc->bh.
  */
-static int ext3_do_update_inode(handle_t *handle,
+static int ext4_do_update_inode(handle_t *handle,
 				struct inode *inode,
-				struct ext3_iloc *iloc)
+				struct ext4_iloc *iloc)
 {
-	struct ext3_inode *raw_inode = ext3_raw_inode(iloc);
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	struct buffer_head *bh = iloc->bh;
 	int err = 0, rc, block;
 
 	/* For fields not not tracking in the in-memory inode,
 	 * initialise them to zero for new inodes. */
-	if (ei->i_state & EXT3_STATE_NEW)
-		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
+	if (ei->i_state & EXT4_STATE_NEW)
+		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
 
 	raw_inode->i_mode = cpu_to_le16(inode->i_mode);
 	if(!(test_opt(inode->i_sb, NO_UID32))) {
@@ -2769,7 +2769,7 @@ static int ext3_do_update_inode(handle_t *handle,
 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
 	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
 	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
-#ifdef EXT3_FRAGMENTS
+#ifdef EXT4_FRAGMENTS
 	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
 	raw_inode->i_frag = ei->i_frag_no;
 	raw_inode->i_fsize = ei->i_frag_size;
@@ -2782,24 +2782,24 @@ static int ext3_do_update_inode(handle_t *handle,
 			cpu_to_le32(ei->i_disksize >> 32);
 		if (ei->i_disksize > 0x7fffffffULL) {
 			struct super_block *sb = inode->i_sb;
-			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
-			    EXT3_SB(sb)->s_es->s_rev_level ==
-					cpu_to_le32(EXT3_GOOD_OLD_REV)) {
+			if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
+			    EXT4_SB(sb)->s_es->s_rev_level ==
+					cpu_to_le32(EXT4_GOOD_OLD_REV)) {
 			       /* If this is the first large file
 				* created, add a flag to the superblock.
 				*/
-				err = ext3_journal_get_write_access(handle,
-						EXT3_SB(sb)->s_sbh);
+				err = ext4_journal_get_write_access(handle,
+						EXT4_SB(sb)->s_sbh);
 				if (err)
 					goto out_brelse;
-				ext3_update_dynamic_rev(sb);
-				EXT3_SET_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_LARGE_FILE);
+				ext4_update_dynamic_rev(sb);
+				EXT4_SET_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
 				sb->s_dirt = 1;
 				handle->h_sync = 1;
-				err = ext3_journal_dirty_metadata(handle,
-						EXT3_SB(sb)->s_sbh);
+				err = ext4_journal_dirty_metadata(handle,
+						EXT4_SB(sb)->s_sbh);
 			}
 		}
 	}
@@ -2815,26 +2815,26 @@ static int ext3_do_update_inode(handle_t *handle,
 				cpu_to_le32(new_encode_dev(inode->i_rdev));
 			raw_inode->i_block[2] = 0;
 		}
-	} else for (block = 0; block < EXT3_N_BLOCKS; block++)
+	} else for (block = 0; block < EXT4_N_BLOCKS; block++)
 		raw_inode->i_block[block] = ei->i_data[block];
 
 	if (ei->i_extra_isize)
 		raw_inode->i_extra_isize = cpu_to_le16(ei->i_extra_isize);
 
-	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-	rc = ext3_journal_dirty_metadata(handle, bh);
+	BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+	rc = ext4_journal_dirty_metadata(handle, bh);
 	if (!err)
 		err = rc;
-	ei->i_state &= ~EXT3_STATE_NEW;
+	ei->i_state &= ~EXT4_STATE_NEW;
 
 out_brelse:
 	brelse (bh);
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
 /*
- * ext3_write_inode()
+ * ext4_write_inode()
  *
  * We are called from a few places:
  *
@@ -2851,7 +2851,7 @@ out_brelse:
  *
  * In all cases it is actually safe for us to return without doing anything,
  * because the inode has been copied into a raw inode buffer in
- * ext3_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
+ * ext4_mark_inode_dirty().  This is a correctness thing for O_SYNC and for
  * knfsd.
  *
  * Note that we are absolutely dependent upon all inode dirtiers doing the
@@ -2868,12 +2868,12 @@ out_brelse:
  * `stuff()' is running, and the new i_size will be lost.  Plus the inode
  * will no longer be on the superblock's dirty inode list.
  */
-int ext3_write_inode(struct inode *inode, int wait)
+int ext4_write_inode(struct inode *inode, int wait)
 {
 	if (current->flags & PF_MEMALLOC)
 		return 0;
 
-	if (ext3_journal_current_handle()) {
+	if (ext4_journal_current_handle()) {
 		jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n");
 		dump_stack();
 		return -EIO;
@@ -2882,11 +2882,11 @@ int ext3_write_inode(struct inode *inode, int wait)
 	if (!wait)
 		return 0;
 
-	return ext3_force_commit(inode->i_sb);
+	return ext4_force_commit(inode->i_sb);
 }
 
 /*
- * ext3_setattr()
+ * ext4_setattr()
  *
  * Called from notify_change.
  *
@@ -2902,7 +2902,7 @@ int ext3_write_inode(struct inode *inode, int wait)
  *
  * Called with inode->sem down.
  */
-int ext3_setattr(struct dentry *dentry, struct iattr *attr)
+int ext4_setattr(struct dentry *dentry, struct iattr *attr)
 {
 	struct inode *inode = dentry->d_inode;
 	int error, rc = 0;
@@ -2918,15 +2918,15 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 
 		/* (user+group)*(old+new) structure, inode write (sb,
 		 * inode block, ? - but truncate inode update has it) */
-		handle = ext3_journal_start(inode, 2*(EXT3_QUOTA_INIT_BLOCKS(inode->i_sb)+
-					EXT3_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
+		handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+
+					EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
 		error = DQUOT_TRANSFER(inode, attr) ? -EDQUOT : 0;
 		if (error) {
-			ext3_journal_stop(handle);
+			ext4_journal_stop(handle);
 			return error;
 		}
 		/* Update corresponding info in inode so that everything is in
@@ -2935,41 +2935,41 @@ int ext3_setattr(struct dentry *dentry, struct iattr *attr)
 			inode->i_uid = attr->ia_uid;
 		if (attr->ia_valid & ATTR_GID)
 			inode->i_gid = attr->ia_gid;
-		error = ext3_mark_inode_dirty(handle, inode);
-		ext3_journal_stop(handle);
+		error = ext4_mark_inode_dirty(handle, inode);
+		ext4_journal_stop(handle);
 	}
 
 	if (S_ISREG(inode->i_mode) &&
 	    attr->ia_valid & ATTR_SIZE && attr->ia_size < inode->i_size) {
 		handle_t *handle;
 
-		handle = ext3_journal_start(inode, 3);
+		handle = ext4_journal_start(inode, 3);
 		if (IS_ERR(handle)) {
 			error = PTR_ERR(handle);
 			goto err_out;
 		}
 
-		error = ext3_orphan_add(handle, inode);
-		EXT3_I(inode)->i_disksize = attr->ia_size;
-		rc = ext3_mark_inode_dirty(handle, inode);
+		error = ext4_orphan_add(handle, inode);
+		EXT4_I(inode)->i_disksize = attr->ia_size;
+		rc = ext4_mark_inode_dirty(handle, inode);
 		if (!error)
 			error = rc;
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 	}
 
 	rc = inode_setattr(inode, attr);
 
-	/* If inode_setattr's call to ext3_truncate failed to get a
+	/* If inode_setattr's call to ext4_truncate failed to get a
 	 * transaction handle at all, we need to clean up the in-core
 	 * orphan list manually. */
 	if (inode->i_nlink)
-		ext3_orphan_del(NULL, inode);
+		ext4_orphan_del(NULL, inode);
 
 	if (!rc && (ia_valid & ATTR_MODE))
-		rc = ext3_acl_chmod(inode);
+		rc = ext4_acl_chmod(inode);
 
 err_out:
-	ext3_std_error(inode->i_sb, error);
+	ext4_std_error(inode->i_sb, error);
 	if (!error)
 		error = rc;
 	return error;
@@ -2988,9 +2988,9 @@ err_out:
  * N+5 group descriptor summary blocks
  * 1 inode block
  * 1 superblock.
- * 2 * EXT3_SINGLEDATA_TRANS_BLOCKS for the quote files
+ * 2 * EXT4_SINGLEDATA_TRANS_BLOCKS for the quote files
  *
- * 3 * (N + 5) + 2 + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
+ * 3 * (N + 5) + 2 + 2 * EXT4_SINGLEDATA_TRANS_BLOCKS
  *
  * With ordered or writeback data it's the same, less the N data blocks.
  *
@@ -3003,13 +3003,13 @@ err_out:
  * block and work out the exact number of indirects which are touched.  Pah.
  */
 
-static int ext3_writepage_trans_blocks(struct inode *inode)
+static int ext4_writepage_trans_blocks(struct inode *inode)
 {
-	int bpp = ext3_journal_blocks_per_page(inode);
-	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+	int bpp = ext4_journal_blocks_per_page(inode);
+	int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
 	int ret;
 
-	if (ext3_should_journal_data(inode))
+	if (ext4_should_journal_data(inode))
 		ret = 3 * (bpp + indirects) + 2;
 	else
 		ret = 2 * (bpp + indirects) + 2;
@@ -3017,26 +3017,26 @@ static int ext3_writepage_trans_blocks(struct inode *inode)
 #ifdef CONFIG_QUOTA
 	/* We know that structure was already allocated during DQUOT_INIT so
 	 * we will be updating only the data blocks + inodes */
-	ret += 2*EXT3_QUOTA_TRANS_BLOCKS(inode->i_sb);
+	ret += 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
 #endif
 
 	return ret;
 }
 
 /*
- * The caller must have previously called ext3_reserve_inode_write().
+ * The caller must have previously called ext4_reserve_inode_write().
  * Give this, we know that the caller already has write access to iloc->bh.
  */
-int ext3_mark_iloc_dirty(handle_t *handle,
-		struct inode *inode, struct ext3_iloc *iloc)
+int ext4_mark_iloc_dirty(handle_t *handle,
+		struct inode *inode, struct ext4_iloc *iloc)
 {
 	int err = 0;
 
 	/* the do_update_inode consumes one bh->b_count */
 	get_bh(iloc->bh);
 
-	/* ext3_do_update_inode() does journal_dirty_metadata */
-	err = ext3_do_update_inode(handle, inode, iloc);
+	/* ext4_do_update_inode() does journal_dirty_metadata */
+	err = ext4_do_update_inode(handle, inode, iloc);
 	put_bh(iloc->bh);
 	return err;
 }
@@ -3047,22 +3047,22 @@ int ext3_mark_iloc_dirty(handle_t *handle,
  */
 
 int
-ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
-			 struct ext3_iloc *iloc)
+ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+			 struct ext4_iloc *iloc)
 {
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, iloc);
+		err = ext4_get_inode_loc(inode, iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc->bh, "get_write_access");
-			err = ext3_journal_get_write_access(handle, iloc->bh);
+			err = ext4_journal_get_write_access(handle, iloc->bh);
 			if (err) {
 				brelse(iloc->bh);
 				iloc->bh = NULL;
 			}
 		}
 	}
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
@@ -3087,20 +3087,20 @@ ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
  * to do a write_super() to free up some memory.  It has the desired
  * effect.
  */
-int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
 {
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err;
 
 	might_sleep();
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (!err)
-		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 	return err;
 }
 
 /*
- * ext3_dirty_inode() is called from __mark_inode_dirty()
+ * ext4_dirty_inode() is called from __mark_inode_dirty()
  *
  * We're really interested in the case where a file is being extended.
  * i_size has been changed by generic_commit_write() and we thus need
@@ -3113,12 +3113,12 @@ int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode)
  * so would cause a commit on atime updates, which we don't bother doing.
  * We handle synchronous inodes at the highest possible level.
  */
-void ext3_dirty_inode(struct inode *inode)
+void ext4_dirty_inode(struct inode *inode)
 {
-	handle_t *current_handle = ext3_journal_current_handle();
+	handle_t *current_handle = ext4_journal_current_handle();
 	handle_t *handle;
 
-	handle = ext3_journal_start(inode, 2);
+	handle = ext4_journal_start(inode, 2);
 	if (IS_ERR(handle))
 		goto out;
 	if (current_handle &&
@@ -3129,9 +3129,9 @@ void ext3_dirty_inode(struct inode *inode)
 	} else {
 		jbd_debug(5, "marking dirty.  outer handle=%p\n",
 				current_handle);
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 	}
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 out:
 	return;
 }
@@ -3140,32 +3140,32 @@ out:
 /*
  * Bind an inode's backing buffer_head into this transaction, to prevent
  * it from being flushed to disk early.  Unlike
- * ext3_reserve_inode_write, this leaves behind no bh reference and
+ * ext4_reserve_inode_write, this leaves behind no bh reference and
  * returns no iloc structure, so the caller needs to repeat the iloc
  * lookup to mark the inode dirty later.
  */
-static int ext3_pin_inode(handle_t *handle, struct inode *inode)
+static int ext4_pin_inode(handle_t *handle, struct inode *inode)
 {
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 
 	int err = 0;
 	if (handle) {
-		err = ext3_get_inode_loc(inode, &iloc);
+		err = ext4_get_inode_loc(inode, &iloc);
 		if (!err) {
 			BUFFER_TRACE(iloc.bh, "get_write_access");
 			err = journal_get_write_access(handle, iloc.bh);
 			if (!err)
-				err = ext3_journal_dirty_metadata(handle,
+				err = ext4_journal_dirty_metadata(handle,
 								  iloc.bh);
 			brelse(iloc.bh);
 		}
 	}
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 #endif
 
-int ext3_change_inode_journal_flag(struct inode *inode, int val)
+int ext4_change_inode_journal_flag(struct inode *inode, int val)
 {
 	journal_t *journal;
 	handle_t *handle;
@@ -3181,7 +3181,7 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	 * nobody is changing anything.
 	 */
 
-	journal = EXT3_JOURNAL(inode);
+	journal = EXT4_JOURNAL(inode);
 	if (is_journal_aborted(journal) || IS_RDONLY(inode))
 		return -EROFS;
 
@@ -3197,23 +3197,23 @@ int ext3_change_inode_journal_flag(struct inode *inode, int val)
 	 */
 
 	if (val)
-		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+		EXT4_I(inode)->i_flags |= EXT4_JOURNAL_DATA_FL;
 	else
-		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
-	ext3_set_aops(inode);
+		EXT4_I(inode)->i_flags &= ~EXT4_JOURNAL_DATA_FL;
+	ext4_set_aops(inode);
 
 	journal_unlock_updates(journal);
 
 	/* Finally we can mark the inode as dirty. */
 
-	handle = ext3_journal_start(inode, 1);
+	handle = ext4_journal_start(inode, 1);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
-	err = ext3_mark_inode_dirty(handle, inode);
+	err = ext4_mark_inode_dirty(handle, inode);
 	handle->h_sync = 1;
-	ext3_journal_stop(handle);
-	ext3_std_error(inode->i_sb, err);
+	ext4_journal_stop(handle);
+	ext4_std_error(inode->i_sb, err);
 
 	return err;
 }
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 12daa6869572..a567af161b06 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/ioctl.c
+ * linux/fs/ext4/ioctl.c
  *
  * Copyright (C) 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -10,30 +10,30 @@
 #include <linux/fs.h>
 #include <linux/jbd.h>
 #include <linux/capability.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/time.h>
 #include <linux/compat.h>
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
-int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+int ext4_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		unsigned long arg)
 {
-	struct ext3_inode_info *ei = EXT3_I(inode);
+	struct ext4_inode_info *ei = EXT4_I(inode);
 	unsigned int flags;
 	unsigned short rsv_window_size;
 
-	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+	ext4_debug ("cmd = %u, arg = %lu\n", cmd, arg);
 
 	switch (cmd) {
-	case EXT3_IOC_GETFLAGS:
-		flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+	case EXT4_IOC_GETFLAGS:
+		flags = ei->i_flags & EXT4_FL_USER_VISIBLE;
 		return put_user(flags, (int __user *) arg);
-	case EXT3_IOC_SETFLAGS: {
+	case EXT4_IOC_SETFLAGS: {
 		handle_t *handle = NULL;
 		int err;
-		struct ext3_iloc iloc;
+		struct ext4_iloc iloc;
 		unsigned int oldflags;
 		unsigned int jflag;
 
@@ -47,13 +47,13 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 			return -EFAULT;
 
 		if (!S_ISDIR(inode->i_mode))
-			flags &= ~EXT3_DIRSYNC_FL;
+			flags &= ~EXT4_DIRSYNC_FL;
 
 		mutex_lock(&inode->i_mutex);
 		oldflags = ei->i_flags;
 
 		/* The JOURNAL_DATA flag is modifiable only by root */
-		jflag = flags & EXT3_JOURNAL_DATA_FL;
+		jflag = flags & EXT4_JOURNAL_DATA_FL;
 
 		/*
 		 * The IMMUTABLE and APPEND_ONLY flags can only be changed by
@@ -61,7 +61,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 *
 		 * This test looks nicer. Thanks to Pauline Middelink
 		 */
-		if ((flags ^ oldflags) & (EXT3_APPEND_FL | EXT3_IMMUTABLE_FL)) {
+		if ((flags ^ oldflags) & (EXT4_APPEND_FL | EXT4_IMMUTABLE_FL)) {
 			if (!capable(CAP_LINUX_IMMUTABLE)) {
 				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
@@ -72,7 +72,7 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		 * The JOURNAL_DATA flag can only be changed by
 		 * the relevant capability.
 		 */
-		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL)) {
+		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL)) {
 			if (!capable(CAP_SYS_RESOURCE)) {
 				mutex_unlock(&inode->i_mutex);
 				return -EPERM;
@@ -80,44 +80,44 @@ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
 		}
 
 
-		handle = ext3_journal_start(inode, 1);
+		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle)) {
 			mutex_unlock(&inode->i_mutex);
 			return PTR_ERR(handle);
 		}
 		if (IS_SYNC(inode))
 			handle->h_sync = 1;
-		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
 		if (err)
 			goto flags_err;
 
-		flags = flags & EXT3_FL_USER_MODIFIABLE;
-		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+		flags = flags & EXT4_FL_USER_MODIFIABLE;
+		flags |= oldflags & ~EXT4_FL_USER_MODIFIABLE;
 		ei->i_flags = flags;
 
-		ext3_set_inode_flags(inode);
+		ext4_set_inode_flags(inode);
 		inode->i_ctime = CURRENT_TIME_SEC;
 
-		err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+		err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 flags_err:
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		if (err) {
 			mutex_unlock(&inode->i_mutex);
 			return err;
 		}
 
-		if ((jflag ^ oldflags) & (EXT3_JOURNAL_DATA_FL))
-			err = ext3_change_inode_journal_flag(inode, jflag);
+		if ((jflag ^ oldflags) & (EXT4_JOURNAL_DATA_FL))
+			err = ext4_change_inode_journal_flag(inode, jflag);
 		mutex_unlock(&inode->i_mutex);
 		return err;
 	}
-	case EXT3_IOC_GETVERSION:
-	case EXT3_IOC_GETVERSION_OLD:
+	case EXT4_IOC_GETVERSION:
+	case EXT4_IOC_GETVERSION_OLD:
 		return put_user(inode->i_generation, (int __user *) arg);
-	case EXT3_IOC_SETVERSION:
-	case EXT3_IOC_SETVERSION_OLD: {
+	case EXT4_IOC_SETVERSION:
+	case EXT4_IOC_SETVERSION_OLD: {
 		handle_t *handle;
-		struct ext3_iloc iloc;
+		struct ext4_iloc iloc;
 		__u32 generation;
 		int err;
 
@@ -128,20 +128,20 @@ flags_err:
 		if (get_user(generation, (int __user *) arg))
 			return -EFAULT;
 
-		handle = ext3_journal_start(inode, 1);
+		handle = ext4_journal_start(inode, 1);
 		if (IS_ERR(handle))
 			return PTR_ERR(handle);
-		err = ext3_reserve_inode_write(handle, inode, &iloc);
+		err = ext4_reserve_inode_write(handle, inode, &iloc);
 		if (err == 0) {
 			inode->i_ctime = CURRENT_TIME_SEC;
 			inode->i_generation = generation;
-			err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+			err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 		}
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		return err;
 	}
 #ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC_WAIT_FOR_READONLY:
+	case EXT4_IOC_WAIT_FOR_READONLY:
 		/*
 		 * This is racy - by the time we're woken up and running,
 		 * the superblock could be released.  And the module could
@@ -155,16 +155,16 @@ flags_err:
 			int ret = 0;
 
 			set_current_state(TASK_INTERRUPTIBLE);
-			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
-			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+			add_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
+			if (timer_pending(&EXT4_SB(sb)->turn_ro_timer)) {
 				schedule();
 				ret = 1;
 			}
-			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+			remove_wait_queue(&EXT4_SB(sb)->ro_wait_queue, &wait);
 			return ret;
 		}
 #endif
-	case EXT3_IOC_GETRSVSZ:
+	case EXT4_IOC_GETRSVSZ:
 		if (test_opt(inode->i_sb, RESERVATION)
 			&& S_ISREG(inode->i_mode)
 			&& ei->i_block_alloc_info) {
@@ -172,7 +172,7 @@ flags_err:
 			return put_user(rsv_window_size, (int __user *)arg);
 		}
 		return -ENOTTY;
-	case EXT3_IOC_SETRSVSZ: {
+	case EXT4_IOC_SETRSVSZ: {
 
 		if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
 			return -ENOTTY;
@@ -186,8 +186,8 @@ flags_err:
 		if (get_user(rsv_window_size, (int __user *)arg))
 			return -EFAULT;
 
-		if (rsv_window_size > EXT3_MAX_RESERVE_BLOCKS)
-			rsv_window_size = EXT3_MAX_RESERVE_BLOCKS;
+		if (rsv_window_size > EXT4_MAX_RESERVE_BLOCKS)
+			rsv_window_size = EXT4_MAX_RESERVE_BLOCKS;
 
 		/*
 		 * need to allocate reservation structure for this inode
@@ -195,17 +195,17 @@ flags_err:
 		 */
 		mutex_lock(&ei->truncate_mutex);
 		if (!ei->i_block_alloc_info)
-			ext3_init_block_alloc_info(inode);
+			ext4_init_block_alloc_info(inode);
 
 		if (ei->i_block_alloc_info){
-			struct ext3_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
+			struct ext4_reserve_window_node *rsv = &ei->i_block_alloc_info->rsv_window_node;
 			rsv->rsv_goal_size = rsv_window_size;
 		}
 		mutex_unlock(&ei->truncate_mutex);
 		return 0;
 	}
-	case EXT3_IOC_GROUP_EXTEND: {
-		ext3_fsblk_t n_blocks_count;
+	case EXT4_IOC_GROUP_EXTEND: {
+		ext4_fsblk_t n_blocks_count;
 		struct super_block *sb = inode->i_sb;
 		int err;
 
@@ -218,15 +218,15 @@ flags_err:
 		if (get_user(n_blocks_count, (__u32 __user *)arg))
 			return -EFAULT;
 
-		err = ext3_group_extend(sb, EXT3_SB(sb)->s_es, n_blocks_count);
-		journal_lock_updates(EXT3_SB(sb)->s_journal);
-		journal_flush(EXT3_SB(sb)->s_journal);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		err = ext4_group_extend(sb, EXT4_SB(sb)->s_es, n_blocks_count);
+		journal_lock_updates(EXT4_SB(sb)->s_journal);
+		journal_flush(EXT4_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
-	case EXT3_IOC_GROUP_ADD: {
-		struct ext3_new_group_data input;
+	case EXT4_IOC_GROUP_ADD: {
+		struct ext4_new_group_data input;
 		struct super_block *sb = inode->i_sb;
 		int err;
 
@@ -236,14 +236,14 @@ flags_err:
 		if (IS_RDONLY(inode))
 			return -EROFS;
 
-		if (copy_from_user(&input, (struct ext3_new_group_input __user *)arg,
+		if (copy_from_user(&input, (struct ext4_new_group_input __user *)arg,
 				sizeof(input)))
 			return -EFAULT;
 
-		err = ext3_group_add(sb, &input);
-		journal_lock_updates(EXT3_SB(sb)->s_journal);
-		journal_flush(EXT3_SB(sb)->s_journal);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		err = ext4_group_add(sb, &input);
+		journal_lock_updates(EXT4_SB(sb)->s_journal);
+		journal_flush(EXT4_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 
 		return err;
 	}
@@ -255,52 +255,52 @@ flags_err:
 }
 
 #ifdef CONFIG_COMPAT
-long ext3_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	struct inode *inode = file->f_dentry->d_inode;
 	int ret;
 
 	/* These are just misnamed, they actually get/put from/to user an int */
 	switch (cmd) {
-	case EXT3_IOC32_GETFLAGS:
-		cmd = EXT3_IOC_GETFLAGS;
+	case EXT4_IOC32_GETFLAGS:
+		cmd = EXT4_IOC_GETFLAGS;
 		break;
-	case EXT3_IOC32_SETFLAGS:
-		cmd = EXT3_IOC_SETFLAGS;
+	case EXT4_IOC32_SETFLAGS:
+		cmd = EXT4_IOC_SETFLAGS;
 		break;
-	case EXT3_IOC32_GETVERSION:
-		cmd = EXT3_IOC_GETVERSION;
+	case EXT4_IOC32_GETVERSION:
+		cmd = EXT4_IOC_GETVERSION;
 		break;
-	case EXT3_IOC32_SETVERSION:
-		cmd = EXT3_IOC_SETVERSION;
+	case EXT4_IOC32_SETVERSION:
+		cmd = EXT4_IOC_SETVERSION;
 		break;
-	case EXT3_IOC32_GROUP_EXTEND:
-		cmd = EXT3_IOC_GROUP_EXTEND;
+	case EXT4_IOC32_GROUP_EXTEND:
+		cmd = EXT4_IOC_GROUP_EXTEND;
 		break;
-	case EXT3_IOC32_GETVERSION_OLD:
-		cmd = EXT3_IOC_GETVERSION_OLD;
+	case EXT4_IOC32_GETVERSION_OLD:
+		cmd = EXT4_IOC_GETVERSION_OLD;
 		break;
-	case EXT3_IOC32_SETVERSION_OLD:
-		cmd = EXT3_IOC_SETVERSION_OLD;
+	case EXT4_IOC32_SETVERSION_OLD:
+		cmd = EXT4_IOC_SETVERSION_OLD;
 		break;
 #ifdef CONFIG_JBD_DEBUG
-	case EXT3_IOC32_WAIT_FOR_READONLY:
-		cmd = EXT3_IOC_WAIT_FOR_READONLY;
+	case EXT4_IOC32_WAIT_FOR_READONLY:
+		cmd = EXT4_IOC_WAIT_FOR_READONLY;
 		break;
 #endif
-	case EXT3_IOC32_GETRSVSZ:
-		cmd = EXT3_IOC_GETRSVSZ;
+	case EXT4_IOC32_GETRSVSZ:
+		cmd = EXT4_IOC_GETRSVSZ;
 		break;
-	case EXT3_IOC32_SETRSVSZ:
-		cmd = EXT3_IOC_SETRSVSZ;
+	case EXT4_IOC32_SETRSVSZ:
+		cmd = EXT4_IOC_SETRSVSZ;
 		break;
-	case EXT3_IOC_GROUP_ADD:
+	case EXT4_IOC_GROUP_ADD:
 		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
 	lock_kernel();
-	ret = ext3_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
+	ret = ext4_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
 	unlock_kernel();
 	return ret;
 }
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 906731a20f1a..956b38113f62 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/namei.c
+ *  linux/fs/ext4/namei.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -28,8 +28,8 @@
 #include <linux/pagemap.h>
 #include <linux/jbd.h>
 #include <linux/time.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/fcntl.h>
 #include <linux/stat.h>
 #include <linux/string.h>
@@ -50,7 +50,7 @@
 #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
 #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
 
-static struct buffer_head *ext3_append(handle_t *handle,
+static struct buffer_head *ext4_append(handle_t *handle,
 					struct inode *inode,
 					u32 *block, int *err)
 {
@@ -58,10 +58,10 @@ static struct buffer_head *ext3_append(handle_t *handle,
 
 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
 
-	if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+	if ((bh = ext4_bread(handle, inode, *block, 1, err))) {
 		inode->i_size += inode->i_sb->s_blocksize;
-		EXT3_I(inode)->i_disksize = inode->i_size;
-		ext3_journal_get_write_access(handle,bh);
+		EXT4_I(inode)->i_disksize = inode->i_size;
+		ext4_journal_get_write_access(handle,bh);
 	}
 	return bh;
 }
@@ -144,7 +144,7 @@ struct dx_map_entry
 	u32 offs;
 };
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 static inline unsigned dx_get_block (struct dx_entry *entry);
 static void dx_set_block (struct dx_entry *entry, unsigned value);
 static inline unsigned dx_get_hash (struct dx_entry *entry);
@@ -161,20 +161,20 @@ static struct dx_frame *dx_probe(struct dentry *dentry,
 				 struct dx_frame *frame,
 				 int *err);
 static void dx_release (struct dx_frame *frames);
-static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry map[]);
 static void dx_sort_map(struct dx_map_entry *map, unsigned count);
-static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+static struct ext4_dir_entry_2 *dx_move_dirents (char *from, char *to,
 		struct dx_map_entry *offsets, int count);
-static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+static struct ext4_dir_entry_2* dx_pack_dirents (char *base, int size);
 static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
 				 struct dx_frame *frames,
 				 __u32 *start_hash);
-static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-		       struct ext3_dir_entry_2 **res_dir, int *err);
-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+		       struct ext4_dir_entry_2 **res_dir, int *err);
+static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			     struct inode *inode);
 
 /*
@@ -224,14 +224,14 @@ static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
 
 static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 {
-	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-		EXT3_DIR_REC_LEN(2) - infosize;
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
+		EXT4_DIR_REC_LEN(2) - infosize;
 	return 0? 20: entry_space / sizeof(struct dx_entry);
 }
 
 static inline unsigned dx_node_limit (struct inode *dir)
 {
-	unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+	unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
 	return 0? 22: entry_space / sizeof(struct dx_entry);
 }
 
@@ -257,7 +257,7 @@ struct stats
 	unsigned bcount;
 };
 
-static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_entry_2 *de,
+static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext4_dir_entry_2 *de,
 				 int size, int show_names)
 {
 	unsigned names = 0, space = 0;
@@ -274,14 +274,14 @@ static struct stats dx_show_leaf(struct dx_hash_info *hinfo, struct ext3_dir_ent
 				int len = de->name_len;
 				char *name = de->name;
 				while (len--) printk("%c", *name++);
-				ext3fs_dirhash(de->name, de->name_len, &h);
+				ext4fs_dirhash(de->name, de->name_len, &h);
 				printk(":%x.%u ", h.hash,
 				       ((char *) de - base));
 			}
-			space += EXT3_DIR_REC_LEN(de->name_len);
+			space += EXT4_DIR_REC_LEN(de->name_len);
 			names++;
 		}
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
 	printk("(%i)\n", names);
 	return (struct stats) { names, space, 1 };
@@ -302,10 +302,10 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
 		u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
 		struct stats stats;
 		printk("%s%3u:%03u hash %8x/%8x ",levels?"":"   ", i, block, hash, range);
-		if (!(bh = ext3_bread (NULL,dir, block, 0,&err))) continue;
+		if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue;
 		stats = levels?
 		   dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
-		   dx_show_leaf(hinfo, (struct ext3_dir_entry_2 *) bh->b_data, blocksize, 0);
+		   dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
 		names += stats.names;
 		space += stats.space;
 		bcount += stats.bcount;
@@ -341,13 +341,13 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	frame->bh = NULL;
 	if (dentry)
 		dir = dentry->d_parent->d_inode;
-	if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+	if (!(bh = ext4_bread (NULL,dir, 0, 0, err)))
 		goto fail;
 	root = (struct dx_root *) bh->b_data;
 	if (root->info.hash_version != DX_HASH_TEA &&
 	    root->info.hash_version != DX_HASH_HALF_MD4 &&
 	    root->info.hash_version != DX_HASH_LEGACY) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unrecognised inode hash code %d",
 			     root->info.hash_version);
 		brelse(bh);
@@ -355,13 +355,13 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		goto fail;
 	}
 	hinfo->hash_version = root->info.hash_version;
-	hinfo->seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	hinfo->seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 	if (dentry)
-		ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
+		ext4fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
 	hash = hinfo->hash;
 
 	if (root->info.unused_flags & 1) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash flags: %#06x",
 			     root->info.unused_flags);
 		brelse(bh);
@@ -370,7 +370,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 	}
 
 	if ((indirect = root->info.indirect_levels) > 1) {
-		ext3_warning(dir->i_sb, __FUNCTION__,
+		ext4_warning(dir->i_sb, __FUNCTION__,
 			     "Unimplemented inode hash depth: %#06x",
 			     root->info.indirect_levels);
 		brelse(bh);
@@ -421,7 +421,7 @@ dx_probe(struct dentry *dentry, struct inode *dir,
 		frame->entries = entries;
 		frame->at = at;
 		if (!indirect--) return frame;
-		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+		if (!(bh = ext4_bread (NULL,dir, dx_get_block(at), 0, err)))
 			goto fail2;
 		at = entries = ((struct dx_node *) bh->b_data)->entries;
 		assert (dx_get_limit(entries) == dx_node_limit (dir));
@@ -463,7 +463,7 @@ static void dx_release (struct dx_frame *frames)
  * If start_hash is non-null, it will be filled in with the starting
  * hash of the next page.
  */
-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+static int ext4_htree_next_block(struct inode *dir, __u32 hash,
 				 struct dx_frame *frame,
 				 struct dx_frame *frames,
 				 __u32 *start_hash)
@@ -509,7 +509,7 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 	 * block so no check is necessary
 	 */
 	while (num_frames--) {
-		if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+		if (!(bh = ext4_bread(NULL, dir, dx_get_block(p->at),
 				      0, &err)))
 			return err; /* Failure */
 		p++;
@@ -524,9 +524,9 @@ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
 /*
  * p is at least 6 bytes before the end of page
  */
-static inline struct ext3_dir_entry_2 *ext3_next_entry(struct ext3_dir_entry_2 *p)
+static inline struct ext4_dir_entry_2 *ext4_next_entry(struct ext4_dir_entry_2 *p)
 {
-	return (struct ext3_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
+	return (struct ext4_dir_entry_2 *)((char*)p + le16_to_cpu(p->rec_len));
 }
 
 /*
@@ -540,26 +540,26 @@ static int htree_dirblock_to_tree(struct file *dir_file,
 				  __u32 start_hash, __u32 start_minor_hash)
 {
 	struct buffer_head *bh;
-	struct ext3_dir_entry_2 *de, *top;
+	struct ext4_dir_entry_2 *de, *top;
 	int err, count = 0;
 
 	dxtrace(printk("In htree dirblock_to_tree: block %d\n", block));
-	if (!(bh = ext3_bread (NULL, dir, block, 0, &err)))
+	if (!(bh = ext4_bread (NULL, dir, block, 0, &err)))
 		return err;
 
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
-	top = (struct ext3_dir_entry_2 *) ((char *) de +
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
+	top = (struct ext4_dir_entry_2 *) ((char *) de +
 					   dir->i_sb->s_blocksize -
-					   EXT3_DIR_REC_LEN(0));
-	for (; de < top; de = ext3_next_entry(de)) {
-		ext3fs_dirhash(de->name, de->name_len, hinfo);
+					   EXT4_DIR_REC_LEN(0));
+	for (; de < top; de = ext4_next_entry(de)) {
+		ext4fs_dirhash(de->name, de->name_len, hinfo);
 		if ((hinfo->hash < start_hash) ||
 		    ((hinfo->hash == start_hash) &&
 		     (hinfo->minor_hash < start_minor_hash)))
 			continue;
 		if (de->inode == 0)
 			continue;
-		if ((err = ext3_htree_store_dirent(dir_file,
+		if ((err = ext4_htree_store_dirent(dir_file,
 				   hinfo->hash, hinfo->minor_hash, de)) != 0) {
 			brelse(bh);
 			return err;
@@ -579,11 +579,11 @@ static int htree_dirblock_to_tree(struct file *dir_file,
  * This function returns the number of entries inserted into the tree,
  * or a negative error code.
  */
-int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 			 __u32 start_minor_hash, __u32 *next_hash)
 {
 	struct dx_hash_info hinfo;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct dx_frame frames[2], *frame;
 	struct inode *dir;
 	int block, err;
@@ -594,9 +594,9 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 	dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
 		       start_minor_hash));
 	dir = dir_file->f_dentry->d_inode;
-	if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
-		hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
-		hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+	if (!(EXT4_I(dir)->i_flags & EXT4_INDEX_FL)) {
+		hinfo.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
+		hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
 		count = htree_dirblock_to_tree(dir_file, dir, 0, &hinfo,
 					       start_hash, start_minor_hash);
 		*next_hash = ~0;
@@ -610,15 +610,15 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 
 	/* Add '.' and '..' from the htree header */
 	if (!start_hash && !start_minor_hash) {
-		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
-		if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
+		if ((err = ext4_htree_store_dirent(dir_file, 0, 0, de)) != 0)
 			goto errout;
 		count++;
 	}
 	if (start_hash < 2 || (start_hash ==2 && start_minor_hash==0)) {
-		de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
-		de = ext3_next_entry(de);
-		if ((err = ext3_htree_store_dirent(dir_file, 2, 0, de)) != 0)
+		de = (struct ext4_dir_entry_2 *) frames[0].bh->b_data;
+		de = ext4_next_entry(de);
+		if ((err = ext4_htree_store_dirent(dir_file, 2, 0, de)) != 0)
 			goto errout;
 		count++;
 	}
@@ -633,7 +633,7 @@ int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 		}
 		count += ret;
 		hashval = ~0;
-		ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+		ret = ext4_htree_next_block(dir, HASH_NB_ALWAYS,
 					    frame, frames, &hashval);
 		*next_hash = hashval;
 		if (ret < 0) {
@@ -663,7 +663,7 @@ errout:
  * Directory block splitting, compacting
  */
 
-static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+static int dx_make_map (struct ext4_dir_entry_2 *de, int size,
 			struct dx_hash_info *hinfo, struct dx_map_entry *map_tail)
 {
 	int count = 0;
@@ -673,7 +673,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 	while ((char *) de < base + size)
 	{
 		if (de->name_len && de->inode) {
-			ext3fs_dirhash(de->name, de->name_len, &h);
+			ext4fs_dirhash(de->name, de->name_len, &h);
 			map_tail--;
 			map_tail->hash = h.hash;
 			map_tail->offs = (u32) ((char *) de - base);
@@ -681,7 +681,7 @@ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
 			cond_resched();
 		}
 		/* XXX: do we need to check rec_len == 0 case? -Chris */
-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
+		de = (struct ext4_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
 	}
 	return count;
 }
@@ -730,21 +730,21 @@ static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
 #endif
 
 
-static void ext3_update_dx_flag(struct inode *inode)
+static void ext4_update_dx_flag(struct inode *inode)
 {
-	if (!EXT3_HAS_COMPAT_FEATURE(inode->i_sb,
-				     EXT3_FEATURE_COMPAT_DIR_INDEX))
-		EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
+	if (!EXT4_HAS_COMPAT_FEATURE(inode->i_sb,
+				     EXT4_FEATURE_COMPAT_DIR_INDEX))
+		EXT4_I(inode)->i_flags &= ~EXT4_INDEX_FL;
 }
 
 /*
- * NOTE! unlike strncmp, ext3_match returns 1 for success, 0 for failure.
+ * NOTE! unlike strncmp, ext4_match returns 1 for success, 0 for failure.
  *
- * `len <= EXT3_NAME_LEN' is guaranteed by caller.
+ * `len <= EXT4_NAME_LEN' is guaranteed by caller.
  * `de != NULL' is guaranteed by caller.
  */
-static inline int ext3_match (int len, const char * const name,
-			      struct ext3_dir_entry_2 * de)
+static inline int ext4_match (int len, const char * const name,
+			      struct ext4_dir_entry_2 * de)
 {
 	if (len != de->name_len)
 		return 0;
@@ -760,24 +760,24 @@ static inline int search_dirblock(struct buffer_head * bh,
 				  struct inode *dir,
 				  struct dentry *dentry,
 				  unsigned long offset,
-				  struct ext3_dir_entry_2 ** res_dir)
+				  struct ext4_dir_entry_2 ** res_dir)
 {
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	char * dlimit;
 	int de_len;
 	const char *name = dentry->d_name.name;
 	int namelen = dentry->d_name.len;
 
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	dlimit = bh->b_data + dir->i_sb->s_blocksize;
 	while ((char *) de < dlimit) {
 		/* this code is executed quadratically often */
 		/* do minimal checking `by hand' */
 
 		if ((char *) de + namelen <= dlimit &&
-		    ext3_match (namelen, name, de)) {
+		    ext4_match (namelen, name, de)) {
 			/* found a match - just to be sure, do a full check */
-			if (!ext3_check_dir_entry("ext3_find_entry",
+			if (!ext4_check_dir_entry("ext4_find_entry",
 						  dir, de, bh, offset))
 				return -1;
 			*res_dir = de;
@@ -788,14 +788,14 @@ static inline int search_dirblock(struct buffer_head * bh,
 		if (de_len <= 0)
 			return -1;
 		offset += de_len;
-		de = (struct ext3_dir_entry_2 *) ((char *) de + de_len);
+		de = (struct ext4_dir_entry_2 *) ((char *) de + de_len);
 	}
 	return 0;
 }
 
 
 /*
- *	ext3_find_entry()
+ *	ext4_find_entry()
  *
  * finds an entry in the specified directory with the wanted name. It
  * returns the cache buffer in which the entry was found, and the entry
@@ -805,8 +805,8 @@ static inline int search_dirblock(struct buffer_head * bh,
  * The returned buffer_head has ->b_count elevated.  The caller is expected
  * to brelse() it when appropriate.
  */
-static struct buffer_head * ext3_find_entry (struct dentry *dentry,
-					struct ext3_dir_entry_2 ** res_dir)
+static struct buffer_head * ext4_find_entry (struct dentry *dentry,
+					struct ext4_dir_entry_2 ** res_dir)
 {
 	struct super_block * sb;
 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
@@ -828,11 +828,11 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
 	blocksize = sb->s_blocksize;
 	namelen = dentry->d_name.len;
 	name = dentry->d_name.name;
-	if (namelen > EXT3_NAME_LEN)
+	if (namelen > EXT4_NAME_LEN)
 		return NULL;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
-		bh = ext3_dx_find_entry(dentry, res_dir, &err);
+		bh = ext4_dx_find_entry(dentry, res_dir, &err);
 		/*
 		 * On success, or if the error was file not found,
 		 * return.  Otherwise, fall back to doing a search the
@@ -840,11 +840,11 @@ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
 		 */
 		if (bh || (err != ERR_BAD_DX_DIR))
 			return bh;
-		dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
+		dxtrace(printk("ext4_find_entry: dx failed, falling back\n"));
 	}
 #endif
-	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
-	start = EXT3_I(dir)->i_dir_start_lookup;
+	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
+	start = EXT4_I(dir)->i_dir_start_lookup;
 	if (start >= nblocks)
 		start = 0;
 	block = start;
@@ -868,7 +868,7 @@ restart:
 					break;
 				}
 				num++;
-				bh = ext3_getblk(NULL, dir, b++, 0, &err);
+				bh = ext4_getblk(NULL, dir, b++, 0, &err);
 				bh_use[ra_max] = bh;
 				if (bh)
 					ll_rw_block(READ_META, 1, &bh);
@@ -879,15 +879,15 @@ restart:
 		wait_on_buffer(bh);
 		if (!buffer_uptodate(bh)) {
 			/* read error, skip block & hope for the best */
-			ext3_error(sb, __FUNCTION__, "reading directory #%lu "
+			ext4_error(sb, __FUNCTION__, "reading directory #%lu "
 				   "offset %lu", dir->i_ino, block);
 			brelse(bh);
 			goto next;
 		}
 		i = search_dirblock(bh, dir, dentry,
-			    block << EXT3_BLOCK_SIZE_BITS(sb), res_dir);
+			    block << EXT4_BLOCK_SIZE_BITS(sb), res_dir);
 		if (i == 1) {
-			EXT3_I(dir)->i_dir_start_lookup = block;
+			EXT4_I(dir)->i_dir_start_lookup = block;
 			ret = bh;
 			goto cleanup_and_exit;
 		} else {
@@ -905,7 +905,7 @@ restart:
 	 * search the last part of the directory before giving up.
 	 */
 	block = nblocks;
-	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+	nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
 	if (block < nblocks) {
 		start = 0;
 		goto restart;
@@ -918,15 +918,15 @@ cleanup_and_exit:
 	return ret;
 }
 
-#ifdef CONFIG_EXT3_INDEX
-static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-		       struct ext3_dir_entry_2 **res_dir, int *err)
+#ifdef CONFIG_EXT4_INDEX
+static struct buffer_head * ext4_dx_find_entry(struct dentry *dentry,
+		       struct ext4_dir_entry_2 **res_dir, int *err)
 {
 	struct super_block * sb;
 	struct dx_hash_info	hinfo;
 	u32 hash;
 	struct dx_frame frames[2], *frame;
-	struct ext3_dir_entry_2 *de, *top;
+	struct ext4_dir_entry_2 *de, *top;
 	struct buffer_head *bh;
 	unsigned long block;
 	int retval;
@@ -948,16 +948,16 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 	hash = hinfo.hash;
 	do {
 		block = dx_get_block(frame->at);
-		if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+		if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
 			goto errout;
-		de = (struct ext3_dir_entry_2 *) bh->b_data;
-		top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
-				       EXT3_DIR_REC_LEN(0));
-		for (; de < top; de = ext3_next_entry(de))
-		if (ext3_match (namelen, name, de)) {
-			if (!ext3_check_dir_entry("ext3_find_entry",
+		de = (struct ext4_dir_entry_2 *) bh->b_data;
+		top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+				       EXT4_DIR_REC_LEN(0));
+		for (; de < top; de = ext4_next_entry(de))
+		if (ext4_match (namelen, name, de)) {
+			if (!ext4_check_dir_entry("ext4_find_entry",
 						  dir, de, bh,
-				  (block<<EXT3_BLOCK_SIZE_BITS(sb))
+				  (block<<EXT4_BLOCK_SIZE_BITS(sb))
 					  +((char *)de - bh->b_data))) {
 				brelse (bh);
 				goto errout;
@@ -968,10 +968,10 @@ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
 		}
 		brelse (bh);
 		/* Check to see if we should continue to search */
-		retval = ext3_htree_next_block(dir, hash, frame,
+		retval = ext4_htree_next_block(dir, hash, frame,
 					       frames, NULL);
 		if (retval < 0) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 			     "error reading index page in directory #%lu",
 			     dir->i_ino);
 			*err = retval;
@@ -987,22 +987,22 @@ errout:
 }
 #endif
 
-static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+static struct dentry *ext4_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode * inode;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	struct buffer_head * bh;
 
-	if (dentry->d_name.len > EXT3_NAME_LEN)
+	if (dentry->d_name.len > EXT4_NAME_LEN)
 		return ERR_PTR(-ENAMETOOLONG);
 
-	bh = ext3_find_entry(dentry, &de);
+	bh = ext4_find_entry(dentry, &de);
 	inode = NULL;
 	if (bh) {
 		unsigned long ino = le32_to_cpu(de->inode);
 		brelse (bh);
-		if (!ext3_valid_inum(dir->i_sb, ino)) {
-			ext3_error(dir->i_sb, "ext3_lookup",
+		if (!ext4_valid_inum(dir->i_sb, ino)) {
+			ext4_error(dir->i_sb, "ext4_lookup",
 				   "bad inode number: %lu", ino);
 			inode = NULL;
 		} else
@@ -1015,28 +1015,28 @@ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, str
 }
 
 
-struct dentry *ext3_get_parent(struct dentry *child)
+struct dentry *ext4_get_parent(struct dentry *child)
 {
 	unsigned long ino;
 	struct dentry *parent;
 	struct inode *inode;
 	struct dentry dotdot;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	struct buffer_head *bh;
 
 	dotdot.d_name.name = "..";
 	dotdot.d_name.len = 2;
 	dotdot.d_parent = child; /* confusing, isn't it! */
 
-	bh = ext3_find_entry(&dotdot, &de);
+	bh = ext4_find_entry(&dotdot, &de);
 	inode = NULL;
 	if (!bh)
 		return ERR_PTR(-ENOENT);
 	ino = le32_to_cpu(de->inode);
 	brelse(bh);
 
-	if (!ext3_valid_inum(child->d_inode->i_sb, ino)) {
-		ext3_error(child->d_inode->i_sb, "ext3_get_parent",
+	if (!ext4_valid_inum(child->d_inode->i_sb, ino)) {
+		ext4_error(child->d_inode->i_sb, "ext4_get_parent",
 			   "bad inode number: %lu", ino);
 		inode = NULL;
 	} else
@@ -1054,65 +1054,65 @@ struct dentry *ext3_get_parent(struct dentry *child)
 }
 
 #define S_SHIFT 12
-static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
-	[S_IFREG >> S_SHIFT]	= EXT3_FT_REG_FILE,
-	[S_IFDIR >> S_SHIFT]	= EXT3_FT_DIR,
-	[S_IFCHR >> S_SHIFT]	= EXT3_FT_CHRDEV,
-	[S_IFBLK >> S_SHIFT]	= EXT3_FT_BLKDEV,
-	[S_IFIFO >> S_SHIFT]	= EXT3_FT_FIFO,
-	[S_IFSOCK >> S_SHIFT]	= EXT3_FT_SOCK,
-	[S_IFLNK >> S_SHIFT]	= EXT3_FT_SYMLINK,
+static unsigned char ext4_type_by_mode[S_IFMT >> S_SHIFT] = {
+	[S_IFREG >> S_SHIFT]	= EXT4_FT_REG_FILE,
+	[S_IFDIR >> S_SHIFT]	= EXT4_FT_DIR,
+	[S_IFCHR >> S_SHIFT]	= EXT4_FT_CHRDEV,
+	[S_IFBLK >> S_SHIFT]	= EXT4_FT_BLKDEV,
+	[S_IFIFO >> S_SHIFT]	= EXT4_FT_FIFO,
+	[S_IFSOCK >> S_SHIFT]	= EXT4_FT_SOCK,
+	[S_IFLNK >> S_SHIFT]	= EXT4_FT_SYMLINK,
 };
 
-static inline void ext3_set_de_type(struct super_block *sb,
-				struct ext3_dir_entry_2 *de,
+static inline void ext4_set_de_type(struct super_block *sb,
+				struct ext4_dir_entry_2 *de,
 				umode_t mode) {
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_FILETYPE))
-		de->file_type = ext3_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FILETYPE))
+		de->file_type = ext4_type_by_mode[(mode & S_IFMT)>>S_SHIFT];
 }
 
-#ifdef CONFIG_EXT3_INDEX
-static struct ext3_dir_entry_2 *
+#ifdef CONFIG_EXT4_INDEX
+static struct ext4_dir_entry_2 *
 dx_move_dirents(char *from, char *to, struct dx_map_entry *map, int count)
 {
 	unsigned rec_len = 0;
 
 	while (count--) {
-		struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
-		rec_len = EXT3_DIR_REC_LEN(de->name_len);
+		struct ext4_dir_entry_2 *de = (struct ext4_dir_entry_2 *) (from + map->offs);
+		rec_len = EXT4_DIR_REC_LEN(de->name_len);
 		memcpy (to, de, rec_len);
-		((struct ext3_dir_entry_2 *) to)->rec_len =
+		((struct ext4_dir_entry_2 *) to)->rec_len =
 				cpu_to_le16(rec_len);
 		de->inode = 0;
 		map++;
 		to += rec_len;
 	}
-	return (struct ext3_dir_entry_2 *) (to - rec_len);
+	return (struct ext4_dir_entry_2 *) (to - rec_len);
 }
 
-static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+static struct ext4_dir_entry_2* dx_pack_dirents(char *base, int size)
 {
-	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
+	struct ext4_dir_entry_2 *next, *to, *prev, *de = (struct ext4_dir_entry_2 *) base;
 	unsigned rec_len = 0;
 
 	prev = to = de;
 	while ((char*)de < base + size) {
-		next = (struct ext3_dir_entry_2 *) ((char *) de +
+		next = (struct ext4_dir_entry_2 *) ((char *) de +
 						    le16_to_cpu(de->rec_len));
 		if (de->inode && de->name_len) {
-			rec_len = EXT3_DIR_REC_LEN(de->name_len);
+			rec_len = EXT4_DIR_REC_LEN(de->name_len);
 			if (de > to)
 				memmove(to, de, rec_len);
 			to->rec_len = cpu_to_le16(rec_len);
 			prev = to;
-			to = (struct ext3_dir_entry_2 *) (((char *) to) + rec_len);
+			to = (struct ext4_dir_entry_2 *) (((char *) to) + rec_len);
 		}
 		de = next;
 	}
 	return prev;
 }
 
-static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 			struct buffer_head **bh,struct dx_frame *frame,
 			struct dx_hash_info *hinfo, int *error)
 {
@@ -1124,10 +1124,10 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	struct dx_map_entry *map;
 	char *data1 = (*bh)->b_data, *data2;
 	unsigned split;
-	struct ext3_dir_entry_2 *de = NULL, *de2;
+	struct ext4_dir_entry_2 *de = NULL, *de2;
 	int	err;
 
-	bh2 = ext3_append (handle, dir, &newblock, error);
+	bh2 = ext4_append (handle, dir, &newblock, error);
 	if (!(bh2)) {
 		brelse(*bh);
 		*bh = NULL;
@@ -1135,17 +1135,17 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	}
 
 	BUFFER_TRACE(*bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, *bh);
+	err = ext4_journal_get_write_access(handle, *bh);
 	if (err) {
 	journal_error:
 		brelse(*bh);
 		brelse(bh2);
 		*bh = NULL;
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 		goto errout;
 	}
 	BUFFER_TRACE(frame->bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, frame->bh);
+	err = ext4_journal_get_write_access(handle, frame->bh);
 	if (err)
 		goto journal_error;
 
@@ -1153,7 +1153,7 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 
 	/* create map in the end of data2 block */
 	map = (struct dx_map_entry *) (data2 + blocksize);
-	count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
+	count = dx_make_map ((struct ext4_dir_entry_2 *) data1,
 			     blocksize, hinfo, map);
 	map -= count;
 	split = count/2; // need to adjust to actual middle
@@ -1168,8 +1168,8 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 	de = dx_pack_dirents(data1,blocksize);
 	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
 	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data1, blocksize, 1));
+	dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
 
 	/* Which block gets the new entry? */
 	if (hinfo->hash >= hash2)
@@ -1178,10 +1178,10 @@ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
 		de = de2;
 	}
 	dx_insert_block (frame, hash2 + continued, newblock);
-	err = ext3_journal_dirty_metadata (handle, bh2);
+	err = ext4_journal_dirty_metadata (handle, bh2);
 	if (err)
 		goto journal_error;
-	err = ext3_journal_dirty_metadata (handle, frame->bh);
+	err = ext4_journal_dirty_metadata (handle, frame->bh);
 	if (err)
 		goto journal_error;
 	brelse (bh2);
@@ -1204,7 +1204,7 @@ errout:
  * all other cases bh is released.
  */
 static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
-			     struct inode *inode, struct ext3_dir_entry_2 *de,
+			     struct inode *inode, struct ext4_dir_entry_2 *de,
 			     struct buffer_head * bh)
 {
 	struct inode	*dir = dentry->d_parent->d_inode;
@@ -1215,51 +1215,51 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	int		nlen, rlen, err;
 	char		*top;
 
-	reclen = EXT3_DIR_REC_LEN(namelen);
+	reclen = EXT4_DIR_REC_LEN(namelen);
 	if (!de) {
-		de = (struct ext3_dir_entry_2 *)bh->b_data;
+		de = (struct ext4_dir_entry_2 *)bh->b_data;
 		top = bh->b_data + dir->i_sb->s_blocksize - reclen;
 		while ((char *) de <= top) {
-			if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
+			if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
 						  bh, offset)) {
 				brelse (bh);
 				return -EIO;
 			}
-			if (ext3_match (namelen, name, de)) {
+			if (ext4_match (namelen, name, de)) {
 				brelse (bh);
 				return -EEXIST;
 			}
-			nlen = EXT3_DIR_REC_LEN(de->name_len);
+			nlen = EXT4_DIR_REC_LEN(de->name_len);
 			rlen = le16_to_cpu(de->rec_len);
 			if ((de->inode? rlen - nlen: rlen) >= reclen)
 				break;
-			de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
+			de = (struct ext4_dir_entry_2 *)((char *)de + rlen);
 			offset += rlen;
 		}
 		if ((char *) de > top)
 			return -ENOSPC;
 	}
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, bh);
 	if (err) {
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 		brelse(bh);
 		return err;
 	}
 
 	/* By now the buffer is marked for journaling */
-	nlen = EXT3_DIR_REC_LEN(de->name_len);
+	nlen = EXT4_DIR_REC_LEN(de->name_len);
 	rlen = le16_to_cpu(de->rec_len);
 	if (de->inode) {
-		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
+		struct ext4_dir_entry_2 *de1 = (struct ext4_dir_entry_2 *)((char *)de + nlen);
 		de1->rec_len = cpu_to_le16(rlen - nlen);
 		de->rec_len = cpu_to_le16(nlen);
 		de = de1;
 	}
-	de->file_type = EXT3_FT_UNKNOWN;
+	de->file_type = EXT4_FT_UNKNOWN;
 	if (inode) {
 		de->inode = cpu_to_le32(inode->i_ino);
-		ext3_set_de_type(dir->i_sb, de, inode->i_mode);
+		ext4_set_de_type(dir->i_sb, de, inode->i_mode);
 	} else
 		de->inode = 0;
 	de->name_len = namelen;
@@ -1270,24 +1270,24 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
 	 * on this.
 	 *
 	 * XXX similarly, too many callers depend on
-	 * ext3_new_inode() setting the times, but error
+	 * ext4_new_inode() setting the times, but error
 	 * recovery deletes the inode, so the worst that can
 	 * happen is that the times are slightly out of date
 	 * and/or different from the directory change time.
 	 */
 	dir->i_mtime = dir->i_ctime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(dir);
+	ext4_update_dx_flag(dir);
 	dir->i_version++;
-	ext3_mark_inode_dirty(handle, dir);
-	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, bh);
+	ext4_mark_inode_dirty(handle, dir);
+	BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+	err = ext4_journal_dirty_metadata(handle, bh);
 	if (err)
-		ext3_std_error(dir->i_sb, err);
+		ext4_std_error(dir->i_sb, err);
 	brelse(bh);
 	return 0;
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * This converts a one block unindexed directory to a 3 block indexed
  * directory, and adds the dentry to the indexed directory.
@@ -1302,7 +1302,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 	struct dx_root	*root;
 	struct dx_frame	frames[2], *frame;
 	struct dx_entry *entries;
-	struct ext3_dir_entry_2	*de, *de2;
+	struct ext4_dir_entry_2	*de, *de2;
 	char		*data1, *top;
 	unsigned	len;
 	int		retval;
@@ -1313,38 +1313,38 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	blocksize =  dir->i_sb->s_blocksize;
 	dxtrace(printk("Creating index\n"));
-	retval = ext3_journal_get_write_access(handle, bh);
+	retval = ext4_journal_get_write_access(handle, bh);
 	if (retval) {
-		ext3_std_error(dir->i_sb, retval);
+		ext4_std_error(dir->i_sb, retval);
 		brelse(bh);
 		return retval;
 	}
 	root = (struct dx_root *) bh->b_data;
 
-	bh2 = ext3_append (handle, dir, &block, &retval);
+	bh2 = ext4_append (handle, dir, &block, &retval);
 	if (!(bh2)) {
 		brelse(bh);
 		return retval;
 	}
-	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+	EXT4_I(dir)->i_flags |= EXT4_INDEX_FL;
 	data1 = bh2->b_data;
 
 	/* The 0th block becomes the root, move the dirents out */
 	fde = &root->dotdot;
-	de = (struct ext3_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
+	de = (struct ext4_dir_entry_2 *)((char *)fde + le16_to_cpu(fde->rec_len));
 	len = ((char *) root) + blocksize - (char *) de;
 	memcpy (data1, de, len);
-	de = (struct ext3_dir_entry_2 *) data1;
+	de = (struct ext4_dir_entry_2 *) data1;
 	top = data1 + len;
 	while ((char *)(de2=(void*)de+le16_to_cpu(de->rec_len)) < top)
 		de = de2;
 	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
 	/* Initialize the root; the dot dirents already exist */
-	de = (struct ext3_dir_entry_2 *) (&root->dotdot);
-	de->rec_len = cpu_to_le16(blocksize - EXT3_DIR_REC_LEN(2));
+	de = (struct ext4_dir_entry_2 *) (&root->dotdot);
+	de->rec_len = cpu_to_le16(blocksize - EXT4_DIR_REC_LEN(2));
 	memset (&root->info, 0, sizeof(root->info));
 	root->info.info_length = sizeof(root->info);
-	root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+	root->info.hash_version = EXT4_SB(dir->i_sb)->s_def_hash_version;
 	entries = root->entries;
 	dx_set_block (entries, 1);
 	dx_set_count (entries, 1);
@@ -1352,8 +1352,8 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 
 	/* Initialize as for dx_probe */
 	hinfo.hash_version = root->info.hash_version;
-	hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
-	ext3fs_dirhash(name, namelen, &hinfo);
+	hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
+	ext4fs_dirhash(name, namelen, &hinfo);
 	frame = frames;
 	frame->entries = entries;
 	frame->at = entries;
@@ -1369,25 +1369,25 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
 #endif
 
 /*
- *	ext3_add_entry()
+ *	ext4_add_entry()
  *
  * adds a file entry to the specified directory, using the same
- * semantics as ext3_find_entry(). It returns NULL if it failed.
+ * semantics as ext4_find_entry(). It returns NULL if it failed.
  *
  * NOTE!! The inode part of 'de' is left at 0 - which means you
  * may not sleep between calling this and putting something into
  * the entry, as someone else might have used it while you slept.
  */
-static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
+static int ext4_add_entry (handle_t *handle, struct dentry *dentry,
 	struct inode *inode)
 {
 	struct inode *dir = dentry->d_parent->d_inode;
 	unsigned long offset;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	struct super_block * sb;
 	int	retval;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	int	dx_fallback=0;
 #endif
 	unsigned blocksize;
@@ -1397,46 +1397,46 @@ static int ext3_add_entry (handle_t *handle, struct dentry *dentry,
 	blocksize = sb->s_blocksize;
 	if (!dentry->d_name.len)
 		return -EINVAL;
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 	if (is_dx(dir)) {
-		retval = ext3_dx_add_entry(handle, dentry, inode);
+		retval = ext4_dx_add_entry(handle, dentry, inode);
 		if (!retval || (retval != ERR_BAD_DX_DIR))
 			return retval;
-		EXT3_I(dir)->i_flags &= ~EXT3_INDEX_FL;
+		EXT4_I(dir)->i_flags &= ~EXT4_INDEX_FL;
 		dx_fallback++;
-		ext3_mark_inode_dirty(handle, dir);
+		ext4_mark_inode_dirty(handle, dir);
 	}
 #endif
 	blocks = dir->i_size >> sb->s_blocksize_bits;
 	for (block = 0, offset = 0; block < blocks; block++) {
-		bh = ext3_bread(handle, dir, block, 0, &retval);
+		bh = ext4_bread(handle, dir, block, 0, &retval);
 		if(!bh)
 			return retval;
 		retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
 		if (retval != -ENOSPC)
 			return retval;
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 		if (blocks == 1 && !dx_fallback &&
-		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+		    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
 			return make_indexed_dir(handle, dentry, inode, bh);
 #endif
 		brelse(bh);
 	}
-	bh = ext3_append(handle, dir, &block, &retval);
+	bh = ext4_append(handle, dir, &block, &retval);
 	if (!bh)
 		return retval;
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	de->inode = 0;
 	de->rec_len = cpu_to_le16(blocksize);
 	return add_dirent_to_buf(handle, dentry, inode, de, bh);
 }
 
-#ifdef CONFIG_EXT3_INDEX
+#ifdef CONFIG_EXT4_INDEX
 /*
  * Returns 0 for success, or a negative error value
  */
-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			     struct inode *inode)
 {
 	struct dx_frame frames[2], *frame;
@@ -1445,7 +1445,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	struct buffer_head * bh;
 	struct inode *dir = dentry->d_parent->d_inode;
 	struct super_block * sb = dir->i_sb;
-	struct ext3_dir_entry_2 *de;
+	struct ext4_dir_entry_2 *de;
 	int err;
 
 	frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
@@ -1454,11 +1454,11 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	entries = frame->entries;
 	at = frame->at;
 
-	if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+	if (!(bh = ext4_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
 		goto cleanup;
 
 	BUFFER_TRACE(bh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, bh);
+	err = ext4_journal_get_write_access(handle, bh);
 	if (err)
 		goto journal_error;
 
@@ -1482,12 +1482,12 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 
 		if (levels && (dx_get_count(frames->entries) ==
 			       dx_get_limit(frames->entries))) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Directory index full!");
 			err = -ENOSPC;
 			goto cleanup;
 		}
-		bh2 = ext3_append (handle, dir, &newblock, &err);
+		bh2 = ext4_append (handle, dir, &newblock, &err);
 		if (!(bh2))
 			goto cleanup;
 		node2 = (struct dx_node *)(bh2->b_data);
@@ -1495,7 +1495,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
 		node2->fake.inode = 0;
 		BUFFER_TRACE(frame->bh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, frame->bh);
+		err = ext4_journal_get_write_access(handle, frame->bh);
 		if (err)
 			goto journal_error;
 		if (levels) {
@@ -1504,7 +1504,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			dxtrace(printk("Split index %i/%i\n", icount1, icount2));
 
 			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
-			err = ext3_journal_get_write_access(handle,
+			err = ext4_journal_get_write_access(handle,
 							     frames[0].bh);
 			if (err)
 				goto journal_error;
@@ -1525,7 +1525,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			dxtrace(dx_show_index ("node", frames[1].entries));
 			dxtrace(dx_show_index ("node",
 			       ((struct dx_node *) bh2->b_data)->entries));
-			err = ext3_journal_dirty_metadata(handle, bh2);
+			err = ext4_journal_dirty_metadata(handle, bh2);
 			if (err)
 				goto journal_error;
 			brelse (bh2);
@@ -1545,12 +1545,12 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 			frame->at = at = at - entries + entries2;
 			frame->entries = entries = entries2;
 			frame->bh = bh2;
-			err = ext3_journal_get_write_access(handle,
+			err = ext4_journal_get_write_access(handle,
 							     frame->bh);
 			if (err)
 				goto journal_error;
 		}
-		ext3_journal_dirty_metadata(handle, frames[0].bh);
+		ext4_journal_dirty_metadata(handle, frames[0].bh);
 	}
 	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
 	if (!de)
@@ -1560,7 +1560,7 @@ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
 	goto cleanup;
 
 journal_error:
-	ext3_std_error(dir->i_sb, err);
+	ext4_std_error(dir->i_sb, err);
 cleanup:
 	if (bh)
 		brelse(bh);
@@ -1570,26 +1570,26 @@ cleanup:
 #endif
 
 /*
- * ext3_delete_entry deletes a directory entry by merging it with the
+ * ext4_delete_entry deletes a directory entry by merging it with the
  * previous entry
  */
-static int ext3_delete_entry (handle_t *handle,
+static int ext4_delete_entry (handle_t *handle,
 			      struct inode * dir,
-			      struct ext3_dir_entry_2 * de_del,
+			      struct ext4_dir_entry_2 * de_del,
 			      struct buffer_head * bh)
 {
-	struct ext3_dir_entry_2 * de, * pde;
+	struct ext4_dir_entry_2 * de, * pde;
 	int i;
 
 	i = 0;
 	pde = NULL;
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
 	while (i < bh->b_size) {
-		if (!ext3_check_dir_entry("ext3_delete_entry", dir, de, bh, i))
+		if (!ext4_check_dir_entry("ext4_delete_entry", dir, de, bh, i))
 			return -EIO;
 		if (de == de_del)  {
 			BUFFER_TRACE(bh, "get_write_access");
-			ext3_journal_get_write_access(handle, bh);
+			ext4_journal_get_write_access(handle, bh);
 			if (pde)
 				pde->rec_len =
 					cpu_to_le16(le16_to_cpu(pde->rec_len) +
@@ -1597,43 +1597,43 @@ static int ext3_delete_entry (handle_t *handle,
 			else
 				de->inode = 0;
 			dir->i_version++;
-			BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
-			ext3_journal_dirty_metadata(handle, bh);
+			BUFFER_TRACE(bh, "call ext4_journal_dirty_metadata");
+			ext4_journal_dirty_metadata(handle, bh);
 			return 0;
 		}
 		i += le16_to_cpu(de->rec_len);
 		pde = de;
-		de = (struct ext3_dir_entry_2 *)
+		de = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	}
 	return -ENOENT;
 }
 
 /*
- * ext3_mark_inode_dirty is somewhat expensive, so unlike ext2 we
+ * ext4_mark_inode_dirty is somewhat expensive, so unlike ext2 we
  * do not perform it in these functions.  We perform it at the call site,
  * if it is needed.
  */
-static inline void ext3_inc_count(handle_t *handle, struct inode *inode)
+static inline void ext4_inc_count(handle_t *handle, struct inode *inode)
 {
 	inc_nlink(inode);
 }
 
-static inline void ext3_dec_count(handle_t *handle, struct inode *inode)
+static inline void ext4_dec_count(handle_t *handle, struct inode *inode)
 {
 	drop_nlink(inode);
 }
 
-static int ext3_add_nondir(handle_t *handle,
+static int ext4_add_nondir(handle_t *handle,
 		struct dentry *dentry, struct inode *inode)
 {
-	int err = ext3_add_entry(handle, dentry, inode);
+	int err = ext4_add_entry(handle, dentry, inode);
 	if (!err) {
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		d_instantiate(dentry, inode);
 		return 0;
 	}
-	ext3_dec_count(handle, inode);
+	ext4_dec_count(handle, inode);
 	iput(inode);
 	return err;
 }
@@ -1646,7 +1646,7 @@ static int ext3_add_nondir(handle_t *handle,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+static int ext4_create (struct inode * dir, struct dentry * dentry, int mode,
 		struct nameidata *nd)
 {
 	handle_t *handle;
@@ -1654,30 +1654,30 @@ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
 	int err, retries = 0;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, mode);
+	inode = ext4_new_inode (handle, dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
-		inode->i_op = &ext3_file_inode_operations;
-		inode->i_fop = &ext3_file_operations;
-		ext3_set_aops(inode);
-		err = ext3_add_nondir(handle, dentry, inode);
+		inode->i_op = &ext4_file_inode_operations;
+		inode->i_fop = &ext4_file_operations;
+		ext4_set_aops(inode);
+		err = ext4_add_nondir(handle, dentry, inode);
 	}
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_mknod (struct inode * dir, struct dentry *dentry,
+static int ext4_mknod (struct inode * dir, struct dentry *dentry,
 			int mode, dev_t rdev)
 {
 	handle_t *handle;
@@ -1688,100 +1688,100 @@ static int ext3_mknod (struct inode * dir, struct dentry *dentry,
 		return -EINVAL;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, mode);
+	inode = ext4_new_inode (handle, dir, mode);
 	err = PTR_ERR(inode);
 	if (!IS_ERR(inode)) {
 		init_special_inode(inode, inode->i_mode, rdev);
-#ifdef CONFIG_EXT3_FS_XATTR
-		inode->i_op = &ext3_special_inode_operations;
+#ifdef CONFIG_EXT4DEV_FS_XATTR
+		inode->i_op = &ext4_special_inode_operations;
 #endif
-		err = ext3_add_nondir(handle, dentry, inode);
+		err = ext4_add_nondir(handle, dentry, inode);
 	}
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_mkdir(struct inode * dir, struct dentry * dentry, int mode)
+static int ext4_mkdir(struct inode * dir, struct dentry * dentry, int mode)
 {
 	handle_t *handle;
 	struct inode * inode;
 	struct buffer_head * dir_block;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	int err, retries = 0;
 
-	if (dir->i_nlink >= EXT3_LINK_MAX)
+	if (dir->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+	inode = ext4_new_inode (handle, dir, S_IFDIR | mode);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
 
-	inode->i_op = &ext3_dir_inode_operations;
-	inode->i_fop = &ext3_dir_operations;
-	inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
-	dir_block = ext3_bread (handle, inode, 0, 1, &err);
+	inode->i_op = &ext4_dir_inode_operations;
+	inode->i_fop = &ext4_dir_operations;
+	inode->i_size = EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+	dir_block = ext4_bread (handle, inode, 0, 1, &err);
 	if (!dir_block) {
 		drop_nlink(inode); /* is this nlink == 0? */
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
 	}
 	BUFFER_TRACE(dir_block, "get_write_access");
-	ext3_journal_get_write_access(handle, dir_block);
-	de = (struct ext3_dir_entry_2 *) dir_block->b_data;
+	ext4_journal_get_write_access(handle, dir_block);
+	de = (struct ext4_dir_entry_2 *) dir_block->b_data;
 	de->inode = cpu_to_le32(inode->i_ino);
 	de->name_len = 1;
-	de->rec_len = cpu_to_le16(EXT3_DIR_REC_LEN(de->name_len));
+	de->rec_len = cpu_to_le16(EXT4_DIR_REC_LEN(de->name_len));
 	strcpy (de->name, ".");
-	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
-	de = (struct ext3_dir_entry_2 *)
+	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
+	de = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	de->inode = cpu_to_le32(dir->i_ino);
-	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT3_DIR_REC_LEN(1));
+	de->rec_len = cpu_to_le16(inode->i_sb->s_blocksize-EXT4_DIR_REC_LEN(1));
 	de->name_len = 2;
 	strcpy (de->name, "..");
-	ext3_set_de_type(dir->i_sb, de, S_IFDIR);
+	ext4_set_de_type(dir->i_sb, de, S_IFDIR);
 	inode->i_nlink = 2;
-	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
-	ext3_journal_dirty_metadata(handle, dir_block);
+	BUFFER_TRACE(dir_block, "call ext4_journal_dirty_metadata");
+	ext4_journal_dirty_metadata(handle, dir_block);
 	brelse (dir_block);
-	ext3_mark_inode_dirty(handle, inode);
-	err = ext3_add_entry (handle, dentry, inode);
+	ext4_mark_inode_dirty(handle, inode);
+	err = ext4_add_entry (handle, dentry, inode);
 	if (err) {
 		inode->i_nlink = 0;
-		ext3_mark_inode_dirty(handle, inode);
+		ext4_mark_inode_dirty(handle, inode);
 		iput (inode);
 		goto out_stop;
 	}
 	inc_nlink(dir);
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 	d_instantiate(dentry, inode);
 out_stop:
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
@@ -1793,59 +1793,59 @@ static int empty_dir (struct inode * inode)
 {
 	unsigned long offset;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de, * de1;
+	struct ext4_dir_entry_2 * de, * de1;
 	struct super_block * sb;
 	int err = 0;
 
 	sb = inode->i_sb;
-	if (inode->i_size < EXT3_DIR_REC_LEN(1) + EXT3_DIR_REC_LEN(2) ||
-	    !(bh = ext3_bread (NULL, inode, 0, 0, &err))) {
+	if (inode->i_size < EXT4_DIR_REC_LEN(1) + EXT4_DIR_REC_LEN(2) ||
+	    !(bh = ext4_bread (NULL, inode, 0, 0, &err))) {
 		if (err)
-			ext3_error(inode->i_sb, __FUNCTION__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				   "error %d reading directory #%lu offset 0",
 				   err, inode->i_ino);
 		else
-			ext3_warning(inode->i_sb, __FUNCTION__,
+			ext4_warning(inode->i_sb, __FUNCTION__,
 				     "bad directory (dir #%lu) - no data block",
 				     inode->i_ino);
 		return 1;
 	}
-	de = (struct ext3_dir_entry_2 *) bh->b_data;
-	de1 = (struct ext3_dir_entry_2 *)
+	de = (struct ext4_dir_entry_2 *) bh->b_data;
+	de1 = (struct ext4_dir_entry_2 *)
 			((char *) de + le16_to_cpu(de->rec_len));
 	if (le32_to_cpu(de->inode) != inode->i_ino ||
 			!le32_to_cpu(de1->inode) ||
 			strcmp (".", de->name) ||
 			strcmp ("..", de1->name)) {
-		ext3_warning (inode->i_sb, "empty_dir",
+		ext4_warning (inode->i_sb, "empty_dir",
 			      "bad directory (dir #%lu) - no `.' or `..'",
 			      inode->i_ino);
 		brelse (bh);
 		return 1;
 	}
 	offset = le16_to_cpu(de->rec_len) + le16_to_cpu(de1->rec_len);
-	de = (struct ext3_dir_entry_2 *)
+	de = (struct ext4_dir_entry_2 *)
 			((char *) de1 + le16_to_cpu(de1->rec_len));
 	while (offset < inode->i_size ) {
 		if (!bh ||
 			(void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
 			err = 0;
 			brelse (bh);
-			bh = ext3_bread (NULL, inode,
-				offset >> EXT3_BLOCK_SIZE_BITS(sb), 0, &err);
+			bh = ext4_bread (NULL, inode,
+				offset >> EXT4_BLOCK_SIZE_BITS(sb), 0, &err);
 			if (!bh) {
 				if (err)
-					ext3_error(sb, __FUNCTION__,
+					ext4_error(sb, __FUNCTION__,
 						   "error %d reading directory"
 						   " #%lu offset %lu",
 						   err, inode->i_ino, offset);
 				offset += sb->s_blocksize;
 				continue;
 			}
-			de = (struct ext3_dir_entry_2 *) bh->b_data;
+			de = (struct ext4_dir_entry_2 *) bh->b_data;
 		}
-		if (!ext3_check_dir_entry("empty_dir", inode, de, bh, offset)) {
-			de = (struct ext3_dir_entry_2 *)(bh->b_data +
+		if (!ext4_check_dir_entry("empty_dir", inode, de, bh, offset)) {
+			de = (struct ext4_dir_entry_2 *)(bh->b_data +
 							 sb->s_blocksize);
 			offset = (offset | (sb->s_blocksize - 1)) + 1;
 			continue;
@@ -1855,57 +1855,57 @@ static int empty_dir (struct inode * inode)
 			return 0;
 		}
 		offset += le16_to_cpu(de->rec_len);
-		de = (struct ext3_dir_entry_2 *)
+		de = (struct ext4_dir_entry_2 *)
 				((char *) de + le16_to_cpu(de->rec_len));
 	}
 	brelse (bh);
 	return 1;
 }
 
-/* ext3_orphan_add() links an unlinked or truncated inode into a list of
+/* ext4_orphan_add() links an unlinked or truncated inode into a list of
  * such inodes, starting at the superblock, in case we crash before the
  * file is closed/deleted, or in case the inode truncate spans multiple
  * transactions and the last transaction is not recovered after a crash.
  *
  * At filesystem recovery time, we walk this list deleting unlinked
- * inodes and truncating linked inodes in ext3_orphan_cleanup().
+ * inodes and truncating linked inodes in ext4_orphan_cleanup().
  */
-int ext3_orphan_add(handle_t *handle, struct inode *inode)
+int ext4_orphan_add(handle_t *handle, struct inode *inode)
 {
 	struct super_block *sb = inode->i_sb;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err = 0, rc;
 
 	lock_super(sb);
-	if (!list_empty(&EXT3_I(inode)->i_orphan))
+	if (!list_empty(&EXT4_I(inode)->i_orphan))
 		goto out_unlock;
 
 	/* Orphan handling is only valid for files with data blocks
 	 * being truncated, or files being unlinked. */
 
 	/* @@@ FIXME: Observation from aviro:
-	 * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
-	 * here (on lock_super()), so race with ext3_link() which might bump
+	 * I think I can trigger J_ASSERT in ext4_orphan_add().  We block
+	 * here (on lock_super()), so race with ext4_link() which might bump
 	 * ->i_nlink. For, say it, character device. Not a regular file,
 	 * not a directory, not a symlink and ->i_nlink > 0.
 	 */
 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
 
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
+	err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
 	if (err)
 		goto out_unlock;
 
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
 		goto out_unlock;
 
 	/* Insert this inode at the head of the on-disk orphan list... */
-	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
-	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
-	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan);
+	EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+	err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+	rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
 	if (!err)
 		err = rc;
 
@@ -1918,28 +1918,28 @@ int ext3_orphan_add(handle_t *handle, struct inode *inode)
 	 * This is safe: on error we're going to ignore the orphan list
 	 * anyway on the next recovery. */
 	if (!err)
-		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 
 	jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
 	jbd_debug(4, "orphan inode %lu will point to %d\n",
 			inode->i_ino, NEXT_ORPHAN(inode));
 out_unlock:
 	unlock_super(sb);
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 	return err;
 }
 
 /*
- * ext3_orphan_del() removes an unlinked or truncated inode from the list
+ * ext4_orphan_del() removes an unlinked or truncated inode from the list
  * of such inodes stored on disk, because it is finally being cleaned up.
  */
-int ext3_orphan_del(handle_t *handle, struct inode *inode)
+int ext4_orphan_del(handle_t *handle, struct inode *inode)
 {
 	struct list_head *prev;
-	struct ext3_inode_info *ei = EXT3_I(inode);
-	struct ext3_sb_info *sbi;
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	struct ext4_sb_info *sbi;
 	unsigned long ino_next;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	int err = 0;
 
 	lock_super(inode->i_sb);
@@ -1950,7 +1950,7 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 
 	ino_next = NEXT_ORPHAN(inode);
 	prev = ei->i_orphan.prev;
-	sbi = EXT3_SB(inode->i_sb);
+	sbi = EXT4_SB(inode->i_sb);
 
 	jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
 
@@ -1963,38 +1963,38 @@ int ext3_orphan_del(handle_t *handle, struct inode *inode)
 	if (!handle)
 		goto out;
 
-	err = ext3_reserve_inode_write(handle, inode, &iloc);
+	err = ext4_reserve_inode_write(handle, inode, &iloc);
 	if (err)
 		goto out_err;
 
 	if (prev == &sbi->s_orphan) {
 		jbd_debug(4, "superblock will point to %lu\n", ino_next);
 		BUFFER_TRACE(sbi->s_sbh, "get_write_access");
-		err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+		err = ext4_journal_get_write_access(handle, sbi->s_sbh);
 		if (err)
 			goto out_brelse;
 		sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
-		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+		err = ext4_journal_dirty_metadata(handle, sbi->s_sbh);
 	} else {
-		struct ext3_iloc iloc2;
+		struct ext4_iloc iloc2;
 		struct inode *i_prev =
-			&list_entry(prev, struct ext3_inode_info, i_orphan)->vfs_inode;
+			&list_entry(prev, struct ext4_inode_info, i_orphan)->vfs_inode;
 
 		jbd_debug(4, "orphan inode %lu will point to %lu\n",
 			  i_prev->i_ino, ino_next);
-		err = ext3_reserve_inode_write(handle, i_prev, &iloc2);
+		err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
 		if (err)
 			goto out_brelse;
 		NEXT_ORPHAN(i_prev) = ino_next;
-		err = ext3_mark_iloc_dirty(handle, i_prev, &iloc2);
+		err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
 	}
 	if (err)
 		goto out_brelse;
 	NEXT_ORPHAN(inode) = 0;
-	err = ext3_mark_iloc_dirty(handle, inode, &iloc);
+	err = ext4_mark_iloc_dirty(handle, inode, &iloc);
 
 out_err:
-	ext3_std_error(inode->i_sb, err);
+	ext4_std_error(inode->i_sb, err);
 out:
 	unlock_super(inode->i_sb);
 	return err;
@@ -2004,23 +2004,23 @@ out_brelse:
 	goto out_err;
 }
 
-static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
+static int ext4_rmdir (struct inode * dir, struct dentry *dentry)
 {
 	int retval;
 	struct inode * inode;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	handle_t *handle;
 
 	/* Initialize quotas before so that eventual writes go in
 	 * separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	retval = -ENOENT;
-	bh = ext3_find_entry (dentry, &de);
+	bh = ext4_find_entry (dentry, &de);
 	if (!bh)
 		goto end_rmdir;
 
@@ -2037,11 +2037,11 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	if (!empty_dir (inode))
 		goto end_rmdir;
 
-	retval = ext3_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh);
 	if (retval)
 		goto end_rmdir;
 	if (inode->i_nlink != 2)
-		ext3_warning (inode->i_sb, "ext3_rmdir",
+		ext4_warning (inode->i_sb, "ext4_rmdir",
 			      "empty directory has nlink!=2 (%d)",
 			      inode->i_nlink);
 	inode->i_version++;
@@ -2050,31 +2050,31 @@ static int ext3_rmdir (struct inode * dir, struct dentry *dentry)
 	 * zero will ensure that the right thing happens during any
 	 * recovery. */
 	inode->i_size = 0;
-	ext3_orphan_add(handle, inode);
+	ext4_orphan_add(handle, inode);
 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	drop_nlink(dir);
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 
 end_rmdir:
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	brelse (bh);
 	return retval;
 }
 
-static int ext3_unlink(struct inode * dir, struct dentry *dentry)
+static int ext4_unlink(struct inode * dir, struct dentry *dentry)
 {
 	int retval;
 	struct inode * inode;
 	struct buffer_head * bh;
-	struct ext3_dir_entry_2 * de;
+	struct ext4_dir_entry_2 * de;
 	handle_t *handle;
 
 	/* Initialize quotas before so that eventual writes go
 	 * in separate transaction */
 	DQUOT_INIT(dentry->d_inode);
-	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DELETE_TRANS_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2082,7 +2082,7 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 		handle->h_sync = 1;
 
 	retval = -ENOENT;
-	bh = ext3_find_entry (dentry, &de);
+	bh = ext4_find_entry (dentry, &de);
 	if (!bh)
 		goto end_unlink;
 
@@ -2093,31 +2093,31 @@ static int ext3_unlink(struct inode * dir, struct dentry *dentry)
 		goto end_unlink;
 
 	if (!inode->i_nlink) {
-		ext3_warning (inode->i_sb, "ext3_unlink",
+		ext4_warning (inode->i_sb, "ext4_unlink",
 			      "Deleting nonexistent file (%lu), %d",
 			      inode->i_ino, inode->i_nlink);
 		inode->i_nlink = 1;
 	}
-	retval = ext3_delete_entry(handle, dir, de, bh);
+	retval = ext4_delete_entry(handle, dir, de, bh);
 	if (retval)
 		goto end_unlink;
 	dir->i_ctime = dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(dir);
-	ext3_mark_inode_dirty(handle, dir);
+	ext4_update_dx_flag(dir);
+	ext4_mark_inode_dirty(handle, dir);
 	drop_nlink(inode);
 	if (!inode->i_nlink)
-		ext3_orphan_add(handle, inode);
+		ext4_orphan_add(handle, inode);
 	inode->i_ctime = dir->i_ctime;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	retval = 0;
 
 end_unlink:
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	brelse (bh);
 	return retval;
 }
 
-static int ext3_symlink (struct inode * dir,
+static int ext4_symlink (struct inode * dir,
 		struct dentry *dentry, const char * symname)
 {
 	handle_t *handle;
@@ -2129,63 +2129,63 @@ static int ext3_symlink (struct inode * dir,
 		return -ENAMETOOLONG;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 5 +
-					2*EXT3_QUOTA_INIT_BLOCKS(dir->i_sb));
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
+					2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(dir))
 		handle->h_sync = 1;
 
-	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
+	inode = ext4_new_inode (handle, dir, S_IFLNK|S_IRWXUGO);
 	err = PTR_ERR(inode);
 	if (IS_ERR(inode))
 		goto out_stop;
 
-	if (l > sizeof (EXT3_I(inode)->i_data)) {
-		inode->i_op = &ext3_symlink_inode_operations;
-		ext3_set_aops(inode);
+	if (l > sizeof (EXT4_I(inode)->i_data)) {
+		inode->i_op = &ext4_symlink_inode_operations;
+		ext4_set_aops(inode);
 		/*
-		 * page_symlink() calls into ext3_prepare/commit_write.
+		 * page_symlink() calls into ext4_prepare/commit_write.
 		 * We have a transaction open.  All is sweetness.  It also sets
 		 * i_size in generic_commit_write().
 		 */
 		err = __page_symlink(inode, symname, l,
 				mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS);
 		if (err) {
-			ext3_dec_count(handle, inode);
-			ext3_mark_inode_dirty(handle, inode);
+			ext4_dec_count(handle, inode);
+			ext4_mark_inode_dirty(handle, inode);
 			iput (inode);
 			goto out_stop;
 		}
 	} else {
-		inode->i_op = &ext3_fast_symlink_inode_operations;
-		memcpy((char*)&EXT3_I(inode)->i_data,symname,l);
+		inode->i_op = &ext4_fast_symlink_inode_operations;
+		memcpy((char*)&EXT4_I(inode)->i_data,symname,l);
 		inode->i_size = l-1;
 	}
-	EXT3_I(inode)->i_disksize = inode->i_size;
-	err = ext3_add_nondir(handle, dentry, inode);
+	EXT4_I(inode)->i_disksize = inode->i_size;
+	err = ext4_add_nondir(handle, dentry, inode);
 out_stop:
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
-static int ext3_link (struct dentry * old_dentry,
+static int ext4_link (struct dentry * old_dentry,
 		struct inode * dir, struct dentry *dentry)
 {
 	handle_t *handle;
 	struct inode *inode = old_dentry->d_inode;
 	int err, retries = 0;
 
-	if (inode->i_nlink >= EXT3_LINK_MAX)
+	if (inode->i_nlink >= EXT4_LINK_MAX)
 		return -EMLINK;
 
 retry:
-	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS(dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS);
+	handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
@@ -2193,31 +2193,31 @@ retry:
 		handle->h_sync = 1;
 
 	inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_inc_count(handle, inode);
+	ext4_inc_count(handle, inode);
 	atomic_inc(&inode->i_count);
 
-	err = ext3_add_nondir(handle, dentry, inode);
-	ext3_journal_stop(handle);
-	if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries))
+	err = ext4_add_nondir(handle, dentry, inode);
+	ext4_journal_stop(handle);
+	if (err == -ENOSPC && ext4_should_retry_alloc(dir->i_sb, &retries))
 		goto retry;
 	return err;
 }
 
 #define PARENT_INO(buffer) \
-	((struct ext3_dir_entry_2 *) ((char *) buffer + \
-	le16_to_cpu(((struct ext3_dir_entry_2 *) buffer)->rec_len)))->inode
+	((struct ext4_dir_entry_2 *) ((char *) buffer + \
+	le16_to_cpu(((struct ext4_dir_entry_2 *) buffer)->rec_len)))->inode
 
 /*
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
  */
-static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
+static int ext4_rename (struct inode * old_dir, struct dentry *old_dentry,
 			   struct inode * new_dir,struct dentry *new_dentry)
 {
 	handle_t *handle;
 	struct inode * old_inode, * new_inode;
 	struct buffer_head * old_bh, * new_bh, * dir_bh;
-	struct ext3_dir_entry_2 * old_de, * new_de;
+	struct ext4_dir_entry_2 * old_de, * new_de;
 	int retval;
 
 	old_bh = new_bh = dir_bh = NULL;
@@ -2226,16 +2226,16 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * in separate transaction */
 	if (new_dentry->d_inode)
 		DQUOT_INIT(new_dentry->d_inode);
-	handle = ext3_journal_start(old_dir, 2 *
-					EXT3_DATA_TRANS_BLOCKS(old_dir->i_sb) +
-					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 2);
+	handle = ext4_journal_start(old_dir, 2 *
+					EXT4_DATA_TRANS_BLOCKS(old_dir->i_sb) +
+					EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 
 	if (IS_DIRSYNC(old_dir) || IS_DIRSYNC(new_dir))
 		handle->h_sync = 1;
 
-	old_bh = ext3_find_entry (old_dentry, &old_de);
+	old_bh = ext4_find_entry (old_dentry, &old_de);
 	/*
 	 *  Check for inode number is _not_ due to possible IO errors.
 	 *  We might rmdir the source, keep it as pwd of some process
@@ -2248,7 +2248,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 		goto end_rename;
 
 	new_inode = new_dentry->d_inode;
-	new_bh = ext3_find_entry (new_dentry, &new_de);
+	new_bh = ext4_find_entry (new_dentry, &new_de);
 	if (new_bh) {
 		if (!new_inode) {
 			brelse (new_bh);
@@ -2262,30 +2262,30 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 				goto end_rename;
 		}
 		retval = -EIO;
-		dir_bh = ext3_bread (handle, old_inode, 0, 0, &retval);
+		dir_bh = ext4_bread (handle, old_inode, 0, 0, &retval);
 		if (!dir_bh)
 			goto end_rename;
 		if (le32_to_cpu(PARENT_INO(dir_bh->b_data)) != old_dir->i_ino)
 			goto end_rename;
 		retval = -EMLINK;
 		if (!new_inode && new_dir!=old_dir &&
-				new_dir->i_nlink >= EXT3_LINK_MAX)
+				new_dir->i_nlink >= EXT4_LINK_MAX)
 			goto end_rename;
 	}
 	if (!new_bh) {
-		retval = ext3_add_entry (handle, new_dentry, old_inode);
+		retval = ext4_add_entry (handle, new_dentry, old_inode);
 		if (retval)
 			goto end_rename;
 	} else {
 		BUFFER_TRACE(new_bh, "get write access");
-		ext3_journal_get_write_access(handle, new_bh);
+		ext4_journal_get_write_access(handle, new_bh);
 		new_de->inode = cpu_to_le32(old_inode->i_ino);
-		if (EXT3_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
-					      EXT3_FEATURE_INCOMPAT_FILETYPE))
+		if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb,
+					      EXT4_FEATURE_INCOMPAT_FILETYPE))
 			new_de->file_type = old_de->file_type;
 		new_dir->i_version++;
-		BUFFER_TRACE(new_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, new_bh);
+		BUFFER_TRACE(new_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, new_bh);
 		brelse(new_bh);
 		new_bh = NULL;
 	}
@@ -2295,7 +2295,7 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	 * rename.
 	 */
 	old_inode->i_ctime = CURRENT_TIME_SEC;
-	ext3_mark_inode_dirty(handle, old_inode);
+	ext4_mark_inode_dirty(handle, old_inode);
 
 	/*
 	 * ok, that's it
@@ -2303,24 +2303,24 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 	if (le32_to_cpu(old_de->inode) != old_inode->i_ino ||
 	    old_de->name_len != old_dentry->d_name.len ||
 	    strncmp(old_de->name, old_dentry->d_name.name, old_de->name_len) ||
-	    (retval = ext3_delete_entry(handle, old_dir,
+	    (retval = ext4_delete_entry(handle, old_dir,
 					old_de, old_bh)) == -ENOENT) {
 		/* old_de could have moved from under us during htree split, so
 		 * make sure that we are deleting the right entry.  We might
 		 * also be pointing to a stale entry in the unused part of
 		 * old_bh so just checking inum and the name isn't enough. */
 		struct buffer_head *old_bh2;
-		struct ext3_dir_entry_2 *old_de2;
+		struct ext4_dir_entry_2 *old_de2;
 
-		old_bh2 = ext3_find_entry(old_dentry, &old_de2);
+		old_bh2 = ext4_find_entry(old_dentry, &old_de2);
 		if (old_bh2) {
-			retval = ext3_delete_entry(handle, old_dir,
+			retval = ext4_delete_entry(handle, old_dir,
 						   old_de2, old_bh2);
 			brelse(old_bh2);
 		}
 	}
 	if (retval) {
-		ext3_warning(old_dir->i_sb, "ext3_rename",
+		ext4_warning(old_dir->i_sb, "ext4_rename",
 				"Deleting old file (%lu), %d, error=%d",
 				old_dir->i_ino, old_dir->i_nlink, retval);
 	}
@@ -2330,27 +2330,27 @@ static int ext3_rename (struct inode * old_dir, struct dentry *old_dentry,
 		new_inode->i_ctime = CURRENT_TIME_SEC;
 	}
 	old_dir->i_ctime = old_dir->i_mtime = CURRENT_TIME_SEC;
-	ext3_update_dx_flag(old_dir);
+	ext4_update_dx_flag(old_dir);
 	if (dir_bh) {
 		BUFFER_TRACE(dir_bh, "get_write_access");
-		ext3_journal_get_write_access(handle, dir_bh);
+		ext4_journal_get_write_access(handle, dir_bh);
 		PARENT_INO(dir_bh->b_data) = cpu_to_le32(new_dir->i_ino);
-		BUFFER_TRACE(dir_bh, "call ext3_journal_dirty_metadata");
-		ext3_journal_dirty_metadata(handle, dir_bh);
+		BUFFER_TRACE(dir_bh, "call ext4_journal_dirty_metadata");
+		ext4_journal_dirty_metadata(handle, dir_bh);
 		drop_nlink(old_dir);
 		if (new_inode) {
 			drop_nlink(new_inode);
 		} else {
 			inc_nlink(new_dir);
-			ext3_update_dx_flag(new_dir);
-			ext3_mark_inode_dirty(handle, new_dir);
+			ext4_update_dx_flag(new_dir);
+			ext4_mark_inode_dirty(handle, new_dir);
 		}
 	}
-	ext3_mark_inode_dirty(handle, old_dir);
+	ext4_mark_inode_dirty(handle, old_dir);
 	if (new_inode) {
-		ext3_mark_inode_dirty(handle, new_inode);
+		ext4_mark_inode_dirty(handle, new_inode);
 		if (!new_inode->i_nlink)
-			ext3_orphan_add(handle, new_inode);
+			ext4_orphan_add(handle, new_inode);
 	}
 	retval = 0;
 
@@ -2358,40 +2358,40 @@ end_rename:
 	brelse (dir_bh);
 	brelse (old_bh);
 	brelse (new_bh);
-	ext3_journal_stop(handle);
+	ext4_journal_stop(handle);
 	return retval;
 }
 
 /*
  * directories can handle most operations...
  */
-struct inode_operations ext3_dir_inode_operations = {
-	.create		= ext3_create,
-	.lookup		= ext3_lookup,
-	.link		= ext3_link,
-	.unlink		= ext3_unlink,
-	.symlink	= ext3_symlink,
-	.mkdir		= ext3_mkdir,
-	.rmdir		= ext3_rmdir,
-	.mknod		= ext3_mknod,
-	.rename		= ext3_rename,
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_dir_inode_operations = {
+	.create		= ext4_create,
+	.lookup		= ext4_lookup,
+	.link		= ext4_link,
+	.unlink		= ext4_unlink,
+	.symlink	= ext4_symlink,
+	.mkdir		= ext4_mkdir,
+	.rmdir		= ext4_rmdir,
+	.mknod		= ext4_mknod,
+	.rename		= ext4_rename,
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
 
-struct inode_operations ext3_special_inode_operations = {
-	.setattr	= ext3_setattr,
-#ifdef CONFIG_EXT3_FS_XATTR
+struct inode_operations ext4_special_inode_operations = {
+	.setattr	= ext4_setattr,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
-	.permission	= ext3_permission,
+	.permission	= ext4_permission,
 };
diff --git a/fs/ext4/namei.h b/fs/ext4/namei.h
index f2ce2b0065c9..5e4dfff36a00 100644
--- a/fs/ext4/namei.h
+++ b/fs/ext4/namei.h
@@ -1,8 +1,8 @@
-/*  linux/fs/ext3/namei.h
+/*  linux/fs/ext4/namei.h
  *
  * Copyright (C) 2005 Simtec Electronics
  *	Ben Dooks <ben@simtec.co.uk>
  *
 */
 
-extern struct dentry *ext3_get_parent(struct dentry *child);
+extern struct dentry *ext4_get_parent(struct dentry *child);
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b73cba12f79c..4a47895d9d6d 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1,7 +1,7 @@
 /*
- *  linux/fs/ext3/resize.c
+ *  linux/fs/ext4/resize.c
  *
- * Support for resizing an ext3 filesystem while it is mounted.
+ * Support for resizing an ext4 filesystem while it is mounted.
  *
  * Copyright (C) 2001, 2002 Andreas Dilger <adilger@clusterfs.com>
  *
@@ -9,11 +9,11 @@
  */
 
 
-#define EXT3FS_DEBUG
+#define EXT4FS_DEBUG
 
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_jbd.h>
 
 #include <linux/errno.h>
 #include <linux/slab.h>
@@ -23,87 +23,87 @@
 #define inside(b, first, last)	((b) >= (first) && (b) < (last))
 
 static int verify_group_input(struct super_block *sb,
-			      struct ext3_new_group_data *input)
+			      struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	ext3_fsblk_t start = le32_to_cpu(es->s_blocks_count);
-	ext3_fsblk_t end = start + input->blocks_count;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	ext4_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext4_fsblk_t end = start + input->blocks_count;
 	unsigned group = input->group;
-	ext3_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
-	unsigned overhead = ext3_bg_has_super(sb, group) ?
-		(1 + ext3_bg_num_gdb(sb, group) +
+	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
+	unsigned overhead = ext4_bg_has_super(sb, group) ?
+		(1 + ext4_bg_num_gdb(sb, group) +
 		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
-	ext3_fsblk_t metaend = start + overhead;
+	ext4_fsblk_t metaend = start + overhead;
 	struct buffer_head *bh = NULL;
-	ext3_grpblk_t free_blocks_count;
+	ext4_grpblk_t free_blocks_count;
 	int err = -EINVAL;
 
 	input->free_blocks_count = free_blocks_count =
 		input->blocks_count - 2 - overhead - sbi->s_itb_per_group;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: adding %s group %u: %u blocks "
+		printk(KERN_DEBUG "EXT4-fs: adding %s group %u: %u blocks "
 		       "(%d free, %u reserved)\n",
-		       ext3_bg_has_super(sb, input->group) ? "normal" :
+		       ext4_bg_has_super(sb, input->group) ? "normal" :
 		       "no-super", input->group, input->blocks_count,
 		       free_blocks_count, input->reserved_blocks);
 
 	if (group != sbi->s_groups_count)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot add at group %u (only %lu groups)",
 			     input->group, sbi->s_groups_count);
 	else if ((start - le32_to_cpu(es->s_first_data_block)) %
-		 EXT3_BLOCKS_PER_GROUP(sb))
-		ext3_warning(sb, __FUNCTION__, "Last group not full");
+		 EXT4_BLOCKS_PER_GROUP(sb))
+		ext4_warning(sb, __FUNCTION__, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
-		ext3_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
+		ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
 	else if (free_blocks_count < 0)
-		ext3_warning(sb, __FUNCTION__, "Bad blocks count %u",
+		ext4_warning(sb, __FUNCTION__, "Bad blocks count %u",
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Cannot read last block ("E3FSBLK")",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap not in group (block %u)",
 			     input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap not in group (block %u)",
 			     input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table not in group (blocks %u-"E3FSBLK")",
 			     input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap same as inode bitmap (%u)",
 			     input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
 			     input->block_bitmap, input->inode_table, itend-1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
 			     input->inode_bitmap, input->inode_table, itend-1);
 	else if (inside(input->block_bitmap, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%u) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%u) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "Inode table (%u-"E3FSBLK") overlaps"
 			     "GDT table ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_table, itend - 1, start, metaend - 1);
@@ -115,7 +115,7 @@ static int verify_group_input(struct super_block *sb,
 }
 
 static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
-				  ext3_fsblk_t blk)
+				  ext4_fsblk_t blk)
 {
 	struct buffer_head *bh;
 	int err;
@@ -123,7 +123,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
 	bh = sb_getblk(sb, blk);
 	if (!bh)
 		return ERR_PTR(-EIO);
-	if ((err = ext3_journal_get_write_access(handle, bh))) {
+	if ((err = ext4_journal_get_write_access(handle, bh))) {
 		brelse(bh);
 		bh = ERR_PTR(err);
 	} else {
@@ -148,9 +148,9 @@ static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
 	if (start_bit >= end_bit)
 		return;
 
-	ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
+	ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
 	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
-		ext3_set_bit(i, bitmap);
+		ext4_set_bit(i, bitmap);
 	if (i < end_bit)
 		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
 }
@@ -163,21 +163,21 @@ static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
  * If any part of this fails, we simply abort the resize.
  */
 static int setup_new_group_blocks(struct super_block *sb,
-				  struct ext3_new_group_data *input)
+				  struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t start = ext3_group_first_block_no(sb, input->group);
-	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t start = ext4_group_first_block_no(sb, input->group);
+	int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
 		le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) : 0;
-	unsigned long gdblocks = ext3_bg_num_gdb(sb, input->group);
+	unsigned long gdblocks = ext4_bg_num_gdb(sb, input->group);
 	struct buffer_head *bh;
 	handle_t *handle;
-	ext3_fsblk_t block;
-	ext3_grpblk_t bit;
+	ext4_fsblk_t block;
+	ext4_grpblk_t bit;
 	int i;
 	int err = 0, err2;
 
-	handle = ext3_journal_start_sb(sb, reserved_gdb + gdblocks +
+	handle = ext4_journal_start_sb(sb, reserved_gdb + gdblocks +
 				       2 + sbi->s_itb_per_group);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
@@ -193,9 +193,9 @@ static int setup_new_group_blocks(struct super_block *sb,
 		goto exit_journal;
 	}
 
-	if (ext3_bg_has_super(sb, input->group)) {
-		ext3_debug("mark backup superblock %#04lx (+0)\n", start);
-		ext3_set_bit(0, bh->b_data);
+	if (ext4_bg_has_super(sb, input->group)) {
+		ext4_debug("mark backup superblock %#04lx (+0)\n", start);
+		ext4_set_bit(0, bh->b_data);
 	}
 
 	/* Copy all of the GDT blocks into the backup in this group */
@@ -203,14 +203,14 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < gdblocks; i++, block++, bit++) {
 		struct buffer_head *gdb;
 
-		ext3_debug("update backup group %#04lx (+%d)\n", block, bit);
+		ext4_debug("update backup group %#04lx (+%d)\n", block, bit);
 
 		gdb = sb_getblk(sb, block);
 		if (!gdb) {
 			err = -EIO;
 			goto exit_bh;
 		}
-		if ((err = ext3_journal_get_write_access(handle, gdb))) {
+		if ((err = ext4_journal_get_write_access(handle, gdb))) {
 			brelse(gdb);
 			goto exit_bh;
 		}
@@ -218,8 +218,8 @@ static int setup_new_group_blocks(struct super_block *sb,
 		memcpy(gdb->b_data, sbi->s_group_desc[i]->b_data, bh->b_size);
 		set_buffer_uptodate(gdb);
 		unlock_buffer(bh);
-		ext3_journal_dirty_metadata(handle, gdb);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_journal_dirty_metadata(handle, gdb);
+		ext4_set_bit(bit, bh->b_data);
 		brelse(gdb);
 	}
 
@@ -228,59 +228,59 @@ static int setup_new_group_blocks(struct super_block *sb,
 	     i < reserved_gdb; i++, block++, bit++) {
 		struct buffer_head *gdb;
 
-		ext3_debug("clear reserved block %#04lx (+%d)\n", block, bit);
+		ext4_debug("clear reserved block %#04lx (+%d)\n", block, bit);
 
 		if (IS_ERR(gdb = bclean(handle, sb, block))) {
 			err = PTR_ERR(bh);
 			goto exit_bh;
 		}
-		ext3_journal_dirty_metadata(handle, gdb);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_journal_dirty_metadata(handle, gdb);
+		ext4_set_bit(bit, bh->b_data);
 		brelse(gdb);
 	}
-	ext3_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
+	ext4_debug("mark block bitmap %#04x (+%ld)\n", input->block_bitmap,
 		   input->block_bitmap - start);
-	ext3_set_bit(input->block_bitmap - start, bh->b_data);
-	ext3_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
+	ext4_set_bit(input->block_bitmap - start, bh->b_data);
+	ext4_debug("mark inode bitmap %#04x (+%ld)\n", input->inode_bitmap,
 		   input->inode_bitmap - start);
-	ext3_set_bit(input->inode_bitmap - start, bh->b_data);
+	ext4_set_bit(input->inode_bitmap - start, bh->b_data);
 
 	/* Zero out all of the inode table blocks */
 	for (i = 0, block = input->inode_table, bit = block - start;
 	     i < sbi->s_itb_per_group; i++, bit++, block++) {
 		struct buffer_head *it;
 
-		ext3_debug("clear inode block %#04lx (+%d)\n", block, bit);
+		ext4_debug("clear inode block %#04lx (+%d)\n", block, bit);
 		if (IS_ERR(it = bclean(handle, sb, block))) {
 			err = PTR_ERR(it);
 			goto exit_bh;
 		}
-		ext3_journal_dirty_metadata(handle, it);
+		ext4_journal_dirty_metadata(handle, it);
 		brelse(it);
-		ext3_set_bit(bit, bh->b_data);
+		ext4_set_bit(bit, bh->b_data);
 	}
-	mark_bitmap_end(input->blocks_count, EXT3_BLOCKS_PER_GROUP(sb),
+	mark_bitmap_end(input->blocks_count, EXT4_BLOCKS_PER_GROUP(sb),
 			bh->b_data);
-	ext3_journal_dirty_metadata(handle, bh);
+	ext4_journal_dirty_metadata(handle, bh);
 	brelse(bh);
 
 	/* Mark unused entries in inode bitmap used */
-	ext3_debug("clear inode bitmap %#04x (+%ld)\n",
+	ext4_debug("clear inode bitmap %#04x (+%ld)\n",
 		   input->inode_bitmap, input->inode_bitmap - start);
 	if (IS_ERR(bh = bclean(handle, sb, input->inode_bitmap))) {
 		err = PTR_ERR(bh);
 		goto exit_journal;
 	}
 
-	mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
+	mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), EXT4_BLOCKS_PER_GROUP(sb),
 			bh->b_data);
-	ext3_journal_dirty_metadata(handle, bh);
+	ext4_journal_dirty_metadata(handle, bh);
 exit_bh:
 	brelse(bh);
 
 exit_journal:
 	unlock_super(sb);
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
 	return err;
@@ -288,20 +288,20 @@ exit_journal:
 
 /*
  * Iterate through the groups which hold BACKUP superblock/GDT copies in an
- * ext3 filesystem.  The counters should be initialized to 1, 5, and 7 before
+ * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
  * calling this for the first time.  In a sparse filesystem it will be the
  * sequence of powers of 3, 5, and 7: 1, 3, 5, 7, 9, 25, 27, 49, 81, ...
  * For a non-sparse filesystem it will be every group: 1, 2, 3, 4, ...
  */
-static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
+static unsigned ext4_list_backups(struct super_block *sb, unsigned *three,
 				  unsigned *five, unsigned *seven)
 {
 	unsigned *min = three;
 	int mult = 3;
 	unsigned ret;
 
-	if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+	if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
 		ret = *min;
 		*min += 1;
 		return ret;
@@ -330,8 +330,8 @@ static unsigned ext3_list_backups(struct super_block *sb, unsigned *three,
 static int verify_reserved_gdb(struct super_block *sb,
 			       struct buffer_head *primary)
 {
-	const ext3_fsblk_t blk = primary->b_blocknr;
-	const unsigned long end = EXT3_SB(sb)->s_groups_count;
+	const ext4_fsblk_t blk = primary->b_blocknr;
+	const unsigned long end = EXT4_SB(sb)->s_groups_count;
 	unsigned three = 1;
 	unsigned five = 5;
 	unsigned seven = 7;
@@ -339,16 +339,16 @@ static int verify_reserved_gdb(struct super_block *sb,
 	__le32 *p = (__le32 *)primary->b_data;
 	int gdbackups = 0;
 
-	while ((grp = ext3_list_backups(sb, &three, &five, &seven)) < end) {
-		if (le32_to_cpu(*p++) != grp * EXT3_BLOCKS_PER_GROUP(sb) + blk){
-			ext3_warning(sb, __FUNCTION__,
+	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
+		if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved GDT "E3FSBLK
 				     " missing grp %d ("E3FSBLK")",
 				     blk, grp,
-				     grp * EXT3_BLOCKS_PER_GROUP(sb) + blk);
+				     grp * EXT4_BLOCKS_PER_GROUP(sb) + blk);
 			return -EINVAL;
 		}
-		if (++gdbackups > EXT3_ADDR_PER_BLOCK(sb))
+		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
 			return -EFBIG;
 	}
 
@@ -369,23 +369,23 @@ static int verify_reserved_gdb(struct super_block *sb,
  * fail once we start modifying the data on disk, because JBD has no rollback.
  */
 static int add_new_gdb(handle_t *handle, struct inode *inode,
-		       struct ext3_new_group_data *input,
+		       struct ext4_new_group_data *input,
 		       struct buffer_head **primary)
 {
 	struct super_block *sb = inode->i_sb;
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
-	unsigned long gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
-	ext3_fsblk_t gdblock = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	unsigned long gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
+	ext4_fsblk_t gdblock = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + gdb_num;
 	struct buffer_head **o_group_desc, **n_group_desc;
 	struct buffer_head *dind;
 	int gdbackups;
-	struct ext3_iloc iloc;
+	struct ext4_iloc iloc;
 	__le32 *data;
 	int err;
 
 	if (test_opt(sb, DEBUG))
 		printk(KERN_DEBUG
-		       "EXT3-fs: ext3_add_new_gdb: adding group block %lu\n",
+		       "EXT4-fs: ext4_add_new_gdb: adding group block %lu\n",
 		       gdb_num);
 
 	/*
@@ -393,11 +393,11 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	 * because the user tools have no way of handling this.  Probably a
 	 * bad time to do it anyways.
 	 */
-	if (EXT3_SB(sb)->s_sbh->b_blocknr !=
-	    le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) {
-		ext3_warning(sb, __FUNCTION__,
+	if (EXT4_SB(sb)->s_sbh->b_blocknr !=
+	    le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) {
+		ext4_warning(sb, __FUNCTION__,
 			"won't resize using backup superblock at %llu",
-			(unsigned long long)EXT3_SB(sb)->s_sbh->b_blocknr);
+			(unsigned long long)EXT4_SB(sb)->s_sbh->b_blocknr);
 		return -EPERM;
 	}
 
@@ -410,7 +410,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 		goto exit_bh;
 	}
 
-	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
 	dind = sb_bread(sb, le32_to_cpu(*data));
 	if (!dind) {
 		err = -EIO;
@@ -418,32 +418,32 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	data = (__le32 *)dind->b_data;
-	if (le32_to_cpu(data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)]) != gdblock) {
-		ext3_warning(sb, __FUNCTION__,
+	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
+		ext4_warning(sb, __FUNCTION__,
 			     "new group %u GDT block "E3FSBLK" not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh)))
+	if ((err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh)))
 		goto exit_dind;
 
-	if ((err = ext3_journal_get_write_access(handle, *primary)))
+	if ((err = ext4_journal_get_write_access(handle, *primary)))
 		goto exit_sbh;
 
-	if ((err = ext3_journal_get_write_access(handle, dind)))
+	if ((err = ext4_journal_get_write_access(handle, dind)))
 		goto exit_primary;
 
-	/* ext3_reserve_inode_write() gets a reference on the iloc */
-	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+	/* ext4_reserve_inode_write() gets a reference on the iloc */
+	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
 		goto exit_dindj;
 
 	n_group_desc = kmalloc((gdb_num + 1) * sizeof(struct buffer_head *),
 			GFP_KERNEL);
 	if (!n_group_desc) {
 		err = -ENOMEM;
-		ext3_warning (sb, __FUNCTION__,
+		ext4_warning (sb, __FUNCTION__,
 			      "not enough memory for %lu groups", gdb_num + 1);
 		goto exit_inode;
 	}
@@ -457,43 +457,43 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	 * these blocks, because they are marked as in-use from being in the
 	 * reserved inode, and will become GDT blocks (primary and backup).
 	 */
-	data[gdb_num % EXT3_ADDR_PER_BLOCK(sb)] = 0;
-	ext3_journal_dirty_metadata(handle, dind);
+	data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)] = 0;
+	ext4_journal_dirty_metadata(handle, dind);
 	brelse(dind);
 	inode->i_blocks -= (gdbackups + 1) * sb->s_blocksize >> 9;
-	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	ext4_mark_iloc_dirty(handle, inode, &iloc);
 	memset((*primary)->b_data, 0, sb->s_blocksize);
-	ext3_journal_dirty_metadata(handle, *primary);
+	ext4_journal_dirty_metadata(handle, *primary);
 
-	o_group_desc = EXT3_SB(sb)->s_group_desc;
+	o_group_desc = EXT4_SB(sb)->s_group_desc;
 	memcpy(n_group_desc, o_group_desc,
-	       EXT3_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
+	       EXT4_SB(sb)->s_gdb_count * sizeof(struct buffer_head *));
 	n_group_desc[gdb_num] = *primary;
-	EXT3_SB(sb)->s_group_desc = n_group_desc;
-	EXT3_SB(sb)->s_gdb_count++;
+	EXT4_SB(sb)->s_group_desc = n_group_desc;
+	EXT4_SB(sb)->s_gdb_count++;
 	kfree(o_group_desc);
 
 	es->s_reserved_gdt_blocks =
 		cpu_to_le16(le16_to_cpu(es->s_reserved_gdt_blocks) - 1);
-	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 
 	return 0;
 
 exit_inode:
-	//ext3_journal_release_buffer(handle, iloc.bh);
+	//ext4_journal_release_buffer(handle, iloc.bh);
 	brelse(iloc.bh);
 exit_dindj:
-	//ext3_journal_release_buffer(handle, dind);
+	//ext4_journal_release_buffer(handle, dind);
 exit_primary:
-	//ext3_journal_release_buffer(handle, *primary);
+	//ext4_journal_release_buffer(handle, *primary);
 exit_sbh:
-	//ext3_journal_release_buffer(handle, *primary);
+	//ext4_journal_release_buffer(handle, *primary);
 exit_dind:
 	brelse(dind);
 exit_bh:
 	brelse(*primary);
 
-	ext3_debug("leaving with error %d\n", err);
+	ext4_debug("leaving with error %d\n", err);
 	return err;
 }
 
@@ -511,14 +511,14 @@ exit_bh:
  * backup GDT blocks are stored in their reserved primary GDT block.
  */
 static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
-			      struct ext3_new_group_data *input)
+			      struct ext4_new_group_data *input)
 {
 	struct super_block *sb = inode->i_sb;
-	int reserved_gdb =le16_to_cpu(EXT3_SB(sb)->s_es->s_reserved_gdt_blocks);
+	int reserved_gdb =le16_to_cpu(EXT4_SB(sb)->s_es->s_reserved_gdt_blocks);
 	struct buffer_head **primary;
 	struct buffer_head *dind;
-	struct ext3_iloc iloc;
-	ext3_fsblk_t blk;
+	struct ext4_iloc iloc;
+	ext4_fsblk_t blk;
 	__le32 *data, *end;
 	int gdbackups = 0;
 	int res, i;
@@ -528,21 +528,21 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	if (!primary)
 		return -ENOMEM;
 
-	data = EXT3_I(inode)->i_data + EXT3_DIND_BLOCK;
+	data = EXT4_I(inode)->i_data + EXT4_DIND_BLOCK;
 	dind = sb_bread(sb, le32_to_cpu(*data));
 	if (!dind) {
 		err = -EIO;
 		goto exit_free;
 	}
 
-	blk = EXT3_SB(sb)->s_sbh->b_blocknr + 1 + EXT3_SB(sb)->s_gdb_count;
-	data = (__le32 *)dind->b_data + EXT3_SB(sb)->s_gdb_count;
-	end = (__le32 *)dind->b_data + EXT3_ADDR_PER_BLOCK(sb);
+	blk = EXT4_SB(sb)->s_sbh->b_blocknr + 1 + EXT4_SB(sb)->s_gdb_count;
+	data = (__le32 *)dind->b_data + EXT4_SB(sb)->s_gdb_count;
+	end = (__le32 *)dind->b_data + EXT4_ADDR_PER_BLOCK(sb);
 
 	/* Get each reserved primary GDT block and verify it holds backups */
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "reserved block "E3FSBLK
 				     " not at offset %ld",
 				     blk,
@@ -565,24 +565,24 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	}
 
 	for (i = 0; i < reserved_gdb; i++) {
-		if ((err = ext3_journal_get_write_access(handle, primary[i]))) {
+		if ((err = ext4_journal_get_write_access(handle, primary[i]))) {
 			/*
 			int j;
 			for (j = 0; j < i; j++)
-				ext3_journal_release_buffer(handle, primary[j]);
+				ext4_journal_release_buffer(handle, primary[j]);
 			 */
 			goto exit_bh;
 		}
 	}
 
-	if ((err = ext3_reserve_inode_write(handle, inode, &iloc)))
+	if ((err = ext4_reserve_inode_write(handle, inode, &iloc)))
 		goto exit_bh;
 
 	/*
 	 * Finally we can add each of the reserved backup GDT blocks from
 	 * the new group to its reserved primary GDT block.
 	 */
-	blk = input->group * EXT3_BLOCKS_PER_GROUP(sb);
+	blk = input->group * EXT4_BLOCKS_PER_GROUP(sb);
 	for (i = 0; i < reserved_gdb; i++) {
 		int err2;
 		data = (__le32 *)primary[i]->b_data;
@@ -590,12 +590,12 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 		       primary[i]->b_blocknr, gdbackups,
 		       blk + primary[i]->b_blocknr); */
 		data[gdbackups] = cpu_to_le32(blk + primary[i]->b_blocknr);
-		err2 = ext3_journal_dirty_metadata(handle, primary[i]);
+		err2 = ext4_journal_dirty_metadata(handle, primary[i]);
 		if (!err)
 			err = err2;
 	}
 	inode->i_blocks += reserved_gdb * sb->s_blocksize >> 9;
-	ext3_mark_iloc_dirty(handle, inode, &iloc);
+	ext4_mark_iloc_dirty(handle, inode, &iloc);
 
 exit_bh:
 	while (--res >= 0)
@@ -609,7 +609,7 @@ exit_free:
 }
 
 /*
- * Update the backup copies of the ext3 metadata.  These don't need to be part
+ * Update the backup copies of the ext4 metadata.  These don't need to be part
  * of the main resize transaction, because e2fsck will re-write them if there
  * is a problem (basically only OOM will cause a problem).  However, we
  * _should_ update the backups if possible, in case the primary gets trashed
@@ -626,9 +626,9 @@ exit_free:
 static void update_backups(struct super_block *sb,
 			   int blk_off, char *data, int size)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	const unsigned long last = sbi->s_groups_count;
-	const int bpg = EXT3_BLOCKS_PER_GROUP(sb);
+	const int bpg = EXT4_BLOCKS_PER_GROUP(sb);
 	unsigned three = 1;
 	unsigned five = 5;
 	unsigned seven = 7;
@@ -637,20 +637,20 @@ static void update_backups(struct super_block *sb,
 	handle_t *handle;
 	int err = 0, err2;
 
-	handle = ext3_journal_start_sb(sb, EXT3_MAX_TRANS_DATA);
+	handle = ext4_journal_start_sb(sb, EXT4_MAX_TRANS_DATA);
 	if (IS_ERR(handle)) {
 		group = 1;
 		err = PTR_ERR(handle);
 		goto exit_err;
 	}
 
-	while ((group = ext3_list_backups(sb, &three, &five, &seven)) < last) {
+	while ((group = ext4_list_backups(sb, &three, &five, &seven)) < last) {
 		struct buffer_head *bh;
 
 		/* Out of journal space, and can't get more - abort - so sad */
 		if (handle->h_buffer_credits == 0 &&
-		    ext3_journal_extend(handle, EXT3_MAX_TRANS_DATA) &&
-		    (err = ext3_journal_restart(handle, EXT3_MAX_TRANS_DATA)))
+		    ext4_journal_extend(handle, EXT4_MAX_TRANS_DATA) &&
+		    (err = ext4_journal_restart(handle, EXT4_MAX_TRANS_DATA)))
 			break;
 
 		bh = sb_getblk(sb, group * bpg + blk_off);
@@ -658,9 +658,9 @@ static void update_backups(struct super_block *sb,
 			err = -EIO;
 			break;
 		}
-		ext3_debug("update metadata backup %#04lx\n",
+		ext4_debug("update metadata backup %#04lx\n",
 			  (unsigned long)bh->b_blocknr);
-		if ((err = ext3_journal_get_write_access(handle, bh)))
+		if ((err = ext4_journal_get_write_access(handle, bh)))
 			break;
 		lock_buffer(bh);
 		memcpy(bh->b_data, data, size);
@@ -668,10 +668,10 @@ static void update_backups(struct super_block *sb,
 			memset(bh->b_data + size, 0, rest);
 		set_buffer_uptodate(bh);
 		unlock_buffer(bh);
-		ext3_journal_dirty_metadata(handle, bh);
+		ext4_journal_dirty_metadata(handle, bh);
 		brelse(bh);
 	}
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 
 	/*
@@ -686,11 +686,11 @@ static void update_backups(struct super_block *sb,
 	 */
 exit_err:
 	if (err) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't update backup for group %d (err %d), "
 			     "forcing fsck on next reboot", group, err);
-		sbi->s_mount_state &= ~EXT3_VALID_FS;
-		sbi->s_es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
+		sbi->s_mount_state &= ~EXT4_VALID_FS;
+		sbi->s_es->s_state &= cpu_to_le16(~EXT4_VALID_FS);
 		mark_buffer_dirty(sbi->s_sbh);
 	}
 }
@@ -708,51 +708,51 @@ exit_err:
  * not really "added" the group at all.  We re-check that we are still
  * adding in the last group in case things have changed since verifying.
  */
-int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
+int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	int reserved_gdb = ext3_bg_has_super(sb, input->group) ?
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	int reserved_gdb = ext4_bg_has_super(sb, input->group) ?
 		le16_to_cpu(es->s_reserved_gdt_blocks) : 0;
 	struct buffer_head *primary = NULL;
-	struct ext3_group_desc *gdp;
+	struct ext4_group_desc *gdp;
 	struct inode *inode = NULL;
 	handle_t *handle;
 	int gdb_off, gdb_num;
 	int err, err2;
 
-	gdb_num = input->group / EXT3_DESC_PER_BLOCK(sb);
-	gdb_off = input->group % EXT3_DESC_PER_BLOCK(sb);
+	gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
+	gdb_off = input->group % EXT4_DESC_PER_BLOCK(sb);
 
-	if (gdb_off == 0 && !EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
-		ext3_warning(sb, __FUNCTION__,
+	if (gdb_off == 0 && !EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
+		ext4_warning(sb, __FUNCTION__,
 			     "Can't resize non-sparse filesystem further");
 		return -EPERM;
 	}
 
 	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
 	    le32_to_cpu(es->s_blocks_count)) {
-		ext3_warning(sb, __FUNCTION__, "blocks_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
 		return -EINVAL;
 	}
 
-	if (le32_to_cpu(es->s_inodes_count) + EXT3_INODES_PER_GROUP(sb) <
+	if (le32_to_cpu(es->s_inodes_count) + EXT4_INODES_PER_GROUP(sb) <
 	    le32_to_cpu(es->s_inodes_count)) {
-		ext3_warning(sb, __FUNCTION__, "inodes_count overflow\n");
+		ext4_warning(sb, __FUNCTION__, "inodes_count overflow\n");
 		return -EINVAL;
 	}
 
 	if (reserved_gdb || gdb_off == 0) {
-		if (!EXT3_HAS_COMPAT_FEATURE(sb,
-					     EXT3_FEATURE_COMPAT_RESIZE_INODE)){
-			ext3_warning(sb, __FUNCTION__,
+		if (!EXT4_HAS_COMPAT_FEATURE(sb,
+					     EXT4_FEATURE_COMPAT_RESIZE_INODE)){
+			ext4_warning(sb, __FUNCTION__,
 				     "No reserved GDT blocks, can't resize");
 			return -EPERM;
 		}
-		inode = iget(sb, EXT3_RESIZE_INO);
+		inode = iget(sb, EXT4_RESIZE_INO);
 		if (!inode || is_bad_inode(inode)) {
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 				     "Error opening resize inode");
 			iput(inode);
 			return -ENOENT;
@@ -772,8 +772,8 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 * are adding a group with superblock/GDT backups  we will also
 	 * modify each of the reserved GDT dindirect blocks.
 	 */
-	handle = ext3_journal_start_sb(sb,
-				       ext3_bg_has_super(sb, input->group) ?
+	handle = ext4_journal_start_sb(sb,
+				       ext4_bg_has_super(sb, input->group) ?
 				       3 + reserved_gdb : 4);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
@@ -782,13 +782,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 
 	lock_super(sb);
 	if (input->group != sbi->s_groups_count) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		err = -EBUSY;
 		goto exit_journal;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle, sbi->s_sbh)))
+	if ((err = ext4_journal_get_write_access(handle, sbi->s_sbh)))
 		goto exit_journal;
 
 	/*
@@ -799,10 +799,10 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 */
 	if (gdb_off) {
 		primary = sbi->s_group_desc[gdb_num];
-		if ((err = ext3_journal_get_write_access(handle, primary)))
+		if ((err = ext4_journal_get_write_access(handle, primary)))
 			goto exit_journal;
 
-		if (reserved_gdb && ext3_bg_num_gdb(sb, input->group) &&
+		if (reserved_gdb && ext4_bg_num_gdb(sb, input->group) &&
 		    (err = reserve_backup_gdb(handle, inode, input)))
 			goto exit_journal;
 	} else if ((err = add_new_gdb(handle, inode, input, &primary)))
@@ -828,13 +828,13 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	 */
 
 	/* Update group descriptor block for new group */
-	gdp = (struct ext3_group_desc *)primary->b_data + gdb_off;
+	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
 	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
 	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
 	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
-	gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
+	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * Make the new blocks and inodes valid next.  We do this before
@@ -849,7 +849,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
 		input->blocks_count);
 	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
-		EXT3_INODES_PER_GROUP(sb));
+		EXT4_INODES_PER_GROUP(sb));
 
 	/*
 	 * We need to protect s_groups_count against other CPUs seeing
@@ -878,7 +878,7 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	/* Update the global fs size fields */
 	sbi->s_groups_count++;
 
-	ext3_journal_dirty_metadata(handle, primary);
+	ext4_journal_dirty_metadata(handle, primary);
 
 	/* Update the reserved block counts only once the new group is
 	 * active. */
@@ -889,42 +889,42 @@ int ext3_group_add(struct super_block *sb, struct ext3_new_group_data *input)
 	percpu_counter_mod(&sbi->s_freeblocks_counter,
 			   input->free_blocks_count);
 	percpu_counter_mod(&sbi->s_freeinodes_counter,
-			   EXT3_INODES_PER_GROUP(sb));
+			   EXT4_INODES_PER_GROUP(sb));
 
-	ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+	ext4_journal_dirty_metadata(handle, sbi->s_sbh);
 	sb->s_dirt = 1;
 
 exit_journal:
 	unlock_super(sb);
-	if ((err2 = ext3_journal_stop(handle)) && !err)
+	if ((err2 = ext4_journal_stop(handle)) && !err)
 		err = err2;
 	if (!err) {
 		update_backups(sb, sbi->s_sbh->b_blocknr, (char *)es,
-			       sizeof(struct ext3_super_block));
+			       sizeof(struct ext4_super_block));
 		update_backups(sb, primary->b_blocknr, primary->b_data,
 			       primary->b_size);
 	}
 exit_put:
 	iput(inode);
 	return err;
-} /* ext3_group_add */
+} /* ext4_group_add */
 
 /* Extend the filesystem to the new number of blocks specified.  This entry
  * point is only used to extend the current filesystem to the end of the last
  * existing group.  It can be accessed via ioctl, or by "remount,resize=<size>"
  * for emergencies (because it has no dependencies on reserved blocks).
  *
- * If we _really_ wanted, we could use default values to call ext3_group_add()
+ * If we _really_ wanted, we could use default values to call ext4_group_add()
  * allow the "remount" trick to work for arbitrary resizing, assuming enough
  * GDT blocks are reserved to grow to the desired size.
  */
-int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
-		      ext3_fsblk_t n_blocks_count)
+int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
+		      ext4_fsblk_t n_blocks_count)
 {
-	ext3_fsblk_t o_blocks_count;
+	ext4_fsblk_t o_blocks_count;
 	unsigned long o_groups_count;
-	ext3_grpblk_t last;
-	ext3_grpblk_t add;
+	ext4_grpblk_t last;
+	ext4_grpblk_t add;
 	struct buffer_head * bh;
 	handle_t *handle;
 	int err;
@@ -934,45 +934,45 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	 * yet: we're going to revalidate es->s_blocks_count after
 	 * taking lock_super() below. */
 	o_blocks_count = le32_to_cpu(es->s_blocks_count);
-	o_groups_count = EXT3_SB(sb)->s_groups_count;
+	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
 		       o_blocks_count, n_blocks_count);
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
 		return 0;
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to resize to %lu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
-			ext3_warning(sb, __FUNCTION__,
+			ext4_warning(sb, __FUNCTION__,
 			"CONFIG_LBD not enabled\n");
 		return -EINVAL;
 	}
 
 	if (n_blocks_count < o_blocks_count) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't shrink FS - resize aborted");
 		return -EBUSY;
 	}
 
 	/* Handle the remaining blocks in the last group only. */
 	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
-		EXT3_BLOCKS_PER_GROUP(sb);
+		EXT4_BLOCKS_PER_GROUP(sb);
 
 	if (last == 0) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "need to use ext2online to resize further");
 		return -EPERM;
 	}
 
-	add = EXT3_BLOCKS_PER_GROUP(sb) - last;
+	add = EXT4_BLOCKS_PER_GROUP(sb) - last;
 
 	if (o_blocks_count + add < o_blocks_count) {
-		ext3_warning(sb, __FUNCTION__, "blocks_count overflow");
+		ext4_warning(sb, __FUNCTION__, "blocks_count overflow");
 		return -EINVAL;
 	}
 
@@ -980,7 +980,7 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 		add = n_blocks_count - o_blocks_count;
 
 	if (o_blocks_count + add < n_blocks_count)
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "will only finish group ("E3FSBLK
 			     " blocks, %u new)",
 			     o_blocks_count + add, add);
@@ -988,55 +988,55 @@ int ext3_group_extend(struct super_block *sb, struct ext3_super_block *es,
 	/* See if the device is actually as big as what was requested */
 	bh = sb_bread(sb, o_blocks_count + add -1);
 	if (!bh) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "can't read last block, resize aborted");
 		return -ENOSPC;
 	}
 	brelse(bh);
 
 	/* We will update the superblock, one block bitmap, and
-	 * one group descriptor via ext3_free_blocks().
+	 * one group descriptor via ext4_free_blocks().
 	 */
-	handle = ext3_journal_start_sb(sb, 3);
+	handle = ext4_journal_start_sb(sb, 3);
 	if (IS_ERR(handle)) {
 		err = PTR_ERR(handle);
-		ext3_warning(sb, __FUNCTION__, "error %d on journal start",err);
+		ext4_warning(sb, __FUNCTION__, "error %d on journal start",err);
 		goto exit_put;
 	}
 
 	lock_super(sb);
 	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
-		ext3_warning(sb, __FUNCTION__,
+		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
 		err = -EBUSY;
 		goto exit_put;
 	}
 
-	if ((err = ext3_journal_get_write_access(handle,
-						 EXT3_SB(sb)->s_sbh))) {
-		ext3_warning(sb, __FUNCTION__,
+	if ((err = ext4_journal_get_write_access(handle,
+						 EXT4_SB(sb)->s_sbh))) {
+		ext4_warning(sb, __FUNCTION__,
 			     "error %d on journal write access", err);
 		unlock_super(sb);
-		ext3_journal_stop(handle);
+		ext4_journal_stop(handle);
 		goto exit_put;
 	}
 	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
-	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
-	ext3_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
-	ext3_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	ext3_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+	ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
+	ext4_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
 		   o_blocks_count + add);
-	if ((err = ext3_journal_stop(handle)))
+	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT3-fs: extended group to %u blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extended group to %u blocks\n",
 		       le32_to_cpu(es->s_blocks_count));
-	update_backups(sb, EXT3_SB(sb)->s_sbh->b_blocknr, (char *)es,
-		       sizeof(struct ext3_super_block));
+	update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
+		       sizeof(struct ext4_super_block));
 exit_put:
 	return err;
-} /* ext3_group_extend */
+} /* ext4_group_extend */
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 8bfd56ef18ca..9e32a2a8d286 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/super.c
+ *  linux/fs/ext4/super.c
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -21,8 +21,8 @@
 #include <linux/fs.h>
 #include <linux/time.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
-#include <linux/ext3_jbd.h>
+#include <linux/ext4_fs.h>
+#include <linux/ext4_jbd.h>
 #include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/blkdev.h>
@@ -42,25 +42,25 @@
 #include "acl.h"
 #include "namei.h"
 
-static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
+static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
 			     unsigned long journal_devnum);
-static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+static int ext4_create_journal(struct super_block *, struct ext4_super_block *,
 			       unsigned int);
-static void ext3_commit_super (struct super_block * sb,
-			       struct ext3_super_block * es,
+static void ext4_commit_super (struct super_block * sb,
+			       struct ext4_super_block * es,
 			       int sync);
-static void ext3_mark_recovery_complete(struct super_block * sb,
-					struct ext3_super_block * es);
-static void ext3_clear_journal_err(struct super_block * sb,
-				   struct ext3_super_block * es);
-static int ext3_sync_fs(struct super_block *sb, int wait);
-static const char *ext3_decode_error(struct super_block * sb, int errno,
+static void ext4_mark_recovery_complete(struct super_block * sb,
+					struct ext4_super_block * es);
+static void ext4_clear_journal_err(struct super_block * sb,
+				   struct ext4_super_block * es);
+static int ext4_sync_fs(struct super_block *sb, int wait);
+static const char *ext4_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16]);
-static int ext3_remount (struct super_block * sb, int * flags, char * data);
-static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
-static void ext3_unlockfs(struct super_block *sb);
-static void ext3_write_super (struct super_block * sb);
-static void ext3_write_super_lockfs(struct super_block *sb);
+static int ext4_remount (struct super_block * sb, int * flags, char * data);
+static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf);
+static void ext4_unlockfs(struct super_block *sb);
+static void ext4_write_super (struct super_block * sb);
+static void ext4_write_super_lockfs(struct super_block *sb);
 
 /*
  * Wrappers for journal_start/end.
@@ -70,7 +70,7 @@ static void ext3_write_super_lockfs(struct super_block *sb);
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
  */
-handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
 {
 	journal_t *journal;
 
@@ -80,9 +80,9 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
 	/* Special case here: if the journal has aborted behind our
 	 * backs (eg. EIO in the commit thread), then we still need to
 	 * take the FS itself readonly cleanly. */
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 	if (is_journal_aborted(journal)) {
-		ext3_abort(sb, __FUNCTION__,
+		ext4_abort(sb, __FUNCTION__,
 			   "Detected aborted journal");
 		return ERR_PTR(-EROFS);
 	}
@@ -96,7 +96,7 @@ handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
  * that sync() will call the filesystem's write_super callback if
  * appropriate.
  */
-int __ext3_journal_stop(const char *where, handle_t *handle)
+int __ext4_journal_stop(const char *where, handle_t *handle)
 {
 	struct super_block *sb;
 	int err;
@@ -109,15 +109,15 @@ int __ext3_journal_stop(const char *where, handle_t *handle)
 	if (!err)
 		err = rc;
 	if (err)
-		__ext3_std_error(sb, where, err);
+		__ext4_std_error(sb, where, err);
 	return err;
 }
 
-void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 		struct buffer_head *bh, handle_t *handle, int err)
 {
 	char nbuf[16];
-	const char *errstr = ext3_decode_error(NULL, err, nbuf);
+	const char *errstr = ext4_decode_error(NULL, err, nbuf);
 
 	if (bh)
 		BUFFER_TRACE(bh, "abort");
@@ -138,7 +138,7 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
  * inconsistencies detected or read IO failures.
  *
  * On ext2, we can store the error state of the filesystem in the
- * superblock.  That is not possible on ext3, because we may have other
+ * superblock.  That is not possible on ext4, because we may have other
  * write ordering constraints on the superblock which prevent us from
  * writing it out straight away; and given that the journal is about to
  * be aborted, we can't rely on the current, or future, transactions to
@@ -149,20 +149,20 @@ void ext3_journal_abort_handle(const char *caller, const char *err_fn,
  * that error until we've noted it down and cleared it.
  */
 
-static void ext3_handle_error(struct super_block *sb)
+static void ext4_handle_error(struct super_block *sb)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
-	es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+	es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
 
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
 	if (!test_opt (sb, ERRORS_CONT)) {
-		journal_t *journal = EXT3_SB(sb)->s_journal;
+		journal_t *journal = EXT4_SB(sb)->s_journal;
 
-		EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+		EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
 		if (journal)
 			journal_abort(journal, -EIO);
 	}
@@ -170,27 +170,27 @@ static void ext3_handle_error(struct super_block *sb)
 		printk (KERN_CRIT "Remounting filesystem read-only\n");
 		sb->s_flags |= MS_RDONLY;
 	}
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs (device %s): panic forced after error\n",
+		panic("EXT4-fs (device %s): panic forced after error\n",
 			sb->s_id);
 }
 
-void ext3_error (struct super_block * sb, const char * function,
+void ext4_error (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 
-	ext3_handle_error(sb);
+	ext4_handle_error(sb);
 }
 
-static const char *ext3_decode_error(struct super_block * sb, int errno,
+static const char *ext4_decode_error(struct super_block * sb, int errno,
 				     char nbuf[16])
 {
 	char *errstr = NULL;
@@ -203,7 +203,7 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 		errstr = "Out of memory";
 		break;
 	case -EROFS:
-		if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
+		if (!sb || EXT4_SB(sb)->s_journal->j_flags & JFS_ABORT)
 			errstr = "Journal has aborted";
 		else
 			errstr = "Readonly filesystem";
@@ -223,10 +223,10 @@ static const char *ext3_decode_error(struct super_block * sb, int errno,
 	return errstr;
 }
 
-/* __ext3_std_error decodes expected errors from journaling functions
+/* __ext4_std_error decodes expected errors from journaling functions
  * automatically and invokes the appropriate error response.  */
 
-void __ext3_std_error (struct super_block * sb, const char * function,
+void __ext4_std_error (struct super_block * sb, const char * function,
 		       int errno)
 {
 	char nbuf[16];
@@ -239,15 +239,15 @@ void __ext3_std_error (struct super_block * sb, const char * function,
 	    (sb->s_flags & MS_RDONLY))
 		return;
 
-	errstr = ext3_decode_error(sb, errno, nbuf);
-	printk (KERN_CRIT "EXT3-fs error (device %s) in %s: %s\n",
+	errstr = ext4_decode_error(sb, errno, nbuf);
+	printk (KERN_CRIT "EXT4-fs error (device %s) in %s: %s\n",
 		sb->s_id, function, errstr);
 
-	ext3_handle_error(sb);
+	ext4_handle_error(sb);
 }
 
 /*
- * ext3_abort is a much stronger failure handler than ext3_error.  The
+ * ext4_abort is a much stronger failure handler than ext4_error.  The
  * abort function may be used to deal with unrecoverable failures such
  * as journal IO errors or ENOMEM at a critical moment in log management.
  *
@@ -256,60 +256,60 @@ void __ext3_std_error (struct super_block * sb, const char * function,
  * case we take the easy way out and panic immediately.
  */
 
-void ext3_abort (struct super_block * sb, const char * function,
+void ext4_abort (struct super_block * sb, const char * function,
 		 const char * fmt, ...)
 {
 	va_list args;
 
-	printk (KERN_CRIT "ext3_abort called.\n");
+	printk (KERN_CRIT "ext4_abort called.\n");
 
 	va_start(args, fmt);
-	printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
+	printk(KERN_CRIT "EXT4-fs error (device %s): %s: ",sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 
 	if (test_opt(sb, ERRORS_PANIC))
-		panic("EXT3-fs panic from previous error\n");
+		panic("EXT4-fs panic from previous error\n");
 
 	if (sb->s_flags & MS_RDONLY)
 		return;
 
 	printk(KERN_CRIT "Remounting filesystem read-only\n");
-	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+	EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
 	sb->s_flags |= MS_RDONLY;
-	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
-	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+	EXT4_SB(sb)->s_mount_opt |= EXT4_MOUNT_ABORT;
+	journal_abort(EXT4_SB(sb)->s_journal, -EIO);
 }
 
-void ext3_warning (struct super_block * sb, const char * function,
+void ext4_warning (struct super_block * sb, const char * function,
 		   const char * fmt, ...)
 {
 	va_list args;
 
 	va_start(args, fmt);
-	printk(KERN_WARNING "EXT3-fs warning (device %s): %s: ",
+	printk(KERN_WARNING "EXT4-fs warning (device %s): %s: ",
 	       sb->s_id, function);
 	vprintk(fmt, args);
 	printk("\n");
 	va_end(args);
 }
 
-void ext3_update_dynamic_rev(struct super_block *sb)
+void ext4_update_dynamic_rev(struct super_block *sb)
 {
-	struct ext3_super_block *es = EXT3_SB(sb)->s_es;
+	struct ext4_super_block *es = EXT4_SB(sb)->s_es;
 
-	if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
+	if (le32_to_cpu(es->s_rev_level) > EXT4_GOOD_OLD_REV)
 		return;
 
-	ext3_warning(sb, __FUNCTION__,
+	ext4_warning(sb, __FUNCTION__,
 		     "updating to rev %d because of new feature flag, "
 		     "running e2fsck is recommended",
-		     EXT3_DYNAMIC_REV);
+		     EXT4_DYNAMIC_REV);
 
-	es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
-	es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
-	es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
+	es->s_first_ino = cpu_to_le32(EXT4_GOOD_OLD_FIRST_INO);
+	es->s_inode_size = cpu_to_le16(EXT4_GOOD_OLD_INODE_SIZE);
+	es->s_rev_level = cpu_to_le32(EXT4_DYNAMIC_REV);
 	/* leave es->s_feature_*compat flags alone */
 	/* es->s_uuid will be set by e2fsck if empty */
 
@@ -323,7 +323,7 @@ void ext3_update_dynamic_rev(struct super_block *sb)
 /*
  * Open the external journal device
  */
-static struct block_device *ext3_blkdev_get(dev_t dev)
+static struct block_device *ext4_blkdev_get(dev_t dev)
 {
 	struct block_device *bdev;
 	char b[BDEVNAME_SIZE];
@@ -334,7 +334,7 @@ static struct block_device *ext3_blkdev_get(dev_t dev)
 	return bdev;
 
 fail:
-	printk(KERN_ERR "EXT3: failed to open journal device %s: %ld\n",
+	printk(KERN_ERR "EXT4: failed to open journal device %s: %ld\n",
 			__bdevname(dev, b), PTR_ERR(bdev));
 	return NULL;
 }
@@ -342,20 +342,20 @@ fail:
 /*
  * Release the journal device
  */
-static int ext3_blkdev_put(struct block_device *bdev)
+static int ext4_blkdev_put(struct block_device *bdev)
 {
 	bd_release(bdev);
 	return blkdev_put(bdev);
 }
 
-static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
+static int ext4_blkdev_remove(struct ext4_sb_info *sbi)
 {
 	struct block_device *bdev;
 	int ret = -ENODEV;
 
 	bdev = sbi->journal_bdev;
 	if (bdev) {
-		ret = ext3_blkdev_put(bdev);
+		ret = ext4_blkdev_put(bdev);
 		sbi->journal_bdev = NULL;
 	}
 	return ret;
@@ -363,10 +363,10 @@ static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
 
 static inline struct inode *orphan_list_entry(struct list_head *l)
 {
-	return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
+	return &list_entry(l, struct ext4_inode_info, i_orphan)->vfs_inode;
 }
 
-static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+static void dump_orphan_list(struct super_block *sb, struct ext4_sb_info *sbi)
 {
 	struct list_head *l;
 
@@ -384,20 +384,20 @@ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
 	}
 }
 
-static void ext3_put_super (struct super_block * sb)
+static void ext4_put_super (struct super_block * sb)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
 	int i;
 
-	ext3_xattr_put_super(sb);
+	ext4_xattr_put_super(sb);
 	journal_destroy(sbi->s_journal);
 	if (!(sb->s_flags & MS_RDONLY)) {
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		es->s_state = cpu_to_le16(sbi->s_mount_state);
 		BUFFER_TRACE(sbi->s_sbh, "marking dirty");
 		mark_buffer_dirty(sbi->s_sbh);
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 
 	for (i = 0; i < sbi->s_gdb_count; i++)
@@ -429,47 +429,47 @@ static void ext3_put_super (struct super_block * sb)
 		 */
 		sync_blockdev(sbi->journal_bdev);
 		invalidate_bdev(sbi->journal_bdev, 0);
-		ext3_blkdev_remove(sbi);
+		ext4_blkdev_remove(sbi);
 	}
 	sb->s_fs_info = NULL;
 	kfree(sbi);
 	return;
 }
 
-static kmem_cache_t *ext3_inode_cachep;
+static kmem_cache_t *ext4_inode_cachep;
 
 /*
  * Called inside transaction, so use GFP_NOFS
  */
-static struct inode *ext3_alloc_inode(struct super_block *sb)
+static struct inode *ext4_alloc_inode(struct super_block *sb)
 {
-	struct ext3_inode_info *ei;
+	struct ext4_inode_info *ei;
 
-	ei = kmem_cache_alloc(ext3_inode_cachep, SLAB_NOFS);
+	ei = kmem_cache_alloc(ext4_inode_cachep, SLAB_NOFS);
 	if (!ei)
 		return NULL;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	ei->i_acl = EXT3_ACL_NOT_CACHED;
-	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	ei->i_acl = EXT4_ACL_NOT_CACHED;
+	ei->i_default_acl = EXT4_ACL_NOT_CACHED;
 #endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
 	return &ei->vfs_inode;
 }
 
-static void ext3_destroy_inode(struct inode *inode)
+static void ext4_destroy_inode(struct inode *inode)
 {
-	kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
+	kmem_cache_free(ext4_inode_cachep, EXT4_I(inode));
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 {
-	struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
+	struct ext4_inode_info *ei = (struct ext4_inode_info *) foo;
 
 	if ((flags & (SLAB_CTOR_VERIFY|SLAB_CTOR_CONSTRUCTOR)) ==
 	    SLAB_CTOR_CONSTRUCTOR) {
 		INIT_LIST_HEAD(&ei->i_orphan);
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 		init_rwsem(&ei->xattr_sem);
 #endif
 		mutex_init(&ei->truncate_mutex);
@@ -479,46 +479,46 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
 
 static int init_inodecache(void)
 {
-	ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
-					     sizeof(struct ext3_inode_info),
+	ext4_inode_cachep = kmem_cache_create("ext4_inode_cache",
+					     sizeof(struct ext4_inode_info),
 					     0, (SLAB_RECLAIM_ACCOUNT|
 						SLAB_MEM_SPREAD),
 					     init_once, NULL);
-	if (ext3_inode_cachep == NULL)
+	if (ext4_inode_cachep == NULL)
 		return -ENOMEM;
 	return 0;
 }
 
 static void destroy_inodecache(void)
 {
-	kmem_cache_destroy(ext3_inode_cachep);
+	kmem_cache_destroy(ext4_inode_cachep);
 }
 
-static void ext3_clear_inode(struct inode *inode)
+static void ext4_clear_inode(struct inode *inode)
 {
-	struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	if (EXT3_I(inode)->i_acl &&
-			EXT3_I(inode)->i_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_acl);
-		EXT3_I(inode)->i_acl = EXT3_ACL_NOT_CACHED;
-	}
-	if (EXT3_I(inode)->i_default_acl &&
-			EXT3_I(inode)->i_default_acl != EXT3_ACL_NOT_CACHED) {
-		posix_acl_release(EXT3_I(inode)->i_default_acl);
-		EXT3_I(inode)->i_default_acl = EXT3_ACL_NOT_CACHED;
+	struct ext4_block_alloc_info *rsv = EXT4_I(inode)->i_block_alloc_info;
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	if (EXT4_I(inode)->i_acl &&
+			EXT4_I(inode)->i_acl != EXT4_ACL_NOT_CACHED) {
+		posix_acl_release(EXT4_I(inode)->i_acl);
+		EXT4_I(inode)->i_acl = EXT4_ACL_NOT_CACHED;
+	}
+	if (EXT4_I(inode)->i_default_acl &&
+			EXT4_I(inode)->i_default_acl != EXT4_ACL_NOT_CACHED) {
+		posix_acl_release(EXT4_I(inode)->i_default_acl);
+		EXT4_I(inode)->i_default_acl = EXT4_ACL_NOT_CACHED;
 	}
 #endif
-	ext3_discard_reservation(inode);
-	EXT3_I(inode)->i_block_alloc_info = NULL;
+	ext4_discard_reservation(inode);
+	EXT4_I(inode)->i_block_alloc_info = NULL;
 	if (unlikely(rsv))
 		kfree(rsv);
 }
 
-static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
+static inline void ext4_show_quota_options(struct seq_file *seq, struct super_block *sb)
 {
 #if defined(CONFIG_QUOTA)
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_jquota_fmt)
 		seq_printf(seq, ",jqfmt=%s",
@@ -530,32 +530,32 @@ static inline void ext3_show_quota_options(struct seq_file *seq, struct super_bl
 	if (sbi->s_qf_names[GRPQUOTA])
 		seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA)
+	if (sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA)
 		seq_puts(seq, ",usrquota");
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)
+	if (sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)
 		seq_puts(seq, ",grpquota");
 #endif
 }
 
-static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
+static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
 {
 	struct super_block *sb = vfs->mnt_sb;
 
-	if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+	if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 		seq_puts(seq, ",data=journal");
-	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 		seq_puts(seq, ",data=ordered");
-	else if (test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+	else if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 		seq_puts(seq, ",data=writeback");
 
-	ext3_show_quota_options(seq, sb);
+	ext4_show_quota_options(seq, sb);
 
 	return 0;
 }
 
 
-static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
+static struct dentry *ext4_get_dentry(struct super_block *sb, void *vobjp)
 {
 	__u32 *objp = vobjp;
 	unsigned long ino = objp[0];
@@ -563,14 +563,14 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
 	struct inode *inode;
 	struct dentry *result;
 
-	if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
+	if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
 		return ERR_PTR(-ESTALE);
-	if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
+	if (ino > le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))
 		return ERR_PTR(-ESTALE);
 
 	/* iget isn't really right if the inode is currently unallocated!!
 	 *
-	 * ext3_read_inode will return a bad_inode if the inode had been
+	 * ext4_read_inode will return a bad_inode if the inode had been
 	 * deleted, so we should be safe.
 	 *
 	 * Currently we don't know the generation for parent directory, so
@@ -599,37 +599,37 @@ static struct dentry *ext3_get_dentry(struct super_block *sb, void *vobjp)
 #define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
 #define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
 
-static int ext3_dquot_initialize(struct inode *inode, int type);
-static int ext3_dquot_drop(struct inode *inode);
-static int ext3_write_dquot(struct dquot *dquot);
-static int ext3_acquire_dquot(struct dquot *dquot);
-static int ext3_release_dquot(struct dquot *dquot);
-static int ext3_mark_dquot_dirty(struct dquot *dquot);
-static int ext3_write_info(struct super_block *sb, int type);
-static int ext3_quota_on(struct super_block *sb, int type, int format_id, char *path);
-static int ext3_quota_on_mount(struct super_block *sb, int type);
-static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+static int ext4_dquot_initialize(struct inode *inode, int type);
+static int ext4_dquot_drop(struct inode *inode);
+static int ext4_write_dquot(struct dquot *dquot);
+static int ext4_acquire_dquot(struct dquot *dquot);
+static int ext4_release_dquot(struct dquot *dquot);
+static int ext4_mark_dquot_dirty(struct dquot *dquot);
+static int ext4_write_info(struct super_block *sb, int type);
+static int ext4_quota_on(struct super_block *sb, int type, int format_id, char *path);
+static int ext4_quota_on_mount(struct super_block *sb, int type);
+static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off);
-static ssize_t ext3_quota_write(struct super_block *sb, int type,
+static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off);
 
-static struct dquot_operations ext3_quota_operations = {
-	.initialize	= ext3_dquot_initialize,
-	.drop		= ext3_dquot_drop,
+static struct dquot_operations ext4_quota_operations = {
+	.initialize	= ext4_dquot_initialize,
+	.drop		= ext4_dquot_drop,
 	.alloc_space	= dquot_alloc_space,
 	.alloc_inode	= dquot_alloc_inode,
 	.free_space	= dquot_free_space,
 	.free_inode	= dquot_free_inode,
 	.transfer	= dquot_transfer,
-	.write_dquot	= ext3_write_dquot,
-	.acquire_dquot	= ext3_acquire_dquot,
-	.release_dquot	= ext3_release_dquot,
-	.mark_dirty	= ext3_mark_dquot_dirty,
-	.write_info	= ext3_write_info
+	.write_dquot	= ext4_write_dquot,
+	.acquire_dquot	= ext4_acquire_dquot,
+	.release_dquot	= ext4_release_dquot,
+	.mark_dirty	= ext4_mark_dquot_dirty,
+	.write_info	= ext4_write_info
 };
 
-static struct quotactl_ops ext3_qctl_operations = {
-	.quota_on	= ext3_quota_on,
+static struct quotactl_ops ext4_qctl_operations = {
+	.quota_on	= ext4_quota_on,
 	.quota_off	= vfs_quota_off,
 	.quota_sync	= vfs_quota_sync,
 	.get_info	= vfs_get_dqinfo,
@@ -639,31 +639,31 @@ static struct quotactl_ops ext3_qctl_operations = {
 };
 #endif
 
-static struct super_operations ext3_sops = {
-	.alloc_inode	= ext3_alloc_inode,
-	.destroy_inode	= ext3_destroy_inode,
-	.read_inode	= ext3_read_inode,
-	.write_inode	= ext3_write_inode,
-	.dirty_inode	= ext3_dirty_inode,
-	.delete_inode	= ext3_delete_inode,
-	.put_super	= ext3_put_super,
-	.write_super	= ext3_write_super,
-	.sync_fs	= ext3_sync_fs,
-	.write_super_lockfs = ext3_write_super_lockfs,
-	.unlockfs	= ext3_unlockfs,
-	.statfs		= ext3_statfs,
-	.remount_fs	= ext3_remount,
-	.clear_inode	= ext3_clear_inode,
-	.show_options	= ext3_show_options,
+static struct super_operations ext4_sops = {
+	.alloc_inode	= ext4_alloc_inode,
+	.destroy_inode	= ext4_destroy_inode,
+	.read_inode	= ext4_read_inode,
+	.write_inode	= ext4_write_inode,
+	.dirty_inode	= ext4_dirty_inode,
+	.delete_inode	= ext4_delete_inode,
+	.put_super	= ext4_put_super,
+	.write_super	= ext4_write_super,
+	.sync_fs	= ext4_sync_fs,
+	.write_super_lockfs = ext4_write_super_lockfs,
+	.unlockfs	= ext4_unlockfs,
+	.statfs		= ext4_statfs,
+	.remount_fs	= ext4_remount,
+	.clear_inode	= ext4_clear_inode,
+	.show_options	= ext4_show_options,
 #ifdef CONFIG_QUOTA
-	.quota_read	= ext3_quota_read,
-	.quota_write	= ext3_quota_write,
+	.quota_read	= ext4_quota_read,
+	.quota_write	= ext4_quota_write,
 #endif
 };
 
-static struct export_operations ext3_export_ops = {
-	.get_parent = ext3_get_parent,
-	.get_dentry = ext3_get_dentry,
+static struct export_operations ext4_export_ops = {
+	.get_parent = ext4_get_parent,
+	.get_dentry = ext4_get_dentry,
 };
 
 enum {
@@ -731,18 +731,18 @@ static match_table_t tokens = {
 	{Opt_resize, "resize"},
 };
 
-static ext3_fsblk_t get_sb_block(void **data)
+static ext4_fsblk_t get_sb_block(void **data)
 {
-	ext3_fsblk_t	sb_block;
+	ext4_fsblk_t	sb_block;
 	char		*options = (char *) *data;
 
 	if (!options || strncmp(options, "sb=", 3) != 0)
 		return 1;	/* Default location */
 	options += 3;
-	/*todo: use simple_strtoll with >32bit ext3 */
+	/*todo: use simple_strtoll with >32bit ext4 */
 	sb_block = simple_strtoul(options, &options, 0);
 	if (*options && *options != ',') {
-		printk("EXT3-fs: Invalid sb specification: %s\n",
+		printk("EXT4-fs: Invalid sb specification: %s\n",
 		       (char *) *data);
 		return 1;
 	}
@@ -754,9 +754,9 @@ static ext3_fsblk_t get_sb_block(void **data)
 
 static int parse_options (char *options, struct super_block *sb,
 			  unsigned int *inum, unsigned long *journal_devnum,
-			  ext3_fsblk_t *n_blocks_count, int is_remount)
+			  ext4_fsblk_t *n_blocks_count, int is_remount)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	char * p;
 	substring_t args[MAX_OPT_ARGS];
 	int data_opt = 0;
@@ -832,7 +832,7 @@ static int parse_options (char *options, struct super_block *sb,
 		case Opt_orlov:
 			clear_opt (sbi->s_mount_opt, OLDALLOC);
 			break;
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 		case Opt_user_xattr:
 			set_opt (sbi->s_mount_opt, XATTR_USER);
 			break;
@@ -842,10 +842,10 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_user_xattr:
 		case Opt_nouser_xattr:
-			printk("EXT3 (no)user_xattr options not supported\n");
+			printk("EXT4 (no)user_xattr options not supported\n");
 			break;
 #endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 		case Opt_acl:
 			set_opt(sbi->s_mount_opt, POSIX_ACL);
 			break;
@@ -855,7 +855,7 @@ static int parse_options (char *options, struct super_block *sb,
 #else
 		case Opt_acl:
 		case Opt_noacl:
-			printk("EXT3 (no)acl options not supported\n");
+			printk("EXT4 (no)acl options not supported\n");
 			break;
 #endif
 		case Opt_reservation:
@@ -871,7 +871,7 @@ static int parse_options (char *options, struct super_block *sb,
 			   user to specify an existing inode to be the
 			   journal file. */
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -879,7 +879,7 @@ static int parse_options (char *options, struct super_block *sb,
 			break;
 		case Opt_journal_inum:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -889,7 +889,7 @@ static int parse_options (char *options, struct super_block *sb,
 			break;
 		case Opt_journal_dev:
 			if (is_remount) {
-				printk(KERN_ERR "EXT3-fs: cannot specify "
+				printk(KERN_ERR "EXT4-fs: cannot specify "
 				       "journal on remount\n");
 				return 0;
 			}
@@ -910,24 +910,24 @@ static int parse_options (char *options, struct super_block *sb,
 			sbi->s_commit_interval = HZ * option;
 			break;
 		case Opt_data_journal:
-			data_opt = EXT3_MOUNT_JOURNAL_DATA;
+			data_opt = EXT4_MOUNT_JOURNAL_DATA;
 			goto datacheck;
 		case Opt_data_ordered:
-			data_opt = EXT3_MOUNT_ORDERED_DATA;
+			data_opt = EXT4_MOUNT_ORDERED_DATA;
 			goto datacheck;
 		case Opt_data_writeback:
-			data_opt = EXT3_MOUNT_WRITEBACK_DATA;
+			data_opt = EXT4_MOUNT_WRITEBACK_DATA;
 		datacheck:
 			if (is_remount) {
-				if ((sbi->s_mount_opt & EXT3_MOUNT_DATA_FLAGS)
+				if ((sbi->s_mount_opt & EXT4_MOUNT_DATA_FLAGS)
 						!= data_opt) {
 					printk(KERN_ERR
-						"EXT3-fs: cannot change data "
+						"EXT4-fs: cannot change data "
 						"mode on remount\n");
 					return 0;
 				}
 			} else {
-				sbi->s_mount_opt &= ~EXT3_MOUNT_DATA_FLAGS;
+				sbi->s_mount_opt &= ~EXT4_MOUNT_DATA_FLAGS;
 				sbi->s_mount_opt |= data_opt;
 			}
 			break;
@@ -940,21 +940,21 @@ static int parse_options (char *options, struct super_block *sb,
 set_qf_name:
 			if (sb_any_quota_enabled(sb)) {
 				printk(KERN_ERR
-					"EXT3-fs: Cannot change journalled "
+					"EXT4-fs: Cannot change journalled "
 					"quota options when quota turned on.\n");
 				return 0;
 			}
 			qname = match_strdup(&args[0]);
 			if (!qname) {
 				printk(KERN_ERR
-					"EXT3-fs: not enough memory for "
+					"EXT4-fs: not enough memory for "
 					"storing quotafile name.\n");
 				return 0;
 			}
 			if (sbi->s_qf_names[qtype] &&
 			    strcmp(sbi->s_qf_names[qtype], qname)) {
 				printk(KERN_ERR
-					"EXT3-fs: %s quota file already "
+					"EXT4-fs: %s quota file already "
 					"specified.\n", QTYPE2NAME(qtype));
 				kfree(qname);
 				return 0;
@@ -962,7 +962,7 @@ set_qf_name:
 			sbi->s_qf_names[qtype] = qname;
 			if (strchr(sbi->s_qf_names[qtype], '/')) {
 				printk(KERN_ERR
-					"EXT3-fs: quotafile must be on "
+					"EXT4-fs: quotafile must be on "
 					"filesystem root.\n");
 				kfree(sbi->s_qf_names[qtype]);
 				sbi->s_qf_names[qtype] = NULL;
@@ -977,7 +977,7 @@ set_qf_name:
 			qtype = GRPQUOTA;
 clear_qf_name:
 			if (sb_any_quota_enabled(sb)) {
-				printk(KERN_ERR "EXT3-fs: Cannot change "
+				printk(KERN_ERR "EXT4-fs: Cannot change "
 					"journalled quota options when "
 					"quota turned on.\n");
 				return 0;
@@ -1005,7 +1005,7 @@ clear_qf_name:
 			break;
 		case Opt_noquota:
 			if (sb_any_quota_enabled(sb)) {
-				printk(KERN_ERR "EXT3-fs: Cannot change quota "
+				printk(KERN_ERR "EXT4-fs: Cannot change quota "
 					"options when quota turned on.\n");
 				return 0;
 			}
@@ -1024,7 +1024,7 @@ clear_qf_name:
 		case Opt_jqfmt_vfsold:
 		case Opt_jqfmt_vfsv0:
 			printk(KERN_ERR
-				"EXT3-fs: journalled quota options not "
+				"EXT4-fs: journalled quota options not "
 				"supported.\n");
 			break;
 		case Opt_noquota:
@@ -1045,7 +1045,7 @@ clear_qf_name:
 			break;
 		case Opt_resize:
 			if (!is_remount) {
-				printk("EXT3-fs: resize option only available "
+				printk("EXT4-fs: resize option only available "
 					"for remount\n");
 				return 0;
 			}
@@ -1061,38 +1061,38 @@ clear_qf_name:
 			break;
 		default:
 			printk (KERN_ERR
-				"EXT3-fs: Unrecognized mount option \"%s\" "
+				"EXT4-fs: Unrecognized mount option \"%s\" "
 				"or missing value\n", p);
 			return 0;
 		}
 	}
 #ifdef CONFIG_QUOTA
 	if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
-		if ((sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA) &&
+		if ((sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA) &&
 		     sbi->s_qf_names[USRQUOTA])
 			clear_opt(sbi->s_mount_opt, USRQUOTA);
 
-		if ((sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA) &&
+		if ((sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA) &&
 		     sbi->s_qf_names[GRPQUOTA])
 			clear_opt(sbi->s_mount_opt, GRPQUOTA);
 
 		if ((sbi->s_qf_names[USRQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_GRPQUOTA)) ||
+				(sbi->s_mount_opt & EXT4_MOUNT_GRPQUOTA)) ||
 		    (sbi->s_qf_names[GRPQUOTA] &&
-				(sbi->s_mount_opt & EXT3_MOUNT_USRQUOTA))) {
-			printk(KERN_ERR "EXT3-fs: old and new quota "
+				(sbi->s_mount_opt & EXT4_MOUNT_USRQUOTA))) {
+			printk(KERN_ERR "EXT4-fs: old and new quota "
 					"format mixing.\n");
 			return 0;
 		}
 
 		if (!sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
+			printk(KERN_ERR "EXT4-fs: journalled quota format "
 					"not specified.\n");
 			return 0;
 		}
 	} else {
 		if (sbi->s_jquota_fmt) {
-			printk(KERN_ERR "EXT3-fs: journalled quota format "
+			printk(KERN_ERR "EXT4-fs: journalled quota format "
 					"specified with no journalling "
 					"enabled.\n");
 			return 0;
@@ -1102,68 +1102,68 @@ clear_qf_name:
 	return 1;
 }
 
-static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
+static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
 			    int read_only)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	int res = 0;
 
-	if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
-		printk (KERN_ERR "EXT3-fs warning: revision level too high, "
+	if (le32_to_cpu(es->s_rev_level) > EXT4_MAX_SUPP_REV) {
+		printk (KERN_ERR "EXT4-fs warning: revision level too high, "
 			"forcing read-only mode\n");
 		res = MS_RDONLY;
 	}
 	if (read_only)
 		return res;
-	if (!(sbi->s_mount_state & EXT3_VALID_FS))
-		printk (KERN_WARNING "EXT3-fs warning: mounting unchecked fs, "
+	if (!(sbi->s_mount_state & EXT4_VALID_FS))
+		printk (KERN_WARNING "EXT4-fs warning: mounting unchecked fs, "
 			"running e2fsck is recommended\n");
-	else if ((sbi->s_mount_state & EXT3_ERROR_FS))
+	else if ((sbi->s_mount_state & EXT4_ERROR_FS))
 		printk (KERN_WARNING
-			"EXT3-fs warning: mounting fs with errors, "
+			"EXT4-fs warning: mounting fs with errors, "
 			"running e2fsck is recommended\n");
 	else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
 		 le16_to_cpu(es->s_mnt_count) >=
 		 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
 		printk (KERN_WARNING
-			"EXT3-fs warning: maximal mount count reached, "
+			"EXT4-fs warning: maximal mount count reached, "
 			"running e2fsck is recommended\n");
 	else if (le32_to_cpu(es->s_checkinterval) &&
 		(le32_to_cpu(es->s_lastcheck) +
 			le32_to_cpu(es->s_checkinterval) <= get_seconds()))
 		printk (KERN_WARNING
-			"EXT3-fs warning: checktime reached, "
+			"EXT4-fs warning: checktime reached, "
 			"running e2fsck is recommended\n");
 #if 0
 		/* @@@ We _will_ want to clear the valid bit if we find
                    inconsistencies, to force a fsck at reboot.  But for
                    a plain journaled filesystem we can keep it set as
                    valid forever! :) */
-	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT3_VALID_FS);
+	es->s_state = cpu_to_le16(le16_to_cpu(es->s_state) & ~EXT4_VALID_FS);
 #endif
 	if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
-		es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
+		es->s_max_mnt_count = cpu_to_le16(EXT4_DFL_MAX_MNT_COUNT);
 	es->s_mnt_count=cpu_to_le16(le16_to_cpu(es->s_mnt_count) + 1);
 	es->s_mtime = cpu_to_le32(get_seconds());
-	ext3_update_dynamic_rev(sb);
-	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+	ext4_update_dynamic_rev(sb);
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 	if (test_opt(sb, DEBUG))
-		printk(KERN_INFO "[EXT3 FS bs=%lu, gc=%lu, "
+		printk(KERN_INFO "[EXT4 FS bs=%lu, gc=%lu, "
 				"bpg=%lu, ipg=%lu, mo=%04lx]\n",
 			sb->s_blocksize,
 			sbi->s_groups_count,
-			EXT3_BLOCKS_PER_GROUP(sb),
-			EXT3_INODES_PER_GROUP(sb),
+			EXT4_BLOCKS_PER_GROUP(sb),
+			EXT4_INODES_PER_GROUP(sb),
 			sbi->s_mount_opt);
 
-	printk(KERN_INFO "EXT3 FS on %s, ", sb->s_id);
-	if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
+	printk(KERN_INFO "EXT4 FS on %s, ", sb->s_id);
+	if (EXT4_SB(sb)->s_journal->j_inode == NULL) {
 		char b[BDEVNAME_SIZE];
 
 		printk("external journal on %s\n",
-			bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
+			bdevname(EXT4_SB(sb)->s_journal->j_dev, b));
 	} else {
 		printk("internal journal\n");
 	}
@@ -1171,16 +1171,16 @@ static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
 }
 
 /* Called at mount-time, super-block is locked */
-static int ext3_check_descriptors (struct super_block * sb)
+static int ext4_check_descriptors (struct super_block * sb)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
-	ext3_fsblk_t last_block;
-	struct ext3_group_desc * gdp = NULL;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
+	ext4_fsblk_t last_block;
+	struct ext4_group_desc * gdp = NULL;
 	int desc_block = 0;
 	int i;
 
-	ext3_debug ("Checking group descriptors");
+	ext4_debug ("Checking group descriptors");
 
 	for (i = 0; i < sbi->s_groups_count; i++)
 	{
@@ -1188,15 +1188,15 @@ static int ext3_check_descriptors (struct super_block * sb)
 			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
 		else
 			last_block = first_block +
-				(EXT3_BLOCKS_PER_GROUP(sb) - 1);
+				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
 
-		if ((i % EXT3_DESC_PER_BLOCK(sb)) == 0)
-			gdp = (struct ext3_group_desc *)
+		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
+			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
 		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
 		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
@@ -1206,7 +1206,7 @@ static int ext3_check_descriptors (struct super_block * sb)
 		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
 		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
@@ -1217,24 +1217,24 @@ static int ext3_check_descriptors (struct super_block * sb)
 		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
 		    last_block)
 		{
-			ext3_error (sb, "ext3_check_descriptors",
+			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
 				    " not in group (block %lu)!",
 				    i, (unsigned long)
 					le32_to_cpu(gdp->bg_inode_table));
 			return 0;
 		}
-		first_block += EXT3_BLOCKS_PER_GROUP(sb);
+		first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp++;
 	}
 
-	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
-	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
+	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext4_count_free_blocks(sb));
+	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
 
 
-/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
+/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
  * the superblock) which were deleted from all directories, but held open by
  * a process at the time of a crash.  We walk the list and try to delete these
  * inodes at recovery time (only with a read-write filesystem).
@@ -1247,12 +1247,12 @@ static int ext3_check_descriptors (struct super_block * sb)
  * We only do an iget() and an iput() on each inode, which is very safe if we
  * accidentally point at an in-use or already deleted inode.  The worst that
  * can happen in this case is that we get a "bit already cleared" message from
- * ext3_free_inode().  The only reason we would point at a wrong inode is if
+ * ext4_free_inode().  The only reason we would point at a wrong inode is if
  * e2fsck was run on this filesystem, and it must have already done the orphan
  * inode cleanup for us, so we can safely abort without any further action.
  */
-static void ext3_orphan_cleanup (struct super_block * sb,
-				 struct ext3_super_block * es)
+static void ext4_orphan_cleanup (struct super_block * sb,
+				 struct ext4_super_block * es)
 {
 	unsigned int s_flags = sb->s_flags;
 	int nr_orphans = 0, nr_truncates = 0;
@@ -1264,7 +1264,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		return;
 	}
 
-	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+	if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
 		if (es->s_last_orphan)
 			jbd_debug(1, "Errors on filesystem, "
 				  "clearing orphan list.\n");
@@ -1274,7 +1274,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	}
 
 	if (s_flags & MS_RDONLY) {
-		printk(KERN_INFO "EXT3-fs: %s: orphan cleanup on readonly fs\n",
+		printk(KERN_INFO "EXT4-fs: %s: orphan cleanup on readonly fs\n",
 		       sb->s_id);
 		sb->s_flags &= ~MS_RDONLY;
 	}
@@ -1283,11 +1283,11 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 	sb->s_flags |= MS_ACTIVE;
 	/* Turn on quotas so that they are updated correctly */
 	for (i = 0; i < MAXQUOTAS; i++) {
-		if (EXT3_SB(sb)->s_qf_names[i]) {
-			int ret = ext3_quota_on_mount(sb, i);
+		if (EXT4_SB(sb)->s_qf_names[i]) {
+			int ret = ext4_quota_on_mount(sb, i);
 			if (ret < 0)
 				printk(KERN_ERR
-					"EXT3-fs: Cannot turn on journalled "
+					"EXT4-fs: Cannot turn on journalled "
 					"quota: error %d\n", ret);
 		}
 	}
@@ -1297,12 +1297,12 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 		struct inode *inode;
 
 		if (!(inode =
-		      ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
+		      ext4_orphan_get(sb, le32_to_cpu(es->s_last_orphan)))) {
 			es->s_last_orphan = 0;
 			break;
 		}
 
-		list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
+		list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
 		DQUOT_INIT(inode);
 		if (inode->i_nlink) {
 			printk(KERN_DEBUG
@@ -1310,7 +1310,7 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 				__FUNCTION__, inode->i_ino, inode->i_size);
 			jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
 				  inode->i_ino, inode->i_size);
-			ext3_truncate(inode);
+			ext4_truncate(inode);
 			nr_truncates++;
 		} else {
 			printk(KERN_DEBUG
@@ -1326,10 +1326,10 @@ static void ext3_orphan_cleanup (struct super_block * sb,
 #define PLURAL(x) (x), ((x)==1) ? "" : "s"
 
 	if (nr_orphans)
-		printk(KERN_INFO "EXT3-fs: %s: %d orphan inode%s deleted\n",
+		printk(KERN_INFO "EXT4-fs: %s: %d orphan inode%s deleted\n",
 		       sb->s_id, PLURAL(nr_orphans));
 	if (nr_truncates)
-		printk(KERN_INFO "EXT3-fs: %s: %d truncate%s cleaned up\n",
+		printk(KERN_INFO "EXT4-fs: %s: %d truncate%s cleaned up\n",
 		       sb->s_id, PLURAL(nr_truncates));
 #ifdef CONFIG_QUOTA
 	/* Turn quotas off */
@@ -1348,9 +1348,9 @@ static void ext3_orphan_cleanup (struct super_block * sb,
  * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
  * We need to be 1 filesystem block less than the 2^32 sector limit.
  */
-static loff_t ext3_max_size(int bits)
+static loff_t ext4_max_size(int bits)
 {
-	loff_t res = EXT3_NDIR_BLOCKS;
+	loff_t res = EXT4_NDIR_BLOCKS;
 	/* This constant is calculated to be the largest file size for a
 	 * dense, 4k-blocksize file such that the total number of
 	 * sectors in the file, including data and all indirect blocks,
@@ -1366,34 +1366,34 @@ static loff_t ext3_max_size(int bits)
 	return res;
 }
 
-static ext3_fsblk_t descriptor_loc(struct super_block *sb,
-				    ext3_fsblk_t logic_sb_block,
+static ext4_fsblk_t descriptor_loc(struct super_block *sb,
+				    ext4_fsblk_t logic_sb_block,
 				    int nr)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	unsigned long bg, first_meta_bg;
 	int has_super = 0;
 
 	first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_META_BG) ||
 	    nr < first_meta_bg)
 		return (logic_sb_block + nr + 1);
 	bg = sbi->s_desc_per_block * nr;
-	if (ext3_bg_has_super(sb, bg))
+	if (ext4_bg_has_super(sb, bg))
 		has_super = 1;
-	return (has_super + ext3_group_first_block_no(sb, bg));
+	return (has_super + ext4_group_first_block_no(sb, bg));
 }
 
 
-static int ext3_fill_super (struct super_block *sb, void *data, int silent)
+static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 {
 	struct buffer_head * bh;
-	struct ext3_super_block *es = NULL;
-	struct ext3_sb_info *sbi;
-	ext3_fsblk_t block;
-	ext3_fsblk_t sb_block = get_sb_block(&data);
-	ext3_fsblk_t logic_sb_block;
+	struct ext4_super_block *es = NULL;
+	struct ext4_sb_info *sbi;
+	ext4_fsblk_t block;
+	ext4_fsblk_t sb_block = get_sb_block(&data);
+	ext4_fsblk_t logic_sb_block;
 	unsigned long offset = 0;
 	unsigned int journal_inum = 0;
 	unsigned long journal_devnum = 0;
@@ -1411,64 +1411,64 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		return -ENOMEM;
 	sb->s_fs_info = sbi;
 	sbi->s_mount_opt = 0;
-	sbi->s_resuid = EXT3_DEF_RESUID;
-	sbi->s_resgid = EXT3_DEF_RESGID;
+	sbi->s_resuid = EXT4_DEF_RESUID;
+	sbi->s_resgid = EXT4_DEF_RESGID;
 
 	unlock_kernel();
 
-	blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
+	blocksize = sb_min_blocksize(sb, EXT4_MIN_BLOCK_SIZE);
 	if (!blocksize) {
-		printk(KERN_ERR "EXT3-fs: unable to set blocksize\n");
+		printk(KERN_ERR "EXT4-fs: unable to set blocksize\n");
 		goto out_fail;
 	}
 
 	/*
-	 * The ext3 superblock will not be buffer aligned for other than 1kB
+	 * The ext4 superblock will not be buffer aligned for other than 1kB
 	 * block sizes.  We need to calculate the offset from buffer start.
 	 */
-	if (blocksize != EXT3_MIN_BLOCK_SIZE) {
-		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
+		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
 	} else {
 		logic_sb_block = sb_block;
 	}
 
 	if (!(bh = sb_bread(sb, logic_sb_block))) {
-		printk (KERN_ERR "EXT3-fs: unable to read superblock\n");
+		printk (KERN_ERR "EXT4-fs: unable to read superblock\n");
 		goto out_fail;
 	}
 	/*
 	 * Note: s_es must be initialized as soon as possible because
-	 *       some ext3 macro-instructions depend on its value
+	 *       some ext4 macro-instructions depend on its value
 	 */
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
+	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
 	sbi->s_es = es;
 	sb->s_magic = le16_to_cpu(es->s_magic);
-	if (sb->s_magic != EXT3_SUPER_MAGIC)
-		goto cantfind_ext3;
+	if (sb->s_magic != EXT4_SUPER_MAGIC)
+		goto cantfind_ext4;
 
 	/* Set defaults before we parse the mount options */
 	def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
-	if (def_mount_opts & EXT3_DEFM_DEBUG)
+	if (def_mount_opts & EXT4_DEFM_DEBUG)
 		set_opt(sbi->s_mount_opt, DEBUG);
-	if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
+	if (def_mount_opts & EXT4_DEFM_BSDGROUPS)
 		set_opt(sbi->s_mount_opt, GRPID);
-	if (def_mount_opts & EXT3_DEFM_UID16)
+	if (def_mount_opts & EXT4_DEFM_UID16)
 		set_opt(sbi->s_mount_opt, NO_UID32);
-	if (def_mount_opts & EXT3_DEFM_XATTR_USER)
+	if (def_mount_opts & EXT4_DEFM_XATTR_USER)
 		set_opt(sbi->s_mount_opt, XATTR_USER);
-	if (def_mount_opts & EXT3_DEFM_ACL)
+	if (def_mount_opts & EXT4_DEFM_ACL)
 		set_opt(sbi->s_mount_opt, POSIX_ACL);
-	if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
-		sbi->s_mount_opt |= EXT3_MOUNT_JOURNAL_DATA;
-	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
-		sbi->s_mount_opt |= EXT3_MOUNT_ORDERED_DATA;
-	else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
-		sbi->s_mount_opt |= EXT3_MOUNT_WRITEBACK_DATA;
-
-	if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
+	if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
+		sbi->s_mount_opt |= EXT4_MOUNT_JOURNAL_DATA;
+	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
+		sbi->s_mount_opt |= EXT4_MOUNT_ORDERED_DATA;
+	else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_WBACK)
+		sbi->s_mount_opt |= EXT4_MOUNT_WRITEBACK_DATA;
+
+	if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_PANIC)
 		set_opt(sbi->s_mount_opt, ERRORS_PANIC);
-	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_RO)
+	else if (le16_to_cpu(sbi->s_es->s_errors) == EXT4_ERRORS_RO)
 		set_opt(sbi->s_mount_opt, ERRORS_RO);
 
 	sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
@@ -1481,40 +1481,40 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
-	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
-	    (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
-	     EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
-	     EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
+	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV &&
+	    (EXT4_HAS_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT4_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
+	     EXT4_HAS_INCOMPAT_FEATURE(sb, ~0U)))
 		printk(KERN_WARNING
-		       "EXT3-fs warning: feature flags set on rev 0 fs, "
+		       "EXT4-fs warning: feature flags set on rev 0 fs, "
 		       "running e2fsck is recommended\n");
 	/*
 	 * Check feature flags regardless of the revision level, since we
 	 * previously didn't change the revision level when setting the flags,
 	 * so there is a chance incompat flags are set on a rev 0 filesystem.
 	 */
-	features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
+	features = EXT4_HAS_INCOMPAT_FEATURE(sb, ~EXT4_FEATURE_INCOMPAT_SUPP);
 	if (features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount because of "
+		printk(KERN_ERR "EXT4-fs: %s: couldn't mount because of "
 		       "unsupported optional features (%x).\n",
 		       sb->s_id, le32_to_cpu(features));
 		goto failed_mount;
 	}
-	features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
+	features = EXT4_HAS_RO_COMPAT_FEATURE(sb, ~EXT4_FEATURE_RO_COMPAT_SUPP);
 	if (!(sb->s_flags & MS_RDONLY) && features) {
-		printk(KERN_ERR "EXT3-fs: %s: couldn't mount RDWR because of "
+		printk(KERN_ERR "EXT4-fs: %s: couldn't mount RDWR because of "
 		       "unsupported optional features (%x).\n",
 		       sb->s_id, le32_to_cpu(features));
 		goto failed_mount;
 	}
 	blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
 
-	if (blocksize < EXT3_MIN_BLOCK_SIZE ||
-	    blocksize > EXT3_MAX_BLOCK_SIZE) {
+	if (blocksize < EXT4_MIN_BLOCK_SIZE ||
+	    blocksize > EXT4_MAX_BLOCK_SIZE) {
 		printk(KERN_ERR
-		       "EXT3-fs: Unsupported filesystem blocksize %d on %s.\n",
+		       "EXT4-fs: Unsupported filesystem blocksize %d on %s.\n",
 		       blocksize, sb->s_id);
 		goto failed_mount;
 	}
@@ -1526,52 +1526,52 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		 * than the hardware sectorsize for the machine.
 		 */
 		if (blocksize < hblock) {
-			printk(KERN_ERR "EXT3-fs: blocksize %d too small for "
+			printk(KERN_ERR "EXT4-fs: blocksize %d too small for "
 			       "device blocksize %d.\n", blocksize, hblock);
 			goto failed_mount;
 		}
 
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
-		logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
+		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
-			       "EXT3-fs: Can't read superblock on 2nd try.\n");
+			       "EXT4-fs: Can't read superblock on 2nd try.\n");
 			goto failed_mount;
 		}
-		es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
+		es = (struct ext4_super_block *)(((char *)bh->b_data) + offset);
 		sbi->s_es = es;
-		if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
+		if (es->s_magic != cpu_to_le16(EXT4_SUPER_MAGIC)) {
 			printk (KERN_ERR
-				"EXT3-fs: Magic mismatch, very weird !\n");
+				"EXT4-fs: Magic mismatch, very weird !\n");
 			goto failed_mount;
 		}
 	}
 
-	sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
+	sb->s_maxbytes = ext4_max_size(sb->s_blocksize_bits);
 
-	if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
-		sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
-		sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
+	if (le32_to_cpu(es->s_rev_level) == EXT4_GOOD_OLD_REV) {
+		sbi->s_inode_size = EXT4_GOOD_OLD_INODE_SIZE;
+		sbi->s_first_ino = EXT4_GOOD_OLD_FIRST_INO;
 	} else {
 		sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
 		sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
-		if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
+		if ((sbi->s_inode_size < EXT4_GOOD_OLD_INODE_SIZE) ||
 		    (sbi->s_inode_size & (sbi->s_inode_size - 1)) ||
 		    (sbi->s_inode_size > blocksize)) {
 			printk (KERN_ERR
-				"EXT3-fs: unsupported inode size: %d\n",
+				"EXT4-fs: unsupported inode size: %d\n",
 				sbi->s_inode_size);
 			goto failed_mount;
 		}
 	}
-	sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
+	sbi->s_frag_size = EXT4_MIN_FRAG_SIZE <<
 				   le32_to_cpu(es->s_log_frag_size);
 	if (blocksize != sbi->s_frag_size) {
 		printk(KERN_ERR
-		       "EXT3-fs: fragsize %lu != blocksize %u (unsupported)\n",
+		       "EXT4-fs: fragsize %lu != blocksize %u (unsupported)\n",
 		       sbi->s_frag_size, blocksize);
 		goto failed_mount;
 	}
@@ -1579,62 +1579,62 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
-	if (EXT3_INODE_SIZE(sb) == 0)
-		goto cantfind_ext3;
-	sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
+	if (EXT4_INODE_SIZE(sb) == 0)
+		goto cantfind_ext4;
+	sbi->s_inodes_per_block = blocksize / EXT4_INODE_SIZE(sb);
 	if (sbi->s_inodes_per_block == 0)
-		goto cantfind_ext3;
+		goto cantfind_ext4;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
-	sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
+	sbi->s_desc_per_block = blocksize / sizeof(struct ext4_group_desc);
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
-	sbi->s_addr_per_block_bits = log2(EXT3_ADDR_PER_BLOCK(sb));
-	sbi->s_desc_per_block_bits = log2(EXT3_DESC_PER_BLOCK(sb));
+	sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb));
+	sbi->s_desc_per_block_bits = log2(EXT4_DESC_PER_BLOCK(sb));
 	for (i=0; i < 4; i++)
 		sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
 	sbi->s_def_hash_version = es->s_def_hash_version;
 
 	if (sbi->s_blocks_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #blocks per group too big: %lu\n",
+			"EXT4-fs: #blocks per group too big: %lu\n",
 			sbi->s_blocks_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_frags_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #fragments per group too big: %lu\n",
+			"EXT4-fs: #fragments per group too big: %lu\n",
 			sbi->s_frags_per_group);
 		goto failed_mount;
 	}
 	if (sbi->s_inodes_per_group > blocksize * 8) {
 		printk (KERN_ERR
-			"EXT3-fs: #inodes per group too big: %lu\n",
+			"EXT4-fs: #inodes per group too big: %lu\n",
 			sbi->s_inodes_per_group);
 		goto failed_mount;
 	}
 
 	if (le32_to_cpu(es->s_blocks_count) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
-		printk(KERN_ERR "EXT3-fs: filesystem on %s:"
+		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to mount safely\n", sb->s_id);
 		if (sizeof(sector_t) < 8)
-			printk(KERN_WARNING "EXT3-fs: CONFIG_LBD not "
+			printk(KERN_WARNING "EXT4-fs: CONFIG_LBD not "
 					"enabled\n");
 		goto failed_mount;
 	}
 
-	if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
-		goto cantfind_ext3;
+	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
+		goto cantfind_ext4;
 	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
 			       le32_to_cpu(es->s_first_data_block) - 1)
-				       / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
-	db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
-		   EXT3_DESC_PER_BLOCK(sb);
+				       / EXT4_BLOCKS_PER_GROUP(sb)) + 1;
+	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
+		   EXT4_DESC_PER_BLOCK(sb);
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
 				    GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
-		printk (KERN_ERR "EXT3-fs: not enough memory\n");
+		printk (KERN_ERR "EXT4-fs: not enough memory\n");
 		goto failed_mount;
 	}
 
@@ -1644,14 +1644,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
 		if (!sbi->s_group_desc[i]) {
-			printk (KERN_ERR "EXT3-fs: "
+			printk (KERN_ERR "EXT4-fs: "
 				"can't read group descriptor %d\n", i);
 			db_count = i;
 			goto failed_mount2;
 		}
 	}
-	if (!ext3_check_descriptors (sb)) {
-		printk(KERN_ERR "EXT3-fs: group descriptors corrupted!\n");
+	if (!ext4_check_descriptors (sb)) {
+		printk(KERN_ERR "EXT4-fs: group descriptors corrupted!\n");
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
@@ -1659,11 +1659,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	spin_lock_init(&sbi->s_next_gen_lock);
 
 	percpu_counter_init(&sbi->s_freeblocks_counter,
-		ext3_count_free_blocks(sb));
+		ext4_count_free_blocks(sb));
 	percpu_counter_init(&sbi->s_freeinodes_counter,
-		ext3_count_free_inodes(sb));
+		ext4_count_free_inodes(sb));
 	percpu_counter_init(&sbi->s_dirs_counter,
-		ext3_count_dirs(sb));
+		ext4_count_dirs(sb));
 
 	/* per fileystem reservation list head & lock */
 	spin_lock_init(&sbi->s_rsv_window_lock);
@@ -1672,45 +1672,45 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * reservation window list --- it gives us a placeholder for
 	 * append-at-start-of-list which makes the allocation logic
 	 * _much_ simpler. */
-	sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
-	sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_start = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
+	sbi->s_rsv_window_head.rsv_end = EXT4_RESERVE_WINDOW_NOT_ALLOCATED;
 	sbi->s_rsv_window_head.rsv_alloc_hit = 0;
 	sbi->s_rsv_window_head.rsv_goal_size = 0;
-	ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
+	ext4_rsv_window_add(sb, &sbi->s_rsv_window_head);
 
 	/*
 	 * set up enough so that it can read an inode
 	 */
-	sb->s_op = &ext3_sops;
-	sb->s_export_op = &ext3_export_ops;
-	sb->s_xattr = ext3_xattr_handlers;
+	sb->s_op = &ext4_sops;
+	sb->s_export_op = &ext4_export_ops;
+	sb->s_xattr = ext4_xattr_handlers;
 #ifdef CONFIG_QUOTA
-	sb->s_qcop = &ext3_qctl_operations;
-	sb->dq_op = &ext3_quota_operations;
+	sb->s_qcop = &ext4_qctl_operations;
+	sb->dq_op = &ext4_quota_operations;
 #endif
 	INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
 
 	sb->s_root = NULL;
 
 	needs_recovery = (es->s_last_orphan != 0 ||
-			  EXT3_HAS_INCOMPAT_FEATURE(sb,
-				    EXT3_FEATURE_INCOMPAT_RECOVER));
+			  EXT4_HAS_INCOMPAT_FEATURE(sb,
+				    EXT4_FEATURE_INCOMPAT_RECOVER));
 
 	/*
 	 * The first inode we look at is the journal inode.  Don't try
 	 * root first: it may be modified in the journal!
 	 */
 	if (!test_opt(sb, NOLOAD) &&
-	    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
-		if (ext3_load_journal(sb, es, journal_devnum))
+	    EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
+		if (ext4_load_journal(sb, es, journal_devnum))
 			goto failed_mount3;
 	} else if (journal_inum) {
-		if (ext3_create_journal(sb, es, journal_inum))
+		if (ext4_create_journal(sb, es, journal_inum))
 			goto failed_mount3;
 	} else {
 		if (!silent)
 			printk (KERN_ERR
-				"ext3: No journal on filesystem on %s\n",
+				"ext4: No journal on filesystem on %s\n",
 				sb->s_id);
 		goto failed_mount3;
 	}
@@ -1729,11 +1729,11 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 			set_opt(sbi->s_mount_opt, JOURNAL_DATA);
 		break;
 
-	case EXT3_MOUNT_ORDERED_DATA:
-	case EXT3_MOUNT_WRITEBACK_DATA:
+	case EXT4_MOUNT_ORDERED_DATA:
+	case EXT4_MOUNT_WRITEBACK_DATA:
 		if (!journal_check_available_features
 		    (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
-			printk(KERN_ERR "EXT3-fs: Journal does not support "
+			printk(KERN_ERR "EXT4-fs: Journal does not support "
 			       "requested data journaling mode\n");
 			goto failed_mount4;
 		}
@@ -1742,8 +1742,8 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	}
 
 	if (test_opt(sb, NOBH)) {
-		if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
-			printk(KERN_WARNING "EXT3-fs: Ignoring nobh option - "
+		if (!(test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)) {
+			printk(KERN_WARNING "EXT4-fs: Ignoring nobh option - "
 				"its supported only with writeback mode\n");
 			clear_opt(sbi->s_mount_opt, NOBH);
 		}
@@ -1753,21 +1753,21 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * so we can safely mount the rest of the filesystem now.
 	 */
 
-	root = iget(sb, EXT3_ROOT_INO);
+	root = iget(sb, EXT4_ROOT_INO);
 	sb->s_root = d_alloc_root(root);
 	if (!sb->s_root) {
-		printk(KERN_ERR "EXT3-fs: get root inode failed\n");
+		printk(KERN_ERR "EXT4-fs: get root inode failed\n");
 		iput(root);
 		goto failed_mount4;
 	}
 	if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
 		dput(sb->s_root);
 		sb->s_root = NULL;
-		printk(KERN_ERR "EXT3-fs: corrupt root inode, run e2fsck\n");
+		printk(KERN_ERR "EXT4-fs: corrupt root inode, run e2fsck\n");
 		goto failed_mount4;
 	}
 
-	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
+	ext4_setup_super (sb, es, sb->s_flags & MS_RDONLY);
 	/*
 	 * akpm: core read_super() calls in here with the superblock locked.
 	 * That deadlocks, because orphan cleanup needs to lock the superblock
@@ -1776,23 +1776,23 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 	 * and aviro says that's the only reason for hanging onto the
 	 * superblock lock.
 	 */
-	EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
-	ext3_orphan_cleanup(sb, es);
-	EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
+	EXT4_SB(sb)->s_mount_state |= EXT4_ORPHAN_FS;
+	ext4_orphan_cleanup(sb, es);
+	EXT4_SB(sb)->s_mount_state &= ~EXT4_ORPHAN_FS;
 	if (needs_recovery)
-		printk (KERN_INFO "EXT3-fs: recovery complete.\n");
-	ext3_mark_recovery_complete(sb, es);
-	printk (KERN_INFO "EXT3-fs: mounted filesystem with %s data mode.\n",
-		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
-		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+		printk (KERN_INFO "EXT4-fs: recovery complete.\n");
+	ext4_mark_recovery_complete(sb, es);
+	printk (KERN_INFO "EXT4-fs: mounted filesystem with %s data mode.\n",
+		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ? "journal":
+		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
 	lock_kernel();
 	return 0;
 
-cantfind_ext3:
+cantfind_ext4:
 	if (!silent)
-		printk(KERN_ERR "VFS: Can't find ext3 filesystem on dev %s.\n",
+		printk(KERN_ERR "VFS: Can't find ext4 filesystem on dev %s.\n",
 		       sb->s_id);
 	goto failed_mount;
 
@@ -1811,7 +1811,7 @@ failed_mount:
 	for (i = 0; i < MAXQUOTAS; i++)
 		kfree(sbi->s_qf_names[i]);
 #endif
-	ext3_blkdev_remove(sbi);
+	ext4_blkdev_remove(sbi);
 	brelse(bh);
 out_fail:
 	sb->s_fs_info = NULL;
@@ -1825,13 +1825,13 @@ out_fail:
  * initial mount, once the journal has been initialised but before we've
  * done any recovery; and again on any subsequent remount.
  */
-static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
+static void ext4_init_journal_params(struct super_block *sb, journal_t *journal)
 {
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
 
 	if (sbi->s_commit_interval)
 		journal->j_commit_interval = sbi->s_commit_interval;
-	/* We could also set up an ext3-specific default for the commit
+	/* We could also set up an ext4-specific default for the commit
 	 * interval here, but for now we'll just fall back to the jbd
 	 * default. */
 
@@ -1843,7 +1843,7 @@ static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
 	spin_unlock(&journal->j_state_lock);
 }
 
-static journal_t *ext3_get_journal(struct super_block *sb,
+static journal_t *ext4_get_journal(struct super_block *sb,
 				   unsigned int journal_inum)
 {
 	struct inode *journal_inode;
@@ -1855,55 +1855,55 @@ static journal_t *ext3_get_journal(struct super_block *sb,
 
 	journal_inode = iget(sb, journal_inum);
 	if (!journal_inode) {
-		printk(KERN_ERR "EXT3-fs: no journal found.\n");
+		printk(KERN_ERR "EXT4-fs: no journal found.\n");
 		return NULL;
 	}
 	if (!journal_inode->i_nlink) {
 		make_bad_inode(journal_inode);
 		iput(journal_inode);
-		printk(KERN_ERR "EXT3-fs: journal inode is deleted.\n");
+		printk(KERN_ERR "EXT4-fs: journal inode is deleted.\n");
 		return NULL;
 	}
 
 	jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
 		  journal_inode, journal_inode->i_size);
 	if (is_bad_inode(journal_inode) || !S_ISREG(journal_inode->i_mode)) {
-		printk(KERN_ERR "EXT3-fs: invalid journal inode.\n");
+		printk(KERN_ERR "EXT4-fs: invalid journal inode.\n");
 		iput(journal_inode);
 		return NULL;
 	}
 
 	journal = journal_init_inode(journal_inode);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: Could not load journal inode\n");
+		printk(KERN_ERR "EXT4-fs: Could not load journal inode\n");
 		iput(journal_inode);
 		return NULL;
 	}
 	journal->j_private = sb;
-	ext3_init_journal_params(sb, journal);
+	ext4_init_journal_params(sb, journal);
 	return journal;
 }
 
-static journal_t *ext3_get_dev_journal(struct super_block *sb,
+static journal_t *ext4_get_dev_journal(struct super_block *sb,
 				       dev_t j_dev)
 {
 	struct buffer_head * bh;
 	journal_t *journal;
-	ext3_fsblk_t start;
-	ext3_fsblk_t len;
+	ext4_fsblk_t start;
+	ext4_fsblk_t len;
 	int hblock, blocksize;
-	ext3_fsblk_t sb_block;
+	ext4_fsblk_t sb_block;
 	unsigned long offset;
-	struct ext3_super_block * es;
+	struct ext4_super_block * es;
 	struct block_device *bdev;
 
-	bdev = ext3_blkdev_get(j_dev);
+	bdev = ext4_blkdev_get(j_dev);
 	if (bdev == NULL)
 		return NULL;
 
 	if (bd_claim(bdev, sb)) {
 		printk(KERN_ERR
-		        "EXT3: failed to claim external journal device.\n");
+		        "EXT4: failed to claim external journal device.\n");
 		blkdev_put(bdev);
 		return NULL;
 	}
@@ -1912,31 +1912,31 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	hblock = bdev_hardsect_size(bdev);
 	if (blocksize < hblock) {
 		printk(KERN_ERR
-			"EXT3-fs: blocksize too small for journal device.\n");
+			"EXT4-fs: blocksize too small for journal device.\n");
 		goto out_bdev;
 	}
 
-	sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
-	offset = EXT3_MIN_BLOCK_SIZE % blocksize;
+	sb_block = EXT4_MIN_BLOCK_SIZE / blocksize;
+	offset = EXT4_MIN_BLOCK_SIZE % blocksize;
 	set_blocksize(bdev, blocksize);
 	if (!(bh = __bread(bdev, sb_block, blocksize))) {
-		printk(KERN_ERR "EXT3-fs: couldn't read superblock of "
+		printk(KERN_ERR "EXT4-fs: couldn't read superblock of "
 		       "external journal\n");
 		goto out_bdev;
 	}
 
-	es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
-	if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
+	es = (struct ext4_super_block *) (((char *)bh->b_data) + offset);
+	if ((le16_to_cpu(es->s_magic) != EXT4_SUPER_MAGIC) ||
 	    !(le32_to_cpu(es->s_feature_incompat) &
-	      EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
-		printk(KERN_ERR "EXT3-fs: external journal has "
+	      EXT4_FEATURE_INCOMPAT_JOURNAL_DEV)) {
+		printk(KERN_ERR "EXT4-fs: external journal has "
 					"bad superblock\n");
 		brelse(bh);
 		goto out_bdev;
 	}
 
-	if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
-		printk(KERN_ERR "EXT3-fs: journal UUID does not match\n");
+	if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
+		printk(KERN_ERR "EXT4-fs: journal UUID does not match\n");
 		brelse(bh);
 		goto out_bdev;
 	}
@@ -1948,34 +1948,34 @@ static journal_t *ext3_get_dev_journal(struct super_block *sb,
 	journal = journal_init_dev(bdev, sb->s_bdev,
 					start, len, blocksize);
 	if (!journal) {
-		printk(KERN_ERR "EXT3-fs: failed to create device journal\n");
+		printk(KERN_ERR "EXT4-fs: failed to create device journal\n");
 		goto out_bdev;
 	}
 	journal->j_private = sb;
 	ll_rw_block(READ, 1, &journal->j_sb_buffer);
 	wait_on_buffer(journal->j_sb_buffer);
 	if (!buffer_uptodate(journal->j_sb_buffer)) {
-		printk(KERN_ERR "EXT3-fs: I/O error on journal device\n");
+		printk(KERN_ERR "EXT4-fs: I/O error on journal device\n");
 		goto out_journal;
 	}
 	if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
-		printk(KERN_ERR "EXT3-fs: External journal has more than one "
+		printk(KERN_ERR "EXT4-fs: External journal has more than one "
 					"user (unsupported) - %d\n",
 			be32_to_cpu(journal->j_superblock->s_nr_users));
 		goto out_journal;
 	}
-	EXT3_SB(sb)->journal_bdev = bdev;
-	ext3_init_journal_params(sb, journal);
+	EXT4_SB(sb)->journal_bdev = bdev;
+	ext4_init_journal_params(sb, journal);
 	return journal;
 out_journal:
 	journal_destroy(journal);
 out_bdev:
-	ext3_blkdev_put(bdev);
+	ext4_blkdev_put(bdev);
 	return NULL;
 }
 
-static int ext3_load_journal(struct super_block *sb,
-			     struct ext3_super_block *es,
+static int ext4_load_journal(struct super_block *sb,
+			     struct ext4_super_block *es,
 			     unsigned long journal_devnum)
 {
 	journal_t *journal;
@@ -1986,7 +1986,7 @@ static int ext3_load_journal(struct super_block *sb,
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
-		printk(KERN_INFO "EXT3-fs: external journal device major/minor "
+		printk(KERN_INFO "EXT4-fs: external journal device major/minor "
 			"numbers have changed\n");
 		journal_dev = new_decode_dev(journal_devnum);
 	} else
@@ -2000,56 +2000,56 @@ static int ext3_load_journal(struct super_block *sb,
 	 * can get read-write access to the device.
 	 */
 
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
 		if (sb->s_flags & MS_RDONLY) {
-			printk(KERN_INFO "EXT3-fs: INFO: recovery "
+			printk(KERN_INFO "EXT4-fs: INFO: recovery "
 					"required on readonly filesystem.\n");
 			if (really_read_only) {
-				printk(KERN_ERR "EXT3-fs: write access "
+				printk(KERN_ERR "EXT4-fs: write access "
 					"unavailable, cannot proceed.\n");
 				return -EROFS;
 			}
-			printk (KERN_INFO "EXT3-fs: write access will "
+			printk (KERN_INFO "EXT4-fs: write access will "
 					"be enabled during recovery.\n");
 		}
 	}
 
 	if (journal_inum && journal_dev) {
-		printk(KERN_ERR "EXT3-fs: filesystem has both journal "
+		printk(KERN_ERR "EXT4-fs: filesystem has both journal "
 		       "and inode journals!\n");
 		return -EINVAL;
 	}
 
 	if (journal_inum) {
-		if (!(journal = ext3_get_journal(sb, journal_inum)))
+		if (!(journal = ext4_get_journal(sb, journal_inum)))
 			return -EINVAL;
 	} else {
-		if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
+		if (!(journal = ext4_get_dev_journal(sb, journal_dev)))
 			return -EINVAL;
 	}
 
 	if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
 		err = journal_update_format(journal);
 		if (err)  {
-			printk(KERN_ERR "EXT3-fs: error updating journal.\n");
+			printk(KERN_ERR "EXT4-fs: error updating journal.\n");
 			journal_destroy(journal);
 			return err;
 		}
 	}
 
-	if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
+	if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER))
 		err = journal_wipe(journal, !really_read_only);
 	if (!err)
 		err = journal_load(journal);
 
 	if (err) {
-		printk(KERN_ERR "EXT3-fs: error loading journal.\n");
+		printk(KERN_ERR "EXT4-fs: error loading journal.\n");
 		journal_destroy(journal);
 		return err;
 	}
 
-	EXT3_SB(sb)->s_journal = journal;
-	ext3_clear_journal_err(sb, es);
+	EXT4_SB(sb)->s_journal = journal;
+	ext4_clear_journal_err(sb, es);
 
 	if (journal_devnum &&
 	    journal_devnum != le32_to_cpu(es->s_journal_dev)) {
@@ -2057,62 +2057,62 @@ static int ext3_load_journal(struct super_block *sb,
 		sb->s_dirt = 1;
 
 		/* Make sure we flush the recovery flag to disk. */
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 
 	return 0;
 }
 
-static int ext3_create_journal(struct super_block * sb,
-			       struct ext3_super_block * es,
+static int ext4_create_journal(struct super_block * sb,
+			       struct ext4_super_block * es,
 			       unsigned int journal_inum)
 {
 	journal_t *journal;
 
 	if (sb->s_flags & MS_RDONLY) {
-		printk(KERN_ERR "EXT3-fs: readonly filesystem when trying to "
+		printk(KERN_ERR "EXT4-fs: readonly filesystem when trying to "
 				"create journal.\n");
 		return -EROFS;
 	}
 
-	if (!(journal = ext3_get_journal(sb, journal_inum)))
+	if (!(journal = ext4_get_journal(sb, journal_inum)))
 		return -EINVAL;
 
-	printk(KERN_INFO "EXT3-fs: creating new journal on inode %u\n",
+	printk(KERN_INFO "EXT4-fs: creating new journal on inode %u\n",
 	       journal_inum);
 
 	if (journal_create(journal)) {
-		printk(KERN_ERR "EXT3-fs: error creating journal.\n");
+		printk(KERN_ERR "EXT4-fs: error creating journal.\n");
 		journal_destroy(journal);
 		return -EIO;
 	}
 
-	EXT3_SB(sb)->s_journal = journal;
+	EXT4_SB(sb)->s_journal = journal;
 
-	ext3_update_dynamic_rev(sb);
-	EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-	EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
+	ext4_update_dynamic_rev(sb);
+	EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+	EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL);
 
 	es->s_journal_inum = cpu_to_le32(journal_inum);
 	sb->s_dirt = 1;
 
 	/* Make sure we flush the recovery flag to disk. */
-	ext3_commit_super(sb, es, 1);
+	ext4_commit_super(sb, es, 1);
 
 	return 0;
 }
 
-static void ext3_commit_super (struct super_block * sb,
-			       struct ext3_super_block * es,
+static void ext4_commit_super (struct super_block * sb,
+			       struct ext4_super_block * es,
 			       int sync)
 {
-	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
+	struct buffer_head *sbh = EXT4_SB(sb)->s_sbh;
 
 	if (!sbh)
 		return;
 	es->s_wtime = cpu_to_le32(get_seconds());
-	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
-	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
+	es->s_free_blocks_count = cpu_to_le32(ext4_count_free_blocks(sb));
+	es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync)
@@ -2125,18 +2125,18 @@ static void ext3_commit_super (struct super_block * sb,
  * remounting) the filesystem readonly, then we will end up with a
  * consistent fs on disk.  Record that fact.
  */
-static void ext3_mark_recovery_complete(struct super_block * sb,
-					struct ext3_super_block * es)
+static void ext4_mark_recovery_complete(struct super_block * sb,
+					struct ext4_super_block * es)
 {
-	journal_t *journal = EXT3_SB(sb)->s_journal;
+	journal_t *journal = EXT4_SB(sb)->s_journal;
 
 	journal_lock_updates(journal);
 	journal_flush(journal);
-	if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER) &&
 	    sb->s_flags & MS_RDONLY) {
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
 		sb->s_dirt = 0;
-		ext3_commit_super(sb, es, 1);
+		ext4_commit_super(sb, es, 1);
 	}
 	journal_unlock_updates(journal);
 }
@@ -2146,33 +2146,33 @@ static void ext3_mark_recovery_complete(struct super_block * sb,
  * has recorded an error from a previous lifetime, move that error to the
  * main filesystem now.
  */
-static void ext3_clear_journal_err(struct super_block * sb,
-				   struct ext3_super_block * es)
+static void ext4_clear_journal_err(struct super_block * sb,
+				   struct ext4_super_block * es)
 {
 	journal_t *journal;
 	int j_errno;
 	const char *errstr;
 
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 
 	/*
 	 * Now check for any error status which may have been recorded in the
-	 * journal by a prior ext3_error() or ext3_abort()
+	 * journal by a prior ext4_error() or ext4_abort()
 	 */
 
 	j_errno = journal_errno(journal);
 	if (j_errno) {
 		char nbuf[16];
 
-		errstr = ext3_decode_error(sb, j_errno, nbuf);
-		ext3_warning(sb, __FUNCTION__, "Filesystem error recorded "
+		errstr = ext4_decode_error(sb, j_errno, nbuf);
+		ext4_warning(sb, __FUNCTION__, "Filesystem error recorded "
 			     "from previous mount: %s", errstr);
-		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+		ext4_warning(sb, __FUNCTION__, "Marking fs in need of "
 			     "filesystem check.");
 
-		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
-		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
-		ext3_commit_super (sb, es, 1);
+		EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
+		es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
+		ext4_commit_super (sb, es, 1);
 
 		journal_clear_err(journal);
 	}
@@ -2182,7 +2182,7 @@ static void ext3_clear_journal_err(struct super_block * sb,
  * Force the running and committing transactions to commit,
  * and wait on the commit.
  */
-int ext3_force_commit(struct super_block *sb)
+int ext4_force_commit(struct super_block *sb)
 {
 	journal_t *journal;
 	int ret;
@@ -2190,14 +2190,14 @@ int ext3_force_commit(struct super_block *sb)
 	if (sb->s_flags & MS_RDONLY)
 		return 0;
 
-	journal = EXT3_SB(sb)->s_journal;
+	journal = EXT4_SB(sb)->s_journal;
 	sb->s_dirt = 0;
-	ret = ext3_journal_force_commit(journal);
+	ret = ext4_journal_force_commit(journal);
 	return ret;
 }
 
 /*
- * Ext3 always journals updates to the superblock itself, so we don't
+ * Ext4 always journals updates to the superblock itself, so we don't
  * have to propagate any other updates to the superblock on disk at this
  * point.  Just start an async writeback to get the buffers on their way
  * to the disk.
@@ -2205,21 +2205,21 @@ int ext3_force_commit(struct super_block *sb)
  * This implicitly triggers the writebehind on sync().
  */
 
-static void ext3_write_super (struct super_block * sb)
+static void ext4_write_super (struct super_block * sb)
 {
 	if (mutex_trylock(&sb->s_lock) != 0)
 		BUG();
 	sb->s_dirt = 0;
 }
 
-static int ext3_sync_fs(struct super_block *sb, int wait)
+static int ext4_sync_fs(struct super_block *sb, int wait)
 {
 	tid_t target;
 
 	sb->s_dirt = 0;
-	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+	if (journal_start_commit(EXT4_SB(sb)->s_journal, &target)) {
 		if (wait)
-			log_wait_commit(EXT3_SB(sb)->s_journal, target);
+			log_wait_commit(EXT4_SB(sb)->s_journal, target);
 	}
 	return 0;
 }
@@ -2228,20 +2228,20 @@ static int ext3_sync_fs(struct super_block *sb, int wait)
  * LVM calls this function before a (read-only) snapshot is created.  This
  * gives us a chance to flush the journal completely and mark the fs clean.
  */
-static void ext3_write_super_lockfs(struct super_block *sb)
+static void ext4_write_super_lockfs(struct super_block *sb)
 {
 	sb->s_dirt = 0;
 
 	if (!(sb->s_flags & MS_RDONLY)) {
-		journal_t *journal = EXT3_SB(sb)->s_journal;
+		journal_t *journal = EXT4_SB(sb)->s_journal;
 
 		/* Now we set up the journal barrier. */
 		journal_lock_updates(journal);
 		journal_flush(journal);
 
 		/* Journal blocked and flushed, clear needs_recovery flag. */
-		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
 	}
 }
 
@@ -2249,25 +2249,25 @@ static void ext3_write_super_lockfs(struct super_block *sb)
  * Called by LVM after the snapshot is done.  We need to reset the RECOVER
  * flag here, even though the filesystem is not technically dirty yet.
  */
-static void ext3_unlockfs(struct super_block *sb)
+static void ext4_unlockfs(struct super_block *sb)
 {
 	if (!(sb->s_flags & MS_RDONLY)) {
 		lock_super(sb);
 		/* Reser the needs_recovery flag before the fs is unlocked. */
-		EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
-		ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
+		EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
+		ext4_commit_super(sb, EXT4_SB(sb)->s_es, 1);
 		unlock_super(sb);
-		journal_unlock_updates(EXT3_SB(sb)->s_journal);
+		journal_unlock_updates(EXT4_SB(sb)->s_journal);
 	}
 }
 
-static int ext3_remount (struct super_block * sb, int * flags, char * data)
+static int ext4_remount (struct super_block * sb, int * flags, char * data)
 {
-	struct ext3_super_block * es;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	ext3_fsblk_t n_blocks_count = 0;
+	struct ext4_super_block * es;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	ext4_fsblk_t n_blocks_count = 0;
 	unsigned long old_sb_flags;
-	struct ext3_mount_options old_opts;
+	struct ext4_mount_options old_opts;
 	int err;
 #ifdef CONFIG_QUOTA
 	int i;
@@ -2293,19 +2293,19 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 		goto restore_opts;
 	}
 
-	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+	if (sbi->s_mount_opt & EXT4_MOUNT_ABORT)
+		ext4_abort(sb, __FUNCTION__, "Abort forced by user");
 
 	sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
-		((sbi->s_mount_opt & EXT3_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
+		((sbi->s_mount_opt & EXT4_MOUNT_POSIX_ACL) ? MS_POSIXACL : 0);
 
 	es = sbi->s_es;
 
-	ext3_init_journal_params(sb, sbi->s_journal);
+	ext4_init_journal_params(sb, sbi->s_journal);
 
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
 		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
-		if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) {
+		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
 			err = -EROFS;
 			goto restore_opts;
 		}
@@ -2322,16 +2322,16 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 * readonly, and if so set the rdonly flag and then
 			 * mark the partition as valid again.
 			 */
-			if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
-			    (sbi->s_mount_state & EXT3_VALID_FS))
+			if (!(es->s_state & cpu_to_le16(EXT4_VALID_FS)) &&
+			    (sbi->s_mount_state & EXT4_VALID_FS))
 				es->s_state = cpu_to_le16(sbi->s_mount_state);
 
-			ext3_mark_recovery_complete(sb, es);
+			ext4_mark_recovery_complete(sb, es);
 		} else {
 			__le32 ret;
-			if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
-					~EXT3_FEATURE_RO_COMPAT_SUPP))) {
-				printk(KERN_WARNING "EXT3-fs: %s: couldn't "
+			if ((ret = EXT4_HAS_RO_COMPAT_FEATURE(sb,
+					~EXT4_FEATURE_RO_COMPAT_SUPP))) {
+				printk(KERN_WARNING "EXT4-fs: %s: couldn't "
 				       "remount RDWR because of unsupported "
 				       "optional features (%x).\n",
 				       sb->s_id, le32_to_cpu(ret));
@@ -2344,11 +2344,11 @@ static int ext3_remount (struct super_block * sb, int * flags, char * data)
 			 * been changed by e2fsck since we originally mounted
 			 * the partition.)
 			 */
-			ext3_clear_journal_err(sb, es);
+			ext4_clear_journal_err(sb, es);
 			sbi->s_mount_state = le16_to_cpu(es->s_state);
-			if ((err = ext3_group_extend(sb, es, n_blocks_count)))
+			if ((err = ext4_group_extend(sb, es, n_blocks_count)))
 				goto restore_opts;
-			if (!ext3_setup_super (sb, es, 0))
+			if (!ext4_setup_super (sb, es, 0))
 				sb->s_flags &= ~MS_RDONLY;
 		}
 	}
@@ -2378,19 +2378,19 @@ restore_opts:
 	return err;
 }
 
-static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
+static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 {
 	struct super_block *sb = dentry->d_sb;
-	struct ext3_sb_info *sbi = EXT3_SB(sb);
-	struct ext3_super_block *es = sbi->s_es;
-	ext3_fsblk_t overhead;
+	struct ext4_sb_info *sbi = EXT4_SB(sb);
+	struct ext4_super_block *es = sbi->s_es;
+	ext4_fsblk_t overhead;
 	int i;
 
 	if (test_opt (sb, MINIX_DF))
 		overhead = 0;
 	else {
 		unsigned long ngroups;
-		ngroups = EXT3_SB(sb)->s_groups_count;
+		ngroups = EXT4_SB(sb)->s_groups_count;
 		smp_rmb();
 
 		/*
@@ -2409,8 +2409,8 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * feature is turned on, then not all groups have this.
 		 */
 		for (i = 0; i < ngroups; i++) {
-			overhead += ext3_bg_has_super(sb, i) +
-				ext3_bg_num_gdb(sb, i);
+			overhead += ext4_bg_has_super(sb, i) +
+				ext4_bg_num_gdb(sb, i);
 			cond_resched();
 		}
 
@@ -2418,10 +2418,10 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		 * Every block group has an inode bitmap, a block
 		 * bitmap, and an inode table.
 		 */
-		overhead += (ngroups * (2 + EXT3_SB(sb)->s_itb_per_group));
+		overhead += (ngroups * (2 + EXT4_SB(sb)->s_itb_per_group));
 	}
 
-	buf->f_type = EXT3_SUPER_MAGIC;
+	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
 	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
@@ -2430,14 +2430,14 @@ static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
-	buf->f_namelen = EXT3_NAME_LEN;
+	buf->f_namelen = EXT4_NAME_LEN;
 	return 0;
 }
 
 /* Helper function for writing quotas on sync - we need to start transaction before quota file
  * is locked for write. Otherwise the are possible deadlocks:
  * Process 1                         Process 2
- * ext3_create()                     quota_sync()
+ * ext4_create()                     quota_sync()
  *   journal_start()                   write_dquot()
  *   DQUOT_INIT()                        down(dqio_mutex)
  *     down(dqio_mutex)                    journal_start()
@@ -2451,111 +2451,111 @@ static inline struct inode *dquot_to_inode(struct dquot *dquot)
 	return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
 }
 
-static int ext3_dquot_initialize(struct inode *inode, int type)
+static int ext4_dquot_initialize(struct inode *inode, int type)
 {
 	handle_t *handle;
 	int ret, err;
 
 	/* We may create quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_INIT_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_INIT_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_initialize(inode, type);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_dquot_drop(struct inode *inode)
+static int ext4_dquot_drop(struct inode *inode)
 {
 	handle_t *handle;
 	int ret, err;
 
 	/* We may delete quota structure so we need to reserve enough blocks */
-	handle = ext3_journal_start(inode, 2*EXT3_QUOTA_DEL_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, 2*EXT4_QUOTA_DEL_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_drop(inode);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_write_dquot(struct dquot *dquot)
+static int ext4_write_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 	struct inode *inode;
 
 	inode = dquot_to_inode(dquot);
-	handle = ext3_journal_start(inode,
-					EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(inode,
+					EXT4_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_acquire_dquot(struct dquot *dquot)
+static int ext4_acquire_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 
-	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(dquot_to_inode(dquot),
+					EXT4_QUOTA_INIT_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_acquire(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_release_dquot(struct dquot *dquot)
+static int ext4_release_dquot(struct dquot *dquot)
 {
 	int ret, err;
 	handle_t *handle;
 
-	handle = ext3_journal_start(dquot_to_inode(dquot),
-					EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
+	handle = ext4_journal_start(dquot_to_inode(dquot),
+					EXT4_QUOTA_DEL_BLOCKS(dquot->dq_sb));
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_release(dquot);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
 }
 
-static int ext3_mark_dquot_dirty(struct dquot *dquot)
+static int ext4_mark_dquot_dirty(struct dquot *dquot)
 {
 	/* Are we journalling quotas? */
-	if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
-	    EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
+	if (EXT4_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
+	    EXT4_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
 		dquot_mark_dquot_dirty(dquot);
-		return ext3_write_dquot(dquot);
+		return ext4_write_dquot(dquot);
 	} else {
 		return dquot_mark_dquot_dirty(dquot);
 	}
 }
 
-static int ext3_write_info(struct super_block *sb, int type)
+static int ext4_write_info(struct super_block *sb, int type)
 {
 	int ret, err;
 	handle_t *handle;
 
 	/* Data block + inode block */
-	handle = ext3_journal_start(sb->s_root->d_inode, 2);
+	handle = ext4_journal_start(sb->s_root->d_inode, 2);
 	if (IS_ERR(handle))
 		return PTR_ERR(handle);
 	ret = dquot_commit_info(sb, type);
-	err = ext3_journal_stop(handle);
+	err = ext4_journal_stop(handle);
 	if (!ret)
 		ret = err;
 	return ret;
@@ -2565,16 +2565,16 @@ static int ext3_write_info(struct super_block *sb, int type)
  * Turn on quotas during mount time - we need to find
  * the quota file and such...
  */
-static int ext3_quota_on_mount(struct super_block *sb, int type)
+static int ext4_quota_on_mount(struct super_block *sb, int type)
 {
-	return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
-			EXT3_SB(sb)->s_jquota_fmt, type);
+	return vfs_quota_on_mount(sb, EXT4_SB(sb)->s_qf_names[type],
+			EXT4_SB(sb)->s_jquota_fmt, type);
 }
 
 /*
  * Standard function to be called on quota_on
  */
-static int ext3_quota_on(struct super_block *sb, int type, int format_id,
+static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 			 char *path)
 {
 	int err;
@@ -2583,8 +2583,8 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	if (!test_opt(sb, QUOTA))
 		return -EINVAL;
 	/* Not journalling quota? */
-	if (!EXT3_SB(sb)->s_qf_names[USRQUOTA] &&
-	    !EXT3_SB(sb)->s_qf_names[GRPQUOTA])
+	if (!EXT4_SB(sb)->s_qf_names[USRQUOTA] &&
+	    !EXT4_SB(sb)->s_qf_names[GRPQUOTA])
 		return vfs_quota_on(sb, type, format_id, path);
 	err = path_lookup(path, LOOKUP_FOLLOW, &nd);
 	if (err)
@@ -2597,7 +2597,7 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
 	/* Quotafile not of fs root? */
 	if (nd.dentry->d_parent->d_inode != sb->s_root->d_inode)
 		printk(KERN_WARNING
-			"EXT3-fs: Quota file not on filesystem root. "
+			"EXT4-fs: Quota file not on filesystem root. "
 			"Journalled quota will not work.\n");
 	path_release(&nd);
 	return vfs_quota_on(sb, type, format_id, path);
@@ -2607,11 +2607,11 @@ static int ext3_quota_on(struct super_block *sb, int type, int format_id,
  * acquiring the locks... As quota files are never truncated and quota code
  * itself serializes the operations (and noone else should touch the files)
  * we don't have to be afraid of races */
-static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
+static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
 			       size_t len, loff_t off)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
-	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
@@ -2627,7 +2627,7 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 	while (toread > 0) {
 		tocopy = sb->s_blocksize - offset < toread ?
 				sb->s_blocksize - offset : toread;
-		bh = ext3_bread(NULL, inode, blk, 0, &err);
+		bh = ext4_bread(NULL, inode, blk, 0, &err);
 		if (err)
 			return err;
 		if (!bh)	/* A hole? */
@@ -2645,15 +2645,15 @@ static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
 
 /* Write to quotafile (we know the transaction is already started and has
  * enough credits) */
-static ssize_t ext3_quota_write(struct super_block *sb, int type,
+static ssize_t ext4_quota_write(struct super_block *sb, int type,
 				const char *data, size_t len, loff_t off)
 {
 	struct inode *inode = sb_dqopt(sb)->files[type];
-	sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
+	sector_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
 	int err = 0;
 	int offset = off & (sb->s_blocksize - 1);
 	int tocopy;
-	int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
+	int journal_quota = EXT4_SB(sb)->s_qf_names[type] != NULL;
 	size_t towrite = len;
 	struct buffer_head *bh;
 	handle_t *handle = journal_current_handle();
@@ -2662,11 +2662,11 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 	while (towrite > 0) {
 		tocopy = sb->s_blocksize - offset < towrite ?
 				sb->s_blocksize - offset : towrite;
-		bh = ext3_bread(handle, inode, blk, 1, &err);
+		bh = ext4_bread(handle, inode, blk, 1, &err);
 		if (!bh)
 			goto out;
 		if (journal_quota) {
-			err = ext3_journal_get_write_access(handle, bh);
+			err = ext4_journal_get_write_access(handle, bh);
 			if (err) {
 				brelse(bh);
 				goto out;
@@ -2677,10 +2677,10 @@ static ssize_t ext3_quota_write(struct super_block *sb, int type,
 		flush_dcache_page(bh->b_page);
 		unlock_buffer(bh);
 		if (journal_quota)
-			err = ext3_journal_dirty_metadata(handle, bh);
+			err = ext4_journal_dirty_metadata(handle, bh);
 		else {
 			/* Always do at least ordered writes for quotas */
-			err = ext3_journal_dirty_data(handle, bh);
+			err = ext4_journal_dirty_data(handle, bh);
 			mark_buffer_dirty(bh);
 		}
 		brelse(bh);
@@ -2696,59 +2696,59 @@ out:
 		return err;
 	if (inode->i_size < off+len-towrite) {
 		i_size_write(inode, off+len-towrite);
-		EXT3_I(inode)->i_disksize = inode->i_size;
+		EXT4_I(inode)->i_disksize = inode->i_size;
 	}
 	inode->i_version++;
 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
-	ext3_mark_inode_dirty(handle, inode);
+	ext4_mark_inode_dirty(handle, inode);
 	mutex_unlock(&inode->i_mutex);
 	return len - towrite;
 }
 
 #endif
 
-static int ext3_get_sb(struct file_system_type *fs_type,
+static int ext4_get_sb(struct file_system_type *fs_type,
 	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
 {
-	return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
+	return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super, mnt);
 }
 
-static struct file_system_type ext3_fs_type = {
+static struct file_system_type ext4dev_fs_type = {
 	.owner		= THIS_MODULE,
-	.name		= "ext3",
-	.get_sb		= ext3_get_sb,
+	.name		= "ext4dev",
+	.get_sb		= ext4_get_sb,
 	.kill_sb	= kill_block_super,
 	.fs_flags	= FS_REQUIRES_DEV,
 };
 
-static int __init init_ext3_fs(void)
+static int __init init_ext4_fs(void)
 {
-	int err = init_ext3_xattr();
+	int err = init_ext4_xattr();
 	if (err)
 		return err;
 	err = init_inodecache();
 	if (err)
 		goto out1;
-        err = register_filesystem(&ext3_fs_type);
+        err = register_filesystem(&ext4dev_fs_type);
 	if (err)
 		goto out;
 	return 0;
 out:
 	destroy_inodecache();
 out1:
-	exit_ext3_xattr();
+	exit_ext4_xattr();
 	return err;
 }
 
-static void __exit exit_ext3_fs(void)
+static void __exit exit_ext4_fs(void)
 {
-	unregister_filesystem(&ext3_fs_type);
+	unregister_filesystem(&ext4dev_fs_type);
 	destroy_inodecache();
-	exit_ext3_xattr();
+	exit_ext4_xattr();
 }
 
 MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
-MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+MODULE_DESCRIPTION("Fourth Extended Filesystem with extents");
 MODULE_LICENSE("GPL");
-module_init(init_ext3_fs)
-module_exit(exit_ext3_fs)
+module_init(init_ext4_fs)
+module_exit(exit_ext4_fs)
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 4f79122cde67..9e4c75f912f7 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -1,5 +1,5 @@
 /*
- *  linux/fs/ext3/symlink.c
+ *  linux/fs/ext4/symlink.c
  *
  * Only fast symlinks left here - the rest is done by generic code. AV, 1999
  *
@@ -14,41 +14,41 @@
  *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *
- *  ext3 symlink handling code
+ *  ext4 symlink handling code
  */
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 #include <linux/namei.h>
 #include "xattr.h"
 
-static void * ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+static void * ext4_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
-	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
+	struct ext4_inode_info *ei = EXT4_I(dentry->d_inode);
 	nd_set_link(nd, (char*)ei->i_data);
 	return NULL;
 }
 
-struct inode_operations ext3_symlink_inode_operations = {
+struct inode_operations ext4_symlink_inode_operations = {
 	.readlink	= generic_readlink,
 	.follow_link	= page_follow_link_light,
 	.put_link	= page_put_link,
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
 };
 
-struct inode_operations ext3_fast_symlink_inode_operations = {
+struct inode_operations ext4_fast_symlink_inode_operations = {
 	.readlink	= generic_readlink,
-	.follow_link	= ext3_follow_link,
-#ifdef CONFIG_EXT3_FS_XATTR
+	.follow_link	= ext4_follow_link,
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	.setxattr	= generic_setxattr,
 	.getxattr	= generic_getxattr,
-	.listxattr	= ext3_listxattr,
+	.listxattr	= ext4_listxattr,
 	.removexattr	= generic_removexattr,
 #endif
 };
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index f86f2482f01d..d3a408154101 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -1,10 +1,10 @@
 /*
- * linux/fs/ext3/xattr.c
+ * linux/fs/ext4/xattr.c
  *
  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
  *
  * Fix by Harrison Xing <harrison@mountainviewdata.com>.
- * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
+ * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
  * Extended attributes for symlinks and special files added per
  *  suggestion of Luka Renko <luka.renko@hermes.si>.
  * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
@@ -43,7 +43,7 @@
  *
  * Locking strategy
  * ----------------
- * EXT3_I(inode)->i_file_acl is protected by EXT3_I(inode)->xattr_sem.
+ * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
  * EA blocks are only changed if they are exclusive to an inode, so
  * holding xattr_sem also means that nothing but the EA block's reference
  * count can change. Multiple writers to the same block are synchronized
@@ -53,27 +53,27 @@
 #include <linux/init.h>
 #include <linux/fs.h>
 #include <linux/slab.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include <linux/mbcache.h>
 #include <linux/quotaops.h>
 #include <linux/rwsem.h>
 #include "xattr.h"
 #include "acl.h"
 
-#define BHDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
-#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
+#define BHDR(bh) ((struct ext4_xattr_header *)((bh)->b_data))
+#define ENTRY(ptr) ((struct ext4_xattr_entry *)(ptr))
 #define BFIRST(bh) ENTRY(BHDR(bh)+1)
 #define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
 
 #define IHDR(inode, raw_inode) \
-	((struct ext3_xattr_ibody_header *) \
+	((struct ext4_xattr_ibody_header *) \
 		((void *)raw_inode + \
-		 EXT3_GOOD_OLD_INODE_SIZE + \
-		 EXT3_I(inode)->i_extra_isize))
-#define IFIRST(hdr) ((struct ext3_xattr_entry *)((hdr)+1))
+		 EXT4_GOOD_OLD_INODE_SIZE + \
+		 EXT4_I(inode)->i_extra_isize))
+#define IFIRST(hdr) ((struct ext4_xattr_entry *)((hdr)+1))
 
-#ifdef EXT3_XATTR_DEBUG
+#ifdef EXT4_XATTR_DEBUG
 # define ea_idebug(inode, f...) do { \
 		printk(KERN_DEBUG "inode %s:%lu: ", \
 			inode->i_sb->s_id, inode->i_ino); \
@@ -93,47 +93,47 @@
 # define ea_bdebug(f...)
 #endif
 
-static void ext3_xattr_cache_insert(struct buffer_head *);
-static struct buffer_head *ext3_xattr_cache_find(struct inode *,
-						 struct ext3_xattr_header *,
+static void ext4_xattr_cache_insert(struct buffer_head *);
+static struct buffer_head *ext4_xattr_cache_find(struct inode *,
+						 struct ext4_xattr_header *,
 						 struct mb_cache_entry **);
-static void ext3_xattr_rehash(struct ext3_xattr_header *,
-			      struct ext3_xattr_entry *);
+static void ext4_xattr_rehash(struct ext4_xattr_header *,
+			      struct ext4_xattr_entry *);
 
-static struct mb_cache *ext3_xattr_cache;
+static struct mb_cache *ext4_xattr_cache;
 
-static struct xattr_handler *ext3_xattr_handler_map[] = {
-	[EXT3_XATTR_INDEX_USER]		     = &ext3_xattr_user_handler,
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	[EXT3_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext3_xattr_acl_access_handler,
-	[EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext3_xattr_acl_default_handler,
+static struct xattr_handler *ext4_xattr_handler_map[] = {
+	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &ext4_xattr_acl_access_handler,
+	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &ext4_xattr_acl_default_handler,
 #endif
-	[EXT3_XATTR_INDEX_TRUSTED]	     = &ext3_xattr_trusted_handler,
-#ifdef CONFIG_EXT3_FS_SECURITY
-	[EXT3_XATTR_INDEX_SECURITY]	     = &ext3_xattr_security_handler,
+	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
 #endif
 };
 
-struct xattr_handler *ext3_xattr_handlers[] = {
-	&ext3_xattr_user_handler,
-	&ext3_xattr_trusted_handler,
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
-	&ext3_xattr_acl_access_handler,
-	&ext3_xattr_acl_default_handler,
+struct xattr_handler *ext4_xattr_handlers[] = {
+	&ext4_xattr_user_handler,
+	&ext4_xattr_trusted_handler,
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
+	&ext4_xattr_acl_access_handler,
+	&ext4_xattr_acl_default_handler,
 #endif
-#ifdef CONFIG_EXT3_FS_SECURITY
-	&ext3_xattr_security_handler,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+	&ext4_xattr_security_handler,
 #endif
 	NULL
 };
 
 static inline struct xattr_handler *
-ext3_xattr_handler(int name_index)
+ext4_xattr_handler(int name_index)
 {
 	struct xattr_handler *handler = NULL;
 
-	if (name_index > 0 && name_index < ARRAY_SIZE(ext3_xattr_handler_map))
-		handler = ext3_xattr_handler_map[name_index];
+	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
+		handler = ext4_xattr_handler_map[name_index];
 	return handler;
 }
 
@@ -143,16 +143,16 @@ ext3_xattr_handler(int name_index)
  * dentry->d_inode->i_mutex: don't care
  */
 ssize_t
-ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
+ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
 {
-	return ext3_xattr_list(dentry->d_inode, buffer, size);
+	return ext4_xattr_list(dentry->d_inode, buffer, size);
 }
 
 static int
-ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
+ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end)
 {
 	while (!IS_LAST_ENTRY(entry)) {
-		struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(entry);
+		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry);
 		if ((void *)next >= end)
 			return -EIO;
 		entry = next;
@@ -161,19 +161,19 @@ ext3_xattr_check_names(struct ext3_xattr_entry *entry, void *end)
 }
 
 static inline int
-ext3_xattr_check_block(struct buffer_head *bh)
+ext4_xattr_check_block(struct buffer_head *bh)
 {
 	int error;
 
-	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1))
 		return -EIO;
-	error = ext3_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
+	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size);
 	return error;
 }
 
 static inline int
-ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
+ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
 {
 	size_t value_size = le32_to_cpu(entry->e_value_size);
 
@@ -184,10 +184,10 @@ ext3_xattr_check_entry(struct ext3_xattr_entry *entry, size_t size)
 }
 
 static int
-ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
+ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
 		      const char *name, size_t size, int sorted)
 {
-	struct ext3_xattr_entry *entry;
+	struct ext4_xattr_entry *entry;
 	size_t name_len;
 	int cmp = 1;
 
@@ -195,7 +195,7 @@ ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
 		return -EINVAL;
 	name_len = strlen(name);
 	entry = *pentry;
-	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 		cmp = name_index - entry->e_name_index;
 		if (!cmp)
 			cmp = name_len - entry->e_name_len;
@@ -205,17 +205,17 @@ ext3_xattr_find_entry(struct ext3_xattr_entry **pentry, int name_index,
 			break;
 	}
 	*pentry = entry;
-	if (!cmp && ext3_xattr_check_entry(entry, size))
+	if (!cmp && ext4_xattr_check_entry(entry, size))
 			return -EIO;
 	return cmp ? -ENODATA : 0;
 }
 
 static int
-ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
-	struct ext3_xattr_entry *entry;
+	struct ext4_xattr_entry *entry;
 	size_t size;
 	int error;
 
@@ -223,24 +223,24 @@ ext3_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		  name_index, name, buffer, (long)buffer_size);
 
 	error = -ENODATA;
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext3_xattr_check_block(bh)) {
-bad_block:	ext3_error(inode->i_sb, __FUNCTION__,
+	if (ext4_xattr_check_block(bh)) {
+bad_block:	ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-			   EXT3_I(inode)->i_file_acl);
+			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-	ext3_xattr_cache_insert(bh);
+	ext4_xattr_cache_insert(bh);
 	entry = BFIRST(bh);
-	error = ext3_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
+	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
 	if (error == -EIO)
 		goto bad_block;
 	if (error)
@@ -261,30 +261,30 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
 		     void *buffer, size_t buffer_size)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_xattr_entry *entry;
-	struct ext3_inode *raw_inode;
-	struct ext3_iloc iloc;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_entry *entry;
+	struct ext4_inode *raw_inode;
+	struct ext4_iloc iloc;
 	size_t size;
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
 		return -ENODATA;
-	error = ext3_get_inode_loc(inode, &iloc);
+	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
 		return error;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
 	entry = IFIRST(header);
-	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	error = ext3_xattr_check_names(entry, end);
+	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	error = ext4_xattr_check_names(entry, end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_find_entry(&entry, name_index, name,
+	error = ext4_xattr_find_entry(&entry, name_index, name,
 				      end - (void *)entry, 0);
 	if (error)
 		goto cleanup;
@@ -304,7 +304,7 @@ cleanup:
 }
 
 /*
- * ext3_xattr_get()
+ * ext4_xattr_get()
  *
  * Copy an extended attribute into the buffer
  * provided, or compute the buffer size required.
@@ -314,30 +314,30 @@ cleanup:
  * used / required on success.
  */
 int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 	       void *buffer, size_t buffer_size)
 {
 	int error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	error = ext3_xattr_ibody_get(inode, name_index, name, buffer,
+	down_read(&EXT4_I(inode)->xattr_sem);
+	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
 				     buffer_size);
 	if (error == -ENODATA)
-		error = ext3_xattr_block_get(inode, name_index, name, buffer,
+		error = ext4_xattr_block_get(inode, name_index, name, buffer,
 					     buffer_size);
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
 
 static int
-ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
+ext4_xattr_list_entries(struct inode *inode, struct ext4_xattr_entry *entry,
 			char *buffer, size_t buffer_size)
 {
 	size_t rest = buffer_size;
 
-	for (; !IS_LAST_ENTRY(entry); entry = EXT3_XATTR_NEXT(entry)) {
+	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
 		struct xattr_handler *handler =
-			ext3_xattr_handler(entry->e_name_index);
+			ext4_xattr_handler(entry->e_name_index);
 
 		if (handler) {
 			size_t size = handler->list(inode, buffer, rest,
@@ -355,7 +355,7 @@ ext3_xattr_list_entries(struct inode *inode, struct ext3_xattr_entry *entry,
 }
 
 static int
-ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	struct buffer_head *bh = NULL;
 	int error;
@@ -364,24 +364,24 @@ ext3_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		  buffer, (long)buffer_size);
 
 	error = 0;
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	ea_idebug(inode, "reading block %u", EXT3_I(inode)->i_file_acl);
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	ea_idebug(inode, "reading block %u", EXT4_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	error = -EIO;
 	if (!bh)
 		goto cleanup;
 	ea_bdebug(bh, "b_count=%d, refcount=%d",
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
-	if (ext3_xattr_check_block(bh)) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+	if (ext4_xattr_check_block(bh)) {
+		ext4_error(inode->i_sb, __FUNCTION__,
 			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-			   EXT3_I(inode)->i_file_acl);
+			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
 	}
-	ext3_xattr_cache_insert(bh);
-	error = ext3_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
+	ext4_xattr_cache_insert(bh);
+	error = ext4_xattr_list_entries(inode, BFIRST(bh), buffer, buffer_size);
 
 cleanup:
 	brelse(bh);
@@ -390,26 +390,26 @@ cleanup:
 }
 
 static int
-ext3_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_ibody_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_inode *raw_inode;
-	struct ext3_iloc iloc;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
+	struct ext4_iloc iloc;
 	void *end;
 	int error;
 
-	if (!(EXT3_I(inode)->i_state & EXT3_STATE_XATTR))
+	if (!(EXT4_I(inode)->i_state & EXT4_STATE_XATTR))
 		return 0;
-	error = ext3_get_inode_loc(inode, &iloc);
+	error = ext4_get_inode_loc(inode, &iloc);
 	if (error)
 		return error;
-	raw_inode = ext3_raw_inode(&iloc);
+	raw_inode = ext4_raw_inode(&iloc);
 	header = IHDR(inode, raw_inode);
-	end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	error = ext3_xattr_check_names(IFIRST(header), end);
+	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	error = ext4_xattr_check_names(IFIRST(header), end);
 	if (error)
 		goto cleanup;
-	error = ext3_xattr_list_entries(inode, IFIRST(header),
+	error = ext4_xattr_list_entries(inode, IFIRST(header),
 					buffer, buffer_size);
 
 cleanup:
@@ -418,7 +418,7 @@ cleanup:
 }
 
 /*
- * ext3_xattr_list()
+ * ext4_xattr_list()
  *
  * Copy a list of attribute names into the buffer
  * provided, or compute the buffer size required.
@@ -428,12 +428,12 @@ cleanup:
  * used / required on success.
  */
 int
-ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
+ext4_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 {
 	int i_error, b_error;
 
-	down_read(&EXT3_I(inode)->xattr_sem);
-	i_error = ext3_xattr_ibody_list(inode, buffer, buffer_size);
+	down_read(&EXT4_I(inode)->xattr_sem);
+	i_error = ext4_xattr_ibody_list(inode, buffer, buffer_size);
 	if (i_error < 0) {
 		b_error = 0;
 	} else {
@@ -441,30 +441,30 @@ ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
 			buffer += i_error;
 			buffer_size -= i_error;
 		}
-		b_error = ext3_xattr_block_list(inode, buffer, buffer_size);
+		b_error = ext4_xattr_block_list(inode, buffer, buffer_size);
 		if (b_error < 0)
 			i_error = 0;
 	}
-	up_read(&EXT3_I(inode)->xattr_sem);
+	up_read(&EXT4_I(inode)->xattr_sem);
 	return i_error + b_error;
 }
 
 /*
- * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
+ * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
  * not set, set it.
  */
-static void ext3_xattr_update_super_block(handle_t *handle,
+static void ext4_xattr_update_super_block(handle_t *handle,
 					  struct super_block *sb)
 {
-	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
+	if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
 		return;
 
 	lock_super(sb);
-	if (ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh) == 0) {
-		EXT3_SB(sb)->s_es->s_feature_compat |=
-			cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
+	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
+		EXT4_SB(sb)->s_es->s_feature_compat |=
+			cpu_to_le32(EXT4_FEATURE_COMPAT_EXT_ATTR);
 		sb->s_dirt = 1;
-		ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+		ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	}
 	unlock_super(sb);
 }
@@ -474,25 +474,25 @@ static void ext3_xattr_update_super_block(handle_t *handle,
  * it; otherwise free the block.
  */
 static void
-ext3_xattr_release_block(handle_t *handle, struct inode *inode,
+ext4_xattr_release_block(handle_t *handle, struct inode *inode,
 			 struct buffer_head *bh)
 {
 	struct mb_cache_entry *ce = NULL;
 
-	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_bdev, bh->b_blocknr);
+	ce = mb_cache_entry_get(ext4_xattr_cache, bh->b_bdev, bh->b_blocknr);
 	if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
 		ea_bdebug(bh, "refcount now=0; freeing");
 		if (ce)
 			mb_cache_entry_free(ce);
-		ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
+		ext4_free_blocks(handle, inode, bh->b_blocknr, 1);
 		get_bh(bh);
-		ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+		ext4_forget(handle, 1, inode, bh, bh->b_blocknr);
 	} else {
-		if (ext3_journal_get_write_access(handle, bh) == 0) {
+		if (ext4_journal_get_write_access(handle, bh) == 0) {
 			lock_buffer(bh);
 			BHDR(bh)->h_refcount = cpu_to_le32(
 				le32_to_cpu(BHDR(bh)->h_refcount) - 1);
-			ext3_journal_dirty_metadata(handle, bh);
+			ext4_journal_dirty_metadata(handle, bh);
 			if (IS_SYNC(inode))
 				handle->h_sync = 1;
 			DQUOT_FREE_BLOCK(inode, 1);
@@ -505,30 +505,30 @@ ext3_xattr_release_block(handle_t *handle, struct inode *inode,
 	}
 }
 
-struct ext3_xattr_info {
+struct ext4_xattr_info {
 	int name_index;
 	const char *name;
 	const void *value;
 	size_t value_len;
 };
 
-struct ext3_xattr_search {
-	struct ext3_xattr_entry *first;
+struct ext4_xattr_search {
+	struct ext4_xattr_entry *first;
 	void *base;
 	void *end;
-	struct ext3_xattr_entry *here;
+	struct ext4_xattr_entry *here;
 	int not_found;
 };
 
 static int
-ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
+ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
 {
-	struct ext3_xattr_entry *last;
+	struct ext4_xattr_entry *last;
 	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);
 
 	/* Compute min_offs and last. */
 	last = s->first;
-	for (; !IS_LAST_ENTRY(last); last = EXT3_XATTR_NEXT(last)) {
+	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
 		if (!last->e_value_block && last->e_value_size) {
 			size_t offs = le16_to_cpu(last->e_value_offs);
 			if (offs < min_offs)
@@ -539,20 +539,20 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 	if (!s->not_found) {
 		if (!s->here->e_value_block && s->here->e_value_size) {
 			size_t size = le32_to_cpu(s->here->e_value_size);
-			free += EXT3_XATTR_SIZE(size);
+			free += EXT4_XATTR_SIZE(size);
 		}
-		free += EXT3_XATTR_LEN(name_len);
+		free += EXT4_XATTR_LEN(name_len);
 	}
 	if (i->value) {
-		if (free < EXT3_XATTR_SIZE(i->value_len) ||
-		    free < EXT3_XATTR_LEN(name_len) +
-			   EXT3_XATTR_SIZE(i->value_len))
+		if (free < EXT4_XATTR_SIZE(i->value_len) ||
+		    free < EXT4_XATTR_LEN(name_len) +
+			   EXT4_XATTR_SIZE(i->value_len))
 			return -ENOSPC;
 	}
 
 	if (i->value && s->not_found) {
 		/* Insert the new name. */
-		size_t size = EXT3_XATTR_LEN(name_len);
+		size_t size = EXT4_XATTR_LEN(name_len);
 		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
 		memmove((void *)s->here + size, s->here, rest);
 		memset(s->here, 0, size);
@@ -564,16 +564,16 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 			void *first_val = s->base + min_offs;
 			size_t offs = le16_to_cpu(s->here->e_value_offs);
 			void *val = s->base + offs;
-			size_t size = EXT3_XATTR_SIZE(
+			size_t size = EXT4_XATTR_SIZE(
 				le32_to_cpu(s->here->e_value_size));
 
-			if (i->value && size == EXT3_XATTR_SIZE(i->value_len)) {
+			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
 				/* The old and the new value have the same
 				   size. Just replace. */
 				s->here->e_value_size =
 					cpu_to_le32(i->value_len);
-				memset(val + size - EXT3_XATTR_PAD, 0,
-				       EXT3_XATTR_PAD); /* Clear pad bytes. */
+				memset(val + size - EXT4_XATTR_PAD, 0,
+				       EXT4_XATTR_PAD); /* Clear pad bytes. */
 				memcpy(val, i->value, i->value_len);
 				return 0;
 			}
@@ -593,12 +593,12 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 				    last->e_value_size && o < offs)
 					last->e_value_offs =
 						cpu_to_le16(o + size);
-				last = EXT3_XATTR_NEXT(last);
+				last = EXT4_XATTR_NEXT(last);
 			}
 		}
 		if (!i->value) {
 			/* Remove the old name. */
-			size_t size = EXT3_XATTR_LEN(name_len);
+			size_t size = EXT4_XATTR_LEN(name_len);
 			last = ENTRY((void *)last - size);
 			memmove(s->here, (void *)s->here + size,
 				(void *)last - (void *)s->here + sizeof(__u32));
@@ -610,25 +610,25 @@ ext3_xattr_set_entry(struct ext3_xattr_info *i, struct ext3_xattr_search *s)
 		/* Insert the new value. */
 		s->here->e_value_size = cpu_to_le32(i->value_len);
 		if (i->value_len) {
-			size_t size = EXT3_XATTR_SIZE(i->value_len);
+			size_t size = EXT4_XATTR_SIZE(i->value_len);
 			void *val = s->base + min_offs - size;
 			s->here->e_value_offs = cpu_to_le16(min_offs - size);
-			memset(val + size - EXT3_XATTR_PAD, 0,
-			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
+			memset(val + size - EXT4_XATTR_PAD, 0,
+			       EXT4_XATTR_PAD); /* Clear the pad bytes. */
 			memcpy(val, i->value, i->value_len);
 		}
 	}
 	return 0;
 }
 
-struct ext3_xattr_block_find {
-	struct ext3_xattr_search s;
+struct ext4_xattr_block_find {
+	struct ext4_xattr_search s;
 	struct buffer_head *bh;
 };
 
 static int
-ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
-		      struct ext3_xattr_block_find *bs)
+ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
+		      struct ext4_xattr_block_find *bs)
 {
 	struct super_block *sb = inode->i_sb;
 	int error;
@@ -636,19 +636,19 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
 	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
 		  i->name_index, i->name, i->value, (long)i->value_len);
 
-	if (EXT3_I(inode)->i_file_acl) {
+	if (EXT4_I(inode)->i_file_acl) {
 		/* The inode already has an extended attribute block. */
-		bs->bh = sb_bread(sb, EXT3_I(inode)->i_file_acl);
+		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		if (!bs->bh)
 			goto cleanup;
 		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
 			atomic_read(&(bs->bh->b_count)),
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
-		if (ext3_xattr_check_block(bs->bh)) {
-			ext3_error(sb, __FUNCTION__,
+		if (ext4_xattr_check_block(bs->bh)) {
+			ext4_error(sb, __FUNCTION__,
 				"inode %lu: bad block "E3FSBLK, inode->i_ino,
-				EXT3_I(inode)->i_file_acl);
+				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
 		}
@@ -657,7 +657,7 @@ ext3_xattr_block_find(struct inode *inode, struct ext3_xattr_info *i,
 		bs->s.first = BFIRST(bs->bh);
 		bs->s.end = bs->bh->b_data + bs->bh->b_size;
 		bs->s.here = bs->s.first;
-		error = ext3_xattr_find_entry(&bs->s.here, i->name_index,
+		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
 					      i->name, bs->bh->b_size, 1);
 		if (error && error != -ENODATA)
 			goto cleanup;
@@ -670,22 +670,22 @@ cleanup:
 }
 
 static int
-ext3_xattr_block_set(handle_t *handle, struct inode *inode,
-		     struct ext3_xattr_info *i,
-		     struct ext3_xattr_block_find *bs)
+ext4_xattr_block_set(handle_t *handle, struct inode *inode,
+		     struct ext4_xattr_info *i,
+		     struct ext4_xattr_block_find *bs)
 {
 	struct super_block *sb = inode->i_sb;
 	struct buffer_head *new_bh = NULL;
-	struct ext3_xattr_search *s = &bs->s;
+	struct ext4_xattr_search *s = &bs->s;
 	struct mb_cache_entry *ce = NULL;
 	int error;
 
-#define header(x) ((struct ext3_xattr_header *)(x))
+#define header(x) ((struct ext4_xattr_header *)(x))
 
 	if (i->value && i->value_len > sb->s_blocksize)
 		return -ENOSPC;
 	if (s->base) {
-		ce = mb_cache_entry_get(ext3_xattr_cache, bs->bh->b_bdev,
+		ce = mb_cache_entry_get(ext4_xattr_cache, bs->bh->b_bdev,
 					bs->bh->b_blocknr);
 		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
 			if (ce) {
@@ -693,22 +693,22 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 				ce = NULL;
 			}
 			ea_bdebug(bs->bh, "modifying in-place");
-			error = ext3_journal_get_write_access(handle, bs->bh);
+			error = ext4_journal_get_write_access(handle, bs->bh);
 			if (error)
 				goto cleanup;
 			lock_buffer(bs->bh);
-			error = ext3_xattr_set_entry(i, s);
+			error = ext4_xattr_set_entry(i, s);
 			if (!error) {
 				if (!IS_LAST_ENTRY(s->first))
-					ext3_xattr_rehash(header(s->base),
+					ext4_xattr_rehash(header(s->base),
 							  s->here);
-				ext3_xattr_cache_insert(bs->bh);
+				ext4_xattr_cache_insert(bs->bh);
 			}
 			unlock_buffer(bs->bh);
 			if (error == -EIO)
 				goto bad_block;
 			if (!error)
-				error = ext3_journal_dirty_metadata(handle,
+				error = ext4_journal_dirty_metadata(handle,
 								    bs->bh);
 			if (error)
 				goto cleanup;
@@ -739,7 +739,7 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 		if (s->base == NULL)
 			goto cleanup;
 		memset(s->base, 0, sb->s_blocksize);
-		header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
+		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
 		header(s->base)->h_blocks = cpu_to_le32(1);
 		header(s->base)->h_refcount = cpu_to_le32(1);
 		s->first = ENTRY(header(s->base)+1);
@@ -747,17 +747,17 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode,
 		s->end = s->base + sb->s_blocksize;
 	}
 
-	error = ext3_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s);
 	if (error == -EIO)
 		goto bad_block;
 	if (error)
 		goto cleanup;
 	if (!IS_LAST_ENTRY(s->first))
-		ext3_xattr_rehash(header(s->base), s->here);
+		ext4_xattr_rehash(header(s->base), s->here);
 
 inserted:
 	if (!IS_LAST_ENTRY(s->first)) {
-		new_bh = ext3_xattr_cache_find(inode, header(s->base), &ce);
+		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
 		if (new_bh) {
 			/* We found an identical block in the cache. */
 			if (new_bh == bs->bh)
@@ -768,7 +768,7 @@ inserted:
 				error = -EDQUOT;
 				if (DQUOT_ALLOC_BLOCK(inode, 1))
 					goto cleanup;
-				error = ext3_journal_get_write_access(handle,
+				error = ext4_journal_get_write_access(handle,
 								      new_bh);
 				if (error)
 					goto cleanup_dquot;
@@ -778,7 +778,7 @@ inserted:
 				ea_bdebug(new_bh, "reusing; refcount now=%d",
 					le32_to_cpu(BHDR(new_bh)->h_refcount));
 				unlock_buffer(new_bh);
-				error = ext3_journal_dirty_metadata(handle,
+				error = ext4_journal_dirty_metadata(handle,
 								    new_bh);
 				if (error)
 					goto cleanup_dquot;
@@ -792,11 +792,11 @@ inserted:
 			get_bh(new_bh);
 		} else {
 			/* We need to allocate a new block */
-			ext3_fsblk_t goal = le32_to_cpu(
-					EXT3_SB(sb)->s_es->s_first_data_block) +
-				(ext3_fsblk_t)EXT3_I(inode)->i_block_group *
-				EXT3_BLOCKS_PER_GROUP(sb);
-			ext3_fsblk_t block = ext3_new_block(handle, inode,
+			ext4_fsblk_t goal = le32_to_cpu(
+					EXT4_SB(sb)->s_es->s_first_data_block) +
+				(ext4_fsblk_t)EXT4_I(inode)->i_block_group *
+				EXT4_BLOCKS_PER_GROUP(sb);
+			ext4_fsblk_t block = ext4_new_block(handle, inode,
 							goal, &error);
 			if (error)
 				goto cleanup;
@@ -805,12 +805,12 @@ inserted:
 			new_bh = sb_getblk(sb, block);
 			if (!new_bh) {
 getblk_failed:
-				ext3_free_blocks(handle, inode, block, 1);
+				ext4_free_blocks(handle, inode, block, 1);
 				error = -EIO;
 				goto cleanup;
 			}
 			lock_buffer(new_bh);
-			error = ext3_journal_get_create_access(handle, new_bh);
+			error = ext4_journal_get_create_access(handle, new_bh);
 			if (error) {
 				unlock_buffer(new_bh);
 				goto getblk_failed;
@@ -818,19 +818,19 @@ getblk_failed:
 			memcpy(new_bh->b_data, s->base, new_bh->b_size);
 			set_buffer_uptodate(new_bh);
 			unlock_buffer(new_bh);
-			ext3_xattr_cache_insert(new_bh);
-			error = ext3_journal_dirty_metadata(handle, new_bh);
+			ext4_xattr_cache_insert(new_bh);
+			error = ext4_journal_dirty_metadata(handle, new_bh);
 			if (error)
 				goto cleanup;
 		}
 	}
 
 	/* Update the inode. */
-	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
+	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
 
 	/* Drop the previous xattr block. */
 	if (bs->bh && bs->bh != new_bh)
-		ext3_xattr_release_block(handle, inode, bs->bh);
+		ext4_xattr_release_block(handle, inode, bs->bh);
 	error = 0;
 
 cleanup:
@@ -847,40 +847,40 @@ cleanup_dquot:
 	goto cleanup;
 
 bad_block:
-	ext3_error(inode->i_sb, __FUNCTION__,
+	ext4_error(inode->i_sb, __FUNCTION__,
 		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
-		   EXT3_I(inode)->i_file_acl);
+		   EXT4_I(inode)->i_file_acl);
 	goto cleanup;
 
 #undef header
 }
 
-struct ext3_xattr_ibody_find {
-	struct ext3_xattr_search s;
-	struct ext3_iloc iloc;
+struct ext4_xattr_ibody_find {
+	struct ext4_xattr_search s;
+	struct ext4_iloc iloc;
 };
 
 static int
-ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
-		      struct ext3_xattr_ibody_find *is)
+ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
+		      struct ext4_xattr_ibody_find *is)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_inode *raw_inode;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_inode *raw_inode;
 	int error;
 
-	if (EXT3_I(inode)->i_extra_isize == 0)
+	if (EXT4_I(inode)->i_extra_isize == 0)
 		return 0;
-	raw_inode = ext3_raw_inode(&is->iloc);
+	raw_inode = ext4_raw_inode(&is->iloc);
 	header = IHDR(inode, raw_inode);
 	is->s.base = is->s.first = IFIRST(header);
 	is->s.here = is->s.first;
-	is->s.end = (void *)raw_inode + EXT3_SB(inode->i_sb)->s_inode_size;
-	if (EXT3_I(inode)->i_state & EXT3_STATE_XATTR) {
-		error = ext3_xattr_check_names(IFIRST(header), is->s.end);
+	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
+	if (EXT4_I(inode)->i_state & EXT4_STATE_XATTR) {
+		error = ext4_xattr_check_names(IFIRST(header), is->s.end);
 		if (error)
 			return error;
 		/* Find the named attribute. */
-		error = ext3_xattr_find_entry(&is->s.here, i->name_index,
+		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
 					      i->name, is->s.end -
 					      (void *)is->s.base, 0);
 		if (error && error != -ENODATA)
@@ -891,32 +891,32 @@ ext3_xattr_ibody_find(struct inode *inode, struct ext3_xattr_info *i,
 }
 
 static int
-ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
-		     struct ext3_xattr_info *i,
-		     struct ext3_xattr_ibody_find *is)
+ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
+		     struct ext4_xattr_info *i,
+		     struct ext4_xattr_ibody_find *is)
 {
-	struct ext3_xattr_ibody_header *header;
-	struct ext3_xattr_search *s = &is->s;
+	struct ext4_xattr_ibody_header *header;
+	struct ext4_xattr_search *s = &is->s;
 	int error;
 
-	if (EXT3_I(inode)->i_extra_isize == 0)
+	if (EXT4_I(inode)->i_extra_isize == 0)
 		return -ENOSPC;
-	error = ext3_xattr_set_entry(i, s);
+	error = ext4_xattr_set_entry(i, s);
 	if (error)
 		return error;
-	header = IHDR(inode, ext3_raw_inode(&is->iloc));
+	header = IHDR(inode, ext4_raw_inode(&is->iloc));
 	if (!IS_LAST_ENTRY(s->first)) {
-		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
-		EXT3_I(inode)->i_state |= EXT3_STATE_XATTR;
+		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
+		EXT4_I(inode)->i_state |= EXT4_STATE_XATTR;
 	} else {
 		header->h_magic = cpu_to_le32(0);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_XATTR;
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_XATTR;
 	}
 	return 0;
 }
 
 /*
- * ext3_xattr_set_handle()
+ * ext4_xattr_set_handle()
  *
  * Create, replace or remove an extended attribute for this inode. Buffer
  * is NULL to remove an existing extended attribute, and non-NULL to
@@ -928,21 +928,21 @@ ext3_xattr_ibody_set(handle_t *handle, struct inode *inode,
  * Returns 0, or a negative error number on failure.
  */
 int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		      const char *name, const void *value, size_t value_len,
 		      int flags)
 {
-	struct ext3_xattr_info i = {
+	struct ext4_xattr_info i = {
 		.name_index = name_index,
 		.name = name,
 		.value = value,
 		.value_len = value_len,
 
 	};
-	struct ext3_xattr_ibody_find is = {
+	struct ext4_xattr_ibody_find is = {
 		.s = { .not_found = -ENODATA, },
 	};
-	struct ext3_xattr_block_find bs = {
+	struct ext4_xattr_block_find bs = {
 		.s = { .not_found = -ENODATA, },
 	};
 	int error;
@@ -951,22 +951,22 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		return -EINVAL;
 	if (strlen(name) > 255)
 		return -ERANGE;
-	down_write(&EXT3_I(inode)->xattr_sem);
-	error = ext3_get_inode_loc(inode, &is.iloc);
+	down_write(&EXT4_I(inode)->xattr_sem);
+	error = ext4_get_inode_loc(inode, &is.iloc);
 	if (error)
 		goto cleanup;
 
-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
-		struct ext3_inode *raw_inode = ext3_raw_inode(&is.iloc);
-		memset(raw_inode, 0, EXT3_SB(inode->i_sb)->s_inode_size);
-		EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
+	if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
+		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
+		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
+		EXT4_I(inode)->i_state &= ~EXT4_STATE_NEW;
 	}
 
-	error = ext3_xattr_ibody_find(inode, &i, &is);
+	error = ext4_xattr_ibody_find(inode, &i, &is);
 	if (error)
 		goto cleanup;
 	if (is.s.not_found)
-		error = ext3_xattr_block_find(inode, &i, &bs);
+		error = ext4_xattr_block_find(inode, &i, &bs);
 	if (error)
 		goto cleanup;
 	if (is.s.not_found && bs.s.not_found) {
@@ -981,36 +981,36 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 		if (flags & XATTR_CREATE)
 			goto cleanup;
 	}
-	error = ext3_journal_get_write_access(handle, is.iloc.bh);
+	error = ext4_journal_get_write_access(handle, is.iloc.bh);
 	if (error)
 		goto cleanup;
 	if (!value) {
 		if (!is.s.not_found)
-			error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 		else if (!bs.s.not_found)
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 	} else {
-		error = ext3_xattr_ibody_set(handle, inode, &i, &is);
+		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
 		if (!error && !bs.s.not_found) {
 			i.value = NULL;
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 		} else if (error == -ENOSPC) {
-			error = ext3_xattr_block_set(handle, inode, &i, &bs);
+			error = ext4_xattr_block_set(handle, inode, &i, &bs);
 			if (error)
 				goto cleanup;
 			if (!is.s.not_found) {
 				i.value = NULL;
-				error = ext3_xattr_ibody_set(handle, inode, &i,
+				error = ext4_xattr_ibody_set(handle, inode, &i,
 							     &is);
 			}
 		}
 	}
 	if (!error) {
-		ext3_xattr_update_super_block(handle, inode->i_sb);
+		ext4_xattr_update_super_block(handle, inode->i_sb);
 		inode->i_ctime = CURRENT_TIME_SEC;
-		error = ext3_mark_iloc_dirty(handle, inode, &is.iloc);
+		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
 		/*
-		 * The bh is consumed by ext3_mark_iloc_dirty, even with
+		 * The bh is consumed by ext4_mark_iloc_dirty, even with
 		 * error != 0.
 		 */
 		is.iloc.bh = NULL;
@@ -1021,37 +1021,37 @@ ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 cleanup:
 	brelse(is.iloc.bh);
 	brelse(bs.bh);
-	up_write(&EXT3_I(inode)->xattr_sem);
+	up_write(&EXT4_I(inode)->xattr_sem);
 	return error;
 }
 
 /*
- * ext3_xattr_set()
+ * ext4_xattr_set()
  *
- * Like ext3_xattr_set_handle, but start from an inode. This extended
+ * Like ext4_xattr_set_handle, but start from an inode. This extended
  * attribute modification is a filesystem transaction by itself.
  *
  * Returns 0, or a negative error number on failure.
  */
 int
-ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t value_len, int flags)
 {
 	handle_t *handle;
 	int error, retries = 0;
 
 retry:
-	handle = ext3_journal_start(inode, EXT3_DATA_TRANS_BLOCKS(inode->i_sb));
+	handle = ext4_journal_start(inode, EXT4_DATA_TRANS_BLOCKS(inode->i_sb));
 	if (IS_ERR(handle)) {
 		error = PTR_ERR(handle);
 	} else {
 		int error2;
 
-		error = ext3_xattr_set_handle(handle, inode, name_index, name,
+		error = ext4_xattr_set_handle(handle, inode, name_index, name,
 					      value, value_len, flags);
-		error2 = ext3_journal_stop(handle);
+		error2 = ext4_journal_stop(handle);
 		if (error == -ENOSPC &&
-		    ext3_should_retry_alloc(inode->i_sb, &retries))
+		    ext4_should_retry_alloc(inode->i_sb, &retries))
 			goto retry;
 		if (error == 0)
 			error = error2;
@@ -1061,53 +1061,53 @@ retry:
 }
 
 /*
- * ext3_xattr_delete_inode()
+ * ext4_xattr_delete_inode()
  *
  * Free extended attribute resources associated with this inode. This
  * is called immediately before an inode is freed. We have exclusive
  * access to the inode.
  */
 void
-ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 	struct buffer_head *bh = NULL;
 
-	if (!EXT3_I(inode)->i_file_acl)
+	if (!EXT4_I(inode)->i_file_acl)
 		goto cleanup;
-	bh = sb_bread(inode->i_sb, EXT3_I(inode)->i_file_acl);
+	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	if (BHDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
+	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
-		ext3_error(inode->i_sb, __FUNCTION__,
+		ext4_error(inode->i_sb, __FUNCTION__,
 			"inode %lu: bad block "E3FSBLK, inode->i_ino,
-			EXT3_I(inode)->i_file_acl);
+			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
-	ext3_xattr_release_block(handle, inode, bh);
-	EXT3_I(inode)->i_file_acl = 0;
+	ext4_xattr_release_block(handle, inode, bh);
+	EXT4_I(inode)->i_file_acl = 0;
 
 cleanup:
 	brelse(bh);
 }
 
 /*
- * ext3_xattr_put_super()
+ * ext4_xattr_put_super()
  *
  * This is called when a file system is unmounted.
  */
 void
-ext3_xattr_put_super(struct super_block *sb)
+ext4_xattr_put_super(struct super_block *sb)
 {
 	mb_cache_shrink(sb->s_bdev);
 }
 
 /*
- * ext3_xattr_cache_insert()
+ * ext4_xattr_cache_insert()
  *
  * Create a new entry in the extended attribute cache, and insert
  * it unless such an entry is already in the cache.
@@ -1115,13 +1115,13 @@ ext3_xattr_put_super(struct super_block *sb)
  * Returns 0, or a negative error number on failure.
  */
 static void
-ext3_xattr_cache_insert(struct buffer_head *bh)
+ext4_xattr_cache_insert(struct buffer_head *bh)
 {
 	__u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
 	struct mb_cache_entry *ce;
 	int error;
 
-	ce = mb_cache_entry_alloc(ext3_xattr_cache);
+	ce = mb_cache_entry_alloc(ext4_xattr_cache);
 	if (!ce) {
 		ea_bdebug(bh, "out of memory");
 		return;
@@ -1140,7 +1140,7 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
 }
 
 /*
- * ext3_xattr_cmp()
+ * ext4_xattr_cmp()
  *
  * Compare two extended attribute blocks for equality.
  *
@@ -1148,10 +1148,10 @@ ext3_xattr_cache_insert(struct buffer_head *bh)
  * a negative error number on errors.
  */
 static int
-ext3_xattr_cmp(struct ext3_xattr_header *header1,
-	       struct ext3_xattr_header *header2)
+ext4_xattr_cmp(struct ext4_xattr_header *header1,
+	       struct ext4_xattr_header *header2)
 {
-	struct ext3_xattr_entry *entry1, *entry2;
+	struct ext4_xattr_entry *entry1, *entry2;
 
 	entry1 = ENTRY(header1+1);
 	entry2 = ENTRY(header2+1);
@@ -1171,8 +1171,8 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
 			   le32_to_cpu(entry1->e_value_size)))
 			return 1;
 
-		entry1 = EXT3_XATTR_NEXT(entry1);
-		entry2 = EXT3_XATTR_NEXT(entry2);
+		entry1 = EXT4_XATTR_NEXT(entry1);
+		entry2 = EXT4_XATTR_NEXT(entry2);
 	}
 	if (!IS_LAST_ENTRY(entry2))
 		return 1;
@@ -1180,7 +1180,7 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
 }
 
 /*
- * ext3_xattr_cache_find()
+ * ext4_xattr_cache_find()
  *
  * Find an identical extended attribute block.
  *
@@ -1188,7 +1188,7 @@ ext3_xattr_cmp(struct ext3_xattr_header *header1,
  * not found or an error occurred.
  */
 static struct buffer_head *
-ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
+ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
 		      struct mb_cache_entry **pce)
 {
 	__u32 hash = le32_to_cpu(header->h_hash);
@@ -1198,7 +1198,7 @@ ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header,
 		return NULL;  /* never share */
 	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
 again:
-	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0,
+	ce = mb_cache_entry_find_first(ext4_xattr_cache, 0,
 				       inode->i_sb->s_bdev, hash);
 	while (ce) {
 		struct buffer_head *bh;
@@ -1210,16 +1210,16 @@ again:
 		}
 		bh = sb_bread(inode->i_sb, ce->e_block);
 		if (!bh) {
-			ext3_error(inode->i_sb, __FUNCTION__,
+			ext4_error(inode->i_sb, __FUNCTION__,
 				"inode %lu: block %lu read error",
 				inode->i_ino, (unsigned long) ce->e_block);
 		} else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
-				EXT3_XATTR_REFCOUNT_MAX) {
+				EXT4_XATTR_REFCOUNT_MAX) {
 			ea_idebug(inode, "block %lu refcount %d>=%d",
 				  (unsigned long) ce->e_block,
 				  le32_to_cpu(BHDR(bh)->h_refcount),
-					  EXT3_XATTR_REFCOUNT_MAX);
-		} else if (ext3_xattr_cmp(header, BHDR(bh)) == 0) {
+					  EXT4_XATTR_REFCOUNT_MAX);
+		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
 			*pce = ce;
 			return bh;
 		}
@@ -1233,12 +1233,12 @@ again:
 #define VALUE_HASH_SHIFT 16
 
 /*
- * ext3_xattr_hash_entry()
+ * ext4_xattr_hash_entry()
  *
  * Compute the hash of an extended attribute.
  */
-static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
-					 struct ext3_xattr_entry *entry)
+static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
+					 struct ext4_xattr_entry *entry)
 {
 	__u32 hash = 0;
 	char *name = entry->e_name;
@@ -1254,7 +1254,7 @@ static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
 		__le32 *value = (__le32 *)((char *)header +
 			le16_to_cpu(entry->e_value_offs));
 		for (n = (le32_to_cpu(entry->e_value_size) +
-		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
+		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
 			hash = (hash << VALUE_HASH_SHIFT) ^
 			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
 			       le32_to_cpu(*value++);
@@ -1269,17 +1269,17 @@ static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
 #define BLOCK_HASH_SHIFT 16
 
 /*
- * ext3_xattr_rehash()
+ * ext4_xattr_rehash()
  *
  * Re-compute the extended attribute hash value after an entry has changed.
  */
-static void ext3_xattr_rehash(struct ext3_xattr_header *header,
-			      struct ext3_xattr_entry *entry)
+static void ext4_xattr_rehash(struct ext4_xattr_header *header,
+			      struct ext4_xattr_entry *entry)
 {
-	struct ext3_xattr_entry *here;
+	struct ext4_xattr_entry *here;
 	__u32 hash = 0;
 
-	ext3_xattr_hash_entry(header, entry);
+	ext4_xattr_hash_entry(header, entry);
 	here = ENTRY(header+1);
 	while (!IS_LAST_ENTRY(here)) {
 		if (!here->e_hash) {
@@ -1290,7 +1290,7 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 		hash = (hash << BLOCK_HASH_SHIFT) ^
 		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
 		       le32_to_cpu(here->e_hash);
-		here = EXT3_XATTR_NEXT(here);
+		here = EXT4_XATTR_NEXT(here);
 	}
 	header->h_hash = cpu_to_le32(hash);
 }
@@ -1298,20 +1298,20 @@ static void ext3_xattr_rehash(struct ext3_xattr_header *header,
 #undef BLOCK_HASH_SHIFT
 
 int __init
-init_ext3_xattr(void)
+init_ext4_xattr(void)
 {
-	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
+	ext4_xattr_cache = mb_cache_create("ext4_xattr", NULL,
 		sizeof(struct mb_cache_entry) +
 		sizeof(((struct mb_cache_entry *) 0)->e_indexes[0]), 1, 6);
-	if (!ext3_xattr_cache)
+	if (!ext4_xattr_cache)
 		return -ENOMEM;
 	return 0;
 }
 
 void
-exit_ext3_xattr(void)
+exit_ext4_xattr(void)
 {
-	if (ext3_xattr_cache)
-		mb_cache_destroy(ext3_xattr_cache);
-	ext3_xattr_cache = NULL;
+	if (ext4_xattr_cache)
+		mb_cache_destroy(ext4_xattr_cache);
+	ext4_xattr_cache = NULL;
 }
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index 6b1ae1c6182c..79432b35398f 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -1,7 +1,7 @@
 /*
-  File: fs/ext3/xattr.h
+  File: fs/ext4/xattr.h
 
-  On-disk format of extended attributes for the ext3 filesystem.
+  On-disk format of extended attributes for the ext4 filesystem.
 
   (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
 */
@@ -9,20 +9,20 @@
 #include <linux/xattr.h>
 
 /* Magic value in attribute blocks */
-#define EXT3_XATTR_MAGIC		0xEA020000
+#define EXT4_XATTR_MAGIC		0xEA020000
 
 /* Maximum number of references to one attribute block */
-#define EXT3_XATTR_REFCOUNT_MAX		1024
+#define EXT4_XATTR_REFCOUNT_MAX		1024
 
 /* Name indexes */
-#define EXT3_XATTR_INDEX_USER			1
-#define EXT3_XATTR_INDEX_POSIX_ACL_ACCESS	2
-#define EXT3_XATTR_INDEX_POSIX_ACL_DEFAULT	3
-#define EXT3_XATTR_INDEX_TRUSTED		4
-#define	EXT3_XATTR_INDEX_LUSTRE			5
-#define EXT3_XATTR_INDEX_SECURITY	        6
-
-struct ext3_xattr_header {
+#define EXT4_XATTR_INDEX_USER			1
+#define EXT4_XATTR_INDEX_POSIX_ACL_ACCESS	2
+#define EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT	3
+#define EXT4_XATTR_INDEX_TRUSTED		4
+#define	EXT4_XATTR_INDEX_LUSTRE			5
+#define EXT4_XATTR_INDEX_SECURITY	        6
+
+struct ext4_xattr_header {
 	__le32	h_magic;	/* magic number for identification */
 	__le32	h_refcount;	/* reference count */
 	__le32	h_blocks;	/* number of disk blocks used */
@@ -30,11 +30,11 @@ struct ext3_xattr_header {
 	__u32	h_reserved[4];	/* zero right now */
 };
 
-struct ext3_xattr_ibody_header {
+struct ext4_xattr_ibody_header {
 	__le32	h_magic;	/* magic number for identification */
 };
 
-struct ext3_xattr_entry {
+struct ext4_xattr_entry {
 	__u8	e_name_len;	/* length of name */
 	__u8	e_name_index;	/* attribute name index */
 	__le16	e_value_offs;	/* offset in disk block of value */
@@ -44,100 +44,100 @@ struct ext3_xattr_entry {
 	char	e_name[0];	/* attribute name */
 };
 
-#define EXT3_XATTR_PAD_BITS		2
-#define EXT3_XATTR_PAD		(1<<EXT3_XATTR_PAD_BITS)
-#define EXT3_XATTR_ROUND		(EXT3_XATTR_PAD-1)
-#define EXT3_XATTR_LEN(name_len) \
-	(((name_len) + EXT3_XATTR_ROUND + \
-	sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
-#define EXT3_XATTR_NEXT(entry) \
-	( (struct ext3_xattr_entry *)( \
-	  (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
-#define EXT3_XATTR_SIZE(size) \
-	(((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
+#define EXT4_XATTR_PAD_BITS		2
+#define EXT4_XATTR_PAD		(1<<EXT4_XATTR_PAD_BITS)
+#define EXT4_XATTR_ROUND		(EXT4_XATTR_PAD-1)
+#define EXT4_XATTR_LEN(name_len) \
+	(((name_len) + EXT4_XATTR_ROUND + \
+	sizeof(struct ext4_xattr_entry)) & ~EXT4_XATTR_ROUND)
+#define EXT4_XATTR_NEXT(entry) \
+	( (struct ext4_xattr_entry *)( \
+	  (char *)(entry) + EXT4_XATTR_LEN((entry)->e_name_len)) )
+#define EXT4_XATTR_SIZE(size) \
+	(((size) + EXT4_XATTR_ROUND) & ~EXT4_XATTR_ROUND)
 
-# ifdef CONFIG_EXT3_FS_XATTR
+# ifdef CONFIG_EXT4DEV_FS_XATTR
 
-extern struct xattr_handler ext3_xattr_user_handler;
-extern struct xattr_handler ext3_xattr_trusted_handler;
-extern struct xattr_handler ext3_xattr_acl_access_handler;
-extern struct xattr_handler ext3_xattr_acl_default_handler;
-extern struct xattr_handler ext3_xattr_security_handler;
+extern struct xattr_handler ext4_xattr_user_handler;
+extern struct xattr_handler ext4_xattr_trusted_handler;
+extern struct xattr_handler ext4_xattr_acl_access_handler;
+extern struct xattr_handler ext4_xattr_acl_default_handler;
+extern struct xattr_handler ext4_xattr_security_handler;
 
-extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
+extern ssize_t ext4_listxattr(struct dentry *, char *, size_t);
 
-extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
-extern int ext3_xattr_list(struct inode *, char *, size_t);
-extern int ext3_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
-extern int ext3_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_get(struct inode *, int, const char *, void *, size_t);
+extern int ext4_xattr_list(struct inode *, char *, size_t);
+extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
+extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
 
-extern void ext3_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext3_xattr_put_super(struct super_block *);
+extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
+extern void ext4_xattr_put_super(struct super_block *);
 
-extern int init_ext3_xattr(void);
-extern void exit_ext3_xattr(void);
+extern int init_ext4_xattr(void);
+extern void exit_ext4_xattr(void);
 
-extern struct xattr_handler *ext3_xattr_handlers[];
+extern struct xattr_handler *ext4_xattr_handlers[];
 
-# else  /* CONFIG_EXT3_FS_XATTR */
+# else  /* CONFIG_EXT4DEV_FS_XATTR */
 
 static inline int
-ext3_xattr_get(struct inode *inode, int name_index, const char *name,
+ext4_xattr_get(struct inode *inode, int name_index, const char *name,
 	       void *buffer, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_list(struct inode *inode, void *buffer, size_t size)
+ext4_xattr_list(struct inode *inode, void *buffer, size_t size)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_set(struct inode *inode, int name_index, const char *name,
+ext4_xattr_set(struct inode *inode, int name_index, const char *name,
 	       const void *value, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline int
-ext3_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
+ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
 	       const char *name, const void *value, size_t size, int flags)
 {
 	return -EOPNOTSUPP;
 }
 
 static inline void
-ext3_xattr_delete_inode(handle_t *handle, struct inode *inode)
+ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 {
 }
 
 static inline void
-ext3_xattr_put_super(struct super_block *sb)
+ext4_xattr_put_super(struct super_block *sb)
 {
 }
 
 static inline int
-init_ext3_xattr(void)
+init_ext4_xattr(void)
 {
 	return 0;
 }
 
 static inline void
-exit_ext3_xattr(void)
+exit_ext4_xattr(void)
 {
 }
 
-#define ext3_xattr_handlers	NULL
+#define ext4_xattr_handlers	NULL
 
-# endif  /* CONFIG_EXT3_FS_XATTR */
+# endif  /* CONFIG_EXT4DEV_FS_XATTR */
 
-#ifdef CONFIG_EXT3_FS_SECURITY
-extern int ext3_init_security(handle_t *handle, struct inode *inode,
+#ifdef CONFIG_EXT4DEV_FS_SECURITY
+extern int ext4_init_security(handle_t *handle, struct inode *inode,
 				struct inode *dir);
 #else
-static inline int ext3_init_security(handle_t *handle, struct inode *inode,
+static inline int ext4_init_security(handle_t *handle, struct inode *inode,
 				struct inode *dir)
 {
 	return 0;
diff --git a/fs/ext4/xattr_security.c b/fs/ext4/xattr_security.c
index b9c40c15647b..d84b1dabeb16 100644
--- a/fs/ext4/xattr_security.c
+++ b/fs/ext4/xattr_security.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_security.c
+ * linux/fs/ext4/xattr_security.c
  * Handler for storing security labels as extended attributes.
  */
 
@@ -7,13 +7,13 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include <linux/security.h>
 #include "xattr.h"
 
 static size_t
-ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 			 const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_SECURITY_PREFIX)-1;
@@ -29,27 +29,27 @@ ext3_xattr_security_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_security_get(struct inode *inode, const char *name,
+ext4_xattr_security_get(struct inode *inode, const char *name,
 		       void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_SECURITY, name,
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_SECURITY, name,
 			      buffer, size);
 }
 
 static int
-ext3_xattr_security_set(struct inode *inode, const char *name,
+ext4_xattr_security_set(struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_SECURITY, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_SECURITY, name,
 			      value, size, flags);
 }
 
 int
-ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
+ext4_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
 {
 	int err;
 	size_t len;
@@ -62,16 +62,16 @@ ext3_init_security(handle_t *handle, struct inode *inode, struct inode *dir)
 			return 0;
 		return err;
 	}
-	err = ext3_xattr_set_handle(handle, inode, EXT3_XATTR_INDEX_SECURITY,
+	err = ext4_xattr_set_handle(handle, inode, EXT4_XATTR_INDEX_SECURITY,
 				    name, value, len, 0);
 	kfree(name);
 	kfree(value);
 	return err;
 }
 
-struct xattr_handler ext3_xattr_security_handler = {
+struct xattr_handler ext4_xattr_security_handler = {
 	.prefix	= XATTR_SECURITY_PREFIX,
-	.list	= ext3_xattr_security_list,
-	.get	= ext3_xattr_security_get,
-	.set	= ext3_xattr_security_set,
+	.list	= ext4_xattr_security_list,
+	.get	= ext4_xattr_security_get,
+	.set	= ext4_xattr_security_set,
 };
diff --git a/fs/ext4/xattr_trusted.c b/fs/ext4/xattr_trusted.c
index 86d91f1186dc..11bd58c95a61 100644
--- a/fs/ext4/xattr_trusted.c
+++ b/fs/ext4/xattr_trusted.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_trusted.c
+ * linux/fs/ext4/xattr_trusted.c
  * Handler for trusted extended attributes.
  *
  * Copyright (C) 2003 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
@@ -10,14 +10,14 @@
 #include <linux/capability.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_TRUSTED_PREFIX "trusted."
 
 static size_t
-ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 			const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_TRUSTED_PREFIX)-1;
@@ -35,28 +35,28 @@ ext3_xattr_trusted_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_trusted_get(struct inode *inode, const char *name,
+ext4_xattr_trusted_get(struct inode *inode, const char *name,
 		       void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_TRUSTED, name,
 			      buffer, size);
 }
 
 static int
-ext3_xattr_trusted_set(struct inode *inode, const char *name,
+ext4_xattr_trusted_set(struct inode *inode, const char *name,
 		       const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_TRUSTED, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_TRUSTED, name,
 			      value, size, flags);
 }
 
-struct xattr_handler ext3_xattr_trusted_handler = {
+struct xattr_handler ext4_xattr_trusted_handler = {
 	.prefix	= XATTR_TRUSTED_PREFIX,
-	.list	= ext3_xattr_trusted_list,
-	.get	= ext3_xattr_trusted_get,
-	.set	= ext3_xattr_trusted_set,
+	.list	= ext4_xattr_trusted_list,
+	.get	= ext4_xattr_trusted_get,
+	.set	= ext4_xattr_trusted_set,
 };
diff --git a/fs/ext4/xattr_user.c b/fs/ext4/xattr_user.c
index a85a0a17c4fd..9c5a665e0837 100644
--- a/fs/ext4/xattr_user.c
+++ b/fs/ext4/xattr_user.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/ext3/xattr_user.c
+ * linux/fs/ext4/xattr_user.c
  * Handler for extended user attributes.
  *
  * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
@@ -9,14 +9,14 @@
 #include <linux/string.h>
 #include <linux/fs.h>
 #include <linux/smp_lock.h>
-#include <linux/ext3_jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_jbd.h>
+#include <linux/ext4_fs.h>
 #include "xattr.h"
 
 #define XATTR_USER_PREFIX "user."
 
 static size_t
-ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
+ext4_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 		     const char *name, size_t name_len)
 {
 	const size_t prefix_len = sizeof(XATTR_USER_PREFIX)-1;
@@ -34,31 +34,31 @@ ext3_xattr_user_list(struct inode *inode, char *list, size_t list_size,
 }
 
 static int
-ext3_xattr_user_get(struct inode *inode, const char *name,
+ext4_xattr_user_get(struct inode *inode, const char *name,
 		    void *buffer, size_t size)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_get(inode, EXT3_XATTR_INDEX_USER, name, buffer, size);
+	return ext4_xattr_get(inode, EXT4_XATTR_INDEX_USER, name, buffer, size);
 }
 
 static int
-ext3_xattr_user_set(struct inode *inode, const char *name,
+ext4_xattr_user_set(struct inode *inode, const char *name,
 		    const void *value, size_t size, int flags)
 {
 	if (strcmp(name, "") == 0)
 		return -EINVAL;
 	if (!test_opt(inode->i_sb, XATTR_USER))
 		return -EOPNOTSUPP;
-	return ext3_xattr_set(inode, EXT3_XATTR_INDEX_USER, name,
+	return ext4_xattr_set(inode, EXT4_XATTR_INDEX_USER, name,
 			      value, size, flags);
 }
 
-struct xattr_handler ext3_xattr_user_handler = {
+struct xattr_handler ext4_xattr_user_handler = {
 	.prefix	= XATTR_USER_PREFIX,
-	.list	= ext3_xattr_user_list,
-	.get	= ext3_xattr_user_get,
-	.set	= ext3_xattr_user_set,
+	.list	= ext4_xattr_user_list,
+	.get	= ext4_xattr_user_get,
+	.set	= ext4_xattr_user_set,
 };
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 11cca1bdc0c7..f582cd762caf 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext3_fs.h
+ *  linux/include/linux/ext4_fs.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _LINUX_EXT3_FS_H
-#define _LINUX_EXT3_FS_H
+#ifndef _LINUX_EXT4_FS_H
+#define _LINUX_EXT4_FS_H
 
 #include <linux/types.h>
 #include <linux/magic.h>
@@ -24,102 +24,102 @@
  */
 
 /*
- * Define EXT3FS_DEBUG to produce debug messages
+ * Define EXT4FS_DEBUG to produce debug messages
  */
-#undef EXT3FS_DEBUG
+#undef EXT4FS_DEBUG
 
 /*
- * Define EXT3_RESERVATION to reserve data blocks for expanding files
+ * Define EXT4_RESERVATION to reserve data blocks for expanding files
  */
-#define EXT3_DEFAULT_RESERVE_BLOCKS     8
+#define EXT4_DEFAULT_RESERVE_BLOCKS     8
 /*max window size: 1024(direct blocks) + 3([t,d]indirect blocks) */
-#define EXT3_MAX_RESERVE_BLOCKS         1027
-#define EXT3_RESERVE_WINDOW_NOT_ALLOCATED 0
+#define EXT4_MAX_RESERVE_BLOCKS         1027
+#define EXT4_RESERVE_WINDOW_NOT_ALLOCATED 0
 /*
  * Always enable hashed directories
  */
-#define CONFIG_EXT3_INDEX
+#define CONFIG_EXT4_INDEX
 
 /*
  * Debug code
  */
-#ifdef EXT3FS_DEBUG
-#define ext3_debug(f, a...)						\
+#ifdef EXT4FS_DEBUG
+#define ext4_debug(f, a...)						\
 	do {								\
-		printk (KERN_DEBUG "EXT3-fs DEBUG (%s, %d): %s:",	\
+		printk (KERN_DEBUG "EXT4-fs DEBUG (%s, %d): %s:",	\
 			__FILE__, __LINE__, __FUNCTION__);		\
 		printk (KERN_DEBUG f, ## a);				\
 	} while (0)
 #else
-#define ext3_debug(f, a...)	do {} while (0)
+#define ext4_debug(f, a...)	do {} while (0)
 #endif
 
 /*
  * Special inodes numbers
  */
-#define	EXT3_BAD_INO		 1	/* Bad blocks inode */
-#define EXT3_ROOT_INO		 2	/* Root inode */
-#define EXT3_BOOT_LOADER_INO	 5	/* Boot loader inode */
-#define EXT3_UNDEL_DIR_INO	 6	/* Undelete directory inode */
-#define EXT3_RESIZE_INO		 7	/* Reserved group descriptors inode */
-#define EXT3_JOURNAL_INO	 8	/* Journal inode */
+#define	EXT4_BAD_INO		 1	/* Bad blocks inode */
+#define EXT4_ROOT_INO		 2	/* Root inode */
+#define EXT4_BOOT_LOADER_INO	 5	/* Boot loader inode */
+#define EXT4_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+#define EXT4_RESIZE_INO		 7	/* Reserved group descriptors inode */
+#define EXT4_JOURNAL_INO	 8	/* Journal inode */
 
-/* First non-reserved inode for old ext3 filesystems */
-#define EXT3_GOOD_OLD_FIRST_INO	11
+/* First non-reserved inode for old ext4 filesystems */
+#define EXT4_GOOD_OLD_FIRST_INO	11
 
 /*
  * Maximal count of links to a file
  */
-#define EXT3_LINK_MAX		32000
+#define EXT4_LINK_MAX		32000
 
 /*
  * Macro-instructions used to manage several block sizes
  */
-#define EXT3_MIN_BLOCK_SIZE		1024
-#define	EXT3_MAX_BLOCK_SIZE		4096
-#define EXT3_MIN_BLOCK_LOG_SIZE		  10
+#define EXT4_MIN_BLOCK_SIZE		1024
+#define	EXT4_MAX_BLOCK_SIZE		4096
+#define EXT4_MIN_BLOCK_LOG_SIZE		  10
 #ifdef __KERNEL__
-# define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
+# define EXT4_BLOCK_SIZE(s)		((s)->s_blocksize)
 #else
-# define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+# define EXT4_BLOCK_SIZE(s)		(EXT4_MIN_BLOCK_SIZE << (s)->s_log_block_size)
 #endif
-#define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+#define	EXT4_ADDR_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / sizeof (__u32))
 #ifdef __KERNEL__
-# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+# define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
 #else
-# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+# define EXT4_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
 #endif
 #ifdef __KERNEL__
-#define	EXT3_ADDR_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_addr_per_block_bits)
-#define EXT3_INODE_SIZE(s)		(EXT3_SB(s)->s_inode_size)
-#define EXT3_FIRST_INO(s)		(EXT3_SB(s)->s_first_ino)
+#define	EXT4_ADDR_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_addr_per_block_bits)
+#define EXT4_INODE_SIZE(s)		(EXT4_SB(s)->s_inode_size)
+#define EXT4_FIRST_INO(s)		(EXT4_SB(s)->s_first_ino)
 #else
-#define EXT3_INODE_SIZE(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
-				 EXT3_GOOD_OLD_INODE_SIZE : \
+#define EXT4_INODE_SIZE(s)	(((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+				 EXT4_GOOD_OLD_INODE_SIZE : \
 				 (s)->s_inode_size)
-#define EXT3_FIRST_INO(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
-				 EXT3_GOOD_OLD_FIRST_INO : \
+#define EXT4_FIRST_INO(s)	(((s)->s_rev_level == EXT4_GOOD_OLD_REV) ? \
+				 EXT4_GOOD_OLD_FIRST_INO : \
 				 (s)->s_first_ino)
 #endif
 
 /*
  * Macro-instructions used to manage fragments
  */
-#define EXT3_MIN_FRAG_SIZE		1024
-#define	EXT3_MAX_FRAG_SIZE		4096
-#define EXT3_MIN_FRAG_LOG_SIZE		  10
+#define EXT4_MIN_FRAG_SIZE		1024
+#define	EXT4_MAX_FRAG_SIZE		4096
+#define EXT4_MIN_FRAG_LOG_SIZE		  10
 #ifdef __KERNEL__
-# define EXT3_FRAG_SIZE(s)		(EXT3_SB(s)->s_frag_size)
-# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_SB(s)->s_frags_per_block)
+# define EXT4_FRAG_SIZE(s)		(EXT4_SB(s)->s_frag_size)
+# define EXT4_FRAGS_PER_BLOCK(s)	(EXT4_SB(s)->s_frags_per_block)
 #else
-# define EXT3_FRAG_SIZE(s)		(EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
-# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
+# define EXT4_FRAG_SIZE(s)		(EXT4_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+# define EXT4_FRAGS_PER_BLOCK(s)	(EXT4_BLOCK_SIZE(s) / EXT4_FRAG_SIZE(s))
 #endif
 
 /*
  * Structure of a blocks group descriptor
  */
-struct ext3_group_desc
+struct ext4_group_desc
 {
 	__le32	bg_block_bitmap;		/* Blocks bitmap block */
 	__le32	bg_inode_bitmap;		/* Inodes bitmap block */
@@ -135,62 +135,62 @@ struct ext3_group_desc
  * Macro-instructions used to manage group descriptors
  */
 #ifdef __KERNEL__
-# define EXT3_BLOCKS_PER_GROUP(s)	(EXT3_SB(s)->s_blocks_per_group)
-# define EXT3_DESC_PER_BLOCK(s)		(EXT3_SB(s)->s_desc_per_block)
-# define EXT3_INODES_PER_GROUP(s)	(EXT3_SB(s)->s_inodes_per_group)
-# define EXT3_DESC_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_desc_per_block_bits)
+# define EXT4_BLOCKS_PER_GROUP(s)	(EXT4_SB(s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s)		(EXT4_SB(s)->s_desc_per_block)
+# define EXT4_INODES_PER_GROUP(s)	(EXT4_SB(s)->s_inodes_per_group)
+# define EXT4_DESC_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_desc_per_block_bits)
 #else
-# define EXT3_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
-# define EXT3_DESC_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
-# define EXT3_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
+# define EXT4_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
+# define EXT4_DESC_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / sizeof (struct ext4_group_desc))
+# define EXT4_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
 #endif
 
 /*
  * Constants relative to the data blocks
  */
-#define	EXT3_NDIR_BLOCKS		12
-#define	EXT3_IND_BLOCK			EXT3_NDIR_BLOCKS
-#define	EXT3_DIND_BLOCK			(EXT3_IND_BLOCK + 1)
-#define	EXT3_TIND_BLOCK			(EXT3_DIND_BLOCK + 1)
-#define	EXT3_N_BLOCKS			(EXT3_TIND_BLOCK + 1)
+#define	EXT4_NDIR_BLOCKS		12
+#define	EXT4_IND_BLOCK			EXT4_NDIR_BLOCKS
+#define	EXT4_DIND_BLOCK			(EXT4_IND_BLOCK + 1)
+#define	EXT4_TIND_BLOCK			(EXT4_DIND_BLOCK + 1)
+#define	EXT4_N_BLOCKS			(EXT4_TIND_BLOCK + 1)
 
 /*
  * Inode flags
  */
-#define	EXT3_SECRM_FL			0x00000001 /* Secure deletion */
-#define	EXT3_UNRM_FL			0x00000002 /* Undelete */
-#define	EXT3_COMPR_FL			0x00000004 /* Compress file */
-#define EXT3_SYNC_FL			0x00000008 /* Synchronous updates */
-#define EXT3_IMMUTABLE_FL		0x00000010 /* Immutable file */
-#define EXT3_APPEND_FL			0x00000020 /* writes to file may only append */
-#define EXT3_NODUMP_FL			0x00000040 /* do not dump file */
-#define EXT3_NOATIME_FL			0x00000080 /* do not update atime */
+#define	EXT4_SECRM_FL			0x00000001 /* Secure deletion */
+#define	EXT4_UNRM_FL			0x00000002 /* Undelete */
+#define	EXT4_COMPR_FL			0x00000004 /* Compress file */
+#define EXT4_SYNC_FL			0x00000008 /* Synchronous updates */
+#define EXT4_IMMUTABLE_FL		0x00000010 /* Immutable file */
+#define EXT4_APPEND_FL			0x00000020 /* writes to file may only append */
+#define EXT4_NODUMP_FL			0x00000040 /* do not dump file */
+#define EXT4_NOATIME_FL			0x00000080 /* do not update atime */
 /* Reserved for compression usage... */
-#define EXT3_DIRTY_FL			0x00000100
-#define EXT3_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
-#define EXT3_NOCOMPR_FL			0x00000400 /* Don't compress */
-#define EXT3_ECOMPR_FL			0x00000800 /* Compression error */
+#define EXT4_DIRTY_FL			0x00000100
+#define EXT4_COMPRBLK_FL		0x00000200 /* One or more compressed clusters */
+#define EXT4_NOCOMPR_FL			0x00000400 /* Don't compress */
+#define EXT4_ECOMPR_FL			0x00000800 /* Compression error */
 /* End compression flags --- maybe not all used */
-#define EXT3_INDEX_FL			0x00001000 /* hash-indexed directory */
-#define EXT3_IMAGIC_FL			0x00002000 /* AFS directory */
-#define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
-#define EXT3_NOTAIL_FL			0x00008000 /* file tail should not be merged */
-#define EXT3_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
-#define EXT3_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
-#define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
+#define EXT4_INDEX_FL			0x00001000 /* hash-indexed directory */
+#define EXT4_IMAGIC_FL			0x00002000 /* AFS directory */
+#define EXT4_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
+#define EXT4_NOTAIL_FL			0x00008000 /* file tail should not be merged */
+#define EXT4_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
+#define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
+#define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
 
-#define EXT3_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
-#define EXT3_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
+#define EXT4_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define EXT4_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
 /*
  * Inode dynamic state flags
  */
-#define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
-#define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
-#define EXT3_STATE_XATTR		0x00000004 /* has in-inode xattrs */
+#define EXT4_STATE_JDATA		0x00000001 /* journaled data exists */
+#define EXT4_STATE_NEW			0x00000002 /* inode is newly created */
+#define EXT4_STATE_XATTR		0x00000004 /* has in-inode xattrs */
 
 /* Used to pass group descriptor data when online resize is done */
-struct ext3_new_group_input {
+struct ext4_new_group_input {
 	__u32 group;            /* Group number for this data */
 	__u32 block_bitmap;     /* Absolute block number of block bitmap */
 	__u32 inode_bitmap;     /* Absolute block number of inode bitmap */
@@ -200,8 +200,8 @@ struct ext3_new_group_input {
 	__u16 unused;
 };
 
-/* The struct ext3_new_group_input in kernel space, with free_blocks_count */
-struct ext3_new_group_data {
+/* The struct ext4_new_group_input in kernel space, with free_blocks_count */
+struct ext4_new_group_data {
 	__u32 group;
 	__u32 block_bitmap;
 	__u32 inode_bitmap;
@@ -216,41 +216,41 @@ struct ext3_new_group_data {
 /*
  * ioctl commands
  */
-#define	EXT3_IOC_GETFLAGS		FS_IOC_GETFLAGS
-#define	EXT3_IOC_SETFLAGS		FS_IOC_SETFLAGS
-#define	EXT3_IOC_GETVERSION		_IOR('f', 3, long)
-#define	EXT3_IOC_SETVERSION		_IOW('f', 4, long)
-#define EXT3_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
-#define EXT3_IOC_GROUP_ADD		_IOW('f', 8,struct ext3_new_group_input)
-#define	EXT3_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
-#define	EXT3_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
+#define	EXT4_IOC_GETFLAGS		FS_IOC_GETFLAGS
+#define	EXT4_IOC_SETFLAGS		FS_IOC_SETFLAGS
+#define	EXT4_IOC_GETVERSION		_IOR('f', 3, long)
+#define	EXT4_IOC_SETVERSION		_IOW('f', 4, long)
+#define EXT4_IOC_GROUP_EXTEND		_IOW('f', 7, unsigned long)
+#define EXT4_IOC_GROUP_ADD		_IOW('f', 8,struct ext4_new_group_input)
+#define	EXT4_IOC_GETVERSION_OLD		FS_IOC_GETVERSION
+#define	EXT4_IOC_SETVERSION_OLD		FS_IOC_SETVERSION
 #ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
+#define EXT4_IOC_WAIT_FOR_READONLY	_IOR('f', 99, long)
 #endif
-#define EXT3_IOC_GETRSVSZ		_IOR('f', 5, long)
-#define EXT3_IOC_SETRSVSZ		_IOW('f', 6, long)
+#define EXT4_IOC_GETRSVSZ		_IOR('f', 5, long)
+#define EXT4_IOC_SETRSVSZ		_IOW('f', 6, long)
 
 /*
  * ioctl commands in 32 bit emulation
  */
-#define EXT3_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
-#define EXT3_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
-#define EXT3_IOC32_GETVERSION		_IOR('f', 3, int)
-#define EXT3_IOC32_SETVERSION		_IOW('f', 4, int)
-#define EXT3_IOC32_GETRSVSZ		_IOR('f', 5, int)
-#define EXT3_IOC32_SETRSVSZ		_IOW('f', 6, int)
-#define EXT3_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
+#define EXT4_IOC32_GETFLAGS		FS_IOC32_GETFLAGS
+#define EXT4_IOC32_SETFLAGS		FS_IOC32_SETFLAGS
+#define EXT4_IOC32_GETVERSION		_IOR('f', 3, int)
+#define EXT4_IOC32_SETVERSION		_IOW('f', 4, int)
+#define EXT4_IOC32_GETRSVSZ		_IOR('f', 5, int)
+#define EXT4_IOC32_SETRSVSZ		_IOW('f', 6, int)
+#define EXT4_IOC32_GROUP_EXTEND		_IOW('f', 7, unsigned int)
 #ifdef CONFIG_JBD_DEBUG
-#define EXT3_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
+#define EXT4_IOC32_WAIT_FOR_READONLY	_IOR('f', 99, int)
 #endif
-#define EXT3_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
-#define EXT3_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
+#define EXT4_IOC32_GETVERSION_OLD	FS_IOC32_GETVERSION
+#define EXT4_IOC32_SETVERSION_OLD	FS_IOC32_SETVERSION
 
 
 /*
  *  Mount options
  */
-struct ext3_mount_options {
+struct ext4_mount_options {
 	unsigned long s_mount_opt;
 	uid_t s_resuid;
 	gid_t s_resgid;
@@ -264,7 +264,7 @@ struct ext3_mount_options {
 /*
  * Structure of an inode on the disk
  */
-struct ext3_inode {
+struct ext4_inode {
 	__le16	i_mode;		/* File mode */
 	__le16	i_uid;		/* Low 16 bits of Owner Uid */
 	__le32	i_size;		/* Size in bytes */
@@ -287,7 +287,7 @@ struct ext3_inode {
 			__u32  m_i_reserved1;
 		} masix1;
 	} osd1;				/* OS dependent 1 */
-	__le32	i_block[EXT3_N_BLOCKS];/* Pointers to blocks */
+	__le32	i_block[EXT4_N_BLOCKS];/* Pointers to blocks */
 	__le32	i_generation;	/* File version (for NFS) */
 	__le32	i_file_acl;	/* File ACL */
 	__le32	i_dir_acl;	/* Directory ACL */
@@ -353,76 +353,76 @@ struct ext3_inode {
 /*
  * File system states
  */
-#define	EXT3_VALID_FS			0x0001	/* Unmounted cleanly */
-#define	EXT3_ERROR_FS			0x0002	/* Errors detected */
-#define	EXT3_ORPHAN_FS			0x0004	/* Orphans being recovered */
+#define	EXT4_VALID_FS			0x0001	/* Unmounted cleanly */
+#define	EXT4_ERROR_FS			0x0002	/* Errors detected */
+#define	EXT4_ORPHAN_FS			0x0004	/* Orphans being recovered */
 
 /*
  * Mount flags
  */
-#define EXT3_MOUNT_CHECK		0x00001	/* Do mount-time checks */
-#define EXT3_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
-#define EXT3_MOUNT_GRPID		0x00004	/* Create files with directory's group */
-#define EXT3_MOUNT_DEBUG		0x00008	/* Some debugging messages */
-#define EXT3_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
-#define EXT3_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
-#define EXT3_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
-#define EXT3_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
-#define EXT3_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
-#define EXT3_MOUNT_ABORT		0x00200	/* Fatal error detected */
-#define EXT3_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
-#define EXT3_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
-#define EXT3_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
-#define EXT3_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */
-#define EXT3_MOUNT_UPDATE_JOURNAL	0x01000	/* Update the journal format */
-#define EXT3_MOUNT_NO_UID32		0x02000  /* Disable 32-bit UIDs */
-#define EXT3_MOUNT_XATTR_USER		0x04000	/* Extended user attributes */
-#define EXT3_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
-#define EXT3_MOUNT_RESERVATION		0x10000	/* Preallocation */
-#define EXT3_MOUNT_BARRIER		0x20000 /* Use block barriers */
-#define EXT3_MOUNT_NOBH			0x40000 /* No bufferheads */
-#define EXT3_MOUNT_QUOTA		0x80000 /* Some quota option set */
-#define EXT3_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
-#define EXT3_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
-
-/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+#define EXT4_MOUNT_CHECK		0x00001	/* Do mount-time checks */
+#define EXT4_MOUNT_OLDALLOC		0x00002  /* Don't use the new Orlov allocator */
+#define EXT4_MOUNT_GRPID		0x00004	/* Create files with directory's group */
+#define EXT4_MOUNT_DEBUG		0x00008	/* Some debugging messages */
+#define EXT4_MOUNT_ERRORS_CONT		0x00010	/* Continue on errors */
+#define EXT4_MOUNT_ERRORS_RO		0x00020	/* Remount fs ro on errors */
+#define EXT4_MOUNT_ERRORS_PANIC		0x00040	/* Panic on errors */
+#define EXT4_MOUNT_MINIX_DF		0x00080	/* Mimics the Minix statfs */
+#define EXT4_MOUNT_NOLOAD		0x00100	/* Don't use existing journal*/
+#define EXT4_MOUNT_ABORT		0x00200	/* Fatal error detected */
+#define EXT4_MOUNT_DATA_FLAGS		0x00C00	/* Mode for data writes: */
+#define EXT4_MOUNT_JOURNAL_DATA		0x00400	/* Write data to journal */
+#define EXT4_MOUNT_ORDERED_DATA		0x00800	/* Flush data before commit */
+#define EXT4_MOUNT_WRITEBACK_DATA	0x00C00	/* No data ordering */
+#define EXT4_MOUNT_UPDATE_JOURNAL	0x01000	/* Update the journal format */
+#define EXT4_MOUNT_NO_UID32		0x02000  /* Disable 32-bit UIDs */
+#define EXT4_MOUNT_XATTR_USER		0x04000	/* Extended user attributes */
+#define EXT4_MOUNT_POSIX_ACL		0x08000	/* POSIX Access Control Lists */
+#define EXT4_MOUNT_RESERVATION		0x10000	/* Preallocation */
+#define EXT4_MOUNT_BARRIER		0x20000 /* Use block barriers */
+#define EXT4_MOUNT_NOBH			0x40000 /* No bufferheads */
+#define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
+#define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
+#define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+
+/* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
-#define clear_opt(o, opt)		o &= ~EXT3_MOUNT_##opt
-#define set_opt(o, opt)			o |= EXT3_MOUNT_##opt
-#define test_opt(sb, opt)		(EXT3_SB(sb)->s_mount_opt & \
-					 EXT3_MOUNT_##opt)
+#define clear_opt(o, opt)		o &= ~EXT4_MOUNT_##opt
+#define set_opt(o, opt)			o |= EXT4_MOUNT_##opt
+#define test_opt(sb, opt)		(EXT4_SB(sb)->s_mount_opt & \
+					 EXT4_MOUNT_##opt)
 #else
-#define EXT2_MOUNT_NOLOAD		EXT3_MOUNT_NOLOAD
-#define EXT2_MOUNT_ABORT		EXT3_MOUNT_ABORT
-#define EXT2_MOUNT_DATA_FLAGS		EXT3_MOUNT_DATA_FLAGS
+#define EXT2_MOUNT_NOLOAD		EXT4_MOUNT_NOLOAD
+#define EXT2_MOUNT_ABORT		EXT4_MOUNT_ABORT
+#define EXT2_MOUNT_DATA_FLAGS		EXT4_MOUNT_DATA_FLAGS
 #endif
 
-#define ext3_set_bit			ext2_set_bit
-#define ext3_set_bit_atomic		ext2_set_bit_atomic
-#define ext3_clear_bit			ext2_clear_bit
-#define ext3_clear_bit_atomic		ext2_clear_bit_atomic
-#define ext3_test_bit			ext2_test_bit
-#define ext3_find_first_zero_bit	ext2_find_first_zero_bit
-#define ext3_find_next_zero_bit		ext2_find_next_zero_bit
+#define ext4_set_bit			ext2_set_bit
+#define ext4_set_bit_atomic		ext2_set_bit_atomic
+#define ext4_clear_bit			ext2_clear_bit
+#define ext4_clear_bit_atomic		ext2_clear_bit_atomic
+#define ext4_test_bit			ext2_test_bit
+#define ext4_find_first_zero_bit	ext2_find_first_zero_bit
+#define ext4_find_next_zero_bit		ext2_find_next_zero_bit
 
 /*
  * Maximal mount counts between two filesystem checks
  */
-#define EXT3_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
-#define EXT3_DFL_CHECKINTERVAL		0	/* Don't use interval check */
+#define EXT4_DFL_MAX_MNT_COUNT		20	/* Allow 20 mounts */
+#define EXT4_DFL_CHECKINTERVAL		0	/* Don't use interval check */
 
 /*
  * Behaviour when detecting errors
  */
-#define EXT3_ERRORS_CONTINUE		1	/* Continue execution */
-#define EXT3_ERRORS_RO			2	/* Remount fs read-only */
-#define EXT3_ERRORS_PANIC		3	/* Panic */
-#define EXT3_ERRORS_DEFAULT		EXT3_ERRORS_CONTINUE
+#define EXT4_ERRORS_CONTINUE		1	/* Continue execution */
+#define EXT4_ERRORS_RO			2	/* Remount fs read-only */
+#define EXT4_ERRORS_PANIC		3	/* Panic */
+#define EXT4_ERRORS_DEFAULT		EXT4_ERRORS_CONTINUE
 
 /*
  * Structure of the super block
  */
-struct ext3_super_block {
+struct ext4_super_block {
 /*00*/	__le32	s_inodes_count;		/* Inodes count */
 	__le32	s_blocks_count;		/* Blocks count */
 	__le32	s_r_blocks_count;	/* Reserved blocks count */
@@ -449,7 +449,7 @@ struct ext3_super_block {
 /*50*/	__le16	s_def_resuid;		/* Default uid for reserved blocks */
 	__le16	s_def_resgid;		/* Default gid for reserved blocks */
 	/*
-	 * These fields are for EXT3_DYNAMIC_REV superblocks only.
+	 * These fields are for EXT4_DYNAMIC_REV superblocks only.
 	 *
 	 * Note: the difference between the compatible feature set and
 	 * the incompatible feature set is that if there is a bit set
@@ -473,13 +473,13 @@ struct ext3_super_block {
 /*C8*/	__le32	s_algorithm_usage_bitmap; /* For compression */
 	/*
 	 * Performance hints.  Directory preallocation should only
-	 * happen if the EXT3_FEATURE_COMPAT_DIR_PREALLOC flag is on.
+	 * happen if the EXT4_FEATURE_COMPAT_DIR_PREALLOC flag is on.
 	 */
 	__u8	s_prealloc_blocks;	/* Nr of blocks to try to preallocate*/
 	__u8	s_prealloc_dir_blocks;	/* Nr to preallocate for dirs */
 	__le16	s_reserved_gdt_blocks;	/* Per group desc for online growth */
 	/*
-	 * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
+	 * Journaling support valid if EXT4_FEATURE_COMPAT_HAS_JOURNAL set.
 	 */
 /*D0*/	__u8	s_journal_uuid[16];	/* uuid of journal superblock */
 /*E0*/	__le32	s_journal_inum;		/* inode number of journal file */
@@ -495,186 +495,186 @@ struct ext3_super_block {
 };
 
 #ifdef __KERNEL__
-#include <linux/ext3_fs_i.h>
-#include <linux/ext3_fs_sb.h>
-static inline struct ext3_sb_info * EXT3_SB(struct super_block *sb)
+#include <linux/ext4_fs_i.h>
+#include <linux/ext4_fs_sb.h>
+static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
 }
-static inline struct ext3_inode_info *EXT3_I(struct inode *inode)
+static inline struct ext4_inode_info *EXT4_I(struct inode *inode)
 {
-	return container_of(inode, struct ext3_inode_info, vfs_inode);
+	return container_of(inode, struct ext4_inode_info, vfs_inode);
 }
 
-static inline int ext3_valid_inum(struct super_block *sb, unsigned long ino)
+static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 {
-	return ino == EXT3_ROOT_INO ||
-		ino == EXT3_JOURNAL_INO ||
-		ino == EXT3_RESIZE_INO ||
-		(ino >= EXT3_FIRST_INO(sb) &&
-		 ino <= le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count));
+	return ino == EXT4_ROOT_INO ||
+		ino == EXT4_JOURNAL_INO ||
+		ino == EXT4_RESIZE_INO ||
+		(ino >= EXT4_FIRST_INO(sb) &&
+		 ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
 }
 #else
-/* Assume that user mode programs are passing in an ext3fs superblock, not
+/* Assume that user mode programs are passing in an ext4fs superblock, not
  * a kernel struct super_block.  This will allow us to call the feature-test
  * macros from user land. */
-#define EXT3_SB(sb)	(sb)
+#define EXT4_SB(sb)	(sb)
 #endif
 
-#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
+#define NEXT_ORPHAN(inode) EXT4_I(inode)->i_dtime
 
 /*
  * Codes for operating systems
  */
-#define EXT3_OS_LINUX		0
-#define EXT3_OS_HURD		1
-#define EXT3_OS_MASIX		2
-#define EXT3_OS_FREEBSD		3
-#define EXT3_OS_LITES		4
+#define EXT4_OS_LINUX		0
+#define EXT4_OS_HURD		1
+#define EXT4_OS_MASIX		2
+#define EXT4_OS_FREEBSD		3
+#define EXT4_OS_LITES		4
 
 /*
  * Revision levels
  */
-#define EXT3_GOOD_OLD_REV	0	/* The good old (original) format */
-#define EXT3_DYNAMIC_REV	1	/* V2 format w/ dynamic inode sizes */
+#define EXT4_GOOD_OLD_REV	0	/* The good old (original) format */
+#define EXT4_DYNAMIC_REV	1	/* V2 format w/ dynamic inode sizes */
 
-#define EXT3_CURRENT_REV	EXT3_GOOD_OLD_REV
-#define EXT3_MAX_SUPP_REV	EXT3_DYNAMIC_REV
+#define EXT4_CURRENT_REV	EXT4_GOOD_OLD_REV
+#define EXT4_MAX_SUPP_REV	EXT4_DYNAMIC_REV
 
-#define EXT3_GOOD_OLD_INODE_SIZE 128
+#define EXT4_GOOD_OLD_INODE_SIZE 128
 
 /*
  * Feature set definitions
  */
 
-#define EXT3_HAS_COMPAT_FEATURE(sb,mask)			\
-	( EXT3_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
-#define EXT3_HAS_RO_COMPAT_FEATURE(sb,mask)			\
-	( EXT3_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
-#define EXT3_HAS_INCOMPAT_FEATURE(sb,mask)			\
-	( EXT3_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
-#define EXT3_SET_COMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
-#define EXT3_SET_RO_COMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
-#define EXT3_SET_INCOMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
-#define EXT3_CLEAR_COMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
-#define EXT3_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
-#define EXT3_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
-	EXT3_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
-
-#define EXT3_FEATURE_COMPAT_DIR_PREALLOC	0x0001
-#define EXT3_FEATURE_COMPAT_IMAGIC_INODES	0x0002
-#define EXT3_FEATURE_COMPAT_HAS_JOURNAL		0x0004
-#define EXT3_FEATURE_COMPAT_EXT_ATTR		0x0008
-#define EXT3_FEATURE_COMPAT_RESIZE_INODE	0x0010
-#define EXT3_FEATURE_COMPAT_DIR_INDEX		0x0020
-
-#define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
-#define EXT3_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
-#define EXT3_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
-
-#define EXT3_FEATURE_INCOMPAT_COMPRESSION	0x0001
-#define EXT3_FEATURE_INCOMPAT_FILETYPE		0x0002
-#define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
-#define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
-#define EXT3_FEATURE_INCOMPAT_META_BG		0x0010
-
-#define EXT3_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
-#define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
-					 EXT3_FEATURE_INCOMPAT_RECOVER| \
-					 EXT3_FEATURE_INCOMPAT_META_BG)
-#define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
-					 EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
-					 EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
+#define EXT4_HAS_COMPAT_FEATURE(sb,mask)			\
+	( EXT4_SB(sb)->s_es->s_feature_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_RO_COMPAT_FEATURE(sb,mask)			\
+	( EXT4_SB(sb)->s_es->s_feature_ro_compat & cpu_to_le32(mask) )
+#define EXT4_HAS_INCOMPAT_FEATURE(sb,mask)			\
+	( EXT4_SB(sb)->s_es->s_feature_incompat & cpu_to_le32(mask) )
+#define EXT4_SET_COMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_compat |= cpu_to_le32(mask)
+#define EXT4_SET_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_ro_compat |= cpu_to_le32(mask)
+#define EXT4_SET_INCOMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_incompat |= cpu_to_le32(mask)
+#define EXT4_CLEAR_COMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_RO_COMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_ro_compat &= ~cpu_to_le32(mask)
+#define EXT4_CLEAR_INCOMPAT_FEATURE(sb,mask)			\
+	EXT4_SB(sb)->s_es->s_feature_incompat &= ~cpu_to_le32(mask)
+
+#define EXT4_FEATURE_COMPAT_DIR_PREALLOC	0x0001
+#define EXT4_FEATURE_COMPAT_IMAGIC_INODES	0x0002
+#define EXT4_FEATURE_COMPAT_HAS_JOURNAL		0x0004
+#define EXT4_FEATURE_COMPAT_EXT_ATTR		0x0008
+#define EXT4_FEATURE_COMPAT_RESIZE_INODE	0x0010
+#define EXT4_FEATURE_COMPAT_DIR_INDEX		0x0020
+
+#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER	0x0001
+#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE	0x0002
+#define EXT4_FEATURE_RO_COMPAT_BTREE_DIR	0x0004
+
+#define EXT4_FEATURE_INCOMPAT_COMPRESSION	0x0001
+#define EXT4_FEATURE_INCOMPAT_FILETYPE		0x0002
+#define EXT4_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
+#define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
+#define EXT4_FEATURE_INCOMPAT_META_BG		0x0010
+
+#define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
+#define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
+					 EXT4_FEATURE_INCOMPAT_RECOVER| \
+					 EXT4_FEATURE_INCOMPAT_META_BG)
+#define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
+					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
 
 /*
  * Default values for user and/or group using reserved blocks
  */
-#define	EXT3_DEF_RESUID		0
-#define	EXT3_DEF_RESGID		0
+#define	EXT4_DEF_RESUID		0
+#define	EXT4_DEF_RESGID		0
 
 /*
  * Default mount options
  */
-#define EXT3_DEFM_DEBUG		0x0001
-#define EXT3_DEFM_BSDGROUPS	0x0002
-#define EXT3_DEFM_XATTR_USER	0x0004
-#define EXT3_DEFM_ACL		0x0008
-#define EXT3_DEFM_UID16		0x0010
-#define EXT3_DEFM_JMODE		0x0060
-#define EXT3_DEFM_JMODE_DATA	0x0020
-#define EXT3_DEFM_JMODE_ORDERED	0x0040
-#define EXT3_DEFM_JMODE_WBACK	0x0060
+#define EXT4_DEFM_DEBUG		0x0001
+#define EXT4_DEFM_BSDGROUPS	0x0002
+#define EXT4_DEFM_XATTR_USER	0x0004
+#define EXT4_DEFM_ACL		0x0008
+#define EXT4_DEFM_UID16		0x0010
+#define EXT4_DEFM_JMODE		0x0060
+#define EXT4_DEFM_JMODE_DATA	0x0020
+#define EXT4_DEFM_JMODE_ORDERED	0x0040
+#define EXT4_DEFM_JMODE_WBACK	0x0060
 
 /*
  * Structure of a directory entry
  */
-#define EXT3_NAME_LEN 255
+#define EXT4_NAME_LEN 255
 
-struct ext3_dir_entry {
+struct ext4_dir_entry {
 	__le32	inode;			/* Inode number */
 	__le16	rec_len;		/* Directory entry length */
 	__le16	name_len;		/* Name length */
-	char	name[EXT3_NAME_LEN];	/* File name */
+	char	name[EXT4_NAME_LEN];	/* File name */
 };
 
 /*
- * The new version of the directory entry.  Since EXT3 structures are
+ * The new version of the directory entry.  Since EXT4 structures are
  * stored in intel byte order, and the name_len field could never be
  * bigger than 255 chars, it's safe to reclaim the extra byte for the
  * file_type field.
  */
-struct ext3_dir_entry_2 {
+struct ext4_dir_entry_2 {
 	__le32	inode;			/* Inode number */
 	__le16	rec_len;		/* Directory entry length */
 	__u8	name_len;		/* Name length */
 	__u8	file_type;
-	char	name[EXT3_NAME_LEN];	/* File name */
+	char	name[EXT4_NAME_LEN];	/* File name */
 };
 
 /*
- * Ext3 directory file types.  Only the low 3 bits are used.  The
+ * Ext4 directory file types.  Only the low 3 bits are used.  The
  * other bits are reserved for now.
  */
-#define EXT3_FT_UNKNOWN		0
-#define EXT3_FT_REG_FILE	1
-#define EXT3_FT_DIR		2
-#define EXT3_FT_CHRDEV		3
-#define EXT3_FT_BLKDEV		4
-#define EXT3_FT_FIFO		5
-#define EXT3_FT_SOCK		6
-#define EXT3_FT_SYMLINK		7
+#define EXT4_FT_UNKNOWN		0
+#define EXT4_FT_REG_FILE	1
+#define EXT4_FT_DIR		2
+#define EXT4_FT_CHRDEV		3
+#define EXT4_FT_BLKDEV		4
+#define EXT4_FT_FIFO		5
+#define EXT4_FT_SOCK		6
+#define EXT4_FT_SYMLINK		7
 
-#define EXT3_FT_MAX		8
+#define EXT4_FT_MAX		8
 
 /*
- * EXT3_DIR_PAD defines the directory entries boundaries
+ * EXT4_DIR_PAD defines the directory entries boundaries
  *
  * NOTE: It must be a multiple of 4
  */
-#define EXT3_DIR_PAD			4
-#define EXT3_DIR_ROUND			(EXT3_DIR_PAD - 1)
-#define EXT3_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT3_DIR_ROUND) & \
-					 ~EXT3_DIR_ROUND)
+#define EXT4_DIR_PAD			4
+#define EXT4_DIR_ROUND			(EXT4_DIR_PAD - 1)
+#define EXT4_DIR_REC_LEN(name_len)	(((name_len) + 8 + EXT4_DIR_ROUND) & \
+					 ~EXT4_DIR_ROUND)
 /*
  * Hash Tree Directory indexing
  * (c) Daniel Phillips, 2001
  */
 
-#ifdef CONFIG_EXT3_INDEX
-  #define is_dx(dir) (EXT3_HAS_COMPAT_FEATURE(dir->i_sb, \
-					      EXT3_FEATURE_COMPAT_DIR_INDEX) && \
-		      (EXT3_I(dir)->i_flags & EXT3_INDEX_FL))
-#define EXT3_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT3_LINK_MAX)
-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
+#ifdef CONFIG_EXT4_INDEX
+  #define is_dx(dir) (EXT4_HAS_COMPAT_FEATURE(dir->i_sb, \
+					      EXT4_FEATURE_COMPAT_DIR_INDEX) && \
+		      (EXT4_I(dir)->i_flags & EXT4_INDEX_FL))
+#define EXT4_DIR_LINK_MAX(dir) (!is_dx(dir) && (dir)->i_nlink >= EXT4_LINK_MAX)
+#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2 || (dir)->i_nlink == 1)
 #else
   #define is_dx(dir) 0
-#define EXT3_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT3_LINK_MAX)
-#define EXT3_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
+#define EXT4_DIR_LINK_MAX(dir) ((dir)->i_nlink >= EXT4_LINK_MAX)
+#define EXT4_DIR_LINK_EMPTY(dir) ((dir)->i_nlink == 2)
 #endif
 
 /* Legal values for the dx_root hash_version field: */
@@ -694,10 +694,10 @@ struct dx_hash_info
 	u32		*seed;
 };
 
-#define EXT3_HTREE_EOF	0x7fffffff
+#define EXT4_HTREE_EOF	0x7fffffff
 
 /*
- * Control parameters used by ext3_htree_next_block
+ * Control parameters used by ext4_htree_next_block
  */
 #define HASH_NB_ALWAYS		1
 
@@ -705,16 +705,16 @@ struct dx_hash_info
 /*
  * Describe an inode's exact location on disk and in memory
  */
-struct ext3_iloc
+struct ext4_iloc
 {
 	struct buffer_head *bh;
 	unsigned long offset;
 	unsigned long block_group;
 };
 
-static inline struct ext3_inode *ext3_raw_inode(struct ext3_iloc *iloc)
+static inline struct ext4_inode *ext4_raw_inode(struct ext4_iloc *iloc)
 {
-	return (struct ext3_inode *) (iloc->bh->b_data + iloc->offset);
+	return (struct ext4_inode *) (iloc->bh->b_data + iloc->offset);
 }
 
 /*
@@ -733,11 +733,11 @@ struct dir_private_info {
 };
 
 /* calculate the first block number of the group */
-static inline ext3_fsblk_t
-ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
+static inline ext4_fsblk_t
+ext4_group_first_block_no(struct super_block *sb, unsigned long group_no)
 {
-	return group_no * (ext3_fsblk_t)EXT3_BLOCKS_PER_GROUP(sb) +
-		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block);
+	return group_no * (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
+		le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
 }
 
 /*
@@ -751,113 +751,113 @@ ext3_group_first_block_no(struct super_block *sb, unsigned long group_no)
 
 /*
  * Ok, these declarations are also in <linux/kernel.h> but none of the
- * ext3 source programs needs to include it so they are duplicated here.
+ * ext4 source programs needs to include it so they are duplicated here.
  */
 # define NORET_TYPE    /**/
 # define ATTRIB_NORET  __attribute__((noreturn))
 # define NORET_AND     noreturn,
 
 /* balloc.c */
-extern int ext3_bg_has_super(struct super_block *sb, int group);
-extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
-extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, int *errp);
-extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
-			ext3_fsblk_t goal, unsigned long *count, int *errp);
-extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
-			ext3_fsblk_t block, unsigned long count);
-extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
-				 ext3_fsblk_t block, unsigned long count,
+extern int ext4_bg_has_super(struct super_block *sb, int group);
+extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group);
+extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, int *errp);
+extern ext4_fsblk_t ext4_new_blocks (handle_t *handle, struct inode *inode,
+			ext4_fsblk_t goal, unsigned long *count, int *errp);
+extern void ext4_free_blocks (handle_t *handle, struct inode *inode,
+			ext4_fsblk_t block, unsigned long count);
+extern void ext4_free_blocks_sb (handle_t *handle, struct super_block *sb,
+				 ext4_fsblk_t block, unsigned long count,
 				unsigned long *pdquot_freed_blocks);
-extern ext3_fsblk_t ext3_count_free_blocks (struct super_block *);
-extern void ext3_check_blocks_bitmap (struct super_block *);
-extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
+extern ext4_fsblk_t ext4_count_free_blocks (struct super_block *);
+extern void ext4_check_blocks_bitmap (struct super_block *);
+extern struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 						    unsigned int block_group,
 						    struct buffer_head ** bh);
-extern int ext3_should_retry_alloc(struct super_block *sb, int *retries);
-extern void ext3_init_block_alloc_info(struct inode *);
-extern void ext3_rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv);
+extern int ext4_should_retry_alloc(struct super_block *sb, int *retries);
+extern void ext4_init_block_alloc_info(struct inode *);
+extern void ext4_rsv_window_add(struct super_block *sb, struct ext4_reserve_window_node *rsv);
 
 /* dir.c */
-extern int ext3_check_dir_entry(const char *, struct inode *,
-				struct ext3_dir_entry_2 *,
+extern int ext4_check_dir_entry(const char *, struct inode *,
+				struct ext4_dir_entry_2 *,
 				struct buffer_head *, unsigned long);
-extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
+extern int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
 				    __u32 minor_hash,
-				    struct ext3_dir_entry_2 *dirent);
-extern void ext3_htree_free_dir_info(struct dir_private_info *p);
+				    struct ext4_dir_entry_2 *dirent);
+extern void ext4_htree_free_dir_info(struct dir_private_info *p);
 
 /* fsync.c */
-extern int ext3_sync_file (struct file *, struct dentry *, int);
+extern int ext4_sync_file (struct file *, struct dentry *, int);
 
 /* hash.c */
-extern int ext3fs_dirhash(const char *name, int len, struct
+extern int ext4fs_dirhash(const char *name, int len, struct
 			  dx_hash_info *hinfo);
 
 /* ialloc.c */
-extern struct inode * ext3_new_inode (handle_t *, struct inode *, int);
-extern void ext3_free_inode (handle_t *, struct inode *);
-extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
-extern unsigned long ext3_count_free_inodes (struct super_block *);
-extern unsigned long ext3_count_dirs (struct super_block *);
-extern void ext3_check_inodes_bitmap (struct super_block *);
-extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+extern struct inode * ext4_new_inode (handle_t *, struct inode *, int);
+extern void ext4_free_inode (handle_t *, struct inode *);
+extern struct inode * ext4_orphan_get (struct super_block *, unsigned long);
+extern unsigned long ext4_count_free_inodes (struct super_block *);
+extern unsigned long ext4_count_dirs (struct super_block *);
+extern void ext4_check_inodes_bitmap (struct super_block *);
+extern unsigned long ext4_count_free (struct buffer_head *, unsigned);
 
 
 /* inode.c */
-int ext3_forget(handle_t *handle, int is_metadata, struct inode *inode,
-		struct buffer_head *bh, ext3_fsblk_t blocknr);
-struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
-struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
-int ext3_get_blocks_handle(handle_t *handle, struct inode *inode,
+int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
+		struct buffer_head *bh, ext4_fsblk_t blocknr);
+struct buffer_head * ext4_getblk (handle_t *, struct inode *, long, int, int *);
+struct buffer_head * ext4_bread (handle_t *, struct inode *, int, int, int *);
+int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 	sector_t iblock, unsigned long maxblocks, struct buffer_head *bh_result,
 	int create, int extend_disksize);
 
-extern void ext3_read_inode (struct inode *);
-extern int  ext3_write_inode (struct inode *, int);
-extern int  ext3_setattr (struct dentry *, struct iattr *);
-extern void ext3_delete_inode (struct inode *);
-extern int  ext3_sync_inode (handle_t *, struct inode *);
-extern void ext3_discard_reservation (struct inode *);
-extern void ext3_dirty_inode(struct inode *);
-extern int ext3_change_inode_journal_flag(struct inode *, int);
-extern int ext3_get_inode_loc(struct inode *, struct ext3_iloc *);
-extern void ext3_truncate (struct inode *);
-extern void ext3_set_inode_flags(struct inode *);
-extern void ext3_set_aops(struct inode *inode);
+extern void ext4_read_inode (struct inode *);
+extern int  ext4_write_inode (struct inode *, int);
+extern int  ext4_setattr (struct dentry *, struct iattr *);
+extern void ext4_delete_inode (struct inode *);
+extern int  ext4_sync_inode (handle_t *, struct inode *);
+extern void ext4_discard_reservation (struct inode *);
+extern void ext4_dirty_inode(struct inode *);
+extern int ext4_change_inode_journal_flag(struct inode *, int);
+extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
+extern void ext4_truncate (struct inode *);
+extern void ext4_set_inode_flags(struct inode *);
+extern void ext4_set_aops(struct inode *inode);
 
 /* ioctl.c */
-extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
 		       unsigned long);
-extern long ext3_compat_ioctl (struct file *, unsigned int, unsigned long);
+extern long ext4_compat_ioctl (struct file *, unsigned int, unsigned long);
 
 /* namei.c */
-extern int ext3_orphan_add(handle_t *, struct inode *);
-extern int ext3_orphan_del(handle_t *, struct inode *);
-extern int ext3_htree_fill_tree(struct file *dir_file, __u32 start_hash,
+extern int ext4_orphan_add(handle_t *, struct inode *);
+extern int ext4_orphan_del(handle_t *, struct inode *);
+extern int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
 				__u32 start_minor_hash, __u32 *next_hash);
 
 /* resize.c */
-extern int ext3_group_add(struct super_block *sb,
-				struct ext3_new_group_data *input);
-extern int ext3_group_extend(struct super_block *sb,
-				struct ext3_super_block *es,
-				ext3_fsblk_t n_blocks_count);
+extern int ext4_group_add(struct super_block *sb,
+				struct ext4_new_group_data *input);
+extern int ext4_group_extend(struct super_block *sb,
+				struct ext4_super_block *es,
+				ext4_fsblk_t n_blocks_count);
 
 /* super.c */
-extern void ext3_error (struct super_block *, const char *, const char *, ...)
+extern void ext4_error (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
-extern void __ext3_std_error (struct super_block *, const char *, int);
-extern void ext3_abort (struct super_block *, const char *, const char *, ...)
+extern void __ext4_std_error (struct super_block *, const char *, int);
+extern void ext4_abort (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
-extern void ext3_warning (struct super_block *, const char *, const char *, ...)
+extern void ext4_warning (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
-extern void ext3_update_dynamic_rev (struct super_block *sb);
+extern void ext4_update_dynamic_rev (struct super_block *sb);
 
-#define ext3_std_error(sb, errno)				\
+#define ext4_std_error(sb, errno)				\
 do {								\
 	if ((errno))						\
-		__ext3_std_error((sb), __FUNCTION__, (errno));	\
+		__ext4_std_error((sb), __FUNCTION__, (errno));	\
 } while (0)
 
 /*
@@ -865,21 +865,21 @@ do {								\
  */
 
 /* dir.c */
-extern const struct file_operations ext3_dir_operations;
+extern const struct file_operations ext4_dir_operations;
 
 /* file.c */
-extern struct inode_operations ext3_file_inode_operations;
-extern const struct file_operations ext3_file_operations;
+extern struct inode_operations ext4_file_inode_operations;
+extern const struct file_operations ext4_file_operations;
 
 /* namei.c */
-extern struct inode_operations ext3_dir_inode_operations;
-extern struct inode_operations ext3_special_inode_operations;
+extern struct inode_operations ext4_dir_inode_operations;
+extern struct inode_operations ext4_special_inode_operations;
 
 /* symlink.c */
-extern struct inode_operations ext3_symlink_inode_operations;
-extern struct inode_operations ext3_fast_symlink_inode_operations;
+extern struct inode_operations ext4_symlink_inode_operations;
+extern struct inode_operations ext4_fast_symlink_inode_operations;
 
 
 #endif	/* __KERNEL__ */
 
-#endif	/* _LINUX_EXT3_FS_H */
+#endif	/* _LINUX_EXT4_FS_H */
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 4395e5206746..18a6ce98537f 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext3_fs_i.h
+ *  linux/include/linux/ext4_fs_i.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _LINUX_EXT3_FS_I
-#define _LINUX_EXT3_FS_I
+#ifndef _LINUX_EXT4_FS_I
+#define _LINUX_EXT4_FS_I
 
 #include <linux/rwsem.h>
 #include <linux/rbtree.h>
@@ -22,43 +22,43 @@
 #include <linux/mutex.h>
 
 /* data type for block offset of block group */
-typedef int ext3_grpblk_t;
+typedef int ext4_grpblk_t;
 
 /* data type for filesystem-wide blocks number */
-typedef unsigned long ext3_fsblk_t;
+typedef unsigned long ext4_fsblk_t;
 
 #define E3FSBLK "%lu"
 
-struct ext3_reserve_window {
-	ext3_fsblk_t	_rsv_start;	/* First byte reserved */
-	ext3_fsblk_t	_rsv_end;	/* Last byte reserved or 0 */
+struct ext4_reserve_window {
+	ext4_fsblk_t	_rsv_start;	/* First byte reserved */
+	ext4_fsblk_t	_rsv_end;	/* Last byte reserved or 0 */
 };
 
-struct ext3_reserve_window_node {
+struct ext4_reserve_window_node {
 	struct rb_node		rsv_node;
 	__u32			rsv_goal_size;
 	__u32			rsv_alloc_hit;
-	struct ext3_reserve_window	rsv_window;
+	struct ext4_reserve_window	rsv_window;
 };
 
-struct ext3_block_alloc_info {
+struct ext4_block_alloc_info {
 	/* information about reservation window */
-	struct ext3_reserve_window_node	rsv_window_node;
+	struct ext4_reserve_window_node	rsv_window_node;
 	/*
-	 * was i_next_alloc_block in ext3_inode_info
+	 * was i_next_alloc_block in ext4_inode_info
 	 * is the logical (file-relative) number of the
 	 * most-recently-allocated block in this file.
 	 * We use this for detecting linearly ascending allocation requests.
 	 */
 	__u32                   last_alloc_logical_block;
 	/*
-	 * Was i_next_alloc_goal in ext3_inode_info
+	 * Was i_next_alloc_goal in ext4_inode_info
 	 * is the *physical* companion to i_next_alloc_block.
 	 * it the the physical block number of the block which was most-recentl
 	 * allocated to this file.  This give us the goal (target) for the next
 	 * allocation when we detect linearly ascending requests.
 	 */
-	ext3_fsblk_t		last_alloc_physical_block;
+	ext4_fsblk_t		last_alloc_physical_block;
 };
 
 #define rsv_start rsv_window._rsv_start
@@ -67,15 +67,15 @@ struct ext3_block_alloc_info {
 /*
  * third extended file system inode data in memory
  */
-struct ext3_inode_info {
+struct ext4_inode_info {
 	__le32	i_data[15];	/* unconverted */
 	__u32	i_flags;
-#ifdef EXT3_FRAGMENTS
+#ifdef EXT4_FRAGMENTS
 	__u32	i_faddr;
 	__u8	i_frag_no;
 	__u8	i_frag_size;
 #endif
-	ext3_fsblk_t	i_file_acl;
+	ext4_fsblk_t	i_file_acl;
 	__u32	i_dir_acl;
 	__u32	i_dtime;
 
@@ -87,13 +87,13 @@ struct ext3_inode_info {
 	 * near to their parent directory's inode.
 	 */
 	__u32	i_block_group;
-	__u32	i_state;		/* Dynamic state flags for ext3 */
+	__u32	i_state;		/* Dynamic state flags for ext4 */
 
 	/* block reservation info */
-	struct ext3_block_alloc_info *i_block_alloc_info;
+	struct ext4_block_alloc_info *i_block_alloc_info;
 
 	__u32	i_dir_start_lookup;
-#ifdef CONFIG_EXT3_FS_XATTR
+#ifdef CONFIG_EXT4DEV_FS_XATTR
 	/*
 	 * Extended attributes can be read independently of the main file
 	 * data. Taking i_mutex even when reading would cause contention
@@ -103,7 +103,7 @@ struct ext3_inode_info {
 	 */
 	struct rw_semaphore xattr_sem;
 #endif
-#ifdef CONFIG_EXT3_FS_POSIX_ACL
+#ifdef CONFIG_EXT4DEV_FS_POSIX_ACL
 	struct posix_acl	*i_acl;
 	struct posix_acl	*i_default_acl;
 #endif
@@ -113,7 +113,7 @@ struct ext3_inode_info {
 	/*
 	 * i_disksize keeps track of what the inode size is ON DISK, not
 	 * in memory.  During truncate, i_size is set to the new size by
-	 * the VFS prior to calling ext3_truncate(), but the filesystem won't
+	 * the VFS prior to calling ext4_truncate(), but the filesystem won't
 	 * set i_disksize to 0 until the truncate is actually under way.
 	 *
 	 * The intent is that i_disksize always represents the blocks which
@@ -123,7 +123,7 @@ struct ext3_inode_info {
 	 *
 	 * The only time when i_disksize and i_size may be different is when
 	 * a truncate is in progress.  The only things which change i_disksize
-	 * are ext3_get_block (growth) and ext3_truncate (shrinkth).
+	 * are ext4_get_block (growth) and ext4_truncate (shrinkth).
 	 */
 	loff_t	i_disksize;
 
@@ -131,10 +131,10 @@ struct ext3_inode_info {
 	__u16 i_extra_isize;
 
 	/*
-	 * truncate_mutex is for serialising ext3_truncate() against
-	 * ext3_getblock().  In the 2.4 ext2 design, great chunks of inode's
+	 * truncate_mutex is for serialising ext4_truncate() against
+	 * ext4_getblock().  In the 2.4 ext2 design, great chunks of inode's
 	 * data tree are chopped off during truncate. We can't do that in
-	 * ext3 because whenever we perform intermediate commits during
+	 * ext4 because whenever we perform intermediate commits during
 	 * truncate, the inode and all the metadata blocks *must* be in a
 	 * consistent state which allows truncation of the orphans to restart
 	 * during recovery.  Hence we must fix the get_block-vs-truncate race
@@ -144,4 +144,4 @@ struct ext3_inode_info {
 	struct inode vfs_inode;
 };
 
-#endif	/* _LINUX_EXT3_FS_I */
+#endif	/* _LINUX_EXT4_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index f61309c81cc4..ce4856d70100 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -1,5 +1,5 @@
 /*
- *  linux/include/linux/ext3_fs_sb.h
+ *  linux/include/linux/ext4_fs_sb.h
  *
  * Copyright (C) 1992, 1993, 1994, 1995
  * Remy Card (card@masi.ibp.fr)
@@ -13,8 +13,8 @@
  *  Copyright (C) 1991, 1992  Linus Torvalds
  */
 
-#ifndef _LINUX_EXT3_FS_SB
-#define _LINUX_EXT3_FS_SB
+#ifndef _LINUX_EXT4_FS_SB
+#define _LINUX_EXT4_FS_SB
 
 #ifdef __KERNEL__
 #include <linux/timer.h>
@@ -27,7 +27,7 @@
 /*
  * third extended-fs super-block data in memory
  */
-struct ext3_sb_info {
+struct ext4_sb_info {
 	unsigned long s_frag_size;	/* Size of a fragment in bytes */
 	unsigned long s_frags_per_block;/* Number of fragments per block */
 	unsigned long s_inodes_per_block;/* Number of inodes per block */
@@ -39,7 +39,7 @@ struct ext3_sb_info {
 	unsigned long s_desc_per_block;	/* Number of group descriptors per block */
 	unsigned long s_groups_count;	/* Number of groups in the fs */
 	struct buffer_head * s_sbh;	/* Buffer containing the super block */
-	struct ext3_super_block * s_es;	/* Pointer to the super block in the buffer */
+	struct ext4_super_block * s_es;	/* Pointer to the super block in the buffer */
 	struct buffer_head ** s_group_desc;
 	unsigned long  s_mount_opt;
 	uid_t s_resuid;
@@ -62,7 +62,7 @@ struct ext3_sb_info {
 	/* root of the per fs reservation window tree */
 	spinlock_t s_rsv_window_lock;
 	struct rb_root s_rsv_window_root;
-	struct ext3_reserve_window_node s_rsv_window_head;
+	struct ext4_reserve_window_node s_rsv_window_head;
 
 	/* Journaling */
 	struct inode * s_journal_inode;
@@ -80,4 +80,4 @@ struct ext3_sb_info {
 #endif
 };
 
-#endif	/* _LINUX_EXT3_FS_SB */
+#endif	/* _LINUX_EXT4_FS_SB */
diff --git a/include/linux/ext4_jbd.h b/include/linux/ext4_jbd.h
index ce0e6109aff0..3dbf6c779037 100644
--- a/include/linux/ext4_jbd.h
+++ b/include/linux/ext4_jbd.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/linux/ext3_jbd.h
+ * linux/include/linux/ext4_jbd.h
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  *
@@ -9,17 +9,17 @@
  * the terms of the GNU General Public License, version 2, or at your
  * option, any later version, incorporated herein by reference.
  *
- * Ext3-specific journaling extensions.
+ * Ext4-specific journaling extensions.
  */
 
-#ifndef _LINUX_EXT3_JBD_H
-#define _LINUX_EXT3_JBD_H
+#ifndef _LINUX_EXT4_JBD_H
+#define _LINUX_EXT4_JBD_H
 
 #include <linux/fs.h>
 #include <linux/jbd.h>
-#include <linux/ext3_fs.h>
+#include <linux/ext4_fs.h>
 
-#define EXT3_JOURNAL(inode)	(EXT3_SB((inode)->i_sb)->s_journal)
+#define EXT4_JOURNAL(inode)	(EXT4_SB((inode)->i_sb)->s_journal)
 
 /* Define the number of blocks we need to account to a transaction to
  * modify one block of data.
@@ -28,13 +28,13 @@
  * indirection blocks, the group and superblock summaries, and the data
  * block to complete the transaction.  */
 
-#define EXT3_SINGLEDATA_TRANS_BLOCKS	8U
+#define EXT4_SINGLEDATA_TRANS_BLOCKS	8U
 
 /* Extended attribute operations touch at most two data buffers,
  * two bitmap buffers, and two group summaries, in addition to the inode
  * and the superblock, which are already accounted for. */
 
-#define EXT3_XATTR_TRANS_BLOCKS		6U
+#define EXT4_XATTR_TRANS_BLOCKS		6U
 
 /* Define the minimum size for a transaction which modifies data.  This
  * needs to take into account the fact that we may end up modifying two
@@ -42,15 +42,15 @@
  * superblock only gets updated once, of course, so don't bother
  * counting that again for the quota updates. */
 
-#define EXT3_DATA_TRANS_BLOCKS(sb)	(EXT3_SINGLEDATA_TRANS_BLOCKS + \
-					 EXT3_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT3_QUOTA_TRANS_BLOCKS(sb))
+#define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS + \
+					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
+					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
 
 /* Delete operations potentially hit one directory's namespace plus an
  * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
  * generous.  We can grow the delete transaction later if necessary. */
 
-#define EXT3_DELETE_TRANS_BLOCKS(sb)	(2 * EXT3_DATA_TRANS_BLOCKS(sb) + 64)
+#define EXT4_DELETE_TRANS_BLOCKS(sb)	(2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
 
 /* Define an arbitrary limit for the amount of data we will anticipate
  * writing to any given transaction.  For unbounded transactions such as
@@ -58,7 +58,7 @@
  * start off at the maximum transaction size and grow the transaction
  * optimistically as we go. */
 
-#define EXT3_MAX_TRANS_DATA		64U
+#define EXT4_MAX_TRANS_DATA		64U
 
 /* We break up a large truncate or write transaction once the handle's
  * buffer credits gets this low, we need either to extend the
@@ -67,202 +67,202 @@
  * one block, plus two quota updates.  Quota allocations are not
  * needed. */
 
-#define EXT3_RESERVE_TRANS_BLOCKS	12U
+#define EXT4_RESERVE_TRANS_BLOCKS	12U
 
-#define EXT3_INDEX_EXTRA_TRANS_BLOCKS	8
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS	8
 
 #ifdef CONFIG_QUOTA
 /* Amount of blocks needed for quota update - we know that the structure was
  * allocated so we need to update only inode+data */
-#define EXT3_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
 /* Amount of blocks needed for quota insert/delete - we do some block writes
  * but inode, sb and group updates are done only once */
-#define EXT3_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
-		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
-#define EXT3_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
-		(EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
 #else
-#define EXT3_QUOTA_TRANS_BLOCKS(sb) 0
-#define EXT3_QUOTA_INIT_BLOCKS(sb) 0
-#define EXT3_QUOTA_DEL_BLOCKS(sb) 0
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
 #endif
 
 int
-ext3_mark_iloc_dirty(handle_t *handle,
+ext4_mark_iloc_dirty(handle_t *handle,
 		     struct inode *inode,
-		     struct ext3_iloc *iloc);
+		     struct ext4_iloc *iloc);
 
 /*
  * On success, We end up with an outstanding reference count against
  * iloc->bh.  This _must_ be cleaned up later.
  */
 
-int ext3_reserve_inode_write(handle_t *handle, struct inode *inode,
-			struct ext3_iloc *iloc);
+int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+			struct ext4_iloc *iloc);
 
-int ext3_mark_inode_dirty(handle_t *handle, struct inode *inode);
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
 
 /*
- * Wrapper functions with which ext3 calls into JBD.  The intent here is
- * to allow these to be turned into appropriate stubs so ext3 can control
- * ext2 filesystems, so ext2+ext3 systems only nee one fs.  This work hasn't
+ * Wrapper functions with which ext4 calls into JBD.  The intent here is
+ * to allow these to be turned into appropriate stubs so ext4 can control
+ * ext2 filesystems, so ext2+ext4 systems only nee one fs.  This work hasn't
  * been done yet.
  */
 
-void ext3_journal_abort_handle(const char *caller, const char *err_fn,
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
 		struct buffer_head *bh, handle_t *handle, int err);
 
 static inline int
-__ext3_journal_get_undo_access(const char *where, handle_t *handle,
+__ext4_journal_get_undo_access(const char *where, handle_t *handle,
 				struct buffer_head *bh)
 {
 	int err = journal_get_undo_access(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 static inline int
-__ext3_journal_get_write_access(const char *where, handle_t *handle,
+__ext4_journal_get_write_access(const char *where, handle_t *handle,
 				struct buffer_head *bh)
 {
 	int err = journal_get_write_access(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 static inline void
-ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 {
 	journal_release_buffer(handle, bh);
 }
 
 static inline int
-__ext3_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
+__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_forget(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 static inline int
-__ext3_journal_revoke(const char *where, handle_t *handle,
+__ext4_journal_revoke(const char *where, handle_t *handle,
 		      unsigned long blocknr, struct buffer_head *bh)
 {
 	int err = journal_revoke(handle, blocknr, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 static inline int
-__ext3_journal_get_create_access(const char *where,
+__ext4_journal_get_create_access(const char *where,
 				 handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_get_create_access(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 static inline int
-__ext3_journal_dirty_metadata(const char *where,
+__ext4_journal_dirty_metadata(const char *where,
 			      handle_t *handle, struct buffer_head *bh)
 {
 	int err = journal_dirty_metadata(handle, bh);
 	if (err)
-		ext3_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
 }
 
 
-#define ext3_journal_get_undo_access(handle, bh) \
-	__ext3_journal_get_undo_access(__FUNCTION__, (handle), (bh))
-#define ext3_journal_get_write_access(handle, bh) \
-	__ext3_journal_get_write_access(__FUNCTION__, (handle), (bh))
-#define ext3_journal_revoke(handle, blocknr, bh) \
-	__ext3_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
-#define ext3_journal_get_create_access(handle, bh) \
-	__ext3_journal_get_create_access(__FUNCTION__, (handle), (bh))
-#define ext3_journal_dirty_metadata(handle, bh) \
-	__ext3_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
-#define ext3_journal_forget(handle, bh) \
-	__ext3_journal_forget(__FUNCTION__, (handle), (bh))
+#define ext4_journal_get_undo_access(handle, bh) \
+	__ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_get_write_access(handle, bh) \
+	__ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_revoke(handle, blocknr, bh) \
+	__ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+#define ext4_journal_get_create_access(handle, bh) \
+	__ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_dirty_metadata(handle, bh) \
+	__ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+#define ext4_journal_forget(handle, bh) \
+	__ext4_journal_forget(__FUNCTION__, (handle), (bh))
 
-int ext3_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
 
-handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext3_journal_stop(const char *where, handle_t *handle);
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext4_journal_stop(const char *where, handle_t *handle);
 
-static inline handle_t *ext3_journal_start(struct inode *inode, int nblocks)
+static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
 {
-	return ext3_journal_start_sb(inode->i_sb, nblocks);
+	return ext4_journal_start_sb(inode->i_sb, nblocks);
 }
 
-#define ext3_journal_stop(handle) \
-	__ext3_journal_stop(__FUNCTION__, (handle))
+#define ext4_journal_stop(handle) \
+	__ext4_journal_stop(__FUNCTION__, (handle))
 
-static inline handle_t *ext3_journal_current_handle(void)
+static inline handle_t *ext4_journal_current_handle(void)
 {
 	return journal_current_handle();
 }
 
-static inline int ext3_journal_extend(handle_t *handle, int nblocks)
+static inline int ext4_journal_extend(handle_t *handle, int nblocks)
 {
 	return journal_extend(handle, nblocks);
 }
 
-static inline int ext3_journal_restart(handle_t *handle, int nblocks)
+static inline int ext4_journal_restart(handle_t *handle, int nblocks)
 {
 	return journal_restart(handle, nblocks);
 }
 
-static inline int ext3_journal_blocks_per_page(struct inode *inode)
+static inline int ext4_journal_blocks_per_page(struct inode *inode)
 {
 	return journal_blocks_per_page(inode);
 }
 
-static inline int ext3_journal_force_commit(journal_t *journal)
+static inline int ext4_journal_force_commit(journal_t *journal)
 {
 	return journal_force_commit(journal);
 }
 
 /* super.c */
-int ext3_force_commit(struct super_block *sb);
+int ext4_force_commit(struct super_block *sb);
 
-static inline int ext3_should_journal_data(struct inode *inode)
+static inline int ext4_should_journal_data(struct inode *inode)
 {
 	if (!S_ISREG(inode->i_mode))
 		return 1;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
 		return 1;
-	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
 		return 1;
 	return 0;
 }
 
-static inline int ext3_should_order_data(struct inode *inode)
+static inline int ext4_should_order_data(struct inode *inode)
 {
 	if (!S_ISREG(inode->i_mode))
 		return 0;
-	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
 		return 0;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA)
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
 		return 1;
 	return 0;
 }
 
-static inline int ext3_should_writeback_data(struct inode *inode)
+static inline int ext4_should_writeback_data(struct inode *inode)
 {
 	if (!S_ISREG(inode->i_mode))
 		return 0;
-	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
 		return 0;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
 		return 1;
 	return 0;
 }
 
-#endif	/* _LINUX_EXT3_JBD_H */
+#endif	/* _LINUX_EXT4_JBD_H */
-- 
cgit v1.2.3


From 02ea2104c55b625cf5b5d9ba8586a4fc17920f5c Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:56 -0700
Subject: [PATCH] ext4: enable building of ext4

Originally part of a patch from Mingming Cao and Randy Dunlap.  Reorganized
by Shaggy.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Mingming Cao<cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/Kconfig            | 75 ++++++++++++++++++++++++++++++++++++++++++++++++---
 fs/Makefile           |  1 +
 include/linux/magic.h |  1 +
 3 files changed, 73 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/fs/Kconfig b/fs/Kconfig
index 599de54451af..ac9ba1c30935 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -140,6 +140,73 @@ config EXT3_FS_SECURITY
 	  If you are not using a security module that requires using
 	  extended attributes for file security labels, say N.
 
+config EXT4DEV_FS
+	tristate "Ext4dev/ext4 extended fs support development (EXPERIMENTAL)"
+	depends on EXPERIMENTAL
+        select JBD
+        help
+	  Ext4dev is a predecessor filesystem of the next generation
+	  extended fs ext4, based on ext3 filesystem code. It will be
+	  renamed ext4 fs later, once ext4dev is mature and stabilized.
+
+          Unlike the change from ext2 filesystem to ext3 filesystem,
+          the on-disk format of ext4dev is not the same as ext3 any more:
+	  it is based on extent maps and it supports 48-bit physical block
+          numbers. These combined on-disk format changes will allow
+	  ext4dev/ext4 to handle more than 16 TB filesystem volumes --
+	  a hard limit that ext3 cannot overcome without changing the
+          on-disk format.
+
+	  Other than extent maps and 48-bit block numbers, ext4dev also is
+          likely to have other new features such as persistent preallocation,
+	  high resolution time stamps, and larger file support etc.  These
+          features will be added to ext4dev gradually.
+
+	  To compile this file system support as a module, choose M here. The
+	  module will be called ext4dev.  Be aware, however, that the filesystem
+	  of your root partition (the one containing the directory /) cannot
+	  be compiled as a module, and so this could be dangerous.
+
+	  If unsure, say N.
+
+config EXT4DEV_FS_XATTR
+	bool "Ext4dev extended attributes"
+	depends on EXT4DEV_FS
+	default y
+	help
+	  Extended attributes are name:value pairs associated with inodes by
+	  the kernel or by users (see the attr(5) manual page, or visit
+	  <http://acl.bestbits.at/> for details).
+
+	  If unsure, say N.
+
+	  You need this for POSIX ACL support on ext4dev/ext4.
+
+config EXT4DEV_FS_POSIX_ACL
+	bool "Ext4dev POSIX Access Control Lists"
+	depends on EXT4DEV_FS_XATTR
+	select FS_POSIX_ACL
+	help
+	  POSIX Access Control Lists (ACLs) support permissions for users and
+	  groups beyond the owner/group/world scheme.
+
+	  To learn more about Access Control Lists, visit the POSIX ACLs for
+	  Linux website <http://acl.bestbits.at/>.
+
+	  If you don't know what Access Control Lists are, say N
+
+config EXT4DEV_FS_SECURITY
+	bool "Ext4dev Security Labels"
+	depends on EXT4DEV_FS_XATTR
+	help
+	  Security labels support alternative access control models
+	  implemented by security modules like SELinux.  This option
+	  enables an extended attribute handler for file security
+	  labels in the ext4dev/ext4 filesystem.
+
+	  If you are not using a security module that requires using
+	  extended attributes for file security labels, say N.
+
 config JBD
 	tristate
 	help
@@ -173,11 +240,11 @@ config JBD_DEBUG
 	  "echo 0 > /proc/sys/fs/jbd-debug".
 
 config FS_MBCACHE
-# Meta block cache for Extended Attributes (ext2/ext3)
+# Meta block cache for Extended Attributes (ext2/ext3/ext4)
 	tristate
-	depends on EXT2_FS_XATTR || EXT3_FS_XATTR
-	default y if EXT2_FS=y || EXT3_FS=y
-	default m if EXT2_FS=m || EXT3_FS=m
+	depends on EXT2_FS_XATTR || EXT3_FS_XATTR || EXT4DEV_FS_XATTR
+	default y if EXT2_FS=y || EXT3_FS=y || EXT4DEV_FS=y
+	default m if EXT2_FS=m || EXT3_FS=m || EXT4DEV_FS=m
 
 config REISERFS_FS
 	tristate "Reiserfs support"
diff --git a/fs/Makefile b/fs/Makefile
index df614eacee86..64396af37b2a 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -62,6 +62,7 @@ obj-$(CONFIG_DLM)		+= dlm/
 # Do not add any filesystems before this line
 obj-$(CONFIG_REISERFS_FS)	+= reiserfs/
 obj-$(CONFIG_EXT3_FS)		+= ext3/ # Before ext2 so root fs can be ext3
+obj-$(CONFIG_EXT4DEV_FS)	+= ext4/ # Before ext2 so root fs can be ext4dev
 obj-$(CONFIG_JBD)		+= jbd/
 obj-$(CONFIG_EXT2_FS)		+= ext2/
 obj-$(CONFIG_CRAMFS)		+= cramfs/
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 22036dd2ba36..156c40fc664e 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -8,6 +8,7 @@
 #define EFS_SUPER_MAGIC		0x414A53
 #define EXT2_SUPER_MAGIC	0xEF53
 #define EXT3_SUPER_MAGIC	0xEF53
+#define EXT4_SUPER_MAGIC	0xEF53
 #define HPFS_SUPER_MAGIC	0xf995e849
 #define ISOFS_SUPER_MAGIC	0x9660
 #define JFFS2_SUPER_MAGIC	0x72b6
-- 
cgit v1.2.3


From 470decc613ab2048b619a01028072d932d9086ee Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 11 Oct 2006 01:20:57 -0700
Subject: [PATCH] jbd2: initial copy of files from jbd

This is a simple copy of the files in fs/jbd to fs/jbd2 and
/usr/incude/linux/[ext4_]jbd.h to /usr/include/[ext4_]jbd2.h

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/Makefile          |    7 +
 fs/jbd2/checkpoint.c      |  697 +++++++++++++++
 fs/jbd2/commit.c          |  911 ++++++++++++++++++++
 fs/jbd2/journal.c         | 2072 ++++++++++++++++++++++++++++++++++++++++++++
 fs/jbd2/recovery.c        |  592 +++++++++++++
 fs/jbd2/revoke.c          |  703 +++++++++++++++
 fs/jbd2/transaction.c     | 2080 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/ext4_jbd2.h |  268 ++++++
 include/linux/jbd2.h      | 1098 ++++++++++++++++++++++++
 9 files changed, 8428 insertions(+)
 create mode 100644 fs/jbd2/Makefile
 create mode 100644 fs/jbd2/checkpoint.c
 create mode 100644 fs/jbd2/commit.c
 create mode 100644 fs/jbd2/journal.c
 create mode 100644 fs/jbd2/recovery.c
 create mode 100644 fs/jbd2/revoke.c
 create mode 100644 fs/jbd2/transaction.c
 create mode 100644 include/linux/ext4_jbd2.h
 create mode 100644 include/linux/jbd2.h

(limited to 'include')

diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
new file mode 100644
index 000000000000..54aca4868a36
--- /dev/null
+++ b/fs/jbd2/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the linux journaling routines.
+#
+
+obj-$(CONFIG_JBD) += jbd.o
+
+jbd-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
new file mode 100644
index 000000000000..0208cc7ac5d0
--- /dev/null
+++ b/fs/jbd2/checkpoint.c
@@ -0,0 +1,697 @@
+/*
+ * linux/fs/checkpoint.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1999 Red Hat Software --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Checkpoint routines for the generic filesystem journaling code.
+ * Part of the ext2fs journaling system.
+ *
+ * Checkpointing is the process of ensuring that a section of the log is
+ * committed fully to disk, so that that portion of the log can be
+ * reused.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+
+/*
+ * Unlink a buffer from a transaction checkpoint list.
+ *
+ * Called with j_list_lock held.
+ */
+static inline void __buffer_unlink_first(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	jh->b_cpnext->b_cpprev = jh->b_cpprev;
+	jh->b_cpprev->b_cpnext = jh->b_cpnext;
+	if (transaction->t_checkpoint_list == jh) {
+		transaction->t_checkpoint_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_list == jh)
+			transaction->t_checkpoint_list = NULL;
+	}
+}
+
+/*
+ * Unlink a buffer from a transaction checkpoint(io) list.
+ *
+ * Called with j_list_lock held.
+ */
+static inline void __buffer_unlink(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+	if (transaction->t_checkpoint_io_list == jh) {
+		transaction->t_checkpoint_io_list = jh->b_cpnext;
+		if (transaction->t_checkpoint_io_list == jh)
+			transaction->t_checkpoint_io_list = NULL;
+	}
+}
+
+/*
+ * Move a buffer from the checkpoint list to the checkpoint io list
+ *
+ * Called with j_list_lock held
+ */
+static inline void __buffer_relink_io(struct journal_head *jh)
+{
+	transaction_t *transaction = jh->b_cp_transaction;
+
+	__buffer_unlink_first(jh);
+
+	if (!transaction->t_checkpoint_io_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_io_list;
+		jh->b_cpprev = transaction->t_checkpoint_io_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_io_list = jh;
+}
+
+/*
+ * Try to release a checkpointed buffer from its transaction.
+ * Returns 1 if we released it and 2 if we also released the
+ * whole transaction.
+ *
+ * Requires j_list_lock
+ * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+ */
+static int __try_to_free_cp_buf(struct journal_head *jh)
+{
+	int ret = 0;
+	struct buffer_head *bh = jh2bh(jh);
+
+	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
+		JBUFFER_TRACE(jh, "remove from checkpoint list");
+		ret = __journal_remove_checkpoint(jh) + 1;
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		BUFFER_TRACE(bh, "release");
+		__brelse(bh);
+	} else {
+		jbd_unlock_bh_state(bh);
+	}
+	return ret;
+}
+
+/*
+ * __log_wait_for_space: wait until there is space in the journal.
+ *
+ * Called under j-state_lock *only*.  It will be unlocked if we have to wait
+ * for a checkpoint to free up some space in the log.
+ */
+void __log_wait_for_space(journal_t *journal)
+{
+	int nblocks;
+	assert_spin_locked(&journal->j_state_lock);
+
+	nblocks = jbd_space_needed(journal);
+	while (__log_space_left(journal) < nblocks) {
+		if (journal->j_flags & JFS_ABORT)
+			return;
+		spin_unlock(&journal->j_state_lock);
+		mutex_lock(&journal->j_checkpoint_mutex);
+
+		/*
+		 * Test again, another process may have checkpointed while we
+		 * were waiting for the checkpoint lock
+		 */
+		spin_lock(&journal->j_state_lock);
+		nblocks = jbd_space_needed(journal);
+		if (__log_space_left(journal) < nblocks) {
+			spin_unlock(&journal->j_state_lock);
+			log_do_checkpoint(journal);
+			spin_lock(&journal->j_state_lock);
+		}
+		mutex_unlock(&journal->j_checkpoint_mutex);
+	}
+}
+
+/*
+ * We were unable to perform jbd_trylock_bh_state() inside j_list_lock.
+ * The caller must restart a list walk.  Wait for someone else to run
+ * jbd_unlock_bh_state().
+ */
+static void jbd_sync_bh(journal_t *journal, struct buffer_head *bh)
+	__releases(journal->j_list_lock)
+{
+	get_bh(bh);
+	spin_unlock(&journal->j_list_lock);
+	jbd_lock_bh_state(bh);
+	jbd_unlock_bh_state(bh);
+	put_bh(bh);
+}
+
+/*
+ * Clean up transaction's list of buffers submitted for io.
+ * We wait for any pending IO to complete and remove any clean
+ * buffers. Note that we take the buffers in the opposite ordering
+ * from the one in which they were submitted for IO.
+ *
+ * Called with j_list_lock held.
+ */
+static void __wait_cp_io(journal_t *journal, transaction_t *transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	tid_t this_tid;
+	int released = 0;
+
+	this_tid = transaction->t_tid;
+restart:
+	/* Did somebody clean up the transaction in the meanwhile? */
+	if (journal->j_checkpoint_transactions != transaction ||
+			transaction->t_tid != this_tid)
+		return;
+	while (!released && transaction->t_checkpoint_io_list) {
+		jh = transaction->t_checkpoint_io_list;
+		bh = jh2bh(jh);
+		if (!jbd_trylock_bh_state(bh)) {
+			jbd_sync_bh(journal, bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		if (buffer_locked(bh)) {
+			atomic_inc(&bh->b_count);
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			wait_on_buffer(bh);
+			/* the journal_head may have gone by now */
+			BUFFER_TRACE(bh, "brelse");
+			__brelse(bh);
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		/*
+		 * Now in whatever state the buffer currently is, we know that
+		 * it has been written out and so we can drop it from the list
+		 */
+		released = __journal_remove_checkpoint(jh);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+}
+
+#define NR_BATCH	64
+
+static void
+__flush_batch(journal_t *journal, struct buffer_head **bhs, int *batch_count)
+{
+	int i;
+
+	ll_rw_block(SWRITE, *batch_count, bhs);
+	for (i = 0; i < *batch_count; i++) {
+		struct buffer_head *bh = bhs[i];
+		clear_buffer_jwrite(bh);
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+	}
+	*batch_count = 0;
+}
+
+/*
+ * Try to flush one buffer from the checkpoint list to disk.
+ *
+ * Return 1 if something happened which requires us to abort the current
+ * scan of the checkpoint list.
+ *
+ * Called with j_list_lock held and drops it if 1 is returned
+ * Called under jbd_lock_bh_state(jh2bh(jh)), and drops it
+ */
+static int __process_buffer(journal_t *journal, struct journal_head *jh,
+			struct buffer_head **bhs, int *batch_count)
+{
+	struct buffer_head *bh = jh2bh(jh);
+	int ret = 0;
+
+	if (buffer_locked(bh)) {
+		atomic_inc(&bh->b_count);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		wait_on_buffer(bh);
+		/* the journal_head may have gone by now */
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+		ret = 1;
+	} else if (jh->b_transaction != NULL) {
+		transaction_t *t = jh->b_transaction;
+		tid_t tid = t->t_tid;
+
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		log_start_commit(journal, tid);
+		log_wait_commit(journal, tid);
+		ret = 1;
+	} else if (!buffer_dirty(bh)) {
+		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
+		BUFFER_TRACE(bh, "remove from checkpoint");
+		__journal_remove_checkpoint(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+		ret = 1;
+	} else {
+		/*
+		 * Important: we are about to write the buffer, and
+		 * possibly block, while still holding the journal lock.
+		 * We cannot afford to let the transaction logic start
+		 * messing around with this buffer before we write it to
+		 * disk, as that would break recoverability.
+		 */
+		BUFFER_TRACE(bh, "queue");
+		get_bh(bh);
+		J_ASSERT_BH(bh, !buffer_jwrite(bh));
+		set_buffer_jwrite(bh);
+		bhs[*batch_count] = bh;
+		__buffer_relink_io(jh);
+		jbd_unlock_bh_state(bh);
+		(*batch_count)++;
+		if (*batch_count == NR_BATCH) {
+			spin_unlock(&journal->j_list_lock);
+			__flush_batch(journal, bhs, batch_count);
+			ret = 1;
+		}
+	}
+	return ret;
+}
+
+/*
+ * Perform an actual checkpoint. We take the first transaction on the
+ * list of transactions to be checkpointed and send all its buffers
+ * to disk. We submit larger chunks of data at once.
+ *
+ * The journal should be locked before calling this function.
+ */
+int log_do_checkpoint(journal_t *journal)
+{
+	transaction_t *transaction;
+	tid_t this_tid;
+	int result;
+
+	jbd_debug(1, "Start checkpoint\n");
+
+	/*
+	 * First thing: if there are any transactions in the log which
+	 * don't need checkpointing, just eliminate them from the
+	 * journal straight away.
+	 */
+	result = cleanup_journal_tail(journal);
+	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
+	if (result <= 0)
+		return result;
+
+	/*
+	 * OK, we need to start writing disk blocks.  Take one transaction
+	 * and write it.
+	 */
+	spin_lock(&journal->j_list_lock);
+	if (!journal->j_checkpoint_transactions)
+		goto out;
+	transaction = journal->j_checkpoint_transactions;
+	this_tid = transaction->t_tid;
+restart:
+	/*
+	 * If someone cleaned up this transaction while we slept, we're
+	 * done (maybe it's a new transaction, but it fell at the same
+	 * address).
+	 */
+	if (journal->j_checkpoint_transactions == transaction &&
+			transaction->t_tid == this_tid) {
+		int batch_count = 0;
+		struct buffer_head *bhs[NR_BATCH];
+		struct journal_head *jh;
+		int retry = 0;
+
+		while (!retry && transaction->t_checkpoint_list) {
+			struct buffer_head *bh;
+
+			jh = transaction->t_checkpoint_list;
+			bh = jh2bh(jh);
+			if (!jbd_trylock_bh_state(bh)) {
+				jbd_sync_bh(journal, bh);
+				retry = 1;
+				break;
+			}
+			retry = __process_buffer(journal, jh, bhs,&batch_count);
+			if (!retry && lock_need_resched(&journal->j_list_lock)){
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+				break;
+			}
+		}
+
+		if (batch_count) {
+			if (!retry) {
+				spin_unlock(&journal->j_list_lock);
+				retry = 1;
+			}
+			__flush_batch(journal, bhs, &batch_count);
+		}
+
+		if (retry) {
+			spin_lock(&journal->j_list_lock);
+			goto restart;
+		}
+		/*
+		 * Now we have cleaned up the first transaction's checkpoint
+		 * list. Let's clean up the second one
+		 */
+		__wait_cp_io(journal, transaction);
+	}
+out:
+	spin_unlock(&journal->j_list_lock);
+	result = cleanup_journal_tail(journal);
+	if (result < 0)
+		return result;
+	return 0;
+}
+
+/*
+ * Check the list of checkpoint transactions for the journal to see if
+ * we have already got rid of any since the last update of the log tail
+ * in the journal superblock.  If so, we can instantly roll the
+ * superblock forward to remove those transactions from the log.
+ *
+ * Return <0 on error, 0 on success, 1 if there was nothing to clean up.
+ *
+ * Called with the journal lock held.
+ *
+ * This is the only part of the journaling code which really needs to be
+ * aware of transaction aborts.  Checkpointing involves writing to the
+ * main filesystem area rather than to the journal, so it can proceed
+ * even in abort state, but we must not update the journal superblock if
+ * we have an abort error outstanding.
+ */
+
+int cleanup_journal_tail(journal_t *journal)
+{
+	transaction_t * transaction;
+	tid_t		first_tid;
+	unsigned long	blocknr, freed;
+
+	/* OK, work out the oldest transaction remaining in the log, and
+	 * the log block it starts at.
+	 *
+	 * If the log is now empty, we need to work out which is the
+	 * next transaction ID we will write, and where it will
+	 * start. */
+
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&journal->j_list_lock);
+	transaction = journal->j_checkpoint_transactions;
+	if (transaction) {
+		first_tid = transaction->t_tid;
+		blocknr = transaction->t_log_start;
+	} else if ((transaction = journal->j_committing_transaction) != NULL) {
+		first_tid = transaction->t_tid;
+		blocknr = transaction->t_log_start;
+	} else if ((transaction = journal->j_running_transaction) != NULL) {
+		first_tid = transaction->t_tid;
+		blocknr = journal->j_head;
+	} else {
+		first_tid = journal->j_transaction_sequence;
+		blocknr = journal->j_head;
+	}
+	spin_unlock(&journal->j_list_lock);
+	J_ASSERT(blocknr != 0);
+
+	/* If the oldest pinned transaction is at the tail of the log
+           already then there's not much we can do right now. */
+	if (journal->j_tail_sequence == first_tid) {
+		spin_unlock(&journal->j_state_lock);
+		return 1;
+	}
+
+	/* OK, update the superblock to recover the freed space.
+	 * Physical blocks come first: have we wrapped beyond the end of
+	 * the log?  */
+	freed = blocknr - journal->j_tail;
+	if (blocknr < journal->j_tail)
+		freed = freed + journal->j_last - journal->j_first;
+
+	jbd_debug(1,
+		  "Cleaning journal tail from %d to %d (offset %lu), "
+		  "freeing %lu\n",
+		  journal->j_tail_sequence, first_tid, blocknr, freed);
+
+	journal->j_free += freed;
+	journal->j_tail_sequence = first_tid;
+	journal->j_tail = blocknr;
+	spin_unlock(&journal->j_state_lock);
+	if (!(journal->j_flags & JFS_ABORT))
+		journal_update_superblock(journal, 1);
+	return 0;
+}
+
+
+/* Checkpoint list management */
+
+/*
+ * journal_clean_one_cp_list
+ *
+ * Find all the written-back checkpoint buffers in the given list and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of bufers reaped (for debug)
+ */
+
+static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+{
+	struct journal_head *last_jh;
+	struct journal_head *next_jh = jh;
+	int ret, freed = 0;
+
+	*released = 0;
+	if (!jh)
+		return 0;
+
+	last_jh = jh->b_cpprev;
+	do {
+		jh = next_jh;
+		next_jh = jh->b_cpnext;
+		/* Use trylock because of the ranking */
+		if (jbd_trylock_bh_state(jh2bh(jh))) {
+			ret = __try_to_free_cp_buf(jh);
+			if (ret) {
+				freed++;
+				if (ret == 2) {
+					*released = 1;
+					return freed;
+				}
+			}
+		}
+		/*
+		 * This function only frees up some memory
+		 * if possible so we dont have an obligation
+		 * to finish processing. Bail out if preemption
+		 * requested:
+		 */
+		if (need_resched())
+			return freed;
+	} while (jh != last_jh);
+
+	return freed;
+}
+
+/*
+ * journal_clean_checkpoint_list
+ *
+ * Find all the written-back checkpoint buffers in the journal and release them.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ * Returns number of buffers reaped (for debug)
+ */
+
+int __journal_clean_checkpoint_list(journal_t *journal)
+{
+	transaction_t *transaction, *last_transaction, *next_transaction;
+	int ret = 0;
+	int released;
+
+	transaction = journal->j_checkpoint_transactions;
+	if (!transaction)
+		goto out;
+
+	last_transaction = transaction->t_cpprev;
+	next_transaction = transaction;
+	do {
+		transaction = next_transaction;
+		next_transaction = transaction->t_cpnext;
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_list, &released);
+		/*
+		 * This function only frees up some memory if possible so we
+		 * dont have an obligation to finish processing. Bail out if
+		 * preemption requested:
+		 */
+		if (need_resched())
+			goto out;
+		if (released)
+			continue;
+		/*
+		 * It is essential that we are as careful as in the case of
+		 * t_checkpoint_list with removing the buffer from the list as
+		 * we can possibly see not yet submitted buffers on io_list
+		 */
+		ret += journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list, &released);
+		if (need_resched())
+			goto out;
+	} while (transaction != last_transaction);
+out:
+	return ret;
+}
+
+/*
+ * journal_remove_checkpoint: called after a buffer has been committed
+ * to disk (either by being write-back flushed to disk, or being
+ * committed to the log).
+ *
+ * We cannot safely clean a transaction out of the log until all of the
+ * buffer updates committed in that transaction have safely been stored
+ * elsewhere on disk.  To achieve this, all of the buffers in a
+ * transaction need to be maintained on the transaction's checkpoint
+ * lists until they have been rewritten, at which point this function is
+ * called to remove the buffer from the existing transaction's
+ * checkpoint lists.
+ *
+ * The function returns 1 if it frees the transaction, 0 otherwise.
+ *
+ * This function is called with the journal locked.
+ * This function is called with j_list_lock held.
+ * This function is called with jbd_lock_bh_state(jh2bh(jh))
+ */
+
+int __journal_remove_checkpoint(struct journal_head *jh)
+{
+	transaction_t *transaction;
+	journal_t *journal;
+	int ret = 0;
+
+	JBUFFER_TRACE(jh, "entry");
+
+	if ((transaction = jh->b_cp_transaction) == NULL) {
+		JBUFFER_TRACE(jh, "not on transaction");
+		goto out;
+	}
+	journal = transaction->t_journal;
+
+	__buffer_unlink(jh);
+	jh->b_cp_transaction = NULL;
+
+	if (transaction->t_checkpoint_list != NULL ||
+	    transaction->t_checkpoint_io_list != NULL)
+		goto out;
+	JBUFFER_TRACE(jh, "transaction has no more buffers");
+
+	/*
+	 * There is one special case to worry about: if we have just pulled the
+	 * buffer off a committing transaction's forget list, then even if the
+	 * checkpoint list is empty, the transaction obviously cannot be
+	 * dropped!
+	 *
+	 * The locking here around j_committing_transaction is a bit sleazy.
+	 * See the comment at the end of journal_commit_transaction().
+	 */
+	if (transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "belongs to committing transaction");
+		goto out;
+	}
+
+	/* OK, that was the last buffer for the transaction: we can now
+	   safely remove this transaction from the log */
+
+	__journal_drop_transaction(journal, transaction);
+
+	/* Just in case anybody was waiting for more transactions to be
+           checkpointed... */
+	wake_up(&journal->j_wait_logspace);
+	ret = 1;
+out:
+	JBUFFER_TRACE(jh, "exit");
+	return ret;
+}
+
+/*
+ * journal_insert_checkpoint: put a committed buffer onto a checkpoint
+ * list so that we know when it is safe to clean the transaction out of
+ * the log.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ */
+void __journal_insert_checkpoint(struct journal_head *jh,
+			       transaction_t *transaction)
+{
+	JBUFFER_TRACE(jh, "entry");
+	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
+	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+
+	jh->b_cp_transaction = transaction;
+
+	if (!transaction->t_checkpoint_list) {
+		jh->b_cpnext = jh->b_cpprev = jh;
+	} else {
+		jh->b_cpnext = transaction->t_checkpoint_list;
+		jh->b_cpprev = transaction->t_checkpoint_list->b_cpprev;
+		jh->b_cpprev->b_cpnext = jh;
+		jh->b_cpnext->b_cpprev = jh;
+	}
+	transaction->t_checkpoint_list = jh;
+}
+
+/*
+ * We've finished with this transaction structure: adios...
+ *
+ * The transaction must have no links except for the checkpoint by this
+ * point.
+ *
+ * Called with the journal locked.
+ * Called with j_list_lock held.
+ */
+
+void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+{
+	assert_spin_locked(&journal->j_list_lock);
+	if (transaction->t_cpnext) {
+		transaction->t_cpnext->t_cpprev = transaction->t_cpprev;
+		transaction->t_cpprev->t_cpnext = transaction->t_cpnext;
+		if (journal->j_checkpoint_transactions == transaction)
+			journal->j_checkpoint_transactions =
+				transaction->t_cpnext;
+		if (journal->j_checkpoint_transactions == transaction)
+			journal->j_checkpoint_transactions = NULL;
+	}
+
+	J_ASSERT(transaction->t_state == T_FINISHED);
+	J_ASSERT(transaction->t_buffers == NULL);
+	J_ASSERT(transaction->t_sync_datalist == NULL);
+	J_ASSERT(transaction->t_forget == NULL);
+	J_ASSERT(transaction->t_iobuf_list == NULL);
+	J_ASSERT(transaction->t_shadow_list == NULL);
+	J_ASSERT(transaction->t_log_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_list == NULL);
+	J_ASSERT(transaction->t_checkpoint_io_list == NULL);
+	J_ASSERT(transaction->t_updates == 0);
+	J_ASSERT(journal->j_committing_transaction != transaction);
+	J_ASSERT(journal->j_running_transaction != transaction);
+
+	jbd_debug(1, "Dropping transaction %d, all done\n", transaction->t_tid);
+	kfree(transaction);
+}
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
new file mode 100644
index 000000000000..10be51290a27
--- /dev/null
+++ b/fs/jbd2/commit.c
@@ -0,0 +1,911 @@
+/*
+ * linux/fs/jbd/commit.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal commit routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/smp_lock.h>
+
+/*
+ * Default IO end handler for temporary BJ_IO buffer_heads.
+ */
+static void journal_end_buffer_io_sync(struct buffer_head *bh, int uptodate)
+{
+	BUFFER_TRACE(bh, "");
+	if (uptodate)
+		set_buffer_uptodate(bh);
+	else
+		clear_buffer_uptodate(bh);
+	unlock_buffer(bh);
+}
+
+/*
+ * When an ext3-ordered file is truncated, it is possible that many pages are
+ * not sucessfully freed, because they are attached to a committing transaction.
+ * After the transaction commits, these pages are left on the LRU, with no
+ * ->mapping, and with attached buffers.  These pages are trivially reclaimable
+ * by the VM, but their apparent absence upsets the VM accounting, and it makes
+ * the numbers in /proc/meminfo look odd.
+ *
+ * So here, we have a buffer which has just come off the forget list.  Look to
+ * see if we can strip all buffers from the backing page.
+ *
+ * Called under lock_journal(), and possibly under journal_datalist_lock.  The
+ * caller provided us with a ref against the buffer, and we drop that here.
+ */
+static void release_buffer_page(struct buffer_head *bh)
+{
+	struct page *page;
+
+	if (buffer_dirty(bh))
+		goto nope;
+	if (atomic_read(&bh->b_count) != 1)
+		goto nope;
+	page = bh->b_page;
+	if (!page)
+		goto nope;
+	if (page->mapping)
+		goto nope;
+
+	/* OK, it's a truncated page */
+	if (TestSetPageLocked(page))
+		goto nope;
+
+	page_cache_get(page);
+	__brelse(bh);
+	try_to_free_buffers(page);
+	unlock_page(page);
+	page_cache_release(page);
+	return;
+
+nope:
+	__brelse(bh);
+}
+
+/*
+ * Try to acquire jbd_lock_bh_state() against the buffer, when j_list_lock is
+ * held.  For ranking reasons we must trylock.  If we lose, schedule away and
+ * return 0.  j_list_lock is dropped in this case.
+ */
+static int inverted_lock(journal_t *journal, struct buffer_head *bh)
+{
+	if (!jbd_trylock_bh_state(bh)) {
+		spin_unlock(&journal->j_list_lock);
+		schedule();
+		return 0;
+	}
+	return 1;
+}
+
+/* Done it all: now write the commit record.  We should have
+ * cleaned up our previous buffers by now, so if we are in abort
+ * mode we can now just skip the rest of the journal write
+ * entirely.
+ *
+ * Returns 1 if the journal needs to be aborted or 0 on success
+ */
+static int journal_write_commit_record(journal_t *journal,
+					transaction_t *commit_transaction)
+{
+	struct journal_head *descriptor;
+	struct buffer_head *bh;
+	int i, ret;
+	int barrier_done = 0;
+
+	if (is_journal_aborted(journal))
+		return 0;
+
+	descriptor = journal_get_descriptor_buffer(journal);
+	if (!descriptor)
+		return 1;
+
+	bh = jh2bh(descriptor);
+
+	/* AKPM: buglet - add `i' to tmp! */
+	for (i = 0; i < bh->b_size; i += 512) {
+		journal_header_t *tmp = (journal_header_t*)bh->b_data;
+		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
+		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
+	}
+
+	JBUFFER_TRACE(descriptor, "write commit block");
+	set_buffer_dirty(bh);
+	if (journal->j_flags & JFS_BARRIER) {
+		set_buffer_ordered(bh);
+		barrier_done = 1;
+	}
+	ret = sync_dirty_buffer(bh);
+	/* is it possible for another commit to fail at roughly
+	 * the same time as this one?  If so, we don't want to
+	 * trust the barrier flag in the super, but instead want
+	 * to remember if we sent a barrier request
+	 */
+	if (ret == -EOPNOTSUPP && barrier_done) {
+		char b[BDEVNAME_SIZE];
+
+		printk(KERN_WARNING
+			"JBD: barrier-based sync failed on %s - "
+			"disabling barriers\n",
+			bdevname(journal->j_dev, b));
+		spin_lock(&journal->j_state_lock);
+		journal->j_flags &= ~JFS_BARRIER;
+		spin_unlock(&journal->j_state_lock);
+
+		/* And try again, without the barrier */
+		clear_buffer_ordered(bh);
+		set_buffer_uptodate(bh);
+		set_buffer_dirty(bh);
+		ret = sync_dirty_buffer(bh);
+	}
+	put_bh(bh);		/* One for getblk() */
+	journal_put_journal_head(descriptor);
+
+	return (ret == -EIO);
+}
+
+static void journal_do_submit_data(struct buffer_head **wbuf, int bufs)
+{
+	int i;
+
+	for (i = 0; i < bufs; i++) {
+		wbuf[i]->b_end_io = end_buffer_write_sync;
+		/* We use-up our safety reference in submit_bh() */
+		submit_bh(WRITE, wbuf[i]);
+	}
+}
+
+/*
+ *  Submit all the data buffers to disk
+ */
+static void journal_submit_data_buffers(journal_t *journal,
+				transaction_t *commit_transaction)
+{
+	struct journal_head *jh;
+	struct buffer_head *bh;
+	int locked;
+	int bufs = 0;
+	struct buffer_head **wbuf = journal->j_wbuf;
+
+	/*
+	 * Whenever we unlock the journal and sleep, things can get added
+	 * onto ->t_sync_datalist, so we have to keep looping back to
+	 * write_out_data until we *know* that the list is empty.
+	 *
+	 * Cleanup any flushed data buffers from the data list.  Even in
+	 * abort mode, we want to flush this out as soon as possible.
+	 */
+write_out_data:
+	cond_resched();
+	spin_lock(&journal->j_list_lock);
+
+	while (commit_transaction->t_sync_datalist) {
+		jh = commit_transaction->t_sync_datalist;
+		bh = jh2bh(jh);
+		locked = 0;
+
+		/* Get reference just to make sure buffer does not disappear
+		 * when we are forced to drop various locks */
+		get_bh(bh);
+		/* If the buffer is dirty, we need to submit IO and hence
+		 * we need the buffer lock. We try to lock the buffer without
+		 * blocking. If we fail, we need to drop j_list_lock and do
+		 * blocking lock_buffer().
+		 */
+		if (buffer_dirty(bh)) {
+			if (test_set_buffer_locked(bh)) {
+				BUFFER_TRACE(bh, "needs blocking lock");
+				spin_unlock(&journal->j_list_lock);
+				/* Write out all data to prevent deadlocks */
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				lock_buffer(bh);
+				spin_lock(&journal->j_list_lock);
+			}
+			locked = 1;
+		}
+		/* We have to get bh_state lock. Again out of order, sigh. */
+		if (!inverted_lock(journal, bh)) {
+			jbd_lock_bh_state(bh);
+			spin_lock(&journal->j_list_lock);
+		}
+		/* Someone already cleaned up the buffer? */
+		if (!buffer_jbd(bh)
+			|| jh->b_transaction != commit_transaction
+			|| jh->b_jlist != BJ_SyncData) {
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			BUFFER_TRACE(bh, "already cleaned up");
+			put_bh(bh);
+			continue;
+		}
+		if (locked && test_clear_buffer_dirty(bh)) {
+			BUFFER_TRACE(bh, "needs writeout, adding to array");
+			wbuf[bufs++] = bh;
+			__journal_file_buffer(jh, commit_transaction,
+						BJ_Locked);
+			jbd_unlock_bh_state(bh);
+			if (bufs == journal->j_wbufsize) {
+				spin_unlock(&journal->j_list_lock);
+				journal_do_submit_data(wbuf, bufs);
+				bufs = 0;
+				goto write_out_data;
+			}
+		}
+		else {
+			BUFFER_TRACE(bh, "writeout complete: unfile");
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			if (locked)
+				unlock_buffer(bh);
+			journal_remove_journal_head(bh);
+			/* Once for our safety reference, once for
+			 * journal_remove_journal_head() */
+			put_bh(bh);
+			put_bh(bh);
+		}
+
+		if (lock_need_resched(&journal->j_list_lock)) {
+			spin_unlock(&journal->j_list_lock);
+			goto write_out_data;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+	journal_do_submit_data(wbuf, bufs);
+}
+
+/*
+ * journal_commit_transaction
+ *
+ * The primary function for committing a transaction to the log.  This
+ * function is called by the journal thread to begin a complete commit.
+ */
+void journal_commit_transaction(journal_t *journal)
+{
+	transaction_t *commit_transaction;
+	struct journal_head *jh, *new_jh, *descriptor;
+	struct buffer_head **wbuf = journal->j_wbuf;
+	int bufs;
+	int flags;
+	int err;
+	unsigned long blocknr;
+	char *tagp = NULL;
+	journal_header_t *header;
+	journal_block_tag_t *tag = NULL;
+	int space_left = 0;
+	int first_tag = 0;
+	int tag_flag;
+	int i;
+
+	/*
+	 * First job: lock down the current transaction and wait for
+	 * all outstanding updates to complete.
+	 */
+
+#ifdef COMMIT_STATS
+	spin_lock(&journal->j_list_lock);
+	summarise_journal_usage(journal);
+	spin_unlock(&journal->j_list_lock);
+#endif
+
+	/* Do we need to erase the effects of a prior journal_flush? */
+	if (journal->j_flags & JFS_FLUSHED) {
+		jbd_debug(3, "super block updated\n");
+		journal_update_superblock(journal, 1);
+	} else {
+		jbd_debug(3, "superblock not updated\n");
+	}
+
+	J_ASSERT(journal->j_running_transaction != NULL);
+	J_ASSERT(journal->j_committing_transaction == NULL);
+
+	commit_transaction = journal->j_running_transaction;
+	J_ASSERT(commit_transaction->t_state == T_RUNNING);
+
+	jbd_debug(1, "JBD: starting commit of transaction %d\n",
+			commit_transaction->t_tid);
+
+	spin_lock(&journal->j_state_lock);
+	commit_transaction->t_state = T_LOCKED;
+
+	spin_lock(&commit_transaction->t_handle_lock);
+	while (commit_transaction->t_updates) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(&journal->j_wait_updates, &wait,
+					TASK_UNINTERRUPTIBLE);
+		if (commit_transaction->t_updates) {
+			spin_unlock(&commit_transaction->t_handle_lock);
+			spin_unlock(&journal->j_state_lock);
+			schedule();
+			spin_lock(&journal->j_state_lock);
+			spin_lock(&commit_transaction->t_handle_lock);
+		}
+		finish_wait(&journal->j_wait_updates, &wait);
+	}
+	spin_unlock(&commit_transaction->t_handle_lock);
+
+	J_ASSERT (commit_transaction->t_outstanding_credits <=
+			journal->j_max_transaction_buffers);
+
+	/*
+	 * First thing we are allowed to do is to discard any remaining
+	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
+	 * that there are no such buffers: if a large filesystem
+	 * operation like a truncate needs to split itself over multiple
+	 * transactions, then it may try to do a journal_restart() while
+	 * there are still BJ_Reserved buffers outstanding.  These must
+	 * be released cleanly from the current transaction.
+	 *
+	 * In this case, the filesystem must still reserve write access
+	 * again before modifying the buffer in the new transaction, but
+	 * we do not require it to remember exactly which old buffers it
+	 * has reserved.  This is consistent with the existing behaviour
+	 * that multiple journal_get_write_access() calls to the same
+	 * buffer are perfectly permissable.
+	 */
+	while (commit_transaction->t_reserved_list) {
+		jh = commit_transaction->t_reserved_list;
+		JBUFFER_TRACE(jh, "reserved, unused: refile");
+		/*
+		 * A journal_get_undo_access()+journal_release_buffer() may
+		 * leave undo-committed data.
+		 */
+		if (jh->b_committed_data) {
+			struct buffer_head *bh = jh2bh(jh);
+
+			jbd_lock_bh_state(bh);
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jh->b_committed_data = NULL;
+			jbd_unlock_bh_state(bh);
+		}
+		journal_refile_buffer(journal, jh);
+	}
+
+	/*
+	 * Now try to drop any written-back buffers from the journal's
+	 * checkpoint lists.  We do this *before* commit because it potentially
+	 * frees some memory
+	 */
+	spin_lock(&journal->j_list_lock);
+	__journal_clean_checkpoint_list(journal);
+	spin_unlock(&journal->j_list_lock);
+
+	jbd_debug (3, "JBD: commit phase 1\n");
+
+	/*
+	 * Switch to a new revoke table.
+	 */
+	journal_switch_revoke_table(journal);
+
+	commit_transaction->t_state = T_FLUSH;
+	journal->j_committing_transaction = commit_transaction;
+	journal->j_running_transaction = NULL;
+	commit_transaction->t_log_start = journal->j_head;
+	wake_up(&journal->j_wait_transaction_locked);
+	spin_unlock(&journal->j_state_lock);
+
+	jbd_debug (3, "JBD: commit phase 2\n");
+
+	/*
+	 * First, drop modified flag: all accesses to the buffers
+	 * will be tracked for a new trasaction only -bzzz
+	 */
+	spin_lock(&journal->j_list_lock);
+	if (commit_transaction->t_buffers) {
+		new_jh = jh = commit_transaction->t_buffers->b_tnext;
+		do {
+			J_ASSERT_JH(new_jh, new_jh->b_modified == 1 ||
+					new_jh->b_modified == 0);
+			new_jh->b_modified = 0;
+			new_jh = new_jh->b_tnext;
+		} while (new_jh != jh);
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	/*
+	 * Now start flushing things to disk, in the order they appear
+	 * on the transaction lists.  Data blocks go first.
+	 */
+	err = 0;
+	journal_submit_data_buffers(journal, commit_transaction);
+
+	/*
+	 * Wait for all previously submitted IO to complete.
+	 */
+	spin_lock(&journal->j_list_lock);
+	while (commit_transaction->t_locked_list) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_locked_list->b_tprev;
+		bh = jh2bh(jh);
+		get_bh(bh);
+		if (buffer_locked(bh)) {
+			spin_unlock(&journal->j_list_lock);
+			wait_on_buffer(bh);
+			if (unlikely(!buffer_uptodate(bh)))
+				err = -EIO;
+			spin_lock(&journal->j_list_lock);
+		}
+		if (!inverted_lock(journal, bh)) {
+			put_bh(bh);
+			spin_lock(&journal->j_list_lock);
+			continue;
+		}
+		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
+			__journal_unfile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+			journal_remove_journal_head(bh);
+			put_bh(bh);
+		} else {
+			jbd_unlock_bh_state(bh);
+		}
+		put_bh(bh);
+		cond_resched_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	if (err)
+		__journal_abort_hard(journal);
+
+	journal_write_revoke_records(journal, commit_transaction);
+
+	jbd_debug(3, "JBD: commit phase 2\n");
+
+	/*
+	 * If we found any dirty or locked buffers, then we should have
+	 * looped back up to the write_out_data label.  If there weren't
+	 * any then journal_clean_data_list should have wiped the list
+	 * clean by now, so check that it is in fact empty.
+	 */
+	J_ASSERT (commit_transaction->t_sync_datalist == NULL);
+
+	jbd_debug (3, "JBD: commit phase 3\n");
+
+	/*
+	 * Way to go: we have now written out all of the data for a
+	 * transaction!  Now comes the tricky part: we need to write out
+	 * metadata.  Loop over the transaction's entire buffer list:
+	 */
+	commit_transaction->t_state = T_COMMIT;
+
+	descriptor = NULL;
+	bufs = 0;
+	while (commit_transaction->t_buffers) {
+
+		/* Find the next buffer to be journaled... */
+
+		jh = commit_transaction->t_buffers;
+
+		/* If we're in abort mode, we just un-journal the buffer and
+		   release it for background writing. */
+
+		if (is_journal_aborted(journal)) {
+			JBUFFER_TRACE(jh, "journal is aborting: refile");
+			journal_refile_buffer(journal, jh);
+			/* If that was the last one, we need to clean up
+			 * any descriptor buffers which may have been
+			 * already allocated, even if we are now
+			 * aborting. */
+			if (!commit_transaction->t_buffers)
+				goto start_journal_io;
+			continue;
+		}
+
+		/* Make sure we have a descriptor block in which to
+		   record the metadata buffer. */
+
+		if (!descriptor) {
+			struct buffer_head *bh;
+
+			J_ASSERT (bufs == 0);
+
+			jbd_debug(4, "JBD: get descriptor\n");
+
+			descriptor = journal_get_descriptor_buffer(journal);
+			if (!descriptor) {
+				__journal_abort_hard(journal);
+				continue;
+			}
+
+			bh = jh2bh(descriptor);
+			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
+				(unsigned long long)bh->b_blocknr, bh->b_data);
+			header = (journal_header_t *)&bh->b_data[0];
+			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
+
+			tagp = &bh->b_data[sizeof(journal_header_t)];
+			space_left = bh->b_size - sizeof(journal_header_t);
+			first_tag = 1;
+			set_buffer_jwrite(bh);
+			set_buffer_dirty(bh);
+			wbuf[bufs++] = bh;
+
+			/* Record it so that we can wait for IO
+                           completion later */
+			BUFFER_TRACE(bh, "ph3: file as descriptor");
+			journal_file_buffer(descriptor, commit_transaction,
+					BJ_LogCtl);
+		}
+
+		/* Where is the buffer to be written? */
+
+		err = journal_next_log_block(journal, &blocknr);
+		/* If the block mapping failed, just abandon the buffer
+		   and repeat this loop: we'll fall into the
+		   refile-on-abort condition above. */
+		if (err) {
+			__journal_abort_hard(journal);
+			continue;
+		}
+
+		/*
+		 * start_this_handle() uses t_outstanding_credits to determine
+		 * the free space in the log, but this counter is changed
+		 * by journal_next_log_block() also.
+		 */
+		commit_transaction->t_outstanding_credits--;
+
+		/* Bump b_count to prevent truncate from stumbling over
+                   the shadowed buffer!  @@@ This can go if we ever get
+                   rid of the BJ_IO/BJ_Shadow pairing of buffers. */
+		atomic_inc(&jh2bh(jh)->b_count);
+
+		/* Make a temporary IO buffer with which to write it out
+                   (this will requeue both the metadata buffer and the
+                   temporary IO buffer). new_bh goes on BJ_IO*/
+
+		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
+		/*
+		 * akpm: journal_write_metadata_buffer() sets
+		 * new_bh->b_transaction to commit_transaction.
+		 * We need to clean this up before we release new_bh
+		 * (which is of type BJ_IO)
+		 */
+		JBUFFER_TRACE(jh, "ph3: write metadata");
+		flags = journal_write_metadata_buffer(commit_transaction,
+						      jh, &new_jh, blocknr);
+		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
+		wbuf[bufs++] = jh2bh(new_jh);
+
+		/* Record the new block's tag in the current descriptor
+                   buffer */
+
+		tag_flag = 0;
+		if (flags & 1)
+			tag_flag |= JFS_FLAG_ESCAPE;
+		if (!first_tag)
+			tag_flag |= JFS_FLAG_SAME_UUID;
+
+		tag = (journal_block_tag_t *) tagp;
+		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+		tag->t_flags = cpu_to_be32(tag_flag);
+		tagp += sizeof(journal_block_tag_t);
+		space_left -= sizeof(journal_block_tag_t);
+
+		if (first_tag) {
+			memcpy (tagp, journal->j_uuid, 16);
+			tagp += 16;
+			space_left -= 16;
+			first_tag = 0;
+		}
+
+		/* If there's no more to do, or if the descriptor is full,
+		   let the IO rip! */
+
+		if (bufs == journal->j_wbufsize ||
+		    commit_transaction->t_buffers == NULL ||
+		    space_left < sizeof(journal_block_tag_t) + 16) {
+
+			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
+
+			/* Write an end-of-descriptor marker before
+                           submitting the IOs.  "tag" still points to
+                           the last tag we set up. */
+
+			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+
+start_journal_io:
+			for (i = 0; i < bufs; i++) {
+				struct buffer_head *bh = wbuf[i];
+				lock_buffer(bh);
+				clear_buffer_dirty(bh);
+				set_buffer_uptodate(bh);
+				bh->b_end_io = journal_end_buffer_io_sync;
+				submit_bh(WRITE, bh);
+			}
+			cond_resched();
+
+			/* Force a new descriptor to be generated next
+                           time round the loop. */
+			descriptor = NULL;
+			bufs = 0;
+		}
+	}
+
+	/* Lo and behold: we have just managed to send a transaction to
+           the log.  Before we can commit it, wait for the IO so far to
+           complete.  Control buffers being written are on the
+           transaction's t_log_list queue, and metadata buffers are on
+           the t_iobuf_list queue.
+
+	   Wait for the buffers in reverse order.  That way we are
+	   less likely to be woken up until all IOs have completed, and
+	   so we incur less scheduling load.
+	*/
+
+	jbd_debug(3, "JBD: commit phase 4\n");
+
+	/*
+	 * akpm: these are BJ_IO, and j_list_lock is not needed.
+	 * See __journal_try_to_free_buffer.
+	 */
+wait_for_iobuf:
+	while (commit_transaction->t_iobuf_list != NULL) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_iobuf_list->b_tprev;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			wait_on_buffer(bh);
+			goto wait_for_iobuf;
+		}
+		if (cond_resched())
+			goto wait_for_iobuf;
+
+		if (unlikely(!buffer_uptodate(bh)))
+			err = -EIO;
+
+		clear_buffer_jwrite(bh);
+
+		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
+		journal_unfile_buffer(journal, jh);
+
+		/*
+		 * ->t_iobuf_list should contain only dummy buffer_heads
+		 * which were created by journal_write_metadata_buffer().
+		 */
+		BUFFER_TRACE(bh, "dumping temporary bh");
+		journal_put_journal_head(jh);
+		__brelse(bh);
+		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
+		free_buffer_head(bh);
+
+		/* We also have to unlock and free the corresponding
+                   shadowed buffer */
+		jh = commit_transaction->t_shadow_list->b_tprev;
+		bh = jh2bh(jh);
+		clear_bit(BH_JWrite, &bh->b_state);
+		J_ASSERT_BH(bh, buffer_jbddirty(bh));
+
+		/* The metadata is now released for reuse, but we need
+                   to remember it against this transaction so that when
+                   we finally commit, we can do any checkpointing
+                   required. */
+		JBUFFER_TRACE(jh, "file as BJ_Forget");
+		journal_file_buffer(jh, commit_transaction, BJ_Forget);
+		/* Wake up any transactions which were waiting for this
+		   IO to complete */
+		wake_up_bit(&bh->b_state, BH_Unshadow);
+		JBUFFER_TRACE(jh, "brelse shadowed buffer");
+		__brelse(bh);
+	}
+
+	J_ASSERT (commit_transaction->t_shadow_list == NULL);
+
+	jbd_debug(3, "JBD: commit phase 5\n");
+
+	/* Here we wait for the revoke record and descriptor record buffers */
+ wait_for_ctlbuf:
+	while (commit_transaction->t_log_list != NULL) {
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_log_list->b_tprev;
+		bh = jh2bh(jh);
+		if (buffer_locked(bh)) {
+			wait_on_buffer(bh);
+			goto wait_for_ctlbuf;
+		}
+		if (cond_resched())
+			goto wait_for_ctlbuf;
+
+		if (unlikely(!buffer_uptodate(bh)))
+			err = -EIO;
+
+		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
+		clear_buffer_jwrite(bh);
+		journal_unfile_buffer(journal, jh);
+		journal_put_journal_head(jh);
+		__brelse(bh);		/* One for getblk */
+		/* AKPM: bforget here */
+	}
+
+	jbd_debug(3, "JBD: commit phase 6\n");
+
+	if (journal_write_commit_record(journal, commit_transaction))
+		err = -EIO;
+
+	if (err)
+		__journal_abort_hard(journal);
+
+	/* End of a transaction!  Finally, we can do checkpoint
+           processing: any buffers committed as a result of this
+           transaction can be removed from any checkpoint list it was on
+           before. */
+
+	jbd_debug(3, "JBD: commit phase 7\n");
+
+	J_ASSERT(commit_transaction->t_sync_datalist == NULL);
+	J_ASSERT(commit_transaction->t_buffers == NULL);
+	J_ASSERT(commit_transaction->t_checkpoint_list == NULL);
+	J_ASSERT(commit_transaction->t_iobuf_list == NULL);
+	J_ASSERT(commit_transaction->t_shadow_list == NULL);
+	J_ASSERT(commit_transaction->t_log_list == NULL);
+
+restart_loop:
+	/*
+	 * As there are other places (journal_unmap_buffer()) adding buffers
+	 * to this list we have to be careful and hold the j_list_lock.
+	 */
+	spin_lock(&journal->j_list_lock);
+	while (commit_transaction->t_forget) {
+		transaction_t *cp_transaction;
+		struct buffer_head *bh;
+
+		jh = commit_transaction->t_forget;
+		spin_unlock(&journal->j_list_lock);
+		bh = jh2bh(jh);
+		jbd_lock_bh_state(bh);
+		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction ||
+			jh->b_transaction == journal->j_running_transaction);
+
+		/*
+		 * If there is undo-protected committed data against
+		 * this buffer, then we can remove it now.  If it is a
+		 * buffer needing such protection, the old frozen_data
+		 * field now points to a committed version of the
+		 * buffer, so rotate that field to the new committed
+		 * data.
+		 *
+		 * Otherwise, we can just throw away the frozen data now.
+		 */
+		if (jh->b_committed_data) {
+			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jh->b_committed_data = NULL;
+			if (jh->b_frozen_data) {
+				jh->b_committed_data = jh->b_frozen_data;
+				jh->b_frozen_data = NULL;
+			}
+		} else if (jh->b_frozen_data) {
+			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jh->b_frozen_data = NULL;
+		}
+
+		spin_lock(&journal->j_list_lock);
+		cp_transaction = jh->b_cp_transaction;
+		if (cp_transaction) {
+			JBUFFER_TRACE(jh, "remove from old cp transaction");
+			__journal_remove_checkpoint(jh);
+		}
+
+		/* Only re-checkpoint the buffer_head if it is marked
+		 * dirty.  If the buffer was added to the BJ_Forget list
+		 * by journal_forget, it may no longer be dirty and
+		 * there's no point in keeping a checkpoint record for
+		 * it. */
+
+		/* A buffer which has been freed while still being
+		 * journaled by a previous transaction may end up still
+		 * being dirty here, but we want to avoid writing back
+		 * that buffer in the future now that the last use has
+		 * been committed.  That's not only a performance gain,
+		 * it also stops aliasing problems if the buffer is left
+		 * behind for writeback and gets reallocated for another
+		 * use in a different page. */
+		if (buffer_freed(bh)) {
+			clear_buffer_freed(bh);
+			clear_buffer_jbddirty(bh);
+		}
+
+		if (buffer_jbddirty(bh)) {
+			JBUFFER_TRACE(jh, "add to new checkpointing trans");
+			__journal_insert_checkpoint(jh, commit_transaction);
+			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
+			__journal_refile_buffer(jh);
+			jbd_unlock_bh_state(bh);
+		} else {
+			J_ASSERT_BH(bh, !buffer_dirty(bh));
+			/* The buffer on BJ_Forget list and not jbddirty means
+			 * it has been freed by this transaction and hence it
+			 * could not have been reallocated until this
+			 * transaction has committed. *BUT* it could be
+			 * reallocated once we have written all the data to
+			 * disk and before we process the buffer on BJ_Forget
+			 * list. */
+			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
+			__journal_refile_buffer(jh);
+			if (!jh->b_transaction) {
+				jbd_unlock_bh_state(bh);
+				 /* needs a brelse */
+				journal_remove_journal_head(bh);
+				release_buffer_page(bh);
+			} else
+				jbd_unlock_bh_state(bh);
+		}
+		cond_resched_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+	/*
+	 * This is a bit sleazy.  We borrow j_list_lock to protect
+	 * journal->j_committing_transaction in __journal_remove_checkpoint.
+	 * Really, __journal_remove_checkpoint should be using j_state_lock but
+	 * it's a bit hassle to hold that across __journal_remove_checkpoint
+	 */
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&journal->j_list_lock);
+	/*
+	 * Now recheck if some buffers did not get attached to the transaction
+	 * while the lock was dropped...
+	 */
+	if (commit_transaction->t_forget) {
+		spin_unlock(&journal->j_list_lock);
+		spin_unlock(&journal->j_state_lock);
+		goto restart_loop;
+	}
+
+	/* Done with this transaction! */
+
+	jbd_debug(3, "JBD: commit phase 8\n");
+
+	J_ASSERT(commit_transaction->t_state == T_COMMIT);
+
+	commit_transaction->t_state = T_FINISHED;
+	J_ASSERT(commit_transaction == journal->j_committing_transaction);
+	journal->j_commit_sequence = commit_transaction->t_tid;
+	journal->j_committing_transaction = NULL;
+	spin_unlock(&journal->j_state_lock);
+
+	if (commit_transaction->t_checkpoint_list == NULL) {
+		__journal_drop_transaction(journal, commit_transaction);
+	} else {
+		if (journal->j_checkpoint_transactions == NULL) {
+			journal->j_checkpoint_transactions = commit_transaction;
+			commit_transaction->t_cpnext = commit_transaction;
+			commit_transaction->t_cpprev = commit_transaction;
+		} else {
+			commit_transaction->t_cpnext =
+				journal->j_checkpoint_transactions;
+			commit_transaction->t_cpprev =
+				commit_transaction->t_cpnext->t_cpprev;
+			commit_transaction->t_cpnext->t_cpprev =
+				commit_transaction;
+			commit_transaction->t_cpprev->t_cpnext =
+				commit_transaction;
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+
+	jbd_debug(1, "JBD: commit %d complete, head %d\n",
+		  journal->j_commit_sequence, journal->j_tail_sequence);
+
+	wake_up(&journal->j_wait_done_commit);
+}
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
new file mode 100644
index 000000000000..c518dd8fe60a
--- /dev/null
+++ b/fs/jbd2/journal.c
@@ -0,0 +1,2072 @@
+/*
+ * linux/fs/jbd/journal.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Generic filesystem journal-writing code; part of the ext2fs
+ * journaling system.
+ *
+ * This file manages journals: areas of disk reserved for logging
+ * transactional updates.  This includes the kernel journaling thread
+ * which is responsible for scheduling updates to the log.
+ *
+ * We do not actually manage the physical storage of the journal in this
+ * file: that is left to a per-journal policy function, which allows us
+ * to store the journal within a filesystem-specified area for ext2
+ * journaling (ext2 can use a reserved inode for storing the log).
+ */
+
+#include <linux/module.h>
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#include <linux/mm.h>
+#include <linux/suspend.h>
+#include <linux/pagemap.h>
+#include <linux/kthread.h>
+#include <linux/poison.h>
+#include <linux/proc_fs.h>
+
+#include <asm/uaccess.h>
+#include <asm/page.h>
+
+EXPORT_SYMBOL(journal_start);
+EXPORT_SYMBOL(journal_restart);
+EXPORT_SYMBOL(journal_extend);
+EXPORT_SYMBOL(journal_stop);
+EXPORT_SYMBOL(journal_lock_updates);
+EXPORT_SYMBOL(journal_unlock_updates);
+EXPORT_SYMBOL(journal_get_write_access);
+EXPORT_SYMBOL(journal_get_create_access);
+EXPORT_SYMBOL(journal_get_undo_access);
+EXPORT_SYMBOL(journal_dirty_data);
+EXPORT_SYMBOL(journal_dirty_metadata);
+EXPORT_SYMBOL(journal_release_buffer);
+EXPORT_SYMBOL(journal_forget);
+#if 0
+EXPORT_SYMBOL(journal_sync_buffer);
+#endif
+EXPORT_SYMBOL(journal_flush);
+EXPORT_SYMBOL(journal_revoke);
+
+EXPORT_SYMBOL(journal_init_dev);
+EXPORT_SYMBOL(journal_init_inode);
+EXPORT_SYMBOL(journal_update_format);
+EXPORT_SYMBOL(journal_check_used_features);
+EXPORT_SYMBOL(journal_check_available_features);
+EXPORT_SYMBOL(journal_set_features);
+EXPORT_SYMBOL(journal_create);
+EXPORT_SYMBOL(journal_load);
+EXPORT_SYMBOL(journal_destroy);
+EXPORT_SYMBOL(journal_update_superblock);
+EXPORT_SYMBOL(journal_abort);
+EXPORT_SYMBOL(journal_errno);
+EXPORT_SYMBOL(journal_ack_err);
+EXPORT_SYMBOL(journal_clear_err);
+EXPORT_SYMBOL(log_wait_commit);
+EXPORT_SYMBOL(journal_start_commit);
+EXPORT_SYMBOL(journal_force_commit_nested);
+EXPORT_SYMBOL(journal_wipe);
+EXPORT_SYMBOL(journal_blocks_per_page);
+EXPORT_SYMBOL(journal_invalidatepage);
+EXPORT_SYMBOL(journal_try_to_free_buffers);
+EXPORT_SYMBOL(journal_force_commit);
+
+static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
+static void __journal_abort_soft (journal_t *journal, int errno);
+static int journal_create_jbd_slab(size_t slab_size);
+
+/*
+ * Helper function used to manage commit timeouts
+ */
+
+static void commit_timeout(unsigned long __data)
+{
+	struct task_struct * p = (struct task_struct *) __data;
+
+	wake_up_process(p);
+}
+
+/*
+ * kjournald: The main thread function used to manage a logging device
+ * journal.
+ *
+ * This kernel thread is responsible for two things:
+ *
+ * 1) COMMIT:  Every so often we need to commit the current state of the
+ *    filesystem to disk.  The journal thread is responsible for writing
+ *    all of the metadata buffers to disk.
+ *
+ * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
+ *    of the data in that part of the log has been rewritten elsewhere on
+ *    the disk.  Flushing these old buffers to reclaim space in the log is
+ *    known as checkpointing, and this thread is responsible for that job.
+ */
+
+static int kjournald(void *arg)
+{
+	journal_t *journal = arg;
+	transaction_t *transaction;
+
+	/*
+	 * Set up an interval timer which can be used to trigger a commit wakeup
+	 * after the commit interval expires
+	 */
+	setup_timer(&journal->j_commit_timer, commit_timeout,
+			(unsigned long)current);
+
+	/* Record that the journal thread is running */
+	journal->j_task = current;
+	wake_up(&journal->j_wait_done_commit);
+
+	printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
+			journal->j_commit_interval / HZ);
+
+	/*
+	 * And now, wait forever for commit wakeup events.
+	 */
+	spin_lock(&journal->j_state_lock);
+
+loop:
+	if (journal->j_flags & JFS_UNMOUNT)
+		goto end_loop;
+
+	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
+		journal->j_commit_sequence, journal->j_commit_request);
+
+	if (journal->j_commit_sequence != journal->j_commit_request) {
+		jbd_debug(1, "OK, requests differ\n");
+		spin_unlock(&journal->j_state_lock);
+		del_timer_sync(&journal->j_commit_timer);
+		journal_commit_transaction(journal);
+		spin_lock(&journal->j_state_lock);
+		goto loop;
+	}
+
+	wake_up(&journal->j_wait_done_commit);
+	if (freezing(current)) {
+		/*
+		 * The simpler the better. Flushing journal isn't a
+		 * good idea, because that depends on threads that may
+		 * be already stopped.
+		 */
+		jbd_debug(1, "Now suspending kjournald\n");
+		spin_unlock(&journal->j_state_lock);
+		refrigerator();
+		spin_lock(&journal->j_state_lock);
+	} else {
+		/*
+		 * We assume on resume that commits are already there,
+		 * so we don't sleep
+		 */
+		DEFINE_WAIT(wait);
+		int should_sleep = 1;
+
+		prepare_to_wait(&journal->j_wait_commit, &wait,
+				TASK_INTERRUPTIBLE);
+		if (journal->j_commit_sequence != journal->j_commit_request)
+			should_sleep = 0;
+		transaction = journal->j_running_transaction;
+		if (transaction && time_after_eq(jiffies,
+						transaction->t_expires))
+			should_sleep = 0;
+		if (journal->j_flags & JFS_UNMOUNT)
+			should_sleep = 0;
+		if (should_sleep) {
+			spin_unlock(&journal->j_state_lock);
+			schedule();
+			spin_lock(&journal->j_state_lock);
+		}
+		finish_wait(&journal->j_wait_commit, &wait);
+	}
+
+	jbd_debug(1, "kjournald wakes\n");
+
+	/*
+	 * Were we woken up by a commit wakeup event?
+	 */
+	transaction = journal->j_running_transaction;
+	if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
+		journal->j_commit_request = transaction->t_tid;
+		jbd_debug(1, "woke because of timeout\n");
+	}
+	goto loop;
+
+end_loop:
+	spin_unlock(&journal->j_state_lock);
+	del_timer_sync(&journal->j_commit_timer);
+	journal->j_task = NULL;
+	wake_up(&journal->j_wait_done_commit);
+	jbd_debug(1, "Journal thread exiting.\n");
+	return 0;
+}
+
+static void journal_start_thread(journal_t *journal)
+{
+	kthread_run(kjournald, journal, "kjournald");
+	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
+}
+
+static void journal_kill_thread(journal_t *journal)
+{
+	spin_lock(&journal->j_state_lock);
+	journal->j_flags |= JFS_UNMOUNT;
+
+	while (journal->j_task) {
+		wake_up(&journal->j_wait_commit);
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_done_commit, journal->j_task == 0);
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+}
+
+/*
+ * journal_write_metadata_buffer: write a metadata buffer to the journal.
+ *
+ * Writes a metadata buffer to a given disk block.  The actual IO is not
+ * performed but a new buffer_head is constructed which labels the data
+ * to be written with the correct destination disk block.
+ *
+ * Any magic-number escaping which needs to be done will cause a
+ * copy-out here.  If the buffer happens to start with the
+ * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
+ * magic number is only written to the log for descripter blocks.  In
+ * this case, we copy the data and replace the first word with 0, and we
+ * return a result code which indicates that this buffer needs to be
+ * marked as an escaped buffer in the corresponding log descriptor
+ * block.  The missing word can then be restored when the block is read
+ * during recovery.
+ *
+ * If the source buffer has already been modified by a new transaction
+ * since we took the last commit snapshot, we use the frozen copy of
+ * that data for IO.  If we end up using the existing buffer_head's data
+ * for the write, then we *have* to lock the buffer to prevent anyone
+ * else from using and possibly modifying it while the IO is in
+ * progress.
+ *
+ * The function returns a pointer to the buffer_heads to be used for IO.
+ *
+ * We assume that the journal has already been locked in this function.
+ *
+ * Return value:
+ *  <0: Error
+ * >=0: Finished OK
+ *
+ * On success:
+ * Bit 0 set == escape performed on the data
+ * Bit 1 set == buffer copy-out performed (kfree the data after IO)
+ */
+
+int journal_write_metadata_buffer(transaction_t *transaction,
+				  struct journal_head  *jh_in,
+				  struct journal_head **jh_out,
+				  unsigned long blocknr)
+{
+	int need_copy_out = 0;
+	int done_copy_out = 0;
+	int do_escape = 0;
+	char *mapped_data;
+	struct buffer_head *new_bh;
+	struct journal_head *new_jh;
+	struct page *new_page;
+	unsigned int new_offset;
+	struct buffer_head *bh_in = jh2bh(jh_in);
+
+	/*
+	 * The buffer really shouldn't be locked: only the current committing
+	 * transaction is allowed to write it, so nobody else is allowed
+	 * to do any IO.
+	 *
+	 * akpm: except if we're journalling data, and write() output is
+	 * also part of a shared mapping, and another thread has
+	 * decided to launch a writepage() against this buffer.
+	 */
+	J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
+
+	new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
+
+	/*
+	 * If a new transaction has already done a buffer copy-out, then
+	 * we use that version of the data for the commit.
+	 */
+	jbd_lock_bh_state(bh_in);
+repeat:
+	if (jh_in->b_frozen_data) {
+		done_copy_out = 1;
+		new_page = virt_to_page(jh_in->b_frozen_data);
+		new_offset = offset_in_page(jh_in->b_frozen_data);
+	} else {
+		new_page = jh2bh(jh_in)->b_page;
+		new_offset = offset_in_page(jh2bh(jh_in)->b_data);
+	}
+
+	mapped_data = kmap_atomic(new_page, KM_USER0);
+	/*
+	 * Check for escaping
+	 */
+	if (*((__be32 *)(mapped_data + new_offset)) ==
+				cpu_to_be32(JFS_MAGIC_NUMBER)) {
+		need_copy_out = 1;
+		do_escape = 1;
+	}
+	kunmap_atomic(mapped_data, KM_USER0);
+
+	/*
+	 * Do we need to do a data copy?
+	 */
+	if (need_copy_out && !done_copy_out) {
+		char *tmp;
+
+		jbd_unlock_bh_state(bh_in);
+		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		jbd_lock_bh_state(bh_in);
+		if (jh_in->b_frozen_data) {
+			jbd_slab_free(tmp, bh_in->b_size);
+			goto repeat;
+		}
+
+		jh_in->b_frozen_data = tmp;
+		mapped_data = kmap_atomic(new_page, KM_USER0);
+		memcpy(tmp, mapped_data + new_offset, jh2bh(jh_in)->b_size);
+		kunmap_atomic(mapped_data, KM_USER0);
+
+		new_page = virt_to_page(tmp);
+		new_offset = offset_in_page(tmp);
+		done_copy_out = 1;
+	}
+
+	/*
+	 * Did we need to do an escaping?  Now we've done all the
+	 * copying, we can finally do so.
+	 */
+	if (do_escape) {
+		mapped_data = kmap_atomic(new_page, KM_USER0);
+		*((unsigned int *)(mapped_data + new_offset)) = 0;
+		kunmap_atomic(mapped_data, KM_USER0);
+	}
+
+	/* keep subsequent assertions sane */
+	new_bh->b_state = 0;
+	init_buffer(new_bh, NULL, NULL);
+	atomic_set(&new_bh->b_count, 1);
+	jbd_unlock_bh_state(bh_in);
+
+	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */
+
+	set_bh_page(new_bh, new_page, new_offset);
+	new_jh->b_transaction = NULL;
+	new_bh->b_size = jh2bh(jh_in)->b_size;
+	new_bh->b_bdev = transaction->t_journal->j_dev;
+	new_bh->b_blocknr = blocknr;
+	set_buffer_mapped(new_bh);
+	set_buffer_dirty(new_bh);
+
+	*jh_out = new_jh;
+
+	/*
+	 * The to-be-written buffer needs to get moved to the io queue,
+	 * and the original buffer whose contents we are shadowing or
+	 * copying is moved to the transaction's shadow queue.
+	 */
+	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
+	journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	JBUFFER_TRACE(new_jh, "file as BJ_IO");
+	journal_file_buffer(new_jh, transaction, BJ_IO);
+
+	return do_escape | (done_copy_out << 1);
+}
+
+/*
+ * Allocation code for the journal file.  Manage the space left in the
+ * journal, so that we can begin checkpointing when appropriate.
+ */
+
+/*
+ * __log_space_left: Return the number of free blocks left in the journal.
+ *
+ * Called with the journal already locked.
+ *
+ * Called under j_state_lock
+ */
+
+int __log_space_left(journal_t *journal)
+{
+	int left = journal->j_free;
+
+	assert_spin_locked(&journal->j_state_lock);
+
+	/*
+	 * Be pessimistic here about the number of those free blocks which
+	 * might be required for log descriptor control blocks.
+	 */
+
+#define MIN_LOG_RESERVED_BLOCKS 32 /* Allow for rounding errors */
+
+	left -= MIN_LOG_RESERVED_BLOCKS;
+
+	if (left <= 0)
+		return 0;
+	left -= (left >> 3);
+	return left;
+}
+
+/*
+ * Called under j_state_lock.  Returns true if a transaction was started.
+ */
+int __log_start_commit(journal_t *journal, tid_t target)
+{
+	/*
+	 * Are we already doing a recent enough commit?
+	 */
+	if (!tid_geq(journal->j_commit_request, target)) {
+		/*
+		 * We want a new commit: OK, mark the request and wakup the
+		 * commit thread.  We do _not_ do the commit ourselves.
+		 */
+
+		journal->j_commit_request = target;
+		jbd_debug(1, "JBD: requesting commit %d/%d\n",
+			  journal->j_commit_request,
+			  journal->j_commit_sequence);
+		wake_up(&journal->j_wait_commit);
+		return 1;
+	}
+	return 0;
+}
+
+int log_start_commit(journal_t *journal, tid_t tid)
+{
+	int ret;
+
+	spin_lock(&journal->j_state_lock);
+	ret = __log_start_commit(journal, tid);
+	spin_unlock(&journal->j_state_lock);
+	return ret;
+}
+
+/*
+ * Force and wait upon a commit if the calling process is not within
+ * transaction.  This is used for forcing out undo-protected data which contains
+ * bitmaps, when the fs is running out of space.
+ *
+ * We can only force the running transaction if we don't have an active handle;
+ * otherwise, we will deadlock.
+ *
+ * Returns true if a transaction was started.
+ */
+int journal_force_commit_nested(journal_t *journal)
+{
+	transaction_t *transaction = NULL;
+	tid_t tid;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction && !current->journal_info) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	if (!transaction) {
+		spin_unlock(&journal->j_state_lock);
+		return 0;	/* Nothing to retry */
+	}
+
+	tid = transaction->t_tid;
+	spin_unlock(&journal->j_state_lock);
+	log_wait_commit(journal, tid);
+	return 1;
+}
+
+/*
+ * Start a commit of the current running transaction (if any).  Returns true
+ * if a transaction was started, and fills its tid in at *ptid
+ */
+int journal_start_commit(journal_t *journal, tid_t *ptid)
+{
+	int ret = 0;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_running_transaction) {
+		tid_t tid = journal->j_running_transaction->t_tid;
+
+		ret = __log_start_commit(journal, tid);
+		if (ret && ptid)
+			*ptid = tid;
+	} else if (journal->j_committing_transaction && ptid) {
+		/*
+		 * If ext3_write_super() recently started a commit, then we
+		 * have to wait for completion of that transaction
+		 */
+		*ptid = journal->j_committing_transaction->t_tid;
+		ret = 1;
+	}
+	spin_unlock(&journal->j_state_lock);
+	return ret;
+}
+
+/*
+ * Wait for a specified commit to complete.
+ * The caller may not hold the journal lock.
+ */
+int log_wait_commit(journal_t *journal, tid_t tid)
+{
+	int err = 0;
+
+#ifdef CONFIG_JBD_DEBUG
+	spin_lock(&journal->j_state_lock);
+	if (!tid_geq(journal->j_commit_request, tid)) {
+		printk(KERN_EMERG
+		       "%s: error: j_commit_request=%d, tid=%d\n",
+		       __FUNCTION__, journal->j_commit_request, tid);
+	}
+	spin_unlock(&journal->j_state_lock);
+#endif
+	spin_lock(&journal->j_state_lock);
+	while (tid_gt(tid, journal->j_commit_sequence)) {
+		jbd_debug(1, "JBD: want %d, j_commit_sequence=%d\n",
+				  tid, journal->j_commit_sequence);
+		wake_up(&journal->j_wait_commit);
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_done_commit,
+				!tid_gt(tid, journal->j_commit_sequence));
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+
+	if (unlikely(is_journal_aborted(journal))) {
+		printk(KERN_EMERG "journal commit I/O error\n");
+		err = -EIO;
+	}
+	return err;
+}
+
+/*
+ * Log buffer allocation routines:
+ */
+
+int journal_next_log_block(journal_t *journal, unsigned long *retp)
+{
+	unsigned long blocknr;
+
+	spin_lock(&journal->j_state_lock);
+	J_ASSERT(journal->j_free > 1);
+
+	blocknr = journal->j_head;
+	journal->j_head++;
+	journal->j_free--;
+	if (journal->j_head == journal->j_last)
+		journal->j_head = journal->j_first;
+	spin_unlock(&journal->j_state_lock);
+	return journal_bmap(journal, blocknr, retp);
+}
+
+/*
+ * Conversion of logical to physical block numbers for the journal
+ *
+ * On external journals the journal blocks are identity-mapped, so
+ * this is a no-op.  If needed, we can use j_blk_offset - everything is
+ * ready.
+ */
+int journal_bmap(journal_t *journal, unsigned long blocknr,
+		 unsigned long *retp)
+{
+	int err = 0;
+	unsigned long ret;
+
+	if (journal->j_inode) {
+		ret = bmap(journal->j_inode, blocknr);
+		if (ret)
+			*retp = ret;
+		else {
+			char b[BDEVNAME_SIZE];
+
+			printk(KERN_ALERT "%s: journal block not found "
+					"at offset %lu on %s\n",
+				__FUNCTION__,
+				blocknr,
+				bdevname(journal->j_dev, b));
+			err = -EIO;
+			__journal_abort_soft(journal, err);
+		}
+	} else {
+		*retp = blocknr; /* +journal->j_blk_offset */
+	}
+	return err;
+}
+
+/*
+ * We play buffer_head aliasing tricks to write data/metadata blocks to
+ * the journal without copying their contents, but for journal
+ * descriptor blocks we do need to generate bona fide buffers.
+ *
+ * After the caller of journal_get_descriptor_buffer() has finished modifying
+ * the buffer's contents they really should run flush_dcache_page(bh->b_page).
+ * But we don't bother doing that, so there will be coherency problems with
+ * mmaps of blockdevs which hold live JBD-controlled filesystems.
+ */
+struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
+{
+	struct buffer_head *bh;
+	unsigned long blocknr;
+	int err;
+
+	err = journal_next_log_block(journal, &blocknr);
+
+	if (err)
+		return NULL;
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	lock_buffer(bh);
+	memset(bh->b_data, 0, journal->j_blocksize);
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+	BUFFER_TRACE(bh, "return this buffer");
+	return journal_add_journal_head(bh);
+}
+
+/*
+ * Management for journal control blocks: functions to create and
+ * destroy journal_t structures, and to initialise and read existing
+ * journal blocks from disk.  */
+
+/* First: create and setup a journal_t object in memory.  We initialise
+ * very few fields yet: that has to wait until we have created the
+ * journal structures from from scratch, or loaded them from disk. */
+
+static journal_t * journal_init_common (void)
+{
+	journal_t *journal;
+	int err;
+
+	journal = jbd_kmalloc(sizeof(*journal), GFP_KERNEL);
+	if (!journal)
+		goto fail;
+	memset(journal, 0, sizeof(*journal));
+
+	init_waitqueue_head(&journal->j_wait_transaction_locked);
+	init_waitqueue_head(&journal->j_wait_logspace);
+	init_waitqueue_head(&journal->j_wait_done_commit);
+	init_waitqueue_head(&journal->j_wait_checkpoint);
+	init_waitqueue_head(&journal->j_wait_commit);
+	init_waitqueue_head(&journal->j_wait_updates);
+	mutex_init(&journal->j_barrier);
+	mutex_init(&journal->j_checkpoint_mutex);
+	spin_lock_init(&journal->j_revoke_lock);
+	spin_lock_init(&journal->j_list_lock);
+	spin_lock_init(&journal->j_state_lock);
+
+	journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
+
+	/* The journal is marked for error until we succeed with recovery! */
+	journal->j_flags = JFS_ABORT;
+
+	/* Set up a default-sized revoke table for the new mount. */
+	err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
+	if (err) {
+		kfree(journal);
+		goto fail;
+	}
+	return journal;
+fail:
+	return NULL;
+}
+
+/* journal_init_dev and journal_init_inode:
+ *
+ * Create a journal structure assigned some fixed set of disk blocks to
+ * the journal.  We don't actually touch those disk blocks yet, but we
+ * need to set up all of the mapping information to tell the journaling
+ * system where the journal blocks are.
+ *
+ */
+
+/**
+ *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  @bdev: Block device on which to create the journal
+ *  @fs_dev: Device which hold journalled filesystem for this journal.
+ *  @start: Block nr Start of journal.
+ *  @len:  Length of the journal in blocks.
+ *  @blocksize: blocksize of journalling device
+ *  @returns: a newly created journal_t *
+ *
+ *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  range of blocks on an arbitrary block device.
+ *
+ */
+journal_t * journal_init_dev(struct block_device *bdev,
+			struct block_device *fs_dev,
+			int start, int len, int blocksize)
+{
+	journal_t *journal = journal_init_common();
+	struct buffer_head *bh;
+	int n;
+
+	if (!journal)
+		return NULL;
+
+	/* journal descriptor can store up to n blocks -bzzz */
+	journal->j_blocksize = blocksize;
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		journal = NULL;
+	}
+	journal->j_dev = bdev;
+	journal->j_fs_dev = fs_dev;
+	journal->j_blk_offset = start;
+	journal->j_maxlen = len;
+
+	bh = __getblk(journal->j_dev, start, journal->j_blocksize);
+	J_ASSERT(bh != NULL);
+	journal->j_sb_buffer = bh;
+	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
+	return journal;
+}
+
+/**
+ *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  @inode: An inode to create the journal in
+ *
+ * journal_init_inode creates a journal which maps an on-disk inode as
+ * the journal.  The inode must exist already, must support bmap() and
+ * must have all data blocks preallocated.
+ */
+journal_t * journal_init_inode (struct inode *inode)
+{
+	struct buffer_head *bh;
+	journal_t *journal = journal_init_common();
+	int err;
+	int n;
+	unsigned long blocknr;
+
+	if (!journal)
+		return NULL;
+
+	journal->j_dev = journal->j_fs_dev = inode->i_sb->s_bdev;
+	journal->j_inode = inode;
+	jbd_debug(1,
+		  "journal %p: inode %s/%ld, size %Ld, bits %d, blksize %ld\n",
+		  journal, inode->i_sb->s_id, inode->i_ino,
+		  (long long) inode->i_size,
+		  inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
+
+	journal->j_maxlen = inode->i_size >> inode->i_sb->s_blocksize_bits;
+	journal->j_blocksize = inode->i_sb->s_blocksize;
+
+	/* journal descriptor can store up to n blocks -bzzz */
+	n = journal->j_blocksize / sizeof(journal_block_tag_t);
+	journal->j_wbufsize = n;
+	journal->j_wbuf = kmalloc(n * sizeof(struct buffer_head*), GFP_KERNEL);
+	if (!journal->j_wbuf) {
+		printk(KERN_ERR "%s: Cant allocate bhs for commit thread\n",
+			__FUNCTION__);
+		kfree(journal);
+		return NULL;
+	}
+
+	err = journal_bmap(journal, 0, &blocknr);
+	/* If that failed, give up */
+	if (err) {
+		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
+		       __FUNCTION__);
+		kfree(journal);
+		return NULL;
+	}
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	J_ASSERT(bh != NULL);
+	journal->j_sb_buffer = bh;
+	journal->j_superblock = (journal_superblock_t *)bh->b_data;
+
+	return journal;
+}
+
+/*
+ * If the journal init or create aborts, we need to mark the journal
+ * superblock as being NULL to prevent the journal destroy from writing
+ * back a bogus superblock.
+ */
+static void journal_fail_superblock (journal_t *journal)
+{
+	struct buffer_head *bh = journal->j_sb_buffer;
+	brelse(bh);
+	journal->j_sb_buffer = NULL;
+}
+
+/*
+ * Given a journal_t structure, initialise the various fields for
+ * startup of a new journaling session.  We use this both when creating
+ * a journal, and after recovering an old journal to reset it for
+ * subsequent use.
+ */
+
+static int journal_reset(journal_t *journal)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+	unsigned long first, last;
+
+	first = be32_to_cpu(sb->s_first);
+	last = be32_to_cpu(sb->s_maxlen);
+
+	journal->j_first = first;
+	journal->j_last = last;
+
+	journal->j_head = first;
+	journal->j_tail = first;
+	journal->j_free = last - first;
+
+	journal->j_tail_sequence = journal->j_transaction_sequence;
+	journal->j_commit_sequence = journal->j_transaction_sequence - 1;
+	journal->j_commit_request = journal->j_commit_sequence;
+
+	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
+
+	/* Add the dynamic fields and write it to disk. */
+	journal_update_superblock(journal, 1);
+	journal_start_thread(journal);
+	return 0;
+}
+
+/**
+ * int journal_create() - Initialise the new journal file
+ * @journal: Journal to create. This structure must have been initialised
+ *
+ * Given a journal_t structure which tells us which disk blocks we can
+ * use, create a new journal superblock and initialise all of the
+ * journal fields from scratch.
+ **/
+int journal_create(journal_t *journal)
+{
+	unsigned long blocknr;
+	struct buffer_head *bh;
+	journal_superblock_t *sb;
+	int i, err;
+
+	if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
+		printk (KERN_ERR "Journal length (%d blocks) too short.\n",
+			journal->j_maxlen);
+		journal_fail_superblock(journal);
+		return -EINVAL;
+	}
+
+	if (journal->j_inode == NULL) {
+		/*
+		 * We don't know what block to start at!
+		 */
+		printk(KERN_EMERG
+		       "%s: creation of journal on external device!\n",
+		       __FUNCTION__);
+		BUG();
+	}
+
+	/* Zero out the entire journal on disk.  We cannot afford to
+	   have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
+	jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
+	for (i = 0; i < journal->j_maxlen; i++) {
+		err = journal_bmap(journal, i, &blocknr);
+		if (err)
+			return err;
+		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+		lock_buffer(bh);
+		memset (bh->b_data, 0, journal->j_blocksize);
+		BUFFER_TRACE(bh, "marking dirty");
+		mark_buffer_dirty(bh);
+		BUFFER_TRACE(bh, "marking uptodate");
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+		__brelse(bh);
+	}
+
+	sync_blockdev(journal->j_dev);
+	jbd_debug(1, "JBD: journal cleared.\n");
+
+	/* OK, fill in the initial static fields in the new superblock */
+	sb = journal->j_superblock;
+
+	sb->s_header.h_magic	 = cpu_to_be32(JFS_MAGIC_NUMBER);
+	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+
+	sb->s_blocksize	= cpu_to_be32(journal->j_blocksize);
+	sb->s_maxlen	= cpu_to_be32(journal->j_maxlen);
+	sb->s_first	= cpu_to_be32(1);
+
+	journal->j_transaction_sequence = 1;
+
+	journal->j_flags &= ~JFS_ABORT;
+	journal->j_format_version = 2;
+
+	return journal_reset(journal);
+}
+
+/**
+ * void journal_update_superblock() - Update journal sb on disk.
+ * @journal: The journal to update.
+ * @wait: Set to '0' if you don't want to wait for IO completion.
+ *
+ * Update a journal's dynamic superblock fields and write it to disk,
+ * optionally waiting for the IO to complete.
+ */
+void journal_update_superblock(journal_t *journal, int wait)
+{
+	journal_superblock_t *sb = journal->j_superblock;
+	struct buffer_head *bh = journal->j_sb_buffer;
+
+	/*
+	 * As a special case, if the on-disk copy is already marked as needing
+	 * no recovery (s_start == 0) and there are no outstanding transactions
+	 * in the filesystem, then we can safely defer the superblock update
+	 * until the next commit by setting JFS_FLUSHED.  This avoids
+	 * attempting a write to a potential-readonly device.
+	 */
+	if (sb->s_start == 0 && journal->j_tail_sequence ==
+				journal->j_transaction_sequence) {
+		jbd_debug(1,"JBD: Skipping superblock update on recovered sb "
+			"(start %ld, seq %d, errno %d)\n",
+			journal->j_tail, journal->j_tail_sequence,
+			journal->j_errno);
+		goto out;
+	}
+
+	spin_lock(&journal->j_state_lock);
+	jbd_debug(1,"JBD: updating superblock (start %ld, seq %d, errno %d)\n",
+		  journal->j_tail, journal->j_tail_sequence, journal->j_errno);
+
+	sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
+	sb->s_start    = cpu_to_be32(journal->j_tail);
+	sb->s_errno    = cpu_to_be32(journal->j_errno);
+	spin_unlock(&journal->j_state_lock);
+
+	BUFFER_TRACE(bh, "marking dirty");
+	mark_buffer_dirty(bh);
+	if (wait)
+		sync_dirty_buffer(bh);
+	else
+		ll_rw_block(SWRITE, 1, &bh);
+
+out:
+	/* If we have just flushed the log (by marking s_start==0), then
+	 * any future commit will have to be careful to update the
+	 * superblock again to re-record the true start of the log. */
+
+	spin_lock(&journal->j_state_lock);
+	if (sb->s_start)
+		journal->j_flags &= ~JFS_FLUSHED;
+	else
+		journal->j_flags |= JFS_FLUSHED;
+	spin_unlock(&journal->j_state_lock);
+}
+
+/*
+ * Read the superblock for a given journal, performing initial
+ * validation of the format.
+ */
+
+static int journal_get_superblock(journal_t *journal)
+{
+	struct buffer_head *bh;
+	journal_superblock_t *sb;
+	int err = -EIO;
+
+	bh = journal->j_sb_buffer;
+
+	J_ASSERT(bh != NULL);
+	if (!buffer_uptodate(bh)) {
+		ll_rw_block(READ, 1, &bh);
+		wait_on_buffer(bh);
+		if (!buffer_uptodate(bh)) {
+			printk (KERN_ERR
+				"JBD: IO error reading journal superblock\n");
+			goto out;
+		}
+	}
+
+	sb = journal->j_superblock;
+
+	err = -EINVAL;
+
+	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
+	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
+		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
+		goto out;
+	}
+
+	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
+	case JFS_SUPERBLOCK_V1:
+		journal->j_format_version = 1;
+		break;
+	case JFS_SUPERBLOCK_V2:
+		journal->j_format_version = 2;
+		break;
+	default:
+		printk(KERN_WARNING "JBD: unrecognised superblock format ID\n");
+		goto out;
+	}
+
+	if (be32_to_cpu(sb->s_maxlen) < journal->j_maxlen)
+		journal->j_maxlen = be32_to_cpu(sb->s_maxlen);
+	else if (be32_to_cpu(sb->s_maxlen) > journal->j_maxlen) {
+		printk (KERN_WARNING "JBD: journal file too short\n");
+		goto out;
+	}
+
+	return 0;
+
+out:
+	journal_fail_superblock(journal);
+	return err;
+}
+
+/*
+ * Load the on-disk journal superblock and read the key fields into the
+ * journal_t.
+ */
+
+static int load_superblock(journal_t *journal)
+{
+	int err;
+	journal_superblock_t *sb;
+
+	err = journal_get_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
+	journal->j_tail = be32_to_cpu(sb->s_start);
+	journal->j_first = be32_to_cpu(sb->s_first);
+	journal->j_last = be32_to_cpu(sb->s_maxlen);
+	journal->j_errno = be32_to_cpu(sb->s_errno);
+
+	return 0;
+}
+
+
+/**
+ * int journal_load() - Read journal from disk.
+ * @journal: Journal to act on.
+ *
+ * Given a journal_t structure which tells us which disk blocks contain
+ * a journal, read the journal from disk to initialise the in-memory
+ * structures.
+ */
+int journal_load(journal_t *journal)
+{
+	int err;
+	journal_superblock_t *sb;
+
+	err = load_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+	/* If this is a V2 superblock, then we have to check the
+	 * features flags on it. */
+
+	if (journal->j_format_version >= 2) {
+		if ((sb->s_feature_ro_compat &
+		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
+		    (sb->s_feature_incompat &
+		     ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
+			printk (KERN_WARNING
+				"JBD: Unrecognised features on journal\n");
+			return -EINVAL;
+		}
+	}
+
+	/*
+	 * Create a slab for this blocksize
+	 */
+	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
+	if (err)
+		return err;
+
+	/* Let the recovery code check whether it needs to recover any
+	 * data from the journal. */
+	if (journal_recover(journal))
+		goto recovery_error;
+
+	/* OK, we've finished with the dynamic journal bits:
+	 * reinitialise the dynamic contents of the superblock in memory
+	 * and reset them on disk. */
+	if (journal_reset(journal))
+		goto recovery_error;
+
+	journal->j_flags &= ~JFS_ABORT;
+	journal->j_flags |= JFS_LOADED;
+	return 0;
+
+recovery_error:
+	printk (KERN_WARNING "JBD: recovery failed\n");
+	return -EIO;
+}
+
+/**
+ * void journal_destroy() - Release a journal_t structure.
+ * @journal: Journal to act on.
+ *
+ * Release a journal_t structure once it is no longer in use by the
+ * journaled object.
+ */
+void journal_destroy(journal_t *journal)
+{
+	/* Wait for the commit thread to wake up and die. */
+	journal_kill_thread(journal);
+
+	/* Force a final log commit */
+	if (journal->j_running_transaction)
+		journal_commit_transaction(journal);
+
+	/* Force any old transactions to disk */
+
+	/* Totally anal locking here... */
+	spin_lock(&journal->j_list_lock);
+	while (journal->j_checkpoint_transactions != NULL) {
+		spin_unlock(&journal->j_list_lock);
+		log_do_checkpoint(journal);
+		spin_lock(&journal->j_list_lock);
+	}
+
+	J_ASSERT(journal->j_running_transaction == NULL);
+	J_ASSERT(journal->j_committing_transaction == NULL);
+	J_ASSERT(journal->j_checkpoint_transactions == NULL);
+	spin_unlock(&journal->j_list_lock);
+
+	/* We can now mark the journal as empty. */
+	journal->j_tail = 0;
+	journal->j_tail_sequence = ++journal->j_transaction_sequence;
+	if (journal->j_sb_buffer) {
+		journal_update_superblock(journal, 1);
+		brelse(journal->j_sb_buffer);
+	}
+
+	if (journal->j_inode)
+		iput(journal->j_inode);
+	if (journal->j_revoke)
+		journal_destroy_revoke(journal);
+	kfree(journal->j_wbuf);
+	kfree(journal);
+}
+
+
+/**
+ *int journal_check_used_features () - Check if features specified are used.
+ * @journal: Journal to check.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Check whether the journal uses all of a given set of
+ * features.  Return true (non-zero) if it does.
+ **/
+
+int journal_check_used_features (journal_t *journal, unsigned long compat,
+				 unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (!compat && !ro && !incompat)
+		return 1;
+	if (journal->j_format_version == 1)
+		return 0;
+
+	sb = journal->j_superblock;
+
+	if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
+	    ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
+	    ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
+		return 1;
+
+	return 0;
+}
+
+/**
+ * int journal_check_available_features() - Check feature set in journalling layer
+ * @journal: Journal to check.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Check whether the journaling code supports the use of
+ * all of a given set of features on this journal.  Return true
+ * (non-zero) if it can. */
+
+int journal_check_available_features (journal_t *journal, unsigned long compat,
+				      unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (!compat && !ro && !incompat)
+		return 1;
+
+	sb = journal->j_superblock;
+
+	/* We can support any known requested features iff the
+	 * superblock is in version 2.  Otherwise we fail to support any
+	 * extended sb features. */
+
+	if (journal->j_format_version != 2)
+		return 0;
+
+	if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
+	    (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
+	    (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
+		return 1;
+
+	return 0;
+}
+
+/**
+ * int journal_set_features () - Mark a given journal feature in the superblock
+ * @journal: Journal to act on.
+ * @compat: bitmask of compatible features
+ * @ro: bitmask of features that force read-only mount
+ * @incompat: bitmask of incompatible features
+ *
+ * Mark a given journal feature as present on the
+ * superblock.  Returns true if the requested features could be set.
+ *
+ */
+
+int journal_set_features (journal_t *journal, unsigned long compat,
+			  unsigned long ro, unsigned long incompat)
+{
+	journal_superblock_t *sb;
+
+	if (journal_check_used_features(journal, compat, ro, incompat))
+		return 1;
+
+	if (!journal_check_available_features(journal, compat, ro, incompat))
+		return 0;
+
+	jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
+		  compat, ro, incompat);
+
+	sb = journal->j_superblock;
+
+	sb->s_feature_compat    |= cpu_to_be32(compat);
+	sb->s_feature_ro_compat |= cpu_to_be32(ro);
+	sb->s_feature_incompat  |= cpu_to_be32(incompat);
+
+	return 1;
+}
+
+
+/**
+ * int journal_update_format () - Update on-disk journal structure.
+ * @journal: Journal to act on.
+ *
+ * Given an initialised but unloaded journal struct, poke about in the
+ * on-disk structure to update it to the most recent supported version.
+ */
+int journal_update_format (journal_t *journal)
+{
+	journal_superblock_t *sb;
+	int err;
+
+	err = journal_get_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
+	case JFS_SUPERBLOCK_V2:
+		return 0;
+	case JFS_SUPERBLOCK_V1:
+		return journal_convert_superblock_v1(journal, sb);
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
+static int journal_convert_superblock_v1(journal_t *journal,
+					 journal_superblock_t *sb)
+{
+	int offset, blocksize;
+	struct buffer_head *bh;
+
+	printk(KERN_WARNING
+		"JBD: Converting superblock from version 1 to 2.\n");
+
+	/* Pre-initialise new fields to zero */
+	offset = ((char *) &(sb->s_feature_compat)) - ((char *) sb);
+	blocksize = be32_to_cpu(sb->s_blocksize);
+	memset(&sb->s_feature_compat, 0, blocksize-offset);
+
+	sb->s_nr_users = cpu_to_be32(1);
+	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	journal->j_format_version = 2;
+
+	bh = journal->j_sb_buffer;
+	BUFFER_TRACE(bh, "marking dirty");
+	mark_buffer_dirty(bh);
+	sync_dirty_buffer(bh);
+	return 0;
+}
+
+
+/**
+ * int journal_flush () - Flush journal
+ * @journal: Journal to act on.
+ *
+ * Flush all data for a given journal to disk and empty the journal.
+ * Filesystems can use this when remounting readonly to ensure that
+ * recovery does not need to happen on remount.
+ */
+
+int journal_flush(journal_t *journal)
+{
+	int err = 0;
+	transaction_t *transaction = NULL;
+	unsigned long old_tail;
+
+	spin_lock(&journal->j_state_lock);
+
+	/* Force everything buffered to the log... */
+	if (journal->j_running_transaction) {
+		transaction = journal->j_running_transaction;
+		__log_start_commit(journal, transaction->t_tid);
+	} else if (journal->j_committing_transaction)
+		transaction = journal->j_committing_transaction;
+
+	/* Wait for the log commit to complete... */
+	if (transaction) {
+		tid_t tid = transaction->t_tid;
+
+		spin_unlock(&journal->j_state_lock);
+		log_wait_commit(journal, tid);
+	} else {
+		spin_unlock(&journal->j_state_lock);
+	}
+
+	/* ...and flush everything in the log out to disk. */
+	spin_lock(&journal->j_list_lock);
+	while (!err && journal->j_checkpoint_transactions != NULL) {
+		spin_unlock(&journal->j_list_lock);
+		err = log_do_checkpoint(journal);
+		spin_lock(&journal->j_list_lock);
+	}
+	spin_unlock(&journal->j_list_lock);
+	cleanup_journal_tail(journal);
+
+	/* Finally, mark the journal as really needing no recovery.
+	 * This sets s_start==0 in the underlying superblock, which is
+	 * the magic code for a fully-recovered superblock.  Any future
+	 * commits of data to the journal will restore the current
+	 * s_start value. */
+	spin_lock(&journal->j_state_lock);
+	old_tail = journal->j_tail;
+	journal->j_tail = 0;
+	spin_unlock(&journal->j_state_lock);
+	journal_update_superblock(journal, 1);
+	spin_lock(&journal->j_state_lock);
+	journal->j_tail = old_tail;
+
+	J_ASSERT(!journal->j_running_transaction);
+	J_ASSERT(!journal->j_committing_transaction);
+	J_ASSERT(!journal->j_checkpoint_transactions);
+	J_ASSERT(journal->j_head == journal->j_tail);
+	J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * int journal_wipe() - Wipe journal contents
+ * @journal: Journal to act on.
+ * @write: flag (see below)
+ *
+ * Wipe out all of the contents of a journal, safely.  This will produce
+ * a warning if the journal contains any valid recovery information.
+ * Must be called between journal_init_*() and journal_load().
+ *
+ * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
+ * we merely suppress recovery.
+ */
+
+int journal_wipe(journal_t *journal, int write)
+{
+	journal_superblock_t *sb;
+	int err = 0;
+
+	J_ASSERT (!(journal->j_flags & JFS_LOADED));
+
+	err = load_superblock(journal);
+	if (err)
+		return err;
+
+	sb = journal->j_superblock;
+
+	if (!journal->j_tail)
+		goto no_recovery;
+
+	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
+		write ? "Clearing" : "Ignoring");
+
+	err = journal_skip_recovery(journal);
+	if (write)
+		journal_update_superblock(journal, 1);
+
+ no_recovery:
+	return err;
+}
+
+/*
+ * journal_dev_name: format a character string to describe on what
+ * device this journal is present.
+ */
+
+static const char *journal_dev_name(journal_t *journal, char *buffer)
+{
+	struct block_device *bdev;
+
+	if (journal->j_inode)
+		bdev = journal->j_inode->i_sb->s_bdev;
+	else
+		bdev = journal->j_dev;
+
+	return bdevname(bdev, buffer);
+}
+
+/*
+ * Journal abort has very specific semantics, which we describe
+ * for journal abort.
+ *
+ * Two internal function, which provide abort to te jbd layer
+ * itself are here.
+ */
+
+/*
+ * Quick version for internal journal use (doesn't lock the journal).
+ * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
+ * and don't attempt to make any other journal updates.
+ */
+void __journal_abort_hard(journal_t *journal)
+{
+	transaction_t *transaction;
+	char b[BDEVNAME_SIZE];
+
+	if (journal->j_flags & JFS_ABORT)
+		return;
+
+	printk(KERN_ERR "Aborting journal on device %s.\n",
+		journal_dev_name(journal, b));
+
+	spin_lock(&journal->j_state_lock);
+	journal->j_flags |= JFS_ABORT;
+	transaction = journal->j_running_transaction;
+	if (transaction)
+		__log_start_commit(journal, transaction->t_tid);
+	spin_unlock(&journal->j_state_lock);
+}
+
+/* Soft abort: record the abort error status in the journal superblock,
+ * but don't do any other IO. */
+static void __journal_abort_soft (journal_t *journal, int errno)
+{
+	if (journal->j_flags & JFS_ABORT)
+		return;
+
+	if (!journal->j_errno)
+		journal->j_errno = errno;
+
+	__journal_abort_hard(journal);
+
+	if (errno)
+		journal_update_superblock(journal, 1);
+}
+
+/**
+ * void journal_abort () - Shutdown the journal immediately.
+ * @journal: the journal to shutdown.
+ * @errno:   an error number to record in the journal indicating
+ *           the reason for the shutdown.
+ *
+ * Perform a complete, immediate shutdown of the ENTIRE
+ * journal (not of a single transaction).  This operation cannot be
+ * undone without closing and reopening the journal.
+ *
+ * The journal_abort function is intended to support higher level error
+ * recovery mechanisms such as the ext2/ext3 remount-readonly error
+ * mode.
+ *
+ * Journal abort has very specific semantics.  Any existing dirty,
+ * unjournaled buffers in the main filesystem will still be written to
+ * disk by bdflush, but the journaling mechanism will be suspended
+ * immediately and no further transaction commits will be honoured.
+ *
+ * Any dirty, journaled buffers will be written back to disk without
+ * hitting the journal.  Atomicity cannot be guaranteed on an aborted
+ * filesystem, but we _do_ attempt to leave as much data as possible
+ * behind for fsck to use for cleanup.
+ *
+ * Any attempt to get a new transaction handle on a journal which is in
+ * ABORT state will just result in an -EROFS error return.  A
+ * journal_stop on an existing handle will return -EIO if we have
+ * entered abort state during the update.
+ *
+ * Recursive transactions are not disturbed by journal abort until the
+ * final journal_stop, which will receive the -EIO error.
+ *
+ * Finally, the journal_abort call allows the caller to supply an errno
+ * which will be recorded (if possible) in the journal superblock.  This
+ * allows a client to record failure conditions in the middle of a
+ * transaction without having to complete the transaction to record the
+ * failure to disk.  ext3_error, for example, now uses this
+ * functionality.
+ *
+ * Errors which originate from within the journaling layer will NOT
+ * supply an errno; a null errno implies that absolutely no further
+ * writes are done to the journal (unless there are any already in
+ * progress).
+ *
+ */
+
+void journal_abort(journal_t *journal, int errno)
+{
+	__journal_abort_soft(journal, errno);
+}
+
+/**
+ * int journal_errno () - returns the journal's error state.
+ * @journal: journal to examine.
+ *
+ * This is the errno numbet set with journal_abort(), the last
+ * time the journal was mounted - if the journal was stopped
+ * without calling abort this will be 0.
+ *
+ * If the journal has been aborted on this mount time -EROFS will
+ * be returned.
+ */
+int journal_errno(journal_t *journal)
+{
+	int err;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_flags & JFS_ABORT)
+		err = -EROFS;
+	else
+		err = journal->j_errno;
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * int journal_clear_err () - clears the journal's error state
+ * @journal: journal to act on.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
+int journal_clear_err(journal_t *journal)
+{
+	int err = 0;
+
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_flags & JFS_ABORT)
+		err = -EROFS;
+	else
+		journal->j_errno = 0;
+	spin_unlock(&journal->j_state_lock);
+	return err;
+}
+
+/**
+ * void journal_ack_err() - Ack journal err.
+ * @journal: journal to act on.
+ *
+ * An error must be cleared or Acked to take a FS out of readonly
+ * mode.
+ */
+void journal_ack_err(journal_t *journal)
+{
+	spin_lock(&journal->j_state_lock);
+	if (journal->j_errno)
+		journal->j_flags |= JFS_ACK_ERR;
+	spin_unlock(&journal->j_state_lock);
+}
+
+int journal_blocks_per_page(struct inode *inode)
+{
+	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+}
+
+/*
+ * Simple support for retrying memory allocations.  Introduced to help to
+ * debug different VM deadlock avoidance strategies.
+ */
+void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
+{
+	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
+}
+
+/*
+ * jbd slab management: create 1k, 2k, 4k, 8k slabs as needed
+ * and allocate frozen and commit buffers from these slabs.
+ *
+ * Reason for doing this is to avoid, SLAB_DEBUG - since it could
+ * cause bh to cross page boundary.
+ */
+
+#define JBD_MAX_SLABS 5
+#define JBD_SLAB_INDEX(size)  (size >> 11)
+
+static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+};
+
+static void journal_destroy_jbd_slabs(void)
+{
+	int i;
+
+	for (i = 0; i < JBD_MAX_SLABS; i++) {
+		if (jbd_slab[i])
+			kmem_cache_destroy(jbd_slab[i]);
+		jbd_slab[i] = NULL;
+	}
+}
+
+static int journal_create_jbd_slab(size_t slab_size)
+{
+	int i = JBD_SLAB_INDEX(slab_size);
+
+	BUG_ON(i >= JBD_MAX_SLABS);
+
+	/*
+	 * Check if we already have a slab created for this size
+	 */
+	if (jbd_slab[i])
+		return 0;
+
+	/*
+	 * Create a slab and force alignment to be same as slabsize -
+	 * this will make sure that allocations won't cross the page
+	 * boundary.
+	 */
+	jbd_slab[i] = kmem_cache_create(jbd_slab_names[i],
+				slab_size, slab_size, 0, NULL, NULL);
+	if (!jbd_slab[i]) {
+		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void * jbd_slab_alloc(size_t size, gfp_t flags)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
+}
+
+void jbd_slab_free(void *ptr,  size_t size)
+{
+	int idx;
+
+	idx = JBD_SLAB_INDEX(size);
+	BUG_ON(jbd_slab[idx] == NULL);
+	kmem_cache_free(jbd_slab[idx], ptr);
+}
+
+/*
+ * Journal_head storage management
+ */
+static kmem_cache_t *journal_head_cache;
+#ifdef CONFIG_JBD_DEBUG
+static atomic_t nr_journal_heads = ATOMIC_INIT(0);
+#endif
+
+static int journal_init_journal_head_cache(void)
+{
+	int retval;
+
+	J_ASSERT(journal_head_cache == 0);
+	journal_head_cache = kmem_cache_create("journal_head",
+				sizeof(struct journal_head),
+				0,		/* offset */
+				0,		/* flags */
+				NULL,		/* ctor */
+				NULL);		/* dtor */
+	retval = 0;
+	if (journal_head_cache == 0) {
+		retval = -ENOMEM;
+		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
+	}
+	return retval;
+}
+
+static void journal_destroy_journal_head_cache(void)
+{
+	J_ASSERT(journal_head_cache != NULL);
+	kmem_cache_destroy(journal_head_cache);
+	journal_head_cache = NULL;
+}
+
+/*
+ * journal_head splicing and dicing
+ */
+static struct journal_head *journal_alloc_journal_head(void)
+{
+	struct journal_head *ret;
+	static unsigned long last_warning;
+
+#ifdef CONFIG_JBD_DEBUG
+	atomic_inc(&nr_journal_heads);
+#endif
+	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+	if (ret == 0) {
+		jbd_debug(1, "out of memory for journal_head\n");
+		if (time_after(jiffies, last_warning + 5*HZ)) {
+			printk(KERN_NOTICE "ENOMEM in %s, retrying.\n",
+			       __FUNCTION__);
+			last_warning = jiffies;
+		}
+		while (ret == 0) {
+			yield();
+			ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+		}
+	}
+	return ret;
+}
+
+static void journal_free_journal_head(struct journal_head *jh)
+{
+#ifdef CONFIG_JBD_DEBUG
+	atomic_dec(&nr_journal_heads);
+	memset(jh, JBD_POISON_FREE, sizeof(*jh));
+#endif
+	kmem_cache_free(journal_head_cache, jh);
+}
+
+/*
+ * A journal_head is attached to a buffer_head whenever JBD has an
+ * interest in the buffer.
+ *
+ * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
+ * is set.  This bit is tested in core kernel code where we need to take
+ * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
+ * there.
+ *
+ * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
+ *
+ * When a buffer has its BH_JBD bit set it is immune from being released by
+ * core kernel code, mainly via ->b_count.
+ *
+ * A journal_head may be detached from its buffer_head when the journal_head's
+ * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
+ * Various places in JBD call journal_remove_journal_head() to indicate that the
+ * journal_head can be dropped if needed.
+ *
+ * Various places in the kernel want to attach a journal_head to a buffer_head
+ * _before_ attaching the journal_head to a transaction.  To protect the
+ * journal_head in this situation, journal_add_journal_head elevates the
+ * journal_head's b_jcount refcount by one.  The caller must call
+ * journal_put_journal_head() to undo this.
+ *
+ * So the typical usage would be:
+ *
+ *	(Attach a journal_head if needed.  Increments b_jcount)
+ *	struct journal_head *jh = journal_add_journal_head(bh);
+ *	...
+ *	jh->b_transaction = xxx;
+ *	journal_put_journal_head(jh);
+ *
+ * Now, the journal_head's b_jcount is zero, but it is safe from being released
+ * because it has a non-zero b_transaction.
+ */
+
+/*
+ * Give a buffer_head a journal_head.
+ *
+ * Doesn't need the journal lock.
+ * May sleep.
+ */
+struct journal_head *journal_add_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh;
+	struct journal_head *new_jh = NULL;
+
+repeat:
+	if (!buffer_jbd(bh)) {
+		new_jh = journal_alloc_journal_head();
+		memset(new_jh, 0, sizeof(*new_jh));
+	}
+
+	jbd_lock_bh_journal_head(bh);
+	if (buffer_jbd(bh)) {
+		jh = bh2jh(bh);
+	} else {
+		J_ASSERT_BH(bh,
+			(atomic_read(&bh->b_count) > 0) ||
+			(bh->b_page && bh->b_page->mapping));
+
+		if (!new_jh) {
+			jbd_unlock_bh_journal_head(bh);
+			goto repeat;
+		}
+
+		jh = new_jh;
+		new_jh = NULL;		/* We consumed it */
+		set_buffer_jbd(bh);
+		bh->b_private = jh;
+		jh->b_bh = bh;
+		get_bh(bh);
+		BUFFER_TRACE(bh, "added journal_head");
+	}
+	jh->b_jcount++;
+	jbd_unlock_bh_journal_head(bh);
+	if (new_jh)
+		journal_free_journal_head(new_jh);
+	return bh->b_private;
+}
+
+/*
+ * Grab a ref against this buffer_head's journal_head.  If it ended up not
+ * having a journal_head, return NULL
+ */
+struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh = NULL;
+
+	jbd_lock_bh_journal_head(bh);
+	if (buffer_jbd(bh)) {
+		jh = bh2jh(bh);
+		jh->b_jcount++;
+	}
+	jbd_unlock_bh_journal_head(bh);
+	return jh;
+}
+
+static void __journal_remove_journal_head(struct buffer_head *bh)
+{
+	struct journal_head *jh = bh2jh(bh);
+
+	J_ASSERT_JH(jh, jh->b_jcount >= 0);
+
+	get_bh(bh);
+	if (jh->b_jcount == 0) {
+		if (jh->b_transaction == NULL &&
+				jh->b_next_transaction == NULL &&
+				jh->b_cp_transaction == NULL) {
+			J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+			J_ASSERT_BH(bh, buffer_jbd(bh));
+			J_ASSERT_BH(bh, jh2bh(jh) == bh);
+			BUFFER_TRACE(bh, "remove journal_head");
+			if (jh->b_frozen_data) {
+				printk(KERN_WARNING "%s: freeing "
+						"b_frozen_data\n",
+						__FUNCTION__);
+				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			}
+			if (jh->b_committed_data) {
+				printk(KERN_WARNING "%s: freeing "
+						"b_committed_data\n",
+						__FUNCTION__);
+				jbd_slab_free(jh->b_committed_data, bh->b_size);
+			}
+			bh->b_private = NULL;
+			jh->b_bh = NULL;	/* debug, really */
+			clear_buffer_jbd(bh);
+			__brelse(bh);
+			journal_free_journal_head(jh);
+		} else {
+			BUFFER_TRACE(bh, "journal_head was locked");
+		}
+	}
+}
+
+/*
+ * journal_remove_journal_head(): if the buffer isn't attached to a transaction
+ * and has a zero b_jcount then remove and release its journal_head.   If we did
+ * see that the buffer is not used by any transaction we also "logically"
+ * decrement ->b_count.
+ *
+ * We in fact take an additional increment on ->b_count as a convenience,
+ * because the caller usually wants to do additional things with the bh
+ * after calling here.
+ * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
+ * time.  Once the caller has run __brelse(), the buffer is eligible for
+ * reaping by try_to_free_buffers().
+ */
+void journal_remove_journal_head(struct buffer_head *bh)
+{
+	jbd_lock_bh_journal_head(bh);
+	__journal_remove_journal_head(bh);
+	jbd_unlock_bh_journal_head(bh);
+}
+
+/*
+ * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * release the journal_head from the buffer_head.
+ */
+void journal_put_journal_head(struct journal_head *jh)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_lock_bh_journal_head(bh);
+	J_ASSERT_JH(jh, jh->b_jcount > 0);
+	--jh->b_jcount;
+	if (!jh->b_jcount && !jh->b_transaction) {
+		__journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+	jbd_unlock_bh_journal_head(bh);
+}
+
+/*
+ * /proc tunables
+ */
+#if defined(CONFIG_JBD_DEBUG)
+int journal_enable_debug;
+EXPORT_SYMBOL(journal_enable_debug);
+#endif
+
+#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
+
+static struct proc_dir_entry *proc_jbd_debug;
+
+static int read_jbd_debug(char *page, char **start, off_t off,
+			  int count, int *eof, void *data)
+{
+	int ret;
+
+	ret = sprintf(page + off, "%d\n", journal_enable_debug);
+	*eof = 1;
+	return ret;
+}
+
+static int write_jbd_debug(struct file *file, const char __user *buffer,
+			   unsigned long count, void *data)
+{
+	char buf[32];
+
+	if (count > ARRAY_SIZE(buf) - 1)
+		count = ARRAY_SIZE(buf) - 1;
+	if (copy_from_user(buf, buffer, count))
+		return -EFAULT;
+	buf[ARRAY_SIZE(buf) - 1] = '\0';
+	journal_enable_debug = simple_strtoul(buf, NULL, 10);
+	return count;
+}
+
+#define JBD_PROC_NAME "sys/fs/jbd-debug"
+
+static void __init create_jbd_proc_entry(void)
+{
+	proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL);
+	if (proc_jbd_debug) {
+		/* Why is this so hard? */
+		proc_jbd_debug->read_proc = read_jbd_debug;
+		proc_jbd_debug->write_proc = write_jbd_debug;
+	}
+}
+
+static void __exit remove_jbd_proc_entry(void)
+{
+	if (proc_jbd_debug)
+		remove_proc_entry(JBD_PROC_NAME, NULL);
+}
+
+#else
+
+#define create_jbd_proc_entry() do {} while (0)
+#define remove_jbd_proc_entry() do {} while (0)
+
+#endif
+
+kmem_cache_t *jbd_handle_cache;
+
+static int __init journal_init_handle_cache(void)
+{
+	jbd_handle_cache = kmem_cache_create("journal_handle",
+				sizeof(handle_t),
+				0,		/* offset */
+				0,		/* flags */
+				NULL,		/* ctor */
+				NULL);		/* dtor */
+	if (jbd_handle_cache == NULL) {
+		printk(KERN_EMERG "JBD: failed to create handle cache\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void journal_destroy_handle_cache(void)
+{
+	if (jbd_handle_cache)
+		kmem_cache_destroy(jbd_handle_cache);
+}
+
+/*
+ * Module startup and shutdown
+ */
+
+static int __init journal_init_caches(void)
+{
+	int ret;
+
+	ret = journal_init_revoke_caches();
+	if (ret == 0)
+		ret = journal_init_journal_head_cache();
+	if (ret == 0)
+		ret = journal_init_handle_cache();
+	return ret;
+}
+
+static void journal_destroy_caches(void)
+{
+	journal_destroy_revoke_caches();
+	journal_destroy_journal_head_cache();
+	journal_destroy_handle_cache();
+	journal_destroy_jbd_slabs();
+}
+
+static int __init journal_init(void)
+{
+	int ret;
+
+	BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
+
+	ret = journal_init_caches();
+	if (ret != 0)
+		journal_destroy_caches();
+	create_jbd_proc_entry();
+	return ret;
+}
+
+static void __exit journal_exit(void)
+{
+#ifdef CONFIG_JBD_DEBUG
+	int n = atomic_read(&nr_journal_heads);
+	if (n)
+		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
+#endif
+	remove_jbd_proc_entry();
+	journal_destroy_caches();
+}
+
+MODULE_LICENSE("GPL");
+module_init(journal_init);
+module_exit(journal_exit);
+
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
new file mode 100644
index 000000000000..11563fe2a52b
--- /dev/null
+++ b/fs/jbd2/recovery.c
@@ -0,0 +1,592 @@
+/*
+ * linux/fs/recovery.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal recovery routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ */
+
+#ifndef __KERNEL__
+#include "jfs_user.h"
+#else
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#endif
+
+/*
+ * Maintain information about the progress of the recovery job, so that
+ * the different passes can carry information between them.
+ */
+struct recovery_info
+{
+	tid_t		start_transaction;
+	tid_t		end_transaction;
+
+	int		nr_replays;
+	int		nr_revokes;
+	int		nr_revoke_hits;
+};
+
+enum passtype {PASS_SCAN, PASS_REVOKE, PASS_REPLAY};
+static int do_one_pass(journal_t *journal,
+				struct recovery_info *info, enum passtype pass);
+static int scan_revoke_records(journal_t *, struct buffer_head *,
+				tid_t, struct recovery_info *);
+
+#ifdef __KERNEL__
+
+/* Release readahead buffers after use */
+static void journal_brelse_array(struct buffer_head *b[], int n)
+{
+	while (--n >= 0)
+		brelse (b[n]);
+}
+
+
+/*
+ * When reading from the journal, we are going through the block device
+ * layer directly and so there is no readahead being done for us.  We
+ * need to implement any readahead ourselves if we want it to happen at
+ * all.  Recovery is basically one long sequential read, so make sure we
+ * do the IO in reasonably large chunks.
+ *
+ * This is not so critical that we need to be enormously clever about
+ * the readahead size, though.  128K is a purely arbitrary, good-enough
+ * fixed value.
+ */
+
+#define MAXBUF 8
+static int do_readahead(journal_t *journal, unsigned int start)
+{
+	int err;
+	unsigned int max, nbufs, next;
+	unsigned long blocknr;
+	struct buffer_head *bh;
+
+	struct buffer_head * bufs[MAXBUF];
+
+	/* Do up to 128K of readahead */
+	max = start + (128 * 1024 / journal->j_blocksize);
+	if (max > journal->j_maxlen)
+		max = journal->j_maxlen;
+
+	/* Do the readahead itself.  We'll submit MAXBUF buffer_heads at
+	 * a time to the block device IO layer. */
+
+	nbufs = 0;
+
+	for (next = start; next < max; next++) {
+		err = journal_bmap(journal, next, &blocknr);
+
+		if (err) {
+			printk (KERN_ERR "JBD: bad block at offset %u\n",
+				next);
+			goto failed;
+		}
+
+		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+		if (!bh) {
+			err = -ENOMEM;
+			goto failed;
+		}
+
+		if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
+			bufs[nbufs++] = bh;
+			if (nbufs == MAXBUF) {
+				ll_rw_block(READ, nbufs, bufs);
+				journal_brelse_array(bufs, nbufs);
+				nbufs = 0;
+			}
+		} else
+			brelse(bh);
+	}
+
+	if (nbufs)
+		ll_rw_block(READ, nbufs, bufs);
+	err = 0;
+
+failed:
+	if (nbufs)
+		journal_brelse_array(bufs, nbufs);
+	return err;
+}
+
+#endif /* __KERNEL__ */
+
+
+/*
+ * Read a block from the journal
+ */
+
+static int jread(struct buffer_head **bhp, journal_t *journal,
+		 unsigned int offset)
+{
+	int err;
+	unsigned long blocknr;
+	struct buffer_head *bh;
+
+	*bhp = NULL;
+
+	if (offset >= journal->j_maxlen) {
+		printk(KERN_ERR "JBD: corrupted journal superblock\n");
+		return -EIO;
+	}
+
+	err = journal_bmap(journal, offset, &blocknr);
+
+	if (err) {
+		printk (KERN_ERR "JBD: bad block at offset %u\n",
+			offset);
+		return err;
+	}
+
+	bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
+	if (!bh)
+		return -ENOMEM;
+
+	if (!buffer_uptodate(bh)) {
+		/* If this is a brand new buffer, start readahead.
+                   Otherwise, we assume we are already reading it.  */
+		if (!buffer_req(bh))
+			do_readahead(journal, offset);
+		wait_on_buffer(bh);
+	}
+
+	if (!buffer_uptodate(bh)) {
+		printk (KERN_ERR "JBD: Failed to read block at offset %u\n",
+			offset);
+		brelse(bh);
+		return -EIO;
+	}
+
+	*bhp = bh;
+	return 0;
+}
+
+
+/*
+ * Count the number of in-use tags in a journal descriptor block.
+ */
+
+static int count_tags(struct buffer_head *bh, int size)
+{
+	char *			tagp;
+	journal_block_tag_t *	tag;
+	int			nr = 0;
+
+	tagp = &bh->b_data[sizeof(journal_header_t)];
+
+	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+		tag = (journal_block_tag_t *) tagp;
+
+		nr++;
+		tagp += sizeof(journal_block_tag_t);
+		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+			tagp += 16;
+
+		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+			break;
+	}
+
+	return nr;
+}
+
+
+/* Make sure we wrap around the log correctly! */
+#define wrap(journal, var)						\
+do {									\
+	if (var >= (journal)->j_last)					\
+		var -= ((journal)->j_last - (journal)->j_first);	\
+} while (0)
+
+/**
+ * journal_recover - recovers a on-disk journal
+ * @journal: the journal to recover
+ *
+ * The primary function for recovering the log contents when mounting a
+ * journaled device.
+ *
+ * Recovery is done in three passes.  In the first pass, we look for the
+ * end of the log.  In the second, we assemble the list of revoke
+ * blocks.  In the third and final pass, we replay any un-revoked blocks
+ * in the log.
+ */
+int journal_recover(journal_t *journal)
+{
+	int			err;
+	journal_superblock_t *	sb;
+
+	struct recovery_info	info;
+
+	memset(&info, 0, sizeof(info));
+	sb = journal->j_superblock;
+
+	/*
+	 * The journal superblock's s_start field (the current log head)
+	 * is always zero if, and only if, the journal was cleanly
+	 * unmounted.
+	 */
+
+	if (!sb->s_start) {
+		jbd_debug(1, "No recovery required, last transaction %d\n",
+			  be32_to_cpu(sb->s_sequence));
+		journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
+		return 0;
+	}
+
+	err = do_one_pass(journal, &info, PASS_SCAN);
+	if (!err)
+		err = do_one_pass(journal, &info, PASS_REVOKE);
+	if (!err)
+		err = do_one_pass(journal, &info, PASS_REPLAY);
+
+	jbd_debug(0, "JBD: recovery, exit status %d, "
+		  "recovered transactions %u to %u\n",
+		  err, info.start_transaction, info.end_transaction);
+	jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n",
+		  info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
+
+	/* Restart the log at the next transaction ID, thus invalidating
+	 * any existing commit records in the log. */
+	journal->j_transaction_sequence = ++info.end_transaction;
+
+	journal_clear_revoke(journal);
+	sync_blockdev(journal->j_fs_dev);
+	return err;
+}
+
+/**
+ * journal_skip_recovery - Start journal and wipe exiting records
+ * @journal: journal to startup
+ *
+ * Locate any valid recovery information from the journal and set up the
+ * journal structures in memory to ignore it (presumably because the
+ * caller has evidence that it is out of date).
+ * This function does'nt appear to be exorted..
+ *
+ * We perform one pass over the journal to allow us to tell the user how
+ * much recovery information is being erased, and to let us initialise
+ * the journal transaction sequence numbers to the next unused ID.
+ */
+int journal_skip_recovery(journal_t *journal)
+{
+	int			err;
+	journal_superblock_t *	sb;
+
+	struct recovery_info	info;
+
+	memset (&info, 0, sizeof(info));
+	sb = journal->j_superblock;
+
+	err = do_one_pass(journal, &info, PASS_SCAN);
+
+	if (err) {
+		printk(KERN_ERR "JBD: error %d scanning journal\n", err);
+		++journal->j_transaction_sequence;
+	} else {
+#ifdef CONFIG_JBD_DEBUG
+		int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence);
+#endif
+		jbd_debug(0,
+			  "JBD: ignoring %d transaction%s from the journal.\n",
+			  dropped, (dropped == 1) ? "" : "s");
+		journal->j_transaction_sequence = ++info.end_transaction;
+	}
+
+	journal->j_tail = 0;
+	return err;
+}
+
+static int do_one_pass(journal_t *journal,
+			struct recovery_info *info, enum passtype pass)
+{
+	unsigned int		first_commit_ID, next_commit_ID;
+	unsigned long		next_log_block;
+	int			err, success = 0;
+	journal_superblock_t *	sb;
+	journal_header_t *	tmp;
+	struct buffer_head *	bh;
+	unsigned int		sequence;
+	int			blocktype;
+
+	/* Precompute the maximum metadata descriptors in a descriptor block */
+	int			MAX_BLOCKS_PER_DESC;
+	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
+			       / sizeof(journal_block_tag_t));
+
+	/*
+	 * First thing is to establish what we expect to find in the log
+	 * (in terms of transaction IDs), and where (in terms of log
+	 * block offsets): query the superblock.
+	 */
+
+	sb = journal->j_superblock;
+	next_commit_ID = be32_to_cpu(sb->s_sequence);
+	next_log_block = be32_to_cpu(sb->s_start);
+
+	first_commit_ID = next_commit_ID;
+	if (pass == PASS_SCAN)
+		info->start_transaction = first_commit_ID;
+
+	jbd_debug(1, "Starting recovery pass %d\n", pass);
+
+	/*
+	 * Now we walk through the log, transaction by transaction,
+	 * making sure that each transaction has a commit block in the
+	 * expected place.  Each complete transaction gets replayed back
+	 * into the main filesystem.
+	 */
+
+	while (1) {
+		int			flags;
+		char *			tagp;
+		journal_block_tag_t *	tag;
+		struct buffer_head *	obh;
+		struct buffer_head *	nbh;
+
+		cond_resched();		/* We're under lock_kernel() */
+
+		/* If we already know where to stop the log traversal,
+		 * check right now that we haven't gone past the end of
+		 * the log. */
+
+		if (pass != PASS_SCAN)
+			if (tid_geq(next_commit_ID, info->end_transaction))
+				break;
+
+		jbd_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
+			  next_commit_ID, next_log_block, journal->j_last);
+
+		/* Skip over each chunk of the transaction looking
+		 * either the next descriptor block or the final commit
+		 * record. */
+
+		jbd_debug(3, "JBD: checking block %ld\n", next_log_block);
+		err = jread(&bh, journal, next_log_block);
+		if (err)
+			goto failed;
+
+		next_log_block++;
+		wrap(journal, next_log_block);
+
+		/* What kind of buffer is it?
+		 *
+		 * If it is a descriptor block, check that it has the
+		 * expected sequence number.  Otherwise, we're all done
+		 * here. */
+
+		tmp = (journal_header_t *)bh->b_data;
+
+		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+			brelse(bh);
+			break;
+		}
+
+		blocktype = be32_to_cpu(tmp->h_blocktype);
+		sequence = be32_to_cpu(tmp->h_sequence);
+		jbd_debug(3, "Found magic %d, sequence %d\n",
+			  blocktype, sequence);
+
+		if (sequence != next_commit_ID) {
+			brelse(bh);
+			break;
+		}
+
+		/* OK, we have a valid descriptor block which matches
+		 * all of the sequence number checks.  What are we going
+		 * to do with it?  That depends on the pass... */
+
+		switch(blocktype) {
+		case JFS_DESCRIPTOR_BLOCK:
+			/* If it is a valid descriptor block, replay it
+			 * in pass REPLAY; otherwise, just skip over the
+			 * blocks it describes. */
+			if (pass != PASS_REPLAY) {
+				next_log_block +=
+					count_tags(bh, journal->j_blocksize);
+				wrap(journal, next_log_block);
+				brelse(bh);
+				continue;
+			}
+
+			/* A descriptor block: we can now write all of
+			 * the data blocks.  Yay, useful work is finally
+			 * getting done here! */
+
+			tagp = &bh->b_data[sizeof(journal_header_t)];
+			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+			       <= journal->j_blocksize) {
+				unsigned long io_block;
+
+				tag = (journal_block_tag_t *) tagp;
+				flags = be32_to_cpu(tag->t_flags);
+
+				io_block = next_log_block++;
+				wrap(journal, next_log_block);
+				err = jread(&obh, journal, io_block);
+				if (err) {
+					/* Recover what we can, but
+					 * report failure at the end. */
+					success = err;
+					printk (KERN_ERR
+						"JBD: IO error %d recovering "
+						"block %ld in log\n",
+						err, io_block);
+				} else {
+					unsigned long blocknr;
+
+					J_ASSERT(obh != NULL);
+					blocknr = be32_to_cpu(tag->t_blocknr);
+
+					/* If the block has been
+					 * revoked, then we're all done
+					 * here. */
+					if (journal_test_revoke
+					    (journal, blocknr,
+					     next_commit_ID)) {
+						brelse(obh);
+						++info->nr_revoke_hits;
+						goto skip_write;
+					}
+
+					/* Find a buffer for the new
+					 * data being restored */
+					nbh = __getblk(journal->j_fs_dev,
+							blocknr,
+							journal->j_blocksize);
+					if (nbh == NULL) {
+						printk(KERN_ERR
+						       "JBD: Out of memory "
+						       "during recovery.\n");
+						err = -ENOMEM;
+						brelse(bh);
+						brelse(obh);
+						goto failed;
+					}
+
+					lock_buffer(nbh);
+					memcpy(nbh->b_data, obh->b_data,
+							journal->j_blocksize);
+					if (flags & JFS_FLAG_ESCAPE) {
+						*((__be32 *)bh->b_data) =
+						cpu_to_be32(JFS_MAGIC_NUMBER);
+					}
+
+					BUFFER_TRACE(nbh, "marking dirty");
+					set_buffer_uptodate(nbh);
+					mark_buffer_dirty(nbh);
+					BUFFER_TRACE(nbh, "marking uptodate");
+					++info->nr_replays;
+					/* ll_rw_block(WRITE, 1, &nbh); */
+					unlock_buffer(nbh);
+					brelse(obh);
+					brelse(nbh);
+				}
+
+			skip_write:
+				tagp += sizeof(journal_block_tag_t);
+				if (!(flags & JFS_FLAG_SAME_UUID))
+					tagp += 16;
+
+				if (flags & JFS_FLAG_LAST_TAG)
+					break;
+			}
+
+			brelse(bh);
+			continue;
+
+		case JFS_COMMIT_BLOCK:
+			/* Found an expected commit block: not much to
+			 * do other than move on to the next sequence
+			 * number. */
+			brelse(bh);
+			next_commit_ID++;
+			continue;
+
+		case JFS_REVOKE_BLOCK:
+			/* If we aren't in the REVOKE pass, then we can
+			 * just skip over this block. */
+			if (pass != PASS_REVOKE) {
+				brelse(bh);
+				continue;
+			}
+
+			err = scan_revoke_records(journal, bh,
+						  next_commit_ID, info);
+			brelse(bh);
+			if (err)
+				goto failed;
+			continue;
+
+		default:
+			jbd_debug(3, "Unrecognised magic %d, end of scan.\n",
+				  blocktype);
+			brelse(bh);
+			goto done;
+		}
+	}
+
+ done:
+	/*
+	 * We broke out of the log scan loop: either we came to the
+	 * known end of the log or we found an unexpected block in the
+	 * log.  If the latter happened, then we know that the "current"
+	 * transaction marks the end of the valid log.
+	 */
+
+	if (pass == PASS_SCAN)
+		info->end_transaction = next_commit_ID;
+	else {
+		/* It's really bad news if different passes end up at
+		 * different places (but possible due to IO errors). */
+		if (info->end_transaction != next_commit_ID) {
+			printk (KERN_ERR "JBD: recovery pass %d ended at "
+				"transaction %u, expected %u\n",
+				pass, next_commit_ID, info->end_transaction);
+			if (!success)
+				success = -EIO;
+		}
+	}
+
+	return success;
+
+ failed:
+	return err;
+}
+
+
+/* Scan a revoke record, marking all blocks mentioned as revoked. */
+
+static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
+			       tid_t sequence, struct recovery_info *info)
+{
+	journal_revoke_header_t *header;
+	int offset, max;
+
+	header = (journal_revoke_header_t *) bh->b_data;
+	offset = sizeof(journal_revoke_header_t);
+	max = be32_to_cpu(header->r_count);
+
+	while (offset < max) {
+		unsigned long blocknr;
+		int err;
+
+		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+		offset += 4;
+		err = journal_set_revoke(journal, blocknr, sequence);
+		if (err)
+			return err;
+		++info->nr_revokes;
+	}
+	return 0;
+}
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
new file mode 100644
index 000000000000..c532429d8d9b
--- /dev/null
+++ b/fs/jbd2/revoke.c
@@ -0,0 +1,703 @@
+/*
+ * linux/fs/revoke.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 2000
+ *
+ * Copyright 2000 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Journal revoke routines for the generic filesystem journaling code;
+ * part of the ext2fs journaling system.
+ *
+ * Revoke is the mechanism used to prevent old log records for deleted
+ * metadata from being replayed on top of newer data using the same
+ * blocks.  The revoke mechanism is used in two separate places:
+ *
+ * + Commit: during commit we write the entire list of the current
+ *   transaction's revoked blocks to the journal
+ *
+ * + Recovery: during recovery we record the transaction ID of all
+ *   revoked blocks.  If there are multiple revoke records in the log
+ *   for a single block, only the last one counts, and if there is a log
+ *   entry for a block beyond the last revoke, then that log entry still
+ *   gets replayed.
+ *
+ * We can get interactions between revokes and new log data within a
+ * single transaction:
+ *
+ * Block is revoked and then journaled:
+ *   The desired end result is the journaling of the new block, so we
+ *   cancel the revoke before the transaction commits.
+ *
+ * Block is journaled and then revoked:
+ *   The revoke must take precedence over the write of the block, so we
+ *   need either to cancel the journal entry or to write the revoke
+ *   later in the log than the log block.  In this case, we choose the
+ *   latter: journaling a block cancels any revoke record for that block
+ *   in the current transaction, so any revoke for that block in the
+ *   transaction must have happened after the block was journaled and so
+ *   the revoke must take precedence.
+ *
+ * Block is revoked and then written as data:
+ *   The data write is allowed to succeed, but the revoke is _not_
+ *   cancelled.  We still need to prevent old log records from
+ *   overwriting the new data.  We don't even need to clear the revoke
+ *   bit here.
+ *
+ * Revoke information on buffers is a tri-state value:
+ *
+ * RevokeValid clear:	no cached revoke status, need to look it up
+ * RevokeValid set, Revoked clear:
+ *			buffer has not been revoked, and cancel_revoke
+ *			need do nothing.
+ * RevokeValid set, Revoked set:
+ *			buffer has been revoked.
+ */
+
+#ifndef __KERNEL__
+#include "jfs_user.h"
+#else
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/smp_lock.h>
+#include <linux/init.h>
+#endif
+
+static kmem_cache_t *revoke_record_cache;
+static kmem_cache_t *revoke_table_cache;
+
+/* Each revoke record represents one single revoked block.  During
+   journal replay, this involves recording the transaction ID of the
+   last transaction to revoke this block. */
+
+struct jbd_revoke_record_s
+{
+	struct list_head  hash;
+	tid_t		  sequence;	/* Used for recovery only */
+	unsigned long	  blocknr;
+};
+
+
+/* The revoke table is just a simple hash table of revoke records. */
+struct jbd_revoke_table_s
+{
+	/* It is conceivable that we might want a larger hash table
+	 * for recovery.  Must be a power of two. */
+	int		  hash_size;
+	int		  hash_shift;
+	struct list_head *hash_table;
+};
+
+
+#ifdef __KERNEL__
+static void write_one_revoke_record(journal_t *, transaction_t *,
+				    struct journal_head **, int *,
+				    struct jbd_revoke_record_s *);
+static void flush_descriptor(journal_t *, struct journal_head *, int);
+#endif
+
+/* Utility functions to maintain the revoke table */
+
+/* Borrowed from buffer.c: this is a tried and tested block hash function */
+static inline int hash(journal_t *journal, unsigned long block)
+{
+	struct jbd_revoke_table_s *table = journal->j_revoke;
+	int hash_shift = table->hash_shift;
+
+	return ((block << (hash_shift - 6)) ^
+		(block >> 13) ^
+		(block << (hash_shift - 12))) & (table->hash_size - 1);
+}
+
+static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+			      tid_t seq)
+{
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+
+repeat:
+	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
+	if (!record)
+		goto oom;
+
+	record->sequence = seq;
+	record->blocknr = blocknr;
+	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
+	spin_lock(&journal->j_revoke_lock);
+	list_add(&record->hash, hash_list);
+	spin_unlock(&journal->j_revoke_lock);
+	return 0;
+
+oom:
+	if (!journal_oom_retry)
+		return -ENOMEM;
+	jbd_debug(1, "ENOMEM in %s, retrying\n", __FUNCTION__);
+	yield();
+	goto repeat;
+}
+
+/* Find a revoke record in the journal's hash table. */
+
+static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
+						      unsigned long blocknr)
+{
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+
+	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
+
+	spin_lock(&journal->j_revoke_lock);
+	record = (struct jbd_revoke_record_s *) hash_list->next;
+	while (&(record->hash) != hash_list) {
+		if (record->blocknr == blocknr) {
+			spin_unlock(&journal->j_revoke_lock);
+			return record;
+		}
+		record = (struct jbd_revoke_record_s *) record->hash.next;
+	}
+	spin_unlock(&journal->j_revoke_lock);
+	return NULL;
+}
+
+int __init journal_init_revoke_caches(void)
+{
+	revoke_record_cache = kmem_cache_create("revoke_record",
+					   sizeof(struct jbd_revoke_record_s),
+					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
+	if (revoke_record_cache == 0)
+		return -ENOMEM;
+
+	revoke_table_cache = kmem_cache_create("revoke_table",
+					   sizeof(struct jbd_revoke_table_s),
+					   0, 0, NULL, NULL);
+	if (revoke_table_cache == 0) {
+		kmem_cache_destroy(revoke_record_cache);
+		revoke_record_cache = NULL;
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+void journal_destroy_revoke_caches(void)
+{
+	kmem_cache_destroy(revoke_record_cache);
+	revoke_record_cache = NULL;
+	kmem_cache_destroy(revoke_table_cache);
+	revoke_table_cache = NULL;
+}
+
+/* Initialise the revoke table for a given journal to a given size. */
+
+int journal_init_revoke(journal_t *journal, int hash_size)
+{
+	int shift, tmp;
+
+	J_ASSERT (journal->j_revoke_table[0] == NULL);
+
+	shift = 0;
+	tmp = hash_size;
+	while((tmp >>= 1UL) != 0UL)
+		shift++;
+
+	journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[0])
+		return -ENOMEM;
+	journal->j_revoke = journal->j_revoke_table[0];
+
+	/* Check that the hash_size is a power of two */
+	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+
+	journal->j_revoke->hash_size = hash_size;
+
+	journal->j_revoke->hash_shift = shift;
+
+	journal->j_revoke->hash_table =
+		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+	if (!journal->j_revoke->hash_table) {
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
+	}
+
+	for (tmp = 0; tmp < hash_size; tmp++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+
+	journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	if (!journal->j_revoke_table[1]) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		return -ENOMEM;
+	}
+
+	journal->j_revoke = journal->j_revoke_table[1];
+
+	/* Check that the hash_size is a power of two */
+	J_ASSERT ((hash_size & (hash_size-1)) == 0);
+
+	journal->j_revoke->hash_size = hash_size;
+
+	journal->j_revoke->hash_shift = shift;
+
+	journal->j_revoke->hash_table =
+		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
+	if (!journal->j_revoke->hash_table) {
+		kfree(journal->j_revoke_table[0]->hash_table);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
+		journal->j_revoke = NULL;
+		return -ENOMEM;
+	}
+
+	for (tmp = 0; tmp < hash_size; tmp++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
+
+	spin_lock_init(&journal->j_revoke_lock);
+
+	return 0;
+}
+
+/* Destoy a journal's revoke table.  The table must already be empty! */
+
+void journal_destroy_revoke(journal_t *journal)
+{
+	struct jbd_revoke_table_s *table;
+	struct list_head *hash_list;
+	int i;
+
+	table = journal->j_revoke_table[0];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(revoke_table_cache, table);
+	journal->j_revoke = NULL;
+
+	table = journal->j_revoke_table[1];
+	if (!table)
+		return;
+
+	for (i=0; i<table->hash_size; i++) {
+		hash_list = &table->hash_table[i];
+		J_ASSERT (list_empty(hash_list));
+	}
+
+	kfree(table->hash_table);
+	kmem_cache_free(revoke_table_cache, table);
+	journal->j_revoke = NULL;
+}
+
+
+#ifdef __KERNEL__
+
+/*
+ * journal_revoke: revoke a given buffer_head from the journal.  This
+ * prevents the block from being replayed during recovery if we take a
+ * crash after this current transaction commits.  Any subsequent
+ * metadata writes of the buffer in this transaction cancel the
+ * revoke.
+ *
+ * Note that this call may block --- it is up to the caller to make
+ * sure that there are no further calls to journal_write_metadata
+ * before the revoke is complete.  In ext3, this implies calling the
+ * revoke before clearing the block bitmap when we are deleting
+ * metadata.
+ *
+ * Revoke performs a journal_forget on any buffer_head passed in as a
+ * parameter, but does _not_ forget the buffer_head if the bh was only
+ * found implicitly.
+ *
+ * bh_in may not be a journalled buffer - it may have come off
+ * the hash tables without an attached journal_head.
+ *
+ * If bh_in is non-zero, journal_revoke() will decrement its b_count
+ * by one.
+ */
+
+int journal_revoke(handle_t *handle, unsigned long blocknr,
+		   struct buffer_head *bh_in)
+{
+	struct buffer_head *bh = NULL;
+	journal_t *journal;
+	struct block_device *bdev;
+	int err;
+
+	might_sleep();
+	if (bh_in)
+		BUFFER_TRACE(bh_in, "enter");
+
+	journal = handle->h_transaction->t_journal;
+	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
+		J_ASSERT (!"Cannot set revoke feature!");
+		return -EINVAL;
+	}
+
+	bdev = journal->j_fs_dev;
+	bh = bh_in;
+
+	if (!bh) {
+		bh = __find_get_block(bdev, blocknr, journal->j_blocksize);
+		if (bh)
+			BUFFER_TRACE(bh, "found on hash");
+	}
+#ifdef JBD_EXPENSIVE_CHECKING
+	else {
+		struct buffer_head *bh2;
+
+		/* If there is a different buffer_head lying around in
+		 * memory anywhere... */
+		bh2 = __find_get_block(bdev, blocknr, journal->j_blocksize);
+		if (bh2) {
+			/* ... and it has RevokeValid status... */
+			if (bh2 != bh && buffer_revokevalid(bh2))
+				/* ...then it better be revoked too,
+				 * since it's illegal to create a revoke
+				 * record against a buffer_head which is
+				 * not marked revoked --- that would
+				 * risk missing a subsequent revoke
+				 * cancel. */
+				J_ASSERT_BH(bh2, buffer_revoked(bh2));
+			put_bh(bh2);
+		}
+	}
+#endif
+
+	/* We really ought not ever to revoke twice in a row without
+           first having the revoke cancelled: it's illegal to free a
+           block twice without allocating it in between! */
+	if (bh) {
+		if (!J_EXPECT_BH(bh, !buffer_revoked(bh),
+				 "inconsistent data on disk")) {
+			if (!bh_in)
+				brelse(bh);
+			return -EIO;
+		}
+		set_buffer_revoked(bh);
+		set_buffer_revokevalid(bh);
+		if (bh_in) {
+			BUFFER_TRACE(bh_in, "call journal_forget");
+			journal_forget(handle, bh_in);
+		} else {
+			BUFFER_TRACE(bh, "call brelse");
+			__brelse(bh);
+		}
+	}
+
+	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	err = insert_revoke_hash(journal, blocknr,
+				handle->h_transaction->t_tid);
+	BUFFER_TRACE(bh_in, "exit");
+	return err;
+}
+
+/*
+ * Cancel an outstanding revoke.  For use only internally by the
+ * journaling code (called from journal_get_write_access).
+ *
+ * We trust buffer_revoked() on the buffer if the buffer is already
+ * being journaled: if there is no revoke pending on the buffer, then we
+ * don't do anything here.
+ *
+ * This would break if it were possible for a buffer to be revoked and
+ * discarded, and then reallocated within the same transaction.  In such
+ * a case we would have lost the revoked bit, but when we arrived here
+ * the second time we would still have a pending revoke to cancel.  So,
+ * do not trust the Revoked bit on buffers unless RevokeValid is also
+ * set.
+ *
+ * The caller must have the journal locked.
+ */
+int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+{
+	struct jbd_revoke_record_s *record;
+	journal_t *journal = handle->h_transaction->t_journal;
+	int need_cancel;
+	int did_revoke = 0;	/* akpm: debug */
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_debug(4, "journal_head %p, cancelling revoke\n", jh);
+
+	/* Is the existing Revoke bit valid?  If so, we trust it, and
+	 * only perform the full cancel if the revoke bit is set.  If
+	 * not, we can't trust the revoke bit, and we need to do the
+	 * full search for a revoke record. */
+	if (test_set_buffer_revokevalid(bh)) {
+		need_cancel = test_clear_buffer_revoked(bh);
+	} else {
+		need_cancel = 1;
+		clear_buffer_revoked(bh);
+	}
+
+	if (need_cancel) {
+		record = find_revoke_record(journal, bh->b_blocknr);
+		if (record) {
+			jbd_debug(4, "cancelled existing revoke on "
+				  "blocknr %llu\n", (unsigned long long)bh->b_blocknr);
+			spin_lock(&journal->j_revoke_lock);
+			list_del(&record->hash);
+			spin_unlock(&journal->j_revoke_lock);
+			kmem_cache_free(revoke_record_cache, record);
+			did_revoke = 1;
+		}
+	}
+
+#ifdef JBD_EXPENSIVE_CHECKING
+	/* There better not be one left behind by now! */
+	record = find_revoke_record(journal, bh->b_blocknr);
+	J_ASSERT_JH(jh, record == NULL);
+#endif
+
+	/* Finally, have we just cleared revoke on an unhashed
+	 * buffer_head?  If so, we'd better make sure we clear the
+	 * revoked status on any hashed alias too, otherwise the revoke
+	 * state machine will get very upset later on. */
+	if (need_cancel) {
+		struct buffer_head *bh2;
+		bh2 = __find_get_block(bh->b_bdev, bh->b_blocknr, bh->b_size);
+		if (bh2) {
+			if (bh2 != bh)
+				clear_buffer_revoked(bh2);
+			__brelse(bh2);
+		}
+	}
+	return did_revoke;
+}
+
+/* journal_switch_revoke table select j_revoke for next transaction
+ * we do not want to suspend any processing until all revokes are
+ * written -bzzz
+ */
+void journal_switch_revoke_table(journal_t *journal)
+{
+	int i;
+
+	if (journal->j_revoke == journal->j_revoke_table[0])
+		journal->j_revoke = journal->j_revoke_table[1];
+	else
+		journal->j_revoke = journal->j_revoke_table[0];
+
+	for (i = 0; i < journal->j_revoke->hash_size; i++)
+		INIT_LIST_HEAD(&journal->j_revoke->hash_table[i]);
+}
+
+/*
+ * Write revoke records to the journal for all entries in the current
+ * revoke hash, deleting the entries as we go.
+ *
+ * Called with the journal lock held.
+ */
+
+void journal_write_revoke_records(journal_t *journal,
+				  transaction_t *transaction)
+{
+	struct journal_head *descriptor;
+	struct jbd_revoke_record_s *record;
+	struct jbd_revoke_table_s *revoke;
+	struct list_head *hash_list;
+	int i, offset, count;
+
+	descriptor = NULL;
+	offset = 0;
+	count = 0;
+
+	/* select revoke table for committing transaction */
+	revoke = journal->j_revoke == journal->j_revoke_table[0] ?
+		journal->j_revoke_table[1] : journal->j_revoke_table[0];
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		hash_list = &revoke->hash_table[i];
+
+		while (!list_empty(hash_list)) {
+			record = (struct jbd_revoke_record_s *)
+				hash_list->next;
+			write_one_revoke_record(journal, transaction,
+						&descriptor, &offset,
+						record);
+			count++;
+			list_del(&record->hash);
+			kmem_cache_free(revoke_record_cache, record);
+		}
+	}
+	if (descriptor)
+		flush_descriptor(journal, descriptor, offset);
+	jbd_debug(1, "Wrote %d revoke records\n", count);
+}
+
+/*
+ * Write out one revoke record.  We need to create a new descriptor
+ * block if the old one is full or if we have not already created one.
+ */
+
+static void write_one_revoke_record(journal_t *journal,
+				    transaction_t *transaction,
+				    struct journal_head **descriptorp,
+				    int *offsetp,
+				    struct jbd_revoke_record_s *record)
+{
+	struct journal_head *descriptor;
+	int offset;
+	journal_header_t *header;
+
+	/* If we are already aborting, this all becomes a noop.  We
+           still need to go round the loop in
+           journal_write_revoke_records in order to free all of the
+           revoke records: only the IO to the journal is omitted. */
+	if (is_journal_aborted(journal))
+		return;
+
+	descriptor = *descriptorp;
+	offset = *offsetp;
+
+	/* Make sure we have a descriptor with space left for the record */
+	if (descriptor) {
+		if (offset == journal->j_blocksize) {
+			flush_descriptor(journal, descriptor, offset);
+			descriptor = NULL;
+		}
+	}
+
+	if (!descriptor) {
+		descriptor = journal_get_descriptor_buffer(journal);
+		if (!descriptor)
+			return;
+		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
+		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
+		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
+		header->h_sequence  = cpu_to_be32(transaction->t_tid);
+
+		/* Record it so that we can wait for IO completion later */
+		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
+		journal_file_buffer(descriptor, transaction, BJ_LogCtl);
+
+		offset = sizeof(journal_revoke_header_t);
+		*descriptorp = descriptor;
+	}
+
+	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+		cpu_to_be32(record->blocknr);
+	offset += 4;
+	*offsetp = offset;
+}
+
+/*
+ * Flush a revoke descriptor out to the journal.  If we are aborting,
+ * this is a noop; otherwise we are generating a buffer which needs to
+ * be waited for during commit, so it has to go onto the appropriate
+ * journal buffer list.
+ */
+
+static void flush_descriptor(journal_t *journal,
+			     struct journal_head *descriptor,
+			     int offset)
+{
+	journal_revoke_header_t *header;
+	struct buffer_head *bh = jh2bh(descriptor);
+
+	if (is_journal_aborted(journal)) {
+		put_bh(bh);
+		return;
+	}
+
+	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+	header->r_count = cpu_to_be32(offset);
+	set_buffer_jwrite(bh);
+	BUFFER_TRACE(bh, "write");
+	set_buffer_dirty(bh);
+	ll_rw_block(SWRITE, 1, &bh);
+}
+#endif
+
+/*
+ * Revoke support for recovery.
+ *
+ * Recovery needs to be able to:
+ *
+ *  record all revoke records, including the tid of the latest instance
+ *  of each revoke in the journal
+ *
+ *  check whether a given block in a given transaction should be replayed
+ *  (ie. has not been revoked by a revoke record in that or a subsequent
+ *  transaction)
+ *
+ *  empty the revoke table after recovery.
+ */
+
+/*
+ * First, setting revoke records.  We create a new revoke record for
+ * every block ever revoked in the log as we scan it for recovery, and
+ * we update the existing records if we find multiple revokes for a
+ * single block.
+ */
+
+int journal_set_revoke(journal_t *journal,
+		       unsigned long blocknr,
+		       tid_t sequence)
+{
+	struct jbd_revoke_record_s *record;
+
+	record = find_revoke_record(journal, blocknr);
+	if (record) {
+		/* If we have multiple occurrences, only record the
+		 * latest sequence number in the hashed record */
+		if (tid_gt(sequence, record->sequence))
+			record->sequence = sequence;
+		return 0;
+	}
+	return insert_revoke_hash(journal, blocknr, sequence);
+}
+
+/*
+ * Test revoke records.  For a given block referenced in the log, has
+ * that block been revoked?  A revoke record with a given transaction
+ * sequence number revokes all blocks in that transaction and earlier
+ * ones, but later transactions still need replayed.
+ */
+
+int journal_test_revoke(journal_t *journal,
+			unsigned long blocknr,
+			tid_t sequence)
+{
+	struct jbd_revoke_record_s *record;
+
+	record = find_revoke_record(journal, blocknr);
+	if (!record)
+		return 0;
+	if (tid_gt(sequence, record->sequence))
+		return 0;
+	return 1;
+}
+
+/*
+ * Finally, once recovery is over, we need to clear the revoke table so
+ * that it can be reused by the running filesystem.
+ */
+
+void journal_clear_revoke(journal_t *journal)
+{
+	int i;
+	struct list_head *hash_list;
+	struct jbd_revoke_record_s *record;
+	struct jbd_revoke_table_s *revoke;
+
+	revoke = journal->j_revoke;
+
+	for (i = 0; i < revoke->hash_size; i++) {
+		hash_list = &revoke->hash_table[i];
+		while (!list_empty(hash_list)) {
+			record = (struct jbd_revoke_record_s*) hash_list->next;
+			list_del(&record->hash);
+			kmem_cache_free(revoke_record_cache, record);
+		}
+	}
+}
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
new file mode 100644
index 000000000000..e1b3c8af4d17
--- /dev/null
+++ b/fs/jbd2/transaction.c
@@ -0,0 +1,2080 @@
+/*
+ * linux/fs/transaction.c
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
+ *
+ * Copyright 1998 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Generic filesystem transaction handling code; part of the ext2fs
+ * journaling system.
+ *
+ * This file manages transactions (compound commits managed by the
+ * journaling code) and handles (individual atomic operations by the
+ * filesystem).
+ */
+
+#include <linux/time.h>
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/errno.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+#include <linux/smp_lock.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+
+/*
+ * get_transaction: obtain a new transaction_t object.
+ *
+ * Simply allocate and initialise a new transaction.  Create it in
+ * RUNNING state and add it to the current journal (which should not
+ * have an existing running transaction: we only make a new transaction
+ * once we have started to commit the old one).
+ *
+ * Preconditions:
+ *	The journal MUST be locked.  We don't perform atomic mallocs on the
+ *	new transaction	and we can't block without protecting against other
+ *	processes trying to touch the journal while it is in transition.
+ *
+ * Called under j_state_lock
+ */
+
+static transaction_t *
+get_transaction(journal_t *journal, transaction_t *transaction)
+{
+	transaction->t_journal = journal;
+	transaction->t_state = T_RUNNING;
+	transaction->t_tid = journal->j_transaction_sequence++;
+	transaction->t_expires = jiffies + journal->j_commit_interval;
+	spin_lock_init(&transaction->t_handle_lock);
+
+	/* Set up the commit timer for the new transaction. */
+	journal->j_commit_timer.expires = transaction->t_expires;
+	add_timer(&journal->j_commit_timer);
+
+	J_ASSERT(journal->j_running_transaction == NULL);
+	journal->j_running_transaction = transaction;
+
+	return transaction;
+}
+
+/*
+ * Handle management.
+ *
+ * A handle_t is an object which represents a single atomic update to a
+ * filesystem, and which tracks all of the modifications which form part
+ * of that one update.
+ */
+
+/*
+ * start_this_handle: Given a handle, deal with any locking or stalling
+ * needed to make sure that there is enough journal space for the handle
+ * to begin.  Attach the handle to a transaction and set up the
+ * transaction's buffer credits.
+ */
+
+static int start_this_handle(journal_t *journal, handle_t *handle)
+{
+	transaction_t *transaction;
+	int needed;
+	int nblocks = handle->h_buffer_credits;
+	transaction_t *new_transaction = NULL;
+	int ret = 0;
+
+	if (nblocks > journal->j_max_transaction_buffers) {
+		printk(KERN_ERR "JBD: %s wants too many credits (%d > %d)\n",
+		       current->comm, nblocks,
+		       journal->j_max_transaction_buffers);
+		ret = -ENOSPC;
+		goto out;
+	}
+
+alloc_transaction:
+	if (!journal->j_running_transaction) {
+		new_transaction = jbd_kmalloc(sizeof(*new_transaction),
+						GFP_NOFS);
+		if (!new_transaction) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		memset(new_transaction, 0, sizeof(*new_transaction));
+	}
+
+	jbd_debug(3, "New handle %p going live.\n", handle);
+
+repeat:
+
+	/*
+	 * We need to hold j_state_lock until t_updates has been incremented,
+	 * for proper journal barrier handling
+	 */
+	spin_lock(&journal->j_state_lock);
+repeat_locked:
+	if (is_journal_aborted(journal) ||
+	    (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
+		spin_unlock(&journal->j_state_lock);
+		ret = -EROFS;
+		goto out;
+	}
+
+	/* Wait on the journal's transaction barrier if necessary */
+	if (journal->j_barrier_count) {
+		spin_unlock(&journal->j_state_lock);
+		wait_event(journal->j_wait_transaction_locked,
+				journal->j_barrier_count == 0);
+		goto repeat;
+	}
+
+	if (!journal->j_running_transaction) {
+		if (!new_transaction) {
+			spin_unlock(&journal->j_state_lock);
+			goto alloc_transaction;
+		}
+		get_transaction(journal, new_transaction);
+		new_transaction = NULL;
+	}
+
+	transaction = journal->j_running_transaction;
+
+	/*
+	 * If the current transaction is locked down for commit, wait for the
+	 * lock to be released.
+	 */
+	if (transaction->t_state == T_LOCKED) {
+		DEFINE_WAIT(wait);
+
+		prepare_to_wait(&journal->j_wait_transaction_locked,
+					&wait, TASK_UNINTERRUPTIBLE);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_transaction_locked, &wait);
+		goto repeat;
+	}
+
+	/*
+	 * If there is not enough space left in the log to write all potential
+	 * buffers requested by this operation, we need to stall pending a log
+	 * checkpoint to free some more log space.
+	 */
+	spin_lock(&transaction->t_handle_lock);
+	needed = transaction->t_outstanding_credits + nblocks;
+
+	if (needed > journal->j_max_transaction_buffers) {
+		/*
+		 * If the current transaction is already too large, then start
+		 * to commit it: we can then go back and attach this handle to
+		 * a new transaction.
+		 */
+		DEFINE_WAIT(wait);
+
+		jbd_debug(2, "Handle %p starting new commit...\n", handle);
+		spin_unlock(&transaction->t_handle_lock);
+		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
+				TASK_UNINTERRUPTIBLE);
+		__log_start_commit(journal, transaction->t_tid);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_transaction_locked, &wait);
+		goto repeat;
+	}
+
+	/*
+	 * The commit code assumes that it can get enough log space
+	 * without forcing a checkpoint.  This is *critical* for
+	 * correctness: a checkpoint of a buffer which is also
+	 * associated with a committing transaction creates a deadlock,
+	 * so commit simply cannot force through checkpoints.
+	 *
+	 * We must therefore ensure the necessary space in the journal
+	 * *before* starting to dirty potentially checkpointed buffers
+	 * in the new transaction.
+	 *
+	 * The worst part is, any transaction currently committing can
+	 * reduce the free space arbitrarily.  Be careful to account for
+	 * those buffers when checkpointing.
+	 */
+
+	/*
+	 * @@@ AKPM: This seems rather over-defensive.  We're giving commit
+	 * a _lot_ of headroom: 1/4 of the journal plus the size of
+	 * the committing transaction.  Really, we only need to give it
+	 * committing_transaction->t_outstanding_credits plus "enough" for
+	 * the log control blocks.
+	 * Also, this test is inconsitent with the matching one in
+	 * journal_extend().
+	 */
+	if (__log_space_left(journal) < jbd_space_needed(journal)) {
+		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
+		spin_unlock(&transaction->t_handle_lock);
+		__log_wait_for_space(journal);
+		goto repeat_locked;
+	}
+
+	/* OK, account for the buffers that this operation expects to
+	 * use and add the handle to the running transaction. */
+
+	handle->h_transaction = transaction;
+	transaction->t_outstanding_credits += nblocks;
+	transaction->t_updates++;
+	transaction->t_handle_count++;
+	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
+		  handle, nblocks, transaction->t_outstanding_credits,
+		  __log_space_left(journal));
+	spin_unlock(&transaction->t_handle_lock);
+	spin_unlock(&journal->j_state_lock);
+out:
+	if (unlikely(new_transaction))		/* It's usually NULL */
+		kfree(new_transaction);
+	return ret;
+}
+
+/* Allocate a new handle.  This should probably be in a slab... */
+static handle_t *new_handle(int nblocks)
+{
+	handle_t *handle = jbd_alloc_handle(GFP_NOFS);
+	if (!handle)
+		return NULL;
+	memset(handle, 0, sizeof(*handle));
+	handle->h_buffer_credits = nblocks;
+	handle->h_ref = 1;
+
+	return handle;
+}
+
+/**
+ * handle_t *journal_start() - Obtain a new handle.
+ * @journal: Journal to start transaction on.
+ * @nblocks: number of block buffer we might modify
+ *
+ * We make sure that the transaction can guarantee at least nblocks of
+ * modified buffers in the log.  We block until the log can guarantee
+ * that much space.
+ *
+ * This function is visible to journal users (like ext3fs), so is not
+ * called with the journal already locked.
+ *
+ * Return a pointer to a newly allocated handle, or NULL on failure
+ */
+handle_t *journal_start(journal_t *journal, int nblocks)
+{
+	handle_t *handle = journal_current_handle();
+	int err;
+
+	if (!journal)
+		return ERR_PTR(-EROFS);
+
+	if (handle) {
+		J_ASSERT(handle->h_transaction->t_journal == journal);
+		handle->h_ref++;
+		return handle;
+	}
+
+	handle = new_handle(nblocks);
+	if (!handle)
+		return ERR_PTR(-ENOMEM);
+
+	current->journal_info = handle;
+
+	err = start_this_handle(journal, handle);
+	if (err < 0) {
+		jbd_free_handle(handle);
+		current->journal_info = NULL;
+		handle = ERR_PTR(err);
+	}
+	return handle;
+}
+
+/**
+ * int journal_extend() - extend buffer credits.
+ * @handle:  handle to 'extend'
+ * @nblocks: nr blocks to try to extend by.
+ *
+ * Some transactions, such as large extends and truncates, can be done
+ * atomically all at once or in several stages.  The operation requests
+ * a credit for a number of buffer modications in advance, but can
+ * extend its credit if it needs more.
+ *
+ * journal_extend tries to give the running handle more buffer credits.
+ * It does not guarantee that allocation - this is a best-effort only.
+ * The calling process MUST be able to deal cleanly with a failure to
+ * extend here.
+ *
+ * Return 0 on success, non-zero on failure.
+ *
+ * return code < 0 implies an error
+ * return code > 0 implies normal transaction-full status.
+ */
+int journal_extend(handle_t *handle, int nblocks)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int result;
+	int wanted;
+
+	result = -EIO;
+	if (is_handle_aborted(handle))
+		goto out;
+
+	result = 1;
+
+	spin_lock(&journal->j_state_lock);
+
+	/* Don't extend a locked-down transaction! */
+	if (handle->h_transaction->t_state != T_RUNNING) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "transaction not running\n", handle, nblocks);
+		goto error_out;
+	}
+
+	spin_lock(&transaction->t_handle_lock);
+	wanted = transaction->t_outstanding_credits + nblocks;
+
+	if (wanted > journal->j_max_transaction_buffers) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "transaction too large\n", handle, nblocks);
+		goto unlock;
+	}
+
+	if (wanted > __log_space_left(journal)) {
+		jbd_debug(3, "denied handle %p %d blocks: "
+			  "insufficient log space\n", handle, nblocks);
+		goto unlock;
+	}
+
+	handle->h_buffer_credits += nblocks;
+	transaction->t_outstanding_credits += nblocks;
+	result = 0;
+
+	jbd_debug(3, "extended handle %p by %d\n", handle, nblocks);
+unlock:
+	spin_unlock(&transaction->t_handle_lock);
+error_out:
+	spin_unlock(&journal->j_state_lock);
+out:
+	return result;
+}
+
+
+/**
+ * int journal_restart() - restart a handle .
+ * @handle:  handle to restart
+ * @nblocks: nr credits requested
+ *
+ * Restart a handle for a multi-transaction filesystem
+ * operation.
+ *
+ * If the journal_extend() call above fails to grant new buffer credits
+ * to a running handle, a call to journal_restart will commit the
+ * handle's transaction so far and reattach the handle to a new
+ * transaction capabable of guaranteeing the requested number of
+ * credits.
+ */
+
+int journal_restart(handle_t *handle, int nblocks)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int ret;
+
+	/* If we've had an abort of any type, don't even think about
+	 * actually doing the restart! */
+	if (is_handle_aborted(handle))
+		return 0;
+
+	/*
+	 * First unlink the handle from its current transaction, and start the
+	 * commit on that.
+	 */
+	J_ASSERT(transaction->t_updates > 0);
+	J_ASSERT(journal_current_handle() == handle);
+
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&transaction->t_handle_lock);
+	transaction->t_outstanding_credits -= handle->h_buffer_credits;
+	transaction->t_updates--;
+
+	if (!transaction->t_updates)
+		wake_up(&journal->j_wait_updates);
+	spin_unlock(&transaction->t_handle_lock);
+
+	jbd_debug(2, "restarting handle %p\n", handle);
+	__log_start_commit(journal, transaction->t_tid);
+	spin_unlock(&journal->j_state_lock);
+
+	handle->h_buffer_credits = nblocks;
+	ret = start_this_handle(journal, handle);
+	return ret;
+}
+
+
+/**
+ * void journal_lock_updates () - establish a transaction barrier.
+ * @journal:  Journal to establish a barrier on.
+ *
+ * This locks out any further updates from being started, and blocks
+ * until all existing updates have completed, returning only once the
+ * journal is in a quiescent state with no updates running.
+ *
+ * The journal lock should not be held on entry.
+ */
+void journal_lock_updates(journal_t *journal)
+{
+	DEFINE_WAIT(wait);
+
+	spin_lock(&journal->j_state_lock);
+	++journal->j_barrier_count;
+
+	/* Wait until there are no running updates */
+	while (1) {
+		transaction_t *transaction = journal->j_running_transaction;
+
+		if (!transaction)
+			break;
+
+		spin_lock(&transaction->t_handle_lock);
+		if (!transaction->t_updates) {
+			spin_unlock(&transaction->t_handle_lock);
+			break;
+		}
+		prepare_to_wait(&journal->j_wait_updates, &wait,
+				TASK_UNINTERRUPTIBLE);
+		spin_unlock(&transaction->t_handle_lock);
+		spin_unlock(&journal->j_state_lock);
+		schedule();
+		finish_wait(&journal->j_wait_updates, &wait);
+		spin_lock(&journal->j_state_lock);
+	}
+	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * We have now established a barrier against other normal updates, but
+	 * we also need to barrier against other journal_lock_updates() calls
+	 * to make sure that we serialise special journal-locked operations
+	 * too.
+	 */
+	mutex_lock(&journal->j_barrier);
+}
+
+/**
+ * void journal_unlock_updates (journal_t* journal) - release barrier
+ * @journal:  Journal to release the barrier on.
+ *
+ * Release a transaction barrier obtained with journal_lock_updates().
+ *
+ * Should be called without the journal lock held.
+ */
+void journal_unlock_updates (journal_t *journal)
+{
+	J_ASSERT(journal->j_barrier_count != 0);
+
+	mutex_unlock(&journal->j_barrier);
+	spin_lock(&journal->j_state_lock);
+	--journal->j_barrier_count;
+	spin_unlock(&journal->j_state_lock);
+	wake_up(&journal->j_wait_transaction_locked);
+}
+
+/*
+ * Report any unexpected dirty buffers which turn up.  Normally those
+ * indicate an error, but they can occur if the user is running (say)
+ * tune2fs to modify the live filesystem, so we need the option of
+ * continuing as gracefully as possible.  #
+ *
+ * The caller should already hold the journal lock and
+ * j_list_lock spinlock: most callers will need those anyway
+ * in order to probe the buffer's journaling state safely.
+ */
+static void jbd_unexpected_dirty_buffer(struct journal_head *jh)
+{
+	int jlist;
+
+	/* If this buffer is one which might reasonably be dirty
+	 * --- ie. data, or not part of this journal --- then
+	 * we're OK to leave it alone, but otherwise we need to
+	 * move the dirty bit to the journal's own internal
+	 * JBDDirty bit. */
+	jlist = jh->b_jlist;
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		struct buffer_head *bh = jh2bh(jh);
+
+		if (test_clear_buffer_dirty(bh))
+			set_buffer_jbddirty(bh);
+	}
+}
+
+/*
+ * If the buffer is already part of the current transaction, then there
+ * is nothing we need to do.  If it is already part of a prior
+ * transaction which we are still committing to disk, then we need to
+ * make sure that we do not overwrite the old copy: we do copy-out to
+ * preserve the copy going to disk.  We also account the buffer against
+ * the handle's metadata buffer credits (unless the buffer is already
+ * part of the transaction, that is).
+ *
+ */
+static int
+do_get_write_access(handle_t *handle, struct journal_head *jh,
+			int force_copy)
+{
+	struct buffer_head *bh;
+	transaction_t *transaction;
+	journal_t *journal;
+	int error;
+	char *frozen_buffer = NULL;
+	int need_copy = 0;
+
+	if (is_handle_aborted(handle))
+		return -EROFS;
+
+	transaction = handle->h_transaction;
+	journal = transaction->t_journal;
+
+	jbd_debug(5, "buffer_head %p, force_copy %d\n", jh, force_copy);
+
+	JBUFFER_TRACE(jh, "entry");
+repeat:
+	bh = jh2bh(jh);
+
+	/* @@@ Need to check for errors here at some point. */
+
+	lock_buffer(bh);
+	jbd_lock_bh_state(bh);
+
+	/* We now hold the buffer lock so it is safe to query the buffer
+	 * state.  Is the buffer dirty?
+	 *
+	 * If so, there are two possibilities.  The buffer may be
+	 * non-journaled, and undergoing a quite legitimate writeback.
+	 * Otherwise, it is journaled, and we don't expect dirty buffers
+	 * in that state (the buffers should be marked JBD_Dirty
+	 * instead.)  So either the IO is being done under our own
+	 * control and this is a bug, or it's a third party IO such as
+	 * dump(8) (which may leave the buffer scheduled for read ---
+	 * ie. locked but not dirty) or tune2fs (which may actually have
+	 * the buffer dirtied, ugh.)  */
+
+	if (buffer_dirty(bh)) {
+		/*
+		 * First question: is this buffer already part of the current
+		 * transaction or the existing committing transaction?
+		 */
+		if (jh->b_transaction) {
+			J_ASSERT_JH(jh,
+				jh->b_transaction == transaction ||
+				jh->b_transaction ==
+					journal->j_committing_transaction);
+			if (jh->b_next_transaction)
+				J_ASSERT_JH(jh, jh->b_next_transaction ==
+							transaction);
+		}
+		/*
+		 * In any case we need to clean the dirty flag and we must
+		 * do it under the buffer lock to be sure we don't race
+		 * with running write-out.
+		 */
+		JBUFFER_TRACE(jh, "Unexpected dirty buffer");
+		jbd_unexpected_dirty_buffer(jh);
+	}
+
+	unlock_buffer(bh);
+
+	error = -EROFS;
+	if (is_handle_aborted(handle)) {
+		jbd_unlock_bh_state(bh);
+		goto out;
+	}
+	error = 0;
+
+	/*
+	 * The buffer is already part of this transaction if b_transaction or
+	 * b_next_transaction points to it
+	 */
+	if (jh->b_transaction == transaction ||
+	    jh->b_next_transaction == transaction)
+		goto done;
+
+	/*
+	 * If there is already a copy-out version of this buffer, then we don't
+	 * need to make another one
+	 */
+	if (jh->b_frozen_data) {
+		JBUFFER_TRACE(jh, "has frozen data");
+		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+		jh->b_next_transaction = transaction;
+		goto done;
+	}
+
+	/* Is there data here we need to preserve? */
+
+	if (jh->b_transaction && jh->b_transaction != transaction) {
+		JBUFFER_TRACE(jh, "owned by older transaction");
+		J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+
+		/* There is one case we have to be very careful about.
+		 * If the committing transaction is currently writing
+		 * this buffer out to disk and has NOT made a copy-out,
+		 * then we cannot modify the buffer contents at all
+		 * right now.  The essence of copy-out is that it is the
+		 * extra copy, not the primary copy, which gets
+		 * journaled.  If the primary copy is already going to
+		 * disk then we cannot do copy-out here. */
+
+		if (jh->b_jlist == BJ_Shadow) {
+			DEFINE_WAIT_BIT(wait, &bh->b_state, BH_Unshadow);
+			wait_queue_head_t *wqh;
+
+			wqh = bit_waitqueue(&bh->b_state, BH_Unshadow);
+
+			JBUFFER_TRACE(jh, "on shadow: sleep");
+			jbd_unlock_bh_state(bh);
+			/* commit wakes up all shadow buffers after IO */
+			for ( ; ; ) {
+				prepare_to_wait(wqh, &wait.wait,
+						TASK_UNINTERRUPTIBLE);
+				if (jh->b_jlist != BJ_Shadow)
+					break;
+				schedule();
+			}
+			finish_wait(wqh, &wait.wait);
+			goto repeat;
+		}
+
+		/* Only do the copy if the currently-owning transaction
+		 * still needs it.  If it is on the Forget list, the
+		 * committing transaction is past that stage.  The
+		 * buffer had better remain locked during the kmalloc,
+		 * but that should be true --- we hold the journal lock
+		 * still and the buffer is already on the BUF_JOURNAL
+		 * list so won't be flushed.
+		 *
+		 * Subtle point, though: if this is a get_undo_access,
+		 * then we will be relying on the frozen_data to contain
+		 * the new value of the committed_data record after the
+		 * transaction, so we HAVE to force the frozen_data copy
+		 * in that case. */
+
+		if (jh->b_jlist != BJ_Forget || force_copy) {
+			JBUFFER_TRACE(jh, "generate frozen data");
+			if (!frozen_buffer) {
+				JBUFFER_TRACE(jh, "allocate memory for buffer");
+				jbd_unlock_bh_state(bh);
+				frozen_buffer =
+					jbd_slab_alloc(jh2bh(jh)->b_size,
+							 GFP_NOFS);
+				if (!frozen_buffer) {
+					printk(KERN_EMERG
+					       "%s: OOM for frozen_buffer\n",
+					       __FUNCTION__);
+					JBUFFER_TRACE(jh, "oom!");
+					error = -ENOMEM;
+					jbd_lock_bh_state(bh);
+					goto done;
+				}
+				goto repeat;
+			}
+			jh->b_frozen_data = frozen_buffer;
+			frozen_buffer = NULL;
+			need_copy = 1;
+		}
+		jh->b_next_transaction = transaction;
+	}
+
+
+	/*
+	 * Finally, if the buffer is not journaled right now, we need to make
+	 * sure it doesn't get written to disk before the caller actually
+	 * commits the new data
+	 */
+	if (!jh->b_transaction) {
+		JBUFFER_TRACE(jh, "no transaction");
+		J_ASSERT_JH(jh, !jh->b_next_transaction);
+		jh->b_transaction = transaction;
+		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		spin_lock(&journal->j_list_lock);
+		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		spin_unlock(&journal->j_list_lock);
+	}
+
+done:
+	if (need_copy) {
+		struct page *page;
+		int offset;
+		char *source;
+
+		J_EXPECT_JH(jh, buffer_uptodate(jh2bh(jh)),
+			    "Possible IO failure.\n");
+		page = jh2bh(jh)->b_page;
+		offset = ((unsigned long) jh2bh(jh)->b_data) & ~PAGE_MASK;
+		source = kmap_atomic(page, KM_USER0);
+		memcpy(jh->b_frozen_data, source+offset, jh2bh(jh)->b_size);
+		kunmap_atomic(source, KM_USER0);
+	}
+	jbd_unlock_bh_state(bh);
+
+	/*
+	 * If we are about to journal a buffer, then any revoke pending on it is
+	 * no longer valid
+	 */
+	journal_cancel_revoke(handle, jh);
+
+out:
+	if (unlikely(frozen_buffer))	/* It's usually NULL */
+		jbd_slab_free(frozen_buffer, bh->b_size);
+
+	JBUFFER_TRACE(jh, "exit");
+	return error;
+}
+
+/**
+ * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * @handle: transaction to add buffer modifications to
+ * @bh:     bh to be used for metadata writes
+ * @credits: variable that will receive credits for the buffer
+ *
+ * Returns an error code or 0 on success.
+ *
+ * In full data journalling mode the buffer may be of type BJ_AsyncData,
+ * because we're write()ing a buffer which is also part of a shared mapping.
+ */
+
+int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
+{
+	struct journal_head *jh = journal_add_journal_head(bh);
+	int rc;
+
+	/* We do not want to get caught playing with fields which the
+	 * log thread also manipulates.  Make sure that the buffer
+	 * completes any outstanding IO before proceeding. */
+	rc = do_get_write_access(handle, jh, 0);
+	journal_put_journal_head(jh);
+	return rc;
+}
+
+
+/*
+ * When the user wants to journal a newly created buffer_head
+ * (ie. getblk() returned a new buffer and we are going to populate it
+ * manually rather than reading off disk), then we need to keep the
+ * buffer_head locked until it has been completely filled with new
+ * data.  In this case, we should be able to make the assertion that
+ * the bh is not already part of an existing transaction.
+ *
+ * The buffer should already be locked by the caller by this point.
+ * There is no lock ranking violation: it was a newly created,
+ * unlocked buffer beforehand. */
+
+/**
+ * int journal_get_create_access () - notify intent to use newly created bh
+ * @handle: transaction to new buffer to
+ * @bh: new buffer.
+ *
+ * Call this if you create a new bh.
+ */
+int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh = journal_add_journal_head(bh);
+	int err;
+
+	jbd_debug(5, "journal_head %p\n", jh);
+	err = -EROFS;
+	if (is_handle_aborted(handle))
+		goto out;
+	err = 0;
+
+	JBUFFER_TRACE(jh, "entry");
+	/*
+	 * The buffer may already belong to this transaction due to pre-zeroing
+	 * in the filesystem's new_block code.  It may also be on the previous,
+	 * committing transaction's lists, but it HAS to be in Forget state in
+	 * that case: the transaction must have deleted the buffer for it to be
+	 * reused here.
+	 */
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+	J_ASSERT_JH(jh, (jh->b_transaction == transaction ||
+		jh->b_transaction == NULL ||
+		(jh->b_transaction == journal->j_committing_transaction &&
+			  jh->b_jlist == BJ_Forget)));
+
+	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+	J_ASSERT_JH(jh, buffer_locked(jh2bh(jh)));
+
+	if (jh->b_transaction == NULL) {
+		jh->b_transaction = transaction;
+		JBUFFER_TRACE(jh, "file as BJ_Reserved");
+		__journal_file_buffer(jh, transaction, BJ_Reserved);
+	} else if (jh->b_transaction == journal->j_committing_transaction) {
+		JBUFFER_TRACE(jh, "set next transaction");
+		jh->b_next_transaction = transaction;
+	}
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+
+	/*
+	 * akpm: I added this.  ext3_alloc_branch can pick up new indirect
+	 * blocks which contain freed but then revoked metadata.  We need
+	 * to cancel the revoke in case we end up freeing it yet again
+	 * and the reallocating as data - this would cause a second revoke,
+	 * which hits an assertion error.
+	 */
+	JBUFFER_TRACE(jh, "cancelling revoke");
+	journal_cancel_revoke(handle, jh);
+	journal_put_journal_head(jh);
+out:
+	return err;
+}
+
+/**
+ * int journal_get_undo_access() -  Notify intent to modify metadata with
+ *     non-rewindable consequences
+ * @handle: transaction
+ * @bh: buffer to undo
+ * @credits: store the number of taken credits here (if not NULL)
+ *
+ * Sometimes there is a need to distinguish between metadata which has
+ * been committed to disk and that which has not.  The ext3fs code uses
+ * this for freeing and allocating space, we have to make sure that we
+ * do not reuse freed space until the deallocation has been committed,
+ * since if we overwrote that space we would make the delete
+ * un-rewindable in case of a crash.
+ *
+ * To deal with that, journal_get_undo_access requests write access to a
+ * buffer for parts of non-rewindable operations such as delete
+ * operations on the bitmaps.  The journaling code must keep a copy of
+ * the buffer's contents prior to the undo_access call until such time
+ * as we know that the buffer has definitely been committed to disk.
+ *
+ * We never need to know which transaction the committed data is part
+ * of, buffers touched here are guaranteed to be dirtied later and so
+ * will be committed to a new transaction in due course, at which point
+ * we can discard the old committed data pointer.
+ *
+ * Returns error number or 0 on success.
+ */
+int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
+{
+	int err;
+	struct journal_head *jh = journal_add_journal_head(bh);
+	char *committed_data = NULL;
+
+	JBUFFER_TRACE(jh, "entry");
+
+	/*
+	 * Do this first --- it can drop the journal lock, so we want to
+	 * make sure that obtaining the committed_data is done
+	 * atomically wrt. completion of any outstanding commits.
+	 */
+	err = do_get_write_access(handle, jh, 1);
+	if (err)
+		goto out;
+
+repeat:
+	if (!jh->b_committed_data) {
+		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		if (!committed_data) {
+			printk(KERN_EMERG "%s: No memory for committed data\n",
+				__FUNCTION__);
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	jbd_lock_bh_state(bh);
+	if (!jh->b_committed_data) {
+		/* Copy out the current buffer contents into the
+		 * preserved, committed copy. */
+		JBUFFER_TRACE(jh, "generate b_committed data");
+		if (!committed_data) {
+			jbd_unlock_bh_state(bh);
+			goto repeat;
+		}
+
+		jh->b_committed_data = committed_data;
+		committed_data = NULL;
+		memcpy(jh->b_committed_data, bh->b_data, bh->b_size);
+	}
+	jbd_unlock_bh_state(bh);
+out:
+	journal_put_journal_head(jh);
+	if (unlikely(committed_data))
+		jbd_slab_free(committed_data, bh->b_size);
+	return err;
+}
+
+/**
+ * int journal_dirty_data() -  mark a buffer as containing dirty data which
+ *                             needs to be flushed before we can commit the
+ *                             current transaction.
+ * @handle: transaction
+ * @bh: bufferhead to mark
+ *
+ * The buffer is placed on the transaction's data list and is marked as
+ * belonging to the transaction.
+ *
+ * Returns error number or 0 on success.
+ *
+ * journal_dirty_data() can be called via page_launder->ext3_writepage
+ * by kswapd.
+ */
+int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+{
+	journal_t *journal = handle->h_transaction->t_journal;
+	int need_brelse = 0;
+	struct journal_head *jh;
+
+	if (is_handle_aborted(handle))
+		return 0;
+
+	jh = journal_add_journal_head(bh);
+	JBUFFER_TRACE(jh, "entry");
+
+	/*
+	 * The buffer could *already* be dirty.  Writeout can start
+	 * at any time.
+	 */
+	jbd_debug(4, "jh: %p, tid:%d\n", jh, handle->h_transaction->t_tid);
+
+	/*
+	 * What if the buffer is already part of a running transaction?
+	 *
+	 * There are two cases:
+	 * 1) It is part of the current running transaction.  Refile it,
+	 *    just in case we have allocated it as metadata, deallocated
+	 *    it, then reallocated it as data.
+	 * 2) It is part of the previous, still-committing transaction.
+	 *    If all we want to do is to guarantee that the buffer will be
+	 *    written to disk before this new transaction commits, then
+	 *    being sure that the *previous* transaction has this same
+	 *    property is sufficient for us!  Just leave it on its old
+	 *    transaction.
+	 *
+	 * In case (2), the buffer must not already exist as metadata
+	 * --- that would violate write ordering (a transaction is free
+	 * to write its data at any point, even before the previous
+	 * committing transaction has committed).  The caller must
+	 * never, ever allow this to happen: there's nothing we can do
+	 * about it in this layer.
+	 */
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+	if (jh->b_transaction) {
+		JBUFFER_TRACE(jh, "has transaction");
+		if (jh->b_transaction != handle->h_transaction) {
+			JBUFFER_TRACE(jh, "belongs to older transaction");
+			J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+
+			/* @@@ IS THIS TRUE  ? */
+			/*
+			 * Not any more.  Scenario: someone does a write()
+			 * in data=journal mode.  The buffer's transaction has
+			 * moved into commit.  Then someone does another
+			 * write() to the file.  We do the frozen data copyout
+			 * and set b_next_transaction to point to j_running_t.
+			 * And while we're in that state, someone does a
+			 * writepage() in an attempt to pageout the same area
+			 * of the file via a shared mapping.  At present that
+			 * calls journal_dirty_data(), and we get right here.
+			 * It may be too late to journal the data.  Simply
+			 * falling through to the next test will suffice: the
+			 * data will be dirty and wil be checkpointed.  The
+			 * ordering comments in the next comment block still
+			 * apply.
+			 */
+			//J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+
+			/*
+			 * If we're journalling data, and this buffer was
+			 * subject to a write(), it could be metadata, forget
+			 * or shadow against the committing transaction.  Now,
+			 * someone has dirtied the same darn page via a mapping
+			 * and it is being writepage()'d.
+			 * We *could* just steal the page from commit, with some
+			 * fancy locking there.  Instead, we just skip it -
+			 * don't tie the page's buffers to the new transaction
+			 * at all.
+			 * Implication: if we crash before the writepage() data
+			 * is written into the filesystem, recovery will replay
+			 * the write() data.
+			 */
+			if (jh->b_jlist != BJ_None &&
+					jh->b_jlist != BJ_SyncData &&
+					jh->b_jlist != BJ_Locked) {
+				JBUFFER_TRACE(jh, "Not stealing");
+				goto no_journal;
+			}
+
+			/*
+			 * This buffer may be undergoing writeout in commit.  We
+			 * can't return from here and let the caller dirty it
+			 * again because that can cause the write-out loop in
+			 * commit to never terminate.
+			 */
+			if (buffer_dirty(bh)) {
+				get_bh(bh);
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				need_brelse = 1;
+				sync_dirty_buffer(bh);
+				jbd_lock_bh_state(bh);
+				spin_lock(&journal->j_list_lock);
+				/* The buffer may become locked again at any
+				   time if it is redirtied */
+			}
+
+			/* journal_clean_data_list() may have got there first */
+			if (jh->b_transaction != NULL) {
+				JBUFFER_TRACE(jh, "unfile from commit");
+				__journal_temp_unlink_buffer(jh);
+				/* It still points to the committing
+				 * transaction; move it to this one so
+				 * that the refile assert checks are
+				 * happy. */
+				jh->b_transaction = handle->h_transaction;
+			}
+			/* The buffer will be refiled below */
+
+		}
+		/*
+		 * Special case --- the buffer might actually have been
+		 * allocated and then immediately deallocated in the previous,
+		 * committing transaction, so might still be left on that
+		 * transaction's metadata lists.
+		 */
+		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
+			JBUFFER_TRACE(jh, "not on correct data list: unfile");
+			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
+			__journal_temp_unlink_buffer(jh);
+			jh->b_transaction = handle->h_transaction;
+			JBUFFER_TRACE(jh, "file as data");
+			__journal_file_buffer(jh, handle->h_transaction,
+						BJ_SyncData);
+		}
+	} else {
+		JBUFFER_TRACE(jh, "not on a transaction");
+		__journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
+	}
+no_journal:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	if (need_brelse) {
+		BUFFER_TRACE(bh, "brelse");
+		__brelse(bh);
+	}
+	JBUFFER_TRACE(jh, "exit");
+	journal_put_journal_head(jh);
+	return 0;
+}
+
+/**
+ * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * @handle: transaction to add buffer to.
+ * @bh: buffer to mark
+ *
+ * mark dirty metadata which needs to be journaled as part of the current
+ * transaction.
+ *
+ * The buffer is placed on the transaction's metadata list and is marked
+ * as belonging to the transaction.
+ *
+ * Returns error number or 0 on success.
+ *
+ * Special care needs to be taken if the buffer already belongs to the
+ * current committing transaction (in which case we should have frozen
+ * data present for that commit).  In that case, we don't relink the
+ * buffer: that only gets done when the old transaction finally
+ * completes its commit.
+ */
+int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh = bh2jh(bh);
+
+	jbd_debug(5, "journal_head %p\n", jh);
+	JBUFFER_TRACE(jh, "entry");
+	if (is_handle_aborted(handle))
+		goto out;
+
+	jbd_lock_bh_state(bh);
+
+	if (jh->b_modified == 0) {
+		/*
+		 * This buffer's got modified and becoming part
+		 * of the transaction. This needs to be done
+		 * once a transaction -bzzz
+		 */
+		jh->b_modified = 1;
+		J_ASSERT_JH(jh, handle->h_buffer_credits > 0);
+		handle->h_buffer_credits--;
+	}
+
+	/*
+	 * fastpath, to avoid expensive locking.  If this buffer is already
+	 * on the running transaction's metadata list there is nothing to do.
+	 * Nobody can take it off again because there is a handle open.
+	 * I _think_ we're OK here with SMP barriers - a mistaken decision will
+	 * result in this test being false, so we go in and take the locks.
+	 */
+	if (jh->b_transaction == transaction && jh->b_jlist == BJ_Metadata) {
+		JBUFFER_TRACE(jh, "fastpath");
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_running_transaction);
+		goto out_unlock_bh;
+	}
+
+	set_buffer_jbddirty(bh);
+
+	/*
+	 * Metadata already on the current transaction list doesn't
+	 * need to be filed.  Metadata on another transaction's list must
+	 * be committing, and will be refiled once the commit completes:
+	 * leave it alone for now.
+	 */
+	if (jh->b_transaction != transaction) {
+		JBUFFER_TRACE(jh, "already on other transaction");
+		J_ASSERT_JH(jh, jh->b_transaction ==
+					journal->j_committing_transaction);
+		J_ASSERT_JH(jh, jh->b_next_transaction == transaction);
+		/* And this case is illegal: we can't reuse another
+		 * transaction's data buffer, ever. */
+		goto out_unlock_bh;
+	}
+
+	/* That test should have eliminated the following case: */
+	J_ASSERT_JH(jh, jh->b_frozen_data == 0);
+
+	JBUFFER_TRACE(jh, "file as BJ_Metadata");
+	spin_lock(&journal->j_list_lock);
+	__journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
+	spin_unlock(&journal->j_list_lock);
+out_unlock_bh:
+	jbd_unlock_bh_state(bh);
+out:
+	JBUFFER_TRACE(jh, "exit");
+	return 0;
+}
+
+/*
+ * journal_release_buffer: undo a get_write_access without any buffer
+ * updates, if the update decided in the end that it didn't need access.
+ *
+ */
+void
+journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	BUFFER_TRACE(bh, "entry");
+}
+
+/**
+ * void journal_forget() - bforget() for potentially-journaled buffers.
+ * @handle: transaction handle
+ * @bh:     bh to 'forget'
+ *
+ * We can only do the bforget if there are no commits pending against the
+ * buffer.  If the buffer is dirty in the current running transaction we
+ * can safely unlink it.
+ *
+ * bh may not be a journalled buffer at all - it may be a non-JBD
+ * buffer which came off the hashtable.  Check for this.
+ *
+ * Decrements bh->b_count by one.
+ *
+ * Allow this call even if the handle has aborted --- it may be part of
+ * the caller's cleanup after an abort.
+ */
+int journal_forget (handle_t *handle, struct buffer_head *bh)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	struct journal_head *jh;
+	int drop_reserve = 0;
+	int err = 0;
+
+	BUFFER_TRACE(bh, "entry");
+
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	if (!buffer_jbd(bh))
+		goto not_jbd;
+	jh = bh2jh(bh);
+
+	/* Critical error: attempting to delete a bitmap buffer, maybe?
+	 * Don't do any jbd operations, and return an error. */
+	if (!J_EXPECT_JH(jh, !jh->b_committed_data,
+			 "inconsistent data on disk")) {
+		err = -EIO;
+		goto not_jbd;
+	}
+
+	/*
+	 * The buffer's going from the transaction, we must drop
+	 * all references -bzzz
+	 */
+	jh->b_modified = 0;
+
+	if (jh->b_transaction == handle->h_transaction) {
+		J_ASSERT_JH(jh, !jh->b_frozen_data);
+
+		/* If we are forgetting a buffer which is already part
+		 * of this transaction, then we can just drop it from
+		 * the transaction immediately. */
+		clear_buffer_dirty(bh);
+		clear_buffer_jbddirty(bh);
+
+		JBUFFER_TRACE(jh, "belongs to current transaction: unfile");
+
+		drop_reserve = 1;
+
+		/*
+		 * We are no longer going to journal this buffer.
+		 * However, the commit of this transaction is still
+		 * important to the buffer: the delete that we are now
+		 * processing might obsolete an old log entry, so by
+		 * committing, we can satisfy the buffer's checkpoint.
+		 *
+		 * So, if we have a checkpoint on the buffer, we should
+		 * now refile the buffer on our BJ_Forget list so that
+		 * we know to remove the checkpoint after we commit.
+		 */
+
+		if (jh->b_cp_transaction) {
+			__journal_temp_unlink_buffer(jh);
+			__journal_file_buffer(jh, transaction, BJ_Forget);
+		} else {
+			__journal_unfile_buffer(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+			if (!buffer_jbd(bh)) {
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				__bforget(bh);
+				goto drop;
+			}
+		}
+	} else if (jh->b_transaction) {
+		J_ASSERT_JH(jh, (jh->b_transaction ==
+				 journal->j_committing_transaction));
+		/* However, if the buffer is still owned by a prior
+		 * (committing) transaction, we can't drop it yet... */
+		JBUFFER_TRACE(jh, "belongs to older transaction");
+		/* ... but we CAN drop it from the new transaction if we
+		 * have also modified it since the original commit. */
+
+		if (jh->b_next_transaction) {
+			J_ASSERT(jh->b_next_transaction == transaction);
+			jh->b_next_transaction = NULL;
+			drop_reserve = 1;
+		}
+	}
+
+not_jbd:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	__brelse(bh);
+drop:
+	if (drop_reserve) {
+		/* no need to reserve log space for this block -bzzz */
+		handle->h_buffer_credits++;
+	}
+	return err;
+}
+
+/**
+ * int journal_stop() - complete a transaction
+ * @handle: tranaction to complete.
+ *
+ * All done for a particular handle.
+ *
+ * There is not much action needed here.  We just return any remaining
+ * buffer credits to the transaction and remove the handle.  The only
+ * complication is that we need to start a commit operation if the
+ * filesystem is marked for synchronous update.
+ *
+ * journal_stop itself will not usually return an error, but it may
+ * do so in unusual circumstances.  In particular, expect it to
+ * return -EIO if a journal_abort has been executed since the
+ * transaction began.
+ */
+int journal_stop(handle_t *handle)
+{
+	transaction_t *transaction = handle->h_transaction;
+	journal_t *journal = transaction->t_journal;
+	int old_handle_count, err;
+	pid_t pid;
+
+	J_ASSERT(transaction->t_updates > 0);
+	J_ASSERT(journal_current_handle() == handle);
+
+	if (is_handle_aborted(handle))
+		err = -EIO;
+	else
+		err = 0;
+
+	if (--handle->h_ref > 0) {
+		jbd_debug(4, "h_ref %d -> %d\n", handle->h_ref + 1,
+			  handle->h_ref);
+		return err;
+	}
+
+	jbd_debug(4, "Handle %p going down\n", handle);
+
+	/*
+	 * Implement synchronous transaction batching.  If the handle
+	 * was synchronous, don't force a commit immediately.  Let's
+	 * yield and let another thread piggyback onto this transaction.
+	 * Keep doing that while new threads continue to arrive.
+	 * It doesn't cost much - we're about to run a commit and sleep
+	 * on IO anyway.  Speeds up many-threaded, many-dir operations
+	 * by 30x or more...
+	 *
+	 * But don't do this if this process was the most recent one to
+	 * perform a synchronous write.  We do this to detect the case where a
+	 * single process is doing a stream of sync writes.  No point in waiting
+	 * for joiners in that case.
+	 */
+	pid = current->pid;
+	if (handle->h_sync && journal->j_last_sync_writer != pid) {
+		journal->j_last_sync_writer = pid;
+		do {
+			old_handle_count = transaction->t_handle_count;
+			schedule_timeout_uninterruptible(1);
+		} while (old_handle_count != transaction->t_handle_count);
+	}
+
+	current->journal_info = NULL;
+	spin_lock(&journal->j_state_lock);
+	spin_lock(&transaction->t_handle_lock);
+	transaction->t_outstanding_credits -= handle->h_buffer_credits;
+	transaction->t_updates--;
+	if (!transaction->t_updates) {
+		wake_up(&journal->j_wait_updates);
+		if (journal->j_barrier_count)
+			wake_up(&journal->j_wait_transaction_locked);
+	}
+
+	/*
+	 * If the handle is marked SYNC, we need to set another commit
+	 * going!  We also want to force a commit if the current
+	 * transaction is occupying too much of the log, or if the
+	 * transaction is too old now.
+	 */
+	if (handle->h_sync ||
+			transaction->t_outstanding_credits >
+				journal->j_max_transaction_buffers ||
+			time_after_eq(jiffies, transaction->t_expires)) {
+		/* Do this even for aborted journals: an abort still
+		 * completes the commit thread, it just doesn't write
+		 * anything to disk. */
+		tid_t tid = transaction->t_tid;
+
+		spin_unlock(&transaction->t_handle_lock);
+		jbd_debug(2, "transaction too old, requesting commit for "
+					"handle %p\n", handle);
+		/* This is non-blocking */
+		__log_start_commit(journal, transaction->t_tid);
+		spin_unlock(&journal->j_state_lock);
+
+		/*
+		 * Special case: JFS_SYNC synchronous updates require us
+		 * to wait for the commit to complete.
+		 */
+		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
+			err = log_wait_commit(journal, tid);
+	} else {
+		spin_unlock(&transaction->t_handle_lock);
+		spin_unlock(&journal->j_state_lock);
+	}
+
+	jbd_free_handle(handle);
+	return err;
+}
+
+/**int journal_force_commit() - force any uncommitted transactions
+ * @journal: journal to force
+ *
+ * For synchronous operations: force any uncommitted transactions
+ * to disk.  May seem kludgy, but it reuses all the handle batching
+ * code in a very simple manner.
+ */
+int journal_force_commit(journal_t *journal)
+{
+	handle_t *handle;
+	int ret;
+
+	handle = journal_start(journal, 1);
+	if (IS_ERR(handle)) {
+		ret = PTR_ERR(handle);
+	} else {
+		handle->h_sync = 1;
+		ret = journal_stop(handle);
+	}
+	return ret;
+}
+
+/*
+ *
+ * List management code snippets: various functions for manipulating the
+ * transaction buffer lists.
+ *
+ */
+
+/*
+ * Append a buffer to a transaction list, given the transaction's list head
+ * pointer.
+ *
+ * j_list_lock is held.
+ *
+ * jbd_lock_bh_state(jh2bh(jh)) is held.
+ */
+
+static inline void
+__blist_add_buffer(struct journal_head **list, struct journal_head *jh)
+{
+	if (!*list) {
+		jh->b_tnext = jh->b_tprev = jh;
+		*list = jh;
+	} else {
+		/* Insert at the tail of the list to preserve order */
+		struct journal_head *first = *list, *last = first->b_tprev;
+		jh->b_tprev = last;
+		jh->b_tnext = first;
+		last->b_tnext = first->b_tprev = jh;
+	}
+}
+
+/*
+ * Remove a buffer from a transaction list, given the transaction's list
+ * head pointer.
+ *
+ * Called with j_list_lock held, and the journal may not be locked.
+ *
+ * jbd_lock_bh_state(jh2bh(jh)) is held.
+ */
+
+static inline void
+__blist_del_buffer(struct journal_head **list, struct journal_head *jh)
+{
+	if (*list == jh) {
+		*list = jh->b_tnext;
+		if (*list == jh)
+			*list = NULL;
+	}
+	jh->b_tprev->b_tnext = jh->b_tnext;
+	jh->b_tnext->b_tprev = jh->b_tprev;
+}
+
+/*
+ * Remove a buffer from the appropriate transaction list.
+ *
+ * Note that this function can *change* the value of
+ * bh->b_transaction->t_sync_datalist, t_buffers, t_forget,
+ * t_iobuf_list, t_shadow_list, t_log_list or t_reserved_list.  If the caller
+ * is holding onto a copy of one of thee pointers, it could go bad.
+ * Generally the caller needs to re-read the pointer from the transaction_t.
+ *
+ * Called under j_list_lock.  The journal may not be locked.
+ */
+void __journal_temp_unlink_buffer(struct journal_head *jh)
+{
+	struct journal_head **list = NULL;
+	transaction_t *transaction;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	transaction = jh->b_transaction;
+	if (transaction)
+		assert_spin_locked(&transaction->t_journal->j_list_lock);
+
+	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
+	if (jh->b_jlist != BJ_None)
+		J_ASSERT_JH(jh, transaction != 0);
+
+	switch (jh->b_jlist) {
+	case BJ_None:
+		return;
+	case BJ_SyncData:
+		list = &transaction->t_sync_datalist;
+		break;
+	case BJ_Metadata:
+		transaction->t_nr_buffers--;
+		J_ASSERT_JH(jh, transaction->t_nr_buffers >= 0);
+		list = &transaction->t_buffers;
+		break;
+	case BJ_Forget:
+		list = &transaction->t_forget;
+		break;
+	case BJ_IO:
+		list = &transaction->t_iobuf_list;
+		break;
+	case BJ_Shadow:
+		list = &transaction->t_shadow_list;
+		break;
+	case BJ_LogCtl:
+		list = &transaction->t_log_list;
+		break;
+	case BJ_Reserved:
+		list = &transaction->t_reserved_list;
+		break;
+	case BJ_Locked:
+		list = &transaction->t_locked_list;
+		break;
+	}
+
+	__blist_del_buffer(list, jh);
+	jh->b_jlist = BJ_None;
+	if (test_clear_buffer_jbddirty(bh))
+		mark_buffer_dirty(bh);	/* Expose it to the VM */
+}
+
+void __journal_unfile_buffer(struct journal_head *jh)
+{
+	__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = NULL;
+}
+
+void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
+{
+	jbd_lock_bh_state(jh2bh(jh));
+	spin_lock(&journal->j_list_lock);
+	__journal_unfile_buffer(jh);
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(jh2bh(jh));
+}
+
+/*
+ * Called from journal_try_to_free_buffers().
+ *
+ * Called under jbd_lock_bh_state(bh)
+ */
+static void
+__journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
+{
+	struct journal_head *jh;
+
+	jh = bh2jh(bh);
+
+	if (buffer_locked(bh) || buffer_dirty(bh))
+		goto out;
+
+	if (jh->b_next_transaction != 0)
+		goto out;
+
+	spin_lock(&journal->j_list_lock);
+	if (jh->b_transaction != 0 && jh->b_cp_transaction == 0) {
+		if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
+			/* A written-back ordered data buffer */
+			JBUFFER_TRACE(jh, "release data");
+			__journal_unfile_buffer(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+		}
+	} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
+		/* written-back checkpointed metadata buffer */
+		if (jh->b_jlist == BJ_None) {
+			JBUFFER_TRACE(jh, "remove from checkpoint list");
+			__journal_remove_checkpoint(jh);
+			journal_remove_journal_head(bh);
+			__brelse(bh);
+		}
+	}
+	spin_unlock(&journal->j_list_lock);
+out:
+	return;
+}
+
+
+/**
+ * int journal_try_to_free_buffers() - try to free page buffers.
+ * @journal: journal for operation
+ * @page: to try and free
+ * @unused_gfp_mask: unused
+ *
+ *
+ * For all the buffers on this page,
+ * if they are fully written out ordered data, move them onto BUF_CLEAN
+ * so try_to_free_buffers() can reap them.
+ *
+ * This function returns non-zero if we wish try_to_free_buffers()
+ * to be called. We do this if the page is releasable by try_to_free_buffers().
+ * We also do it if the page has locked or dirty buffers and the caller wants
+ * us to perform sync or async writeout.
+ *
+ * This complicates JBD locking somewhat.  We aren't protected by the
+ * BKL here.  We wish to remove the buffer from its committing or
+ * running transaction's ->t_datalist via __journal_unfile_buffer.
+ *
+ * This may *change* the value of transaction_t->t_datalist, so anyone
+ * who looks at t_datalist needs to lock against this function.
+ *
+ * Even worse, someone may be doing a journal_dirty_data on this
+ * buffer.  So we need to lock against that.  journal_dirty_data()
+ * will come out of the lock with the buffer dirty, which makes it
+ * ineligible for release here.
+ *
+ * Who else is affected by this?  hmm...  Really the only contender
+ * is do_get_write_access() - it could be looking at the buffer while
+ * journal_try_to_free_buffer() is changing its state.  But that
+ * cannot happen because we never reallocate freed data as metadata
+ * while the data is part of a transaction.  Yes?
+ */
+int journal_try_to_free_buffers(journal_t *journal,
+				struct page *page, gfp_t unused_gfp_mask)
+{
+	struct buffer_head *head;
+	struct buffer_head *bh;
+	int ret = 0;
+
+	J_ASSERT(PageLocked(page));
+
+	head = page_buffers(page);
+	bh = head;
+	do {
+		struct journal_head *jh;
+
+		/*
+		 * We take our own ref against the journal_head here to avoid
+		 * having to add tons of locking around each instance of
+		 * journal_remove_journal_head() and journal_put_journal_head().
+		 */
+		jh = journal_grab_journal_head(bh);
+		if (!jh)
+			continue;
+
+		jbd_lock_bh_state(bh);
+		__journal_try_to_free_buffer(journal, bh);
+		journal_put_journal_head(jh);
+		jbd_unlock_bh_state(bh);
+		if (buffer_jbd(bh))
+			goto busy;
+	} while ((bh = bh->b_this_page) != head);
+	ret = try_to_free_buffers(page);
+busy:
+	return ret;
+}
+
+/*
+ * This buffer is no longer needed.  If it is on an older transaction's
+ * checkpoint list we need to record it on this transaction's forget list
+ * to pin this buffer (and hence its checkpointing transaction) down until
+ * this transaction commits.  If the buffer isn't on a checkpoint list, we
+ * release it.
+ * Returns non-zero if JBD no longer has an interest in the buffer.
+ *
+ * Called under j_list_lock.
+ *
+ * Called under jbd_lock_bh_state(bh).
+ */
+static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
+{
+	int may_free = 1;
+	struct buffer_head *bh = jh2bh(jh);
+
+	__journal_unfile_buffer(jh);
+
+	if (jh->b_cp_transaction) {
+		JBUFFER_TRACE(jh, "on running+cp transaction");
+		__journal_file_buffer(jh, transaction, BJ_Forget);
+		clear_buffer_jbddirty(bh);
+		may_free = 0;
+	} else {
+		JBUFFER_TRACE(jh, "on running transaction");
+		journal_remove_journal_head(bh);
+		__brelse(bh);
+	}
+	return may_free;
+}
+
+/*
+ * journal_invalidatepage
+ *
+ * This code is tricky.  It has a number of cases to deal with.
+ *
+ * There are two invariants which this code relies on:
+ *
+ * i_size must be updated on disk before we start calling invalidatepage on the
+ * data.
+ *
+ *  This is done in ext3 by defining an ext3_setattr method which
+ *  updates i_size before truncate gets going.  By maintaining this
+ *  invariant, we can be sure that it is safe to throw away any buffers
+ *  attached to the current transaction: once the transaction commits,
+ *  we know that the data will not be needed.
+ *
+ *  Note however that we can *not* throw away data belonging to the
+ *  previous, committing transaction!
+ *
+ * Any disk blocks which *are* part of the previous, committing
+ * transaction (and which therefore cannot be discarded immediately) are
+ * not going to be reused in the new running transaction
+ *
+ *  The bitmap committed_data images guarantee this: any block which is
+ *  allocated in one transaction and removed in the next will be marked
+ *  as in-use in the committed_data bitmap, so cannot be reused until
+ *  the next transaction to delete the block commits.  This means that
+ *  leaving committing buffers dirty is quite safe: the disk blocks
+ *  cannot be reallocated to a different file and so buffer aliasing is
+ *  not possible.
+ *
+ *
+ * The above applies mainly to ordered data mode.  In writeback mode we
+ * don't make guarantees about the order in which data hits disk --- in
+ * particular we don't guarantee that new dirty data is flushed before
+ * transaction commit --- so it is always safe just to discard data
+ * immediately in that mode.  --sct
+ */
+
+/*
+ * The journal_unmap_buffer helper function returns zero if the buffer
+ * concerned remains pinned as an anonymous buffer belonging to an older
+ * transaction.
+ *
+ * We're outside-transaction here.  Either or both of j_running_transaction
+ * and j_committing_transaction may be NULL.
+ */
+static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
+{
+	transaction_t *transaction;
+	struct journal_head *jh;
+	int may_free = 1;
+	int ret;
+
+	BUFFER_TRACE(bh, "entry");
+
+	/*
+	 * It is safe to proceed here without the j_list_lock because the
+	 * buffers cannot be stolen by try_to_free_buffers as long as we are
+	 * holding the page lock. --sct
+	 */
+
+	if (!buffer_jbd(bh))
+		goto zap_buffer_unlocked;
+
+	spin_lock(&journal->j_state_lock);
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	jh = journal_grab_journal_head(bh);
+	if (!jh)
+		goto zap_buffer_no_jh;
+
+	transaction = jh->b_transaction;
+	if (transaction == NULL) {
+		/* First case: not on any transaction.  If it
+		 * has no checkpoint link, then we can zap it:
+		 * it's a writeback-mode buffer so we don't care
+		 * if it hits disk safely. */
+		if (!jh->b_cp_transaction) {
+			JBUFFER_TRACE(jh, "not on any transaction: zap");
+			goto zap_buffer;
+		}
+
+		if (!buffer_dirty(bh)) {
+			/* bdflush has written it.  We can drop it now */
+			goto zap_buffer;
+		}
+
+		/* OK, it must be in the journal but still not
+		 * written fully to disk: it's metadata or
+		 * journaled data... */
+
+		if (journal->j_running_transaction) {
+			/* ... and once the current transaction has
+			 * committed, the buffer won't be needed any
+			 * longer. */
+			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
+			ret = __dispose_buffer(jh,
+					journal->j_running_transaction);
+			journal_put_journal_head(jh);
+			spin_unlock(&journal->j_list_lock);
+			jbd_unlock_bh_state(bh);
+			spin_unlock(&journal->j_state_lock);
+			return ret;
+		} else {
+			/* There is no currently-running transaction. So the
+			 * orphan record which we wrote for this file must have
+			 * passed into commit.  We must attach this buffer to
+			 * the committing transaction, if it exists. */
+			if (journal->j_committing_transaction) {
+				JBUFFER_TRACE(jh, "give to committing trans");
+				ret = __dispose_buffer(jh,
+					journal->j_committing_transaction);
+				journal_put_journal_head(jh);
+				spin_unlock(&journal->j_list_lock);
+				jbd_unlock_bh_state(bh);
+				spin_unlock(&journal->j_state_lock);
+				return ret;
+			} else {
+				/* The orphan record's transaction has
+				 * committed.  We can cleanse this buffer */
+				clear_buffer_jbddirty(bh);
+				goto zap_buffer;
+			}
+		}
+	} else if (transaction == journal->j_committing_transaction) {
+		if (jh->b_jlist == BJ_Locked) {
+			/*
+			 * The buffer is on the committing transaction's locked
+			 * list.  We have the buffer locked, so I/O has
+			 * completed.  So we can nail the buffer now.
+			 */
+			may_free = __dispose_buffer(jh, transaction);
+			goto zap_buffer;
+		}
+		/*
+		 * If it is committing, we simply cannot touch it.  We
+		 * can remove it's next_transaction pointer from the
+		 * running transaction if that is set, but nothing
+		 * else. */
+		JBUFFER_TRACE(jh, "on committing transaction");
+		set_buffer_freed(bh);
+		if (jh->b_next_transaction) {
+			J_ASSERT(jh->b_next_transaction ==
+					journal->j_running_transaction);
+			jh->b_next_transaction = NULL;
+		}
+		journal_put_journal_head(jh);
+		spin_unlock(&journal->j_list_lock);
+		jbd_unlock_bh_state(bh);
+		spin_unlock(&journal->j_state_lock);
+		return 0;
+	} else {
+		/* Good, the buffer belongs to the running transaction.
+		 * We are writing our own transaction's data, not any
+		 * previous one's, so it is safe to throw it away
+		 * (remember that we expect the filesystem to have set
+		 * i_size already for this truncate so recovery will not
+		 * expose the disk blocks we are discarding here.) */
+		J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
+		may_free = __dispose_buffer(jh, transaction);
+	}
+
+zap_buffer:
+	journal_put_journal_head(jh);
+zap_buffer_no_jh:
+	spin_unlock(&journal->j_list_lock);
+	jbd_unlock_bh_state(bh);
+	spin_unlock(&journal->j_state_lock);
+zap_buffer_unlocked:
+	clear_buffer_dirty(bh);
+	J_ASSERT_BH(bh, !buffer_jbddirty(bh));
+	clear_buffer_mapped(bh);
+	clear_buffer_req(bh);
+	clear_buffer_new(bh);
+	bh->b_bdev = NULL;
+	return may_free;
+}
+
+/**
+ * void journal_invalidatepage()
+ * @journal: journal to use for flush...
+ * @page:    page to flush
+ * @offset:  length of page to invalidate.
+ *
+ * Reap page buffers containing data after offset in page.
+ *
+ */
+void journal_invalidatepage(journal_t *journal,
+		      struct page *page,
+		      unsigned long offset)
+{
+	struct buffer_head *head, *bh, *next;
+	unsigned int curr_off = 0;
+	int may_free = 1;
+
+	if (!PageLocked(page))
+		BUG();
+	if (!page_has_buffers(page))
+		return;
+
+	/* We will potentially be playing with lists other than just the
+	 * data lists (especially for journaled data mode), so be
+	 * cautious in our locking. */
+
+	head = bh = page_buffers(page);
+	do {
+		unsigned int next_off = curr_off + bh->b_size;
+		next = bh->b_this_page;
+
+		if (offset <= curr_off) {
+			/* This block is wholly outside the truncation point */
+			lock_buffer(bh);
+			may_free &= journal_unmap_buffer(journal, bh);
+			unlock_buffer(bh);
+		}
+		curr_off = next_off;
+		bh = next;
+
+	} while (bh != head);
+
+	if (!offset) {
+		if (may_free && try_to_free_buffers(page))
+			J_ASSERT(!page_has_buffers(page));
+	}
+}
+
+/*
+ * File a buffer on the given transaction list.
+ */
+void __journal_file_buffer(struct journal_head *jh,
+			transaction_t *transaction, int jlist)
+{
+	struct journal_head **list = NULL;
+	int was_dirty = 0;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	assert_spin_locked(&transaction->t_journal->j_list_lock);
+
+	J_ASSERT_JH(jh, jh->b_jlist < BJ_Types);
+	J_ASSERT_JH(jh, jh->b_transaction == transaction ||
+				jh->b_transaction == 0);
+
+	if (jh->b_transaction && jh->b_jlist == jlist)
+		return;
+
+	/* The following list of buffer states needs to be consistent
+	 * with __jbd_unexpected_dirty_buffer()'s handling of dirty
+	 * state. */
+
+	if (jlist == BJ_Metadata || jlist == BJ_Reserved ||
+	    jlist == BJ_Shadow || jlist == BJ_Forget) {
+		if (test_clear_buffer_dirty(bh) ||
+		    test_clear_buffer_jbddirty(bh))
+			was_dirty = 1;
+	}
+
+	if (jh->b_transaction)
+		__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = transaction;
+
+	switch (jlist) {
+	case BJ_None:
+		J_ASSERT_JH(jh, !jh->b_committed_data);
+		J_ASSERT_JH(jh, !jh->b_frozen_data);
+		return;
+	case BJ_SyncData:
+		list = &transaction->t_sync_datalist;
+		break;
+	case BJ_Metadata:
+		transaction->t_nr_buffers++;
+		list = &transaction->t_buffers;
+		break;
+	case BJ_Forget:
+		list = &transaction->t_forget;
+		break;
+	case BJ_IO:
+		list = &transaction->t_iobuf_list;
+		break;
+	case BJ_Shadow:
+		list = &transaction->t_shadow_list;
+		break;
+	case BJ_LogCtl:
+		list = &transaction->t_log_list;
+		break;
+	case BJ_Reserved:
+		list = &transaction->t_reserved_list;
+		break;
+	case BJ_Locked:
+		list =  &transaction->t_locked_list;
+		break;
+	}
+
+	__blist_add_buffer(list, jh);
+	jh->b_jlist = jlist;
+
+	if (was_dirty)
+		set_buffer_jbddirty(bh);
+}
+
+void journal_file_buffer(struct journal_head *jh,
+				transaction_t *transaction, int jlist)
+{
+	jbd_lock_bh_state(jh2bh(jh));
+	spin_lock(&transaction->t_journal->j_list_lock);
+	__journal_file_buffer(jh, transaction, jlist);
+	spin_unlock(&transaction->t_journal->j_list_lock);
+	jbd_unlock_bh_state(jh2bh(jh));
+}
+
+/*
+ * Remove a buffer from its current buffer list in preparation for
+ * dropping it from its current transaction entirely.  If the buffer has
+ * already started to be used by a subsequent transaction, refile the
+ * buffer on that transaction's metadata list.
+ *
+ * Called under journal->j_list_lock
+ *
+ * Called under jbd_lock_bh_state(jh2bh(jh))
+ */
+void __journal_refile_buffer(struct journal_head *jh)
+{
+	int was_dirty;
+	struct buffer_head *bh = jh2bh(jh);
+
+	J_ASSERT_JH(jh, jbd_is_locked_bh_state(bh));
+	if (jh->b_transaction)
+		assert_spin_locked(&jh->b_transaction->t_journal->j_list_lock);
+
+	/* If the buffer is now unused, just drop it. */
+	if (jh->b_next_transaction == NULL) {
+		__journal_unfile_buffer(jh);
+		return;
+	}
+
+	/*
+	 * It has been modified by a later transaction: add it to the new
+	 * transaction's metadata list.
+	 */
+
+	was_dirty = test_clear_buffer_jbddirty(bh);
+	__journal_temp_unlink_buffer(jh);
+	jh->b_transaction = jh->b_next_transaction;
+	jh->b_next_transaction = NULL;
+	__journal_file_buffer(jh, jh->b_transaction,
+				was_dirty ? BJ_Metadata : BJ_Reserved);
+	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
+
+	if (was_dirty)
+		set_buffer_jbddirty(bh);
+}
+
+/*
+ * For the unlocked version of this call, also make sure that any
+ * hanging journal_head is cleaned up if necessary.
+ *
+ * __journal_refile_buffer is usually called as part of a single locked
+ * operation on a buffer_head, in which the caller is probably going to
+ * be hooking the journal_head onto other lists.  In that case it is up
+ * to the caller to remove the journal_head if necessary.  For the
+ * unlocked journal_refile_buffer call, the caller isn't going to be
+ * doing anything else to the buffer so we need to do the cleanup
+ * ourselves to avoid a jh leak.
+ *
+ * *** The journal_head may be freed by this call! ***
+ */
+void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
+{
+	struct buffer_head *bh = jh2bh(jh);
+
+	jbd_lock_bh_state(bh);
+	spin_lock(&journal->j_list_lock);
+
+	__journal_refile_buffer(jh);
+	jbd_unlock_bh_state(bh);
+	journal_remove_journal_head(bh);
+
+	spin_unlock(&journal->j_list_lock);
+	__brelse(bh);
+}
diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h
new file mode 100644
index 000000000000..3dbf6c779037
--- /dev/null
+++ b/include/linux/ext4_jbd2.h
@@ -0,0 +1,268 @@
+/*
+ * linux/include/linux/ext4_jbd.h
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
+ *
+ * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Ext4-specific journaling extensions.
+ */
+
+#ifndef _LINUX_EXT4_JBD_H
+#define _LINUX_EXT4_JBD_H
+
+#include <linux/fs.h>
+#include <linux/jbd.h>
+#include <linux/ext4_fs.h>
+
+#define EXT4_JOURNAL(inode)	(EXT4_SB((inode)->i_sb)->s_journal)
+
+/* Define the number of blocks we need to account to a transaction to
+ * modify one block of data.
+ *
+ * We may have to touch one inode, one bitmap buffer, up to three
+ * indirection blocks, the group and superblock summaries, and the data
+ * block to complete the transaction.  */
+
+#define EXT4_SINGLEDATA_TRANS_BLOCKS	8U
+
+/* Extended attribute operations touch at most two data buffers,
+ * two bitmap buffers, and two group summaries, in addition to the inode
+ * and the superblock, which are already accounted for. */
+
+#define EXT4_XATTR_TRANS_BLOCKS		6U
+
+/* Define the minimum size for a transaction which modifies data.  This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota).  The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+
+#define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS + \
+					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
+					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
+
+/* Delete operations potentially hit one directory's namespace plus an
+ * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
+ * generous.  We can grow the delete transaction later if necessary. */
+
+#define EXT4_DELETE_TRANS_BLOCKS(sb)	(2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
+
+/* Define an arbitrary limit for the amount of data we will anticipate
+ * writing to any given transaction.  For unbounded transactions such as
+ * write(2) and truncate(2) we can write more than this, but we always
+ * start off at the maximum transaction size and grow the transaction
+ * optimistically as we go. */
+
+#define EXT4_MAX_TRANS_DATA		64U
+
+/* We break up a large truncate or write transaction once the handle's
+ * buffer credits gets this low, we need either to extend the
+ * transaction or to start a new one.  Reserve enough space here for
+ * inode, bitmap, superblock, group and indirection updates for at least
+ * one block, plus two quota updates.  Quota allocations are not
+ * needed. */
+
+#define EXT4_RESERVE_TRANS_BLOCKS	12U
+
+#define EXT4_INDEX_EXTRA_TRANS_BLOCKS	8
+
+#ifdef CONFIG_QUOTA
+/* Amount of blocks needed for quota update - we know that the structure was
+ * allocated so we need to update only inode+data */
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
+/* Amount of blocks needed for quota insert/delete - we do some block writes
+ * but inode, sb and group updates are done only once */
+#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
+		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
+		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+#else
+#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
+#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
+#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
+#endif
+
+int
+ext4_mark_iloc_dirty(handle_t *handle,
+		     struct inode *inode,
+		     struct ext4_iloc *iloc);
+
+/*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh.  This _must_ be cleaned up later.
+ */
+
+int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
+			struct ext4_iloc *iloc);
+
+int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
+
+/*
+ * Wrapper functions with which ext4 calls into JBD.  The intent here is
+ * to allow these to be turned into appropriate stubs so ext4 can control
+ * ext2 filesystems, so ext2+ext4 systems only nee one fs.  This work hasn't
+ * been done yet.
+ */
+
+void ext4_journal_abort_handle(const char *caller, const char *err_fn,
+		struct buffer_head *bh, handle_t *handle, int err);
+
+static inline int
+__ext4_journal_get_undo_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_undo_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext4_journal_get_write_access(const char *where, handle_t *handle,
+				struct buffer_head *bh)
+{
+	int err = journal_get_write_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline void
+ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	journal_release_buffer(handle, bh);
+}
+
+static inline int
+__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_forget(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext4_journal_revoke(const char *where, handle_t *handle,
+		      unsigned long blocknr, struct buffer_head *bh)
+{
+	int err = journal_revoke(handle, blocknr, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext4_journal_get_create_access(const char *where,
+				 handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_get_create_access(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+static inline int
+__ext4_journal_dirty_metadata(const char *where,
+			      handle_t *handle, struct buffer_head *bh)
+{
+	int err = journal_dirty_metadata(handle, bh);
+	if (err)
+		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
+	return err;
+}
+
+
+#define ext4_journal_get_undo_access(handle, bh) \
+	__ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_get_write_access(handle, bh) \
+	__ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_revoke(handle, blocknr, bh) \
+	__ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
+#define ext4_journal_get_create_access(handle, bh) \
+	__ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
+#define ext4_journal_dirty_metadata(handle, bh) \
+	__ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
+#define ext4_journal_forget(handle, bh) \
+	__ext4_journal_forget(__FUNCTION__, (handle), (bh))
+
+int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
+
+handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
+int __ext4_journal_stop(const char *where, handle_t *handle);
+
+static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
+{
+	return ext4_journal_start_sb(inode->i_sb, nblocks);
+}
+
+#define ext4_journal_stop(handle) \
+	__ext4_journal_stop(__FUNCTION__, (handle))
+
+static inline handle_t *ext4_journal_current_handle(void)
+{
+	return journal_current_handle();
+}
+
+static inline int ext4_journal_extend(handle_t *handle, int nblocks)
+{
+	return journal_extend(handle, nblocks);
+}
+
+static inline int ext4_journal_restart(handle_t *handle, int nblocks)
+{
+	return journal_restart(handle, nblocks);
+}
+
+static inline int ext4_journal_blocks_per_page(struct inode *inode)
+{
+	return journal_blocks_per_page(inode);
+}
+
+static inline int ext4_journal_force_commit(journal_t *journal)
+{
+	return journal_force_commit(journal);
+}
+
+/* super.c */
+int ext4_force_commit(struct super_block *sb);
+
+static inline int ext4_should_journal_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 1;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
+		return 1;
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+		return 1;
+	return 0;
+}
+
+static inline int ext4_should_order_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+		return 0;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
+		return 1;
+	return 0;
+}
+
+static inline int ext4_should_writeback_data(struct inode *inode)
+{
+	if (!S_ISREG(inode->i_mode))
+		return 0;
+	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
+		return 0;
+	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
+		return 1;
+	return 0;
+}
+
+#endif	/* _LINUX_EXT4_JBD_H */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
new file mode 100644
index 000000000000..fe89444b1c6f
--- /dev/null
+++ b/include/linux/jbd2.h
@@ -0,0 +1,1098 @@
+/*
+ * linux/include/linux/jbd.h
+ *
+ * Written by Stephen C. Tweedie <sct@redhat.com>
+ *
+ * Copyright 1998-2000 Red Hat, Inc --- All Rights Reserved
+ *
+ * This file is part of the Linux kernel and is made available under
+ * the terms of the GNU General Public License, version 2, or at your
+ * option, any later version, incorporated herein by reference.
+ *
+ * Definitions for transaction data structures for the buffer cache
+ * filesystem journaling support.
+ */
+
+#ifndef _LINUX_JBD_H
+#define _LINUX_JBD_H
+
+/* Allow this file to be included directly into e2fsprogs */
+#ifndef __KERNEL__
+#include "jfs_compat.h"
+#define JFS_DEBUG
+#define jfs_debug jbd_debug
+#else
+
+#include <linux/types.h>
+#include <linux/buffer_head.h>
+#include <linux/journal-head.h>
+#include <linux/stddef.h>
+#include <linux/bit_spinlock.h>
+#include <linux/mutex.h>
+#include <linux/timer.h>
+
+#include <asm/semaphore.h>
+#endif
+
+#define journal_oom_retry 1
+
+/*
+ * Define JBD_PARANIOD_IOFAIL to cause a kernel BUG() if ext3 finds
+ * certain classes of error which can occur due to failed IOs.  Under
+ * normal use we want ext3 to continue after such errors, because
+ * hardware _can_ fail, but for debugging purposes when running tests on
+ * known-good hardware we may want to trap these errors.
+ */
+#undef JBD_PARANOID_IOFAIL
+
+/*
+ * The default maximum commit age, in seconds.
+ */
+#define JBD_DEFAULT_MAX_COMMIT_AGE 5
+
+#ifdef CONFIG_JBD_DEBUG
+/*
+ * Define JBD_EXPENSIVE_CHECKING to enable more expensive internal
+ * consistency checks.  By default we don't do this unless
+ * CONFIG_JBD_DEBUG is on.
+ */
+#define JBD_EXPENSIVE_CHECKING
+extern int journal_enable_debug;
+
+#define jbd_debug(n, f, a...)						\
+	do {								\
+		if ((n) <= journal_enable_debug) {			\
+			printk (KERN_DEBUG "(%s, %d): %s: ",		\
+				__FILE__, __LINE__, __FUNCTION__);	\
+			printk (f, ## a);				\
+		}							\
+	} while (0)
+#else
+#define jbd_debug(f, a...)	/**/
+#endif
+
+extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd_slab_alloc(size_t size, gfp_t flags);
+extern void jbd_slab_free(void *ptr, size_t size);
+
+#define jbd_kmalloc(size, flags) \
+	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
+#define jbd_rep_kmalloc(size, flags) \
+	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
+
+#define JFS_MIN_JOURNAL_BLOCKS 1024
+
+#ifdef __KERNEL__
+
+/**
+ * typedef handle_t - The handle_t type represents a single atomic update being performed by some process.
+ *
+ * All filesystem modifications made by the process go
+ * through this handle.  Recursive operations (such as quota operations)
+ * are gathered into a single update.
+ *
+ * The buffer credits field is used to account for journaled buffers
+ * being modified by the running process.  To ensure that there is
+ * enough log space for all outstanding operations, we need to limit the
+ * number of outstanding buffers possible at any time.  When the
+ * operation completes, any buffer credits not used are credited back to
+ * the transaction, so that at all times we know how many buffers the
+ * outstanding updates on a transaction might possibly touch.
+ *
+ * This is an opaque datatype.
+ **/
+typedef struct handle_s		handle_t;	/* Atomic operation type */
+
+
+/**
+ * typedef journal_t - The journal_t maintains all of the journaling state information for a single filesystem.
+ *
+ * journal_t is linked to from the fs superblock structure.
+ *
+ * We use the journal_t to keep track of all outstanding transaction
+ * activity on the filesystem, and to manage the state of the log
+ * writing process.
+ *
+ * This is an opaque datatype.
+ **/
+typedef struct journal_s	journal_t;	/* Journal control structure */
+#endif
+
+/*
+ * Internal structures used by the logging mechanism:
+ */
+
+#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+
+/*
+ * On-disk structures
+ */
+
+/*
+ * Descriptor block types:
+ */
+
+#define JFS_DESCRIPTOR_BLOCK	1
+#define JFS_COMMIT_BLOCK	2
+#define JFS_SUPERBLOCK_V1	3
+#define JFS_SUPERBLOCK_V2	4
+#define JFS_REVOKE_BLOCK	5
+
+/*
+ * Standard header for all descriptor blocks:
+ */
+typedef struct journal_header_s
+{
+	__be32		h_magic;
+	__be32		h_blocktype;
+	__be32		h_sequence;
+} journal_header_t;
+
+
+/*
+ * The block tag: used to describe a single buffer in the journal
+ */
+typedef struct journal_block_tag_s
+{
+	__be32		t_blocknr;	/* The on-disk block number */
+	__be32		t_flags;	/* See below */
+} journal_block_tag_t;
+
+/*
+ * The revoke descriptor: used on disk to describe a series of blocks to
+ * be revoked from the log
+ */
+typedef struct journal_revoke_header_s
+{
+	journal_header_t r_header;
+	__be32		 r_count;	/* Count of bytes used in the block */
+} journal_revoke_header_t;
+
+
+/* Definitions for the journal tag flags word: */
+#define JFS_FLAG_ESCAPE		1	/* on-disk block is escaped */
+#define JFS_FLAG_SAME_UUID	2	/* block has same uuid as previous */
+#define JFS_FLAG_DELETED	4	/* block deleted by this transaction */
+#define JFS_FLAG_LAST_TAG	8	/* last tag in this descriptor block */
+
+
+/*
+ * The journal superblock.  All fields are in big-endian byte order.
+ */
+typedef struct journal_superblock_s
+{
+/* 0x0000 */
+	journal_header_t s_header;
+
+/* 0x000C */
+	/* Static information describing the journal */
+	__be32	s_blocksize;		/* journal device blocksize */
+	__be32	s_maxlen;		/* total blocks in journal file */
+	__be32	s_first;		/* first block of log information */
+
+/* 0x0018 */
+	/* Dynamic information describing the current state of the log */
+	__be32	s_sequence;		/* first commit ID expected in log */
+	__be32	s_start;		/* blocknr of start of log */
+
+/* 0x0020 */
+	/* Error value, as set by journal_abort(). */
+	__be32	s_errno;
+
+/* 0x0024 */
+	/* Remaining fields are only valid in a version-2 superblock */
+	__be32	s_feature_compat;	/* compatible feature set */
+	__be32	s_feature_incompat;	/* incompatible feature set */
+	__be32	s_feature_ro_compat;	/* readonly-compatible feature set */
+/* 0x0030 */
+	__u8	s_uuid[16];		/* 128-bit uuid for journal */
+
+/* 0x0040 */
+	__be32	s_nr_users;		/* Nr of filesystems sharing log */
+
+	__be32	s_dynsuper;		/* Blocknr of dynamic superblock copy*/
+
+/* 0x0048 */
+	__be32	s_max_transaction;	/* Limit of journal blocks per trans.*/
+	__be32	s_max_trans_data;	/* Limit of data blocks per trans. */
+
+/* 0x0050 */
+	__u32	s_padding[44];
+
+/* 0x0100 */
+	__u8	s_users[16*48];		/* ids of all fs'es sharing the log */
+/* 0x0400 */
+} journal_superblock_t;
+
+#define JFS_HAS_COMPAT_FEATURE(j,mask)					\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
+#define JFS_HAS_RO_COMPAT_FEATURE(j,mask)				\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
+#define JFS_HAS_INCOMPAT_FEATURE(j,mask)				\
+	((j)->j_format_version >= 2 &&					\
+	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
+
+#define JFS_FEATURE_INCOMPAT_REVOKE	0x00000001
+
+/* Features known to this kernel version: */
+#define JFS_KNOWN_COMPAT_FEATURES	0
+#define JFS_KNOWN_ROCOMPAT_FEATURES	0
+#define JFS_KNOWN_INCOMPAT_FEATURES	JFS_FEATURE_INCOMPAT_REVOKE
+
+#ifdef __KERNEL__
+
+#include <linux/fs.h>
+#include <linux/sched.h>
+
+#define JBD_ASSERTIONS
+#ifdef JBD_ASSERTIONS
+#define J_ASSERT(assert)						\
+do {									\
+	if (!(assert)) {						\
+		printk (KERN_EMERG					\
+			"Assertion failure in %s() at %s:%d: \"%s\"\n",	\
+			__FUNCTION__, __FILE__, __LINE__, # assert);	\
+		BUG();							\
+	}								\
+} while (0)
+
+#if defined(CONFIG_BUFFER_DEBUG)
+void buffer_assertion_failure(struct buffer_head *bh);
+#define J_ASSERT_BH(bh, expr)						\
+	do {								\
+		if (!(expr))						\
+			buffer_assertion_failure(bh);			\
+		J_ASSERT(expr);						\
+	} while (0)
+#define J_ASSERT_JH(jh, expr)	J_ASSERT_BH(jh2bh(jh), expr)
+#else
+#define J_ASSERT_BH(bh, expr)	J_ASSERT(expr)
+#define J_ASSERT_JH(jh, expr)	J_ASSERT(expr)
+#endif
+
+#else
+#define J_ASSERT(assert)	do { } while (0)
+#endif		/* JBD_ASSERTIONS */
+
+#if defined(JBD_PARANOID_IOFAIL)
+#define J_EXPECT(expr, why...)		J_ASSERT(expr)
+#define J_EXPECT_BH(bh, expr, why...)	J_ASSERT_BH(bh, expr)
+#define J_EXPECT_JH(jh, expr, why...)	J_ASSERT_JH(jh, expr)
+#else
+#define __journal_expect(expr, why...)					     \
+	({								     \
+		int val = (expr);					     \
+		if (!val) {						     \
+			printk(KERN_ERR					     \
+				"EXT3-fs unexpected failure: %s;\n",# expr); \
+			printk(KERN_ERR why "\n");			     \
+		}							     \
+		val;							     \
+	})
+#define J_EXPECT(expr, why...)		__journal_expect(expr, ## why)
+#define J_EXPECT_BH(bh, expr, why...)	__journal_expect(expr, ## why)
+#define J_EXPECT_JH(jh, expr, why...)	__journal_expect(expr, ## why)
+#endif
+
+enum jbd_state_bits {
+	BH_JBD			/* Has an attached ext3 journal_head */
+	  = BH_PrivateStart,
+	BH_JWrite,		/* Being written to log (@@@ DEBUGGING) */
+	BH_Freed,		/* Has been freed (truncated) */
+	BH_Revoked,		/* Has been revoked from the log */
+	BH_RevokeValid,		/* Revoked flag is valid */
+	BH_JBDDirty,		/* Is dirty but journaled */
+	BH_State,		/* Pins most journal_head state */
+	BH_JournalHead,		/* Pins bh->b_private and jh->b_bh */
+	BH_Unshadow,		/* Dummy bit, for BJ_Shadow wakeup filtering */
+};
+
+BUFFER_FNS(JBD, jbd)
+BUFFER_FNS(JWrite, jwrite)
+BUFFER_FNS(JBDDirty, jbddirty)
+TAS_BUFFER_FNS(JBDDirty, jbddirty)
+BUFFER_FNS(Revoked, revoked)
+TAS_BUFFER_FNS(Revoked, revoked)
+BUFFER_FNS(RevokeValid, revokevalid)
+TAS_BUFFER_FNS(RevokeValid, revokevalid)
+BUFFER_FNS(Freed, freed)
+
+static inline struct buffer_head *jh2bh(struct journal_head *jh)
+{
+	return jh->b_bh;
+}
+
+static inline struct journal_head *bh2jh(struct buffer_head *bh)
+{
+	return bh->b_private;
+}
+
+static inline void jbd_lock_bh_state(struct buffer_head *bh)
+{
+	bit_spin_lock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_trylock_bh_state(struct buffer_head *bh)
+{
+	return bit_spin_trylock(BH_State, &bh->b_state);
+}
+
+static inline int jbd_is_locked_bh_state(struct buffer_head *bh)
+{
+	return bit_spin_is_locked(BH_State, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_state(struct buffer_head *bh)
+{
+	bit_spin_unlock(BH_State, &bh->b_state);
+}
+
+static inline void jbd_lock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_lock(BH_JournalHead, &bh->b_state);
+}
+
+static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
+{
+	bit_spin_unlock(BH_JournalHead, &bh->b_state);
+}
+
+struct jbd_revoke_table_s;
+
+/**
+ * struct handle_s - The handle_s type is the concrete type associated with
+ *     handle_t.
+ * @h_transaction: Which compound transaction is this update a part of?
+ * @h_buffer_credits: Number of remaining buffers we are allowed to dirty.
+ * @h_ref: Reference count on this handle
+ * @h_err: Field for caller's use to track errors through large fs operations
+ * @h_sync: flag for sync-on-close
+ * @h_jdata: flag to force data journaling
+ * @h_aborted: flag indicating fatal error on handle
+ **/
+
+/* Docbook can't yet cope with the bit fields, but will leave the documentation
+ * in so it can be fixed later.
+ */
+
+struct handle_s
+{
+	/* Which compound transaction is this update a part of? */
+	transaction_t		*h_transaction;
+
+	/* Number of remaining buffers we are allowed to dirty: */
+	int			h_buffer_credits;
+
+	/* Reference count on this handle */
+	int			h_ref;
+
+	/* Field for caller's use to track errors through large fs */
+	/* operations */
+	int			h_err;
+
+	/* Flags [no locking] */
+	unsigned int	h_sync:		1;	/* sync-on-close */
+	unsigned int	h_jdata:	1;	/* force data journaling */
+	unsigned int	h_aborted:	1;	/* fatal error on handle */
+};
+
+
+/* The transaction_t type is the guts of the journaling mechanism.  It
+ * tracks a compound transaction through its various states:
+ *
+ * RUNNING:	accepting new updates
+ * LOCKED:	Updates still running but we don't accept new ones
+ * RUNDOWN:	Updates are tidying up but have finished requesting
+ *		new buffers to modify (state not used for now)
+ * FLUSH:       All updates complete, but we are still writing to disk
+ * COMMIT:      All data on disk, writing commit record
+ * FINISHED:	We still have to keep the transaction for checkpointing.
+ *
+ * The transaction keeps track of all of the buffers modified by a
+ * running transaction, and all of the buffers committed but not yet
+ * flushed to home for finished transactions.
+ */
+
+/*
+ * Lock ranking:
+ *
+ *    j_list_lock
+ *      ->jbd_lock_bh_journal_head()	(This is "innermost")
+ *
+ *    j_state_lock
+ *    ->jbd_lock_bh_state()
+ *
+ *    jbd_lock_bh_state()
+ *    ->j_list_lock
+ *
+ *    j_state_lock
+ *    ->t_handle_lock
+ *
+ *    j_state_lock
+ *    ->j_list_lock			(journal_unmap_buffer)
+ *
+ */
+
+struct transaction_s
+{
+	/* Pointer to the journal for this transaction. [no locking] */
+	journal_t		*t_journal;
+
+	/* Sequence number for this transaction [no locking] */
+	tid_t			t_tid;
+
+	/*
+	 * Transaction's current state
+	 * [no locking - only kjournald alters this]
+	 * FIXME: needs barriers
+	 * KLUDGE: [use j_state_lock]
+	 */
+	enum {
+		T_RUNNING,
+		T_LOCKED,
+		T_RUNDOWN,
+		T_FLUSH,
+		T_COMMIT,
+		T_FINISHED
+	}			t_state;
+
+	/*
+	 * Where in the log does this transaction's commit start? [no locking]
+	 */
+	unsigned long		t_log_start;
+
+	/* Number of buffers on the t_buffers list [j_list_lock] */
+	int			t_nr_buffers;
+
+	/*
+	 * Doubly-linked circular list of all buffers reserved but not yet
+	 * modified by this transaction [j_list_lock]
+	 */
+	struct journal_head	*t_reserved_list;
+
+	/*
+	 * Doubly-linked circular list of all buffers under writeout during
+	 * commit [j_list_lock]
+	 */
+	struct journal_head	*t_locked_list;
+
+	/*
+	 * Doubly-linked circular list of all metadata buffers owned by this
+	 * transaction [j_list_lock]
+	 */
+	struct journal_head	*t_buffers;
+
+	/*
+	 * Doubly-linked circular list of all data buffers still to be
+	 * flushed before this transaction can be committed [j_list_lock]
+	 */
+	struct journal_head	*t_sync_datalist;
+
+	/*
+	 * Doubly-linked circular list of all forget buffers (superseded
+	 * buffers which we can un-checkpoint once this transaction commits)
+	 * [j_list_lock]
+	 */
+	struct journal_head	*t_forget;
+
+	/*
+	 * Doubly-linked circular list of all buffers still to be flushed before
+	 * this transaction can be checkpointed. [j_list_lock]
+	 */
+	struct journal_head	*t_checkpoint_list;
+
+	/*
+	 * Doubly-linked circular list of all buffers submitted for IO while
+	 * checkpointing. [j_list_lock]
+	 */
+	struct journal_head	*t_checkpoint_io_list;
+
+	/*
+	 * Doubly-linked circular list of temporary buffers currently undergoing
+	 * IO in the log [j_list_lock]
+	 */
+	struct journal_head	*t_iobuf_list;
+
+	/*
+	 * Doubly-linked circular list of metadata buffers being shadowed by log
+	 * IO.  The IO buffers on the iobuf list and the shadow buffers on this
+	 * list match each other one for one at all times. [j_list_lock]
+	 */
+	struct journal_head	*t_shadow_list;
+
+	/*
+	 * Doubly-linked circular list of control buffers being written to the
+	 * log. [j_list_lock]
+	 */
+	struct journal_head	*t_log_list;
+
+	/*
+	 * Protects info related to handles
+	 */
+	spinlock_t		t_handle_lock;
+
+	/*
+	 * Number of outstanding updates running on this transaction
+	 * [t_handle_lock]
+	 */
+	int			t_updates;
+
+	/*
+	 * Number of buffers reserved for use by all handles in this transaction
+	 * handle but not yet modified. [t_handle_lock]
+	 */
+	int			t_outstanding_credits;
+
+	/*
+	 * Forward and backward links for the circular list of all transactions
+	 * awaiting checkpoint. [j_list_lock]
+	 */
+	transaction_t		*t_cpnext, *t_cpprev;
+
+	/*
+	 * When will the transaction expire (become due for commit), in jiffies?
+	 * [no locking]
+	 */
+	unsigned long		t_expires;
+
+	/*
+	 * How many handles used this transaction? [t_handle_lock]
+	 */
+	int t_handle_count;
+
+};
+
+/**
+ * struct journal_s - The journal_s type is the concrete type associated with
+ *     journal_t.
+ * @j_flags:  General journaling state flags
+ * @j_errno:  Is there an outstanding uncleared error on the journal (from a
+ *     prior abort)?
+ * @j_sb_buffer: First part of superblock buffer
+ * @j_superblock: Second part of superblock buffer
+ * @j_format_version: Version of the superblock format
+ * @j_state_lock: Protect the various scalars in the journal
+ * @j_barrier_count:  Number of processes waiting to create a barrier lock
+ * @j_barrier: The barrier lock itself
+ * @j_running_transaction: The current running transaction..
+ * @j_committing_transaction: the transaction we are pushing to disk
+ * @j_checkpoint_transactions: a linked circular list of all transactions
+ *  waiting for checkpointing
+ * @j_wait_transaction_locked: Wait queue for waiting for a locked transaction
+ *  to start committing, or for a barrier lock to be released
+ * @j_wait_logspace: Wait queue for waiting for checkpointing to complete
+ * @j_wait_done_commit: Wait queue for waiting for commit to complete
+ * @j_wait_checkpoint:  Wait queue to trigger checkpointing
+ * @j_wait_commit: Wait queue to trigger commit
+ * @j_wait_updates: Wait queue to wait for updates to complete
+ * @j_checkpoint_mutex: Mutex for locking against concurrent checkpoints
+ * @j_head: Journal head - identifies the first unused block in the journal
+ * @j_tail: Journal tail - identifies the oldest still-used block in the
+ *  journal.
+ * @j_free: Journal free - how many free blocks are there in the journal?
+ * @j_first: The block number of the first usable block
+ * @j_last: The block number one beyond the last usable block
+ * @j_dev: Device where we store the journal
+ * @j_blocksize: blocksize for the location where we store the journal.
+ * @j_blk_offset: starting block offset for into the device where we store the
+ *     journal
+ * @j_fs_dev: Device which holds the client fs.  For internal journal this will
+ *     be equal to j_dev
+ * @j_maxlen: Total maximum capacity of the journal region on disk.
+ * @j_list_lock: Protects the buffer lists and internal buffer state.
+ * @j_inode: Optional inode where we store the journal.  If present, all journal
+ *     block numbers are mapped into this inode via bmap().
+ * @j_tail_sequence:  Sequence number of the oldest transaction in the log
+ * @j_transaction_sequence: Sequence number of the next transaction to grant
+ * @j_commit_sequence: Sequence number of the most recently committed
+ *  transaction
+ * @j_commit_request: Sequence number of the most recent transaction wanting
+ *     commit
+ * @j_uuid: Uuid of client object.
+ * @j_task: Pointer to the current commit thread for this journal
+ * @j_max_transaction_buffers:  Maximum number of metadata buffers to allow in a
+ *     single compound commit transaction
+ * @j_commit_interval: What is the maximum transaction lifetime before we begin
+ *  a commit?
+ * @j_commit_timer:  The timer used to wakeup the commit thread
+ * @j_revoke_lock: Protect the revoke table
+ * @j_revoke: The revoke table - maintains the list of revoked blocks in the
+ *     current transaction.
+ * @j_revoke_table: alternate revoke tables for j_revoke
+ * @j_wbuf: array of buffer_heads for journal_commit_transaction
+ * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
+ *	number that will fit in j_blocksize
+ * @j_last_sync_writer: most recent pid which did a synchronous write
+ * @j_private: An opaque pointer to fs-private information.
+ */
+
+struct journal_s
+{
+	/* General journaling state flags [j_state_lock] */
+	unsigned long		j_flags;
+
+	/*
+	 * Is there an outstanding uncleared error on the journal (from a prior
+	 * abort)? [j_state_lock]
+	 */
+	int			j_errno;
+
+	/* The superblock buffer */
+	struct buffer_head	*j_sb_buffer;
+	journal_superblock_t	*j_superblock;
+
+	/* Version of the superblock format */
+	int			j_format_version;
+
+	/*
+	 * Protect the various scalars in the journal
+	 */
+	spinlock_t		j_state_lock;
+
+	/*
+	 * Number of processes waiting to create a barrier lock [j_state_lock]
+	 */
+	int			j_barrier_count;
+
+	/* The barrier lock itself */
+	struct mutex		j_barrier;
+
+	/*
+	 * Transactions: The current running transaction...
+	 * [j_state_lock] [caller holding open handle]
+	 */
+	transaction_t		*j_running_transaction;
+
+	/*
+	 * the transaction we are pushing to disk
+	 * [j_state_lock] [caller holding open handle]
+	 */
+	transaction_t		*j_committing_transaction;
+
+	/*
+	 * ... and a linked circular list of all transactions waiting for
+	 * checkpointing. [j_list_lock]
+	 */
+	transaction_t		*j_checkpoint_transactions;
+
+	/*
+	 * Wait queue for waiting for a locked transaction to start committing,
+	 * or for a barrier lock to be released
+	 */
+	wait_queue_head_t	j_wait_transaction_locked;
+
+	/* Wait queue for waiting for checkpointing to complete */
+	wait_queue_head_t	j_wait_logspace;
+
+	/* Wait queue for waiting for commit to complete */
+	wait_queue_head_t	j_wait_done_commit;
+
+	/* Wait queue to trigger checkpointing */
+	wait_queue_head_t	j_wait_checkpoint;
+
+	/* Wait queue to trigger commit */
+	wait_queue_head_t	j_wait_commit;
+
+	/* Wait queue to wait for updates to complete */
+	wait_queue_head_t	j_wait_updates;
+
+	/* Semaphore for locking against concurrent checkpoints */
+	struct mutex		j_checkpoint_mutex;
+
+	/*
+	 * Journal head: identifies the first unused block in the journal.
+	 * [j_state_lock]
+	 */
+	unsigned long		j_head;
+
+	/*
+	 * Journal tail: identifies the oldest still-used block in the journal.
+	 * [j_state_lock]
+	 */
+	unsigned long		j_tail;
+
+	/*
+	 * Journal free: how many free blocks are there in the journal?
+	 * [j_state_lock]
+	 */
+	unsigned long		j_free;
+
+	/*
+	 * Journal start and end: the block numbers of the first usable block
+	 * and one beyond the last usable block in the journal. [j_state_lock]
+	 */
+	unsigned long		j_first;
+	unsigned long		j_last;
+
+	/*
+	 * Device, blocksize and starting block offset for the location where we
+	 * store the journal.
+	 */
+	struct block_device	*j_dev;
+	int			j_blocksize;
+	unsigned long		j_blk_offset;
+
+	/*
+	 * Device which holds the client fs.  For internal journal this will be
+	 * equal to j_dev.
+	 */
+	struct block_device	*j_fs_dev;
+
+	/* Total maximum capacity of the journal region on disk. */
+	unsigned int		j_maxlen;
+
+	/*
+	 * Protects the buffer lists and internal buffer state.
+	 */
+	spinlock_t		j_list_lock;
+
+	/* Optional inode where we store the journal.  If present, all */
+	/* journal block numbers are mapped into this inode via */
+	/* bmap(). */
+	struct inode		*j_inode;
+
+	/*
+	 * Sequence number of the oldest transaction in the log [j_state_lock]
+	 */
+	tid_t			j_tail_sequence;
+
+	/*
+	 * Sequence number of the next transaction to grant [j_state_lock]
+	 */
+	tid_t			j_transaction_sequence;
+
+	/*
+	 * Sequence number of the most recently committed transaction
+	 * [j_state_lock].
+	 */
+	tid_t			j_commit_sequence;
+
+	/*
+	 * Sequence number of the most recent transaction wanting commit
+	 * [j_state_lock]
+	 */
+	tid_t			j_commit_request;
+
+	/*
+	 * Journal uuid: identifies the object (filesystem, LVM volume etc)
+	 * backed by this journal.  This will eventually be replaced by an array
+	 * of uuids, allowing us to index multiple devices within a single
+	 * journal and to perform atomic updates across them.
+	 */
+	__u8			j_uuid[16];
+
+	/* Pointer to the current commit thread for this journal */
+	struct task_struct	*j_task;
+
+	/*
+	 * Maximum number of metadata buffers to allow in a single compound
+	 * commit transaction
+	 */
+	int			j_max_transaction_buffers;
+
+	/*
+	 * What is the maximum transaction lifetime before we begin a commit?
+	 */
+	unsigned long		j_commit_interval;
+
+	/* The timer used to wakeup the commit thread: */
+	struct timer_list	j_commit_timer;
+
+	/*
+	 * The revoke table: maintains the list of revoked blocks in the
+	 * current transaction.  [j_revoke_lock]
+	 */
+	spinlock_t		j_revoke_lock;
+	struct jbd_revoke_table_s *j_revoke;
+	struct jbd_revoke_table_s *j_revoke_table[2];
+
+	/*
+	 * array of bhs for journal_commit_transaction
+	 */
+	struct buffer_head	**j_wbuf;
+	int			j_wbufsize;
+
+	pid_t			j_last_sync_writer;
+
+	/*
+	 * An opaque pointer to fs-private information.  ext3 puts its
+	 * superblock pointer here
+	 */
+	void *j_private;
+};
+
+/*
+ * Journal flag definitions
+ */
+#define JFS_UNMOUNT	0x001	/* Journal thread is being destroyed */
+#define JFS_ABORT	0x002	/* Journaling has been aborted for errors. */
+#define JFS_ACK_ERR	0x004	/* The errno in the sb has been acked */
+#define JFS_FLUSHED	0x008	/* The journal superblock has been flushed */
+#define JFS_LOADED	0x010	/* The journal superblock has been loaded */
+#define JFS_BARRIER	0x020	/* Use IDE barriers */
+
+/*
+ * Function declarations for the journaling transaction and buffer
+ * management
+ */
+
+/* Filing buffers */
+extern void __journal_temp_unlink_buffer(struct journal_head *jh);
+extern void journal_unfile_buffer(journal_t *, struct journal_head *);
+extern void __journal_unfile_buffer(struct journal_head *);
+extern void __journal_refile_buffer(struct journal_head *);
+extern void journal_refile_buffer(journal_t *, struct journal_head *);
+extern void __journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_free_buffer(struct journal_head *bh);
+extern void journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __journal_clean_data_list(transaction_t *transaction);
+
+/* Log buffer allocation */
+extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
+int journal_next_log_block(journal_t *, unsigned long *);
+
+/* Commit management */
+extern void journal_commit_transaction(journal_t *);
+
+/* Checkpoint list management */
+int __journal_clean_checkpoint_list(journal_t *journal);
+int __journal_remove_checkpoint(struct journal_head *);
+void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
+
+/* Buffer IO */
+extern int
+journal_write_metadata_buffer(transaction_t	  *transaction,
+			      struct journal_head  *jh_in,
+			      struct journal_head **jh_out,
+			      unsigned long	   blocknr);
+
+/* Transaction locking */
+extern void		__wait_on_journal (journal_t *);
+
+/*
+ * Journal locking.
+ *
+ * We need to lock the journal during transaction state changes so that nobody
+ * ever tries to take a handle on the running transaction while we are in the
+ * middle of moving it to the commit phase.  j_state_lock does this.
+ *
+ * Note that the locking is completely interrupt unsafe.  We never touch
+ * journal structures from interrupts.
+ */
+
+static inline handle_t *journal_current_handle(void)
+{
+	return current->journal_info;
+}
+
+/* The journaling code user interface:
+ *
+ * Create and destroy handles
+ * Register buffer modifications against the current transaction.
+ */
+
+extern handle_t *journal_start(journal_t *, int nblocks);
+extern int	 journal_restart (handle_t *, int nblocks);
+extern int	 journal_extend (handle_t *, int nblocks);
+extern int	 journal_get_write_access(handle_t *, struct buffer_head *);
+extern int	 journal_get_create_access (handle_t *, struct buffer_head *);
+extern int	 journal_get_undo_access(handle_t *, struct buffer_head *);
+extern int	 journal_dirty_data (handle_t *, struct buffer_head *);
+extern int	 journal_dirty_metadata (handle_t *, struct buffer_head *);
+extern void	 journal_release_buffer (handle_t *, struct buffer_head *);
+extern int	 journal_forget (handle_t *, struct buffer_head *);
+extern void	 journal_sync_buffer (struct buffer_head *);
+extern void	 journal_invalidatepage(journal_t *,
+				struct page *, unsigned long);
+extern int	 journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
+extern int	 journal_stop(handle_t *);
+extern int	 journal_flush (journal_t *);
+extern void	 journal_lock_updates (journal_t *);
+extern void	 journal_unlock_updates (journal_t *);
+
+extern journal_t * journal_init_dev(struct block_device *bdev,
+				struct block_device *fs_dev,
+				int start, int len, int bsize);
+extern journal_t * journal_init_inode (struct inode *);
+extern int	   journal_update_format (journal_t *);
+extern int	   journal_check_used_features
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_check_available_features
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_set_features
+		   (journal_t *, unsigned long, unsigned long, unsigned long);
+extern int	   journal_create     (journal_t *);
+extern int	   journal_load       (journal_t *journal);
+extern void	   journal_destroy    (journal_t *);
+extern int	   journal_recover    (journal_t *journal);
+extern int	   journal_wipe       (journal_t *, int);
+extern int	   journal_skip_recovery	(journal_t *);
+extern void	   journal_update_superblock	(journal_t *, int);
+extern void	   __journal_abort_hard	(journal_t *);
+extern void	   journal_abort      (journal_t *, int);
+extern int	   journal_errno      (journal_t *);
+extern void	   journal_ack_err    (journal_t *);
+extern int	   journal_clear_err  (journal_t *);
+extern int	   journal_bmap(journal_t *, unsigned long, unsigned long *);
+extern int	   journal_force_commit(journal_t *);
+
+/*
+ * journal_head management
+ */
+struct journal_head *journal_add_journal_head(struct buffer_head *bh);
+struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
+void journal_remove_journal_head(struct buffer_head *bh);
+void journal_put_journal_head(struct journal_head *jh);
+
+/*
+ * handle management
+ */
+extern kmem_cache_t *jbd_handle_cache;
+
+static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
+{
+	return kmem_cache_alloc(jbd_handle_cache, gfp_flags);
+}
+
+static inline void jbd_free_handle(handle_t *handle)
+{
+	kmem_cache_free(jbd_handle_cache, handle);
+}
+
+/* Primary revoke support */
+#define JOURNAL_REVOKE_DEFAULT_HASH 256
+extern int	   journal_init_revoke(journal_t *, int);
+extern void	   journal_destroy_revoke_caches(void);
+extern int	   journal_init_revoke_caches(void);
+
+extern void	   journal_destroy_revoke(journal_t *);
+extern int	   journal_revoke (handle_t *,
+				unsigned long, struct buffer_head *);
+extern int	   journal_cancel_revoke(handle_t *, struct journal_head *);
+extern void	   journal_write_revoke_records(journal_t *, transaction_t *);
+
+/* Recovery revoke support */
+extern int	journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int	journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void	journal_clear_revoke(journal_t *);
+extern void	journal_switch_revoke_table(journal_t *journal);
+
+/*
+ * The log thread user interface:
+ *
+ * Request space in the current transaction, and force transaction commit
+ * transitions on demand.
+ */
+
+int __log_space_left(journal_t *); /* Called with journal locked */
+int log_start_commit(journal_t *journal, tid_t tid);
+int __log_start_commit(journal_t *journal, tid_t tid);
+int journal_start_commit(journal_t *journal, tid_t *tid);
+int journal_force_commit_nested(journal_t *journal);
+int log_wait_commit(journal_t *journal, tid_t tid);
+int log_do_checkpoint(journal_t *journal);
+
+void __log_wait_for_space(journal_t *journal);
+extern void	__journal_drop_transaction(journal_t *, transaction_t *);
+extern int	cleanup_journal_tail(journal_t *);
+
+/* Debugging code only: */
+
+#define jbd_ENOSYS() \
+do {								           \
+	printk (KERN_ERR "JBD unimplemented function %s\n", __FUNCTION__); \
+	current->state = TASK_UNINTERRUPTIBLE;			           \
+	schedule();						           \
+} while (1)
+
+/*
+ * is_journal_abort
+ *
+ * Simple test wrapper function to test the JFS_ABORT state flag.  This
+ * bit, when set, indicates that we have had a fatal error somewhere,
+ * either inside the journaling layer or indicated to us by the client
+ * (eg. ext3), and that we and should not commit any further
+ * transactions.
+ */
+
+static inline int is_journal_aborted(journal_t *journal)
+{
+	return journal->j_flags & JFS_ABORT;
+}
+
+static inline int is_handle_aborted(handle_t *handle)
+{
+	if (handle->h_aborted)
+		return 1;
+	return is_journal_aborted(handle->h_transaction->t_journal);
+}
+
+static inline void journal_abort_handle(handle_t *handle)
+{
+	handle->h_aborted = 1;
+}
+
+#endif /* __KERNEL__   */
+
+/* Comparison functions for transaction IDs: perform comparisons using
+ * modulo arithmetic so that they work over sequence number wraps. */
+
+static inline int tid_gt(tid_t x, tid_t y)
+{
+	int difference = (x - y);
+	return (difference > 0);
+}
+
+static inline int tid_geq(tid_t x, tid_t y)
+{
+	int difference = (x - y);
+	return (difference >= 0);
+}
+
+extern int journal_blocks_per_page(struct inode *inode);
+
+/*
+ * Return the minimum number of blocks which must be free in the journal
+ * before a new transaction may be started.  Must be called under j_state_lock.
+ */
+static inline int jbd_space_needed(journal_t *journal)
+{
+	int nblocks = journal->j_max_transaction_buffers;
+	if (journal->j_committing_transaction)
+		nblocks += journal->j_committing_transaction->
+					t_outstanding_credits;
+	return nblocks;
+}
+
+/*
+ * Definitions which augment the buffer_head layer
+ */
+
+/* journaling buffer types */
+#define BJ_None		0	/* Not journaled */
+#define BJ_SyncData	1	/* Normal data: flush before commit */
+#define BJ_Metadata	2	/* Normal journaled metadata */
+#define BJ_Forget	3	/* Buffer superseded by this transaction */
+#define BJ_IO		4	/* Buffer is for temporary IO use */
+#define BJ_Shadow	5	/* Buffer contents being shadowed to the log */
+#define BJ_LogCtl	6	/* Buffer contains log descriptors */
+#define BJ_Reserved	7	/* Buffer is reserved for access by journal */
+#define BJ_Locked	8	/* Locked for I/O during commit */
+#define BJ_Types	9
+
+extern int jbd_blocks_per_page(struct inode *inode);
+
+#ifdef __KERNEL__
+
+#define buffer_trace_init(bh)	do {} while (0)
+#define print_buffer_fields(bh)	do {} while (0)
+#define print_buffer_trace(bh)	do {} while (0)
+#define BUFFER_TRACE(bh, info)	do {} while (0)
+#define BUFFER_TRACE2(bh, bh2, info)	do {} while (0)
+#define JBUFFER_TRACE(jh, info)	do {} while (0)
+
+#endif	/* __KERNEL__ */
+
+#endif	/* _LINUX_JBD_H */
-- 
cgit v1.2.3


From f7f4bccb729844a0fa873e224e3a6f7eeed095bb Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:20:59 -0700
Subject: [PATCH] jbd2: rename jbd2 symbols to avoid duplication of jbd symbols

Mingming Cao originally did this work, and Shaggy reproduced it using some
scripts from her.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/Makefile          |   4 +-
 fs/jbd2/checkpoint.c      |  54 +++---
 fs/jbd2/commit.c          | 122 ++++++-------
 fs/jbd2/journal.c         | 454 +++++++++++++++++++++++-----------------------
 fs/jbd2/recovery.c        |  46 ++---
 fs/jbd2/revoke.c          | 146 +++++++--------
 fs/jbd2/transaction.c     | 244 ++++++++++++-------------
 include/linux/ext4_jbd2.h |  26 +--
 include/linux/jbd2.h      | 248 ++++++++++++-------------
 9 files changed, 672 insertions(+), 672 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/Makefile b/fs/jbd2/Makefile
index 54aca4868a36..802a3413872a 100644
--- a/fs/jbd2/Makefile
+++ b/fs/jbd2/Makefile
@@ -2,6 +2,6 @@
 # Makefile for the linux journaling routines.
 #
 
-obj-$(CONFIG_JBD) += jbd.o
+obj-$(CONFIG_JBD2) += jbd2.o
 
-jbd-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
+jbd2-objs := transaction.o commit.o recovery.o checkpoint.o revoke.o journal.o
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 0208cc7ac5d0..68039fa9a566 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -19,7 +19,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 
@@ -95,9 +95,9 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) && !buffer_dirty(bh)) {
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
-		ret = __journal_remove_checkpoint(jh) + 1;
+		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
 	} else {
@@ -107,19 +107,19 @@ static int __try_to_free_cp_buf(struct journal_head *jh)
 }
 
 /*
- * __log_wait_for_space: wait until there is space in the journal.
+ * __jbd2_log_wait_for_space: wait until there is space in the journal.
  *
  * Called under j-state_lock *only*.  It will be unlocked if we have to wait
  * for a checkpoint to free up some space in the log.
  */
-void __log_wait_for_space(journal_t *journal)
+void __jbd2_log_wait_for_space(journal_t *journal)
 {
 	int nblocks;
 	assert_spin_locked(&journal->j_state_lock);
 
 	nblocks = jbd_space_needed(journal);
-	while (__log_space_left(journal) < nblocks) {
-		if (journal->j_flags & JFS_ABORT)
+	while (__jbd2_log_space_left(journal) < nblocks) {
+		if (journal->j_flags & JBD2_ABORT)
 			return;
 		spin_unlock(&journal->j_state_lock);
 		mutex_lock(&journal->j_checkpoint_mutex);
@@ -130,9 +130,9 @@ void __log_wait_for_space(journal_t *journal)
 		 */
 		spin_lock(&journal->j_state_lock);
 		nblocks = jbd_space_needed(journal);
-		if (__log_space_left(journal) < nblocks) {
+		if (__jbd2_log_space_left(journal) < nblocks) {
 			spin_unlock(&journal->j_state_lock);
-			log_do_checkpoint(journal);
+			jbd2_log_do_checkpoint(journal);
 			spin_lock(&journal->j_state_lock);
 		}
 		mutex_unlock(&journal->j_checkpoint_mutex);
@@ -198,9 +198,9 @@ restart:
 		 * Now in whatever state the buffer currently is, we know that
 		 * it has been written out and so we can drop it from the list
 		 */
-		released = __journal_remove_checkpoint(jh);
+		released = __jbd2_journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 }
@@ -252,16 +252,16 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
 
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		log_start_commit(journal, tid);
-		log_wait_commit(journal, tid);
+		jbd2_log_start_commit(journal, tid);
+		jbd2_log_wait_commit(journal, tid);
 		ret = 1;
 	} else if (!buffer_dirty(bh)) {
 		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 		BUFFER_TRACE(bh, "remove from checkpoint");
-		__journal_remove_checkpoint(jh);
+		__jbd2_journal_remove_checkpoint(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 		ret = 1;
 	} else {
@@ -296,7 +296,7 @@ static int __process_buffer(journal_t *journal, struct journal_head *jh,
  *
  * The journal should be locked before calling this function.
  */
-int log_do_checkpoint(journal_t *journal)
+int jbd2_log_do_checkpoint(journal_t *journal)
 {
 	transaction_t *transaction;
 	tid_t this_tid;
@@ -309,7 +309,7 @@ int log_do_checkpoint(journal_t *journal)
 	 * don't need checkpointing, just eliminate them from the
 	 * journal straight away.
 	 */
-	result = cleanup_journal_tail(journal);
+	result = jbd2_cleanup_journal_tail(journal);
 	jbd_debug(1, "cleanup_journal_tail returned %d\n", result);
 	if (result <= 0)
 		return result;
@@ -374,7 +374,7 @@ restart:
 	}
 out:
 	spin_unlock(&journal->j_list_lock);
-	result = cleanup_journal_tail(journal);
+	result = jbd2_cleanup_journal_tail(journal);
 	if (result < 0)
 		return result;
 	return 0;
@@ -397,7 +397,7 @@ out:
  * we have an abort error outstanding.
  */
 
-int cleanup_journal_tail(journal_t *journal)
+int jbd2_cleanup_journal_tail(journal_t *journal)
 {
 	transaction_t * transaction;
 	tid_t		first_tid;
@@ -452,8 +452,8 @@ int cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
-	if (!(journal->j_flags & JFS_ABORT))
-		journal_update_superblock(journal, 1);
+	if (!(journal->j_flags & JBD2_ABORT))
+		jbd2_journal_update_superblock(journal, 1);
 	return 0;
 }
 
@@ -518,7 +518,7 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  * Returns number of buffers reaped (for debug)
  */
 
-int __journal_clean_checkpoint_list(journal_t *journal)
+int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret = 0;
@@ -578,7 +578,7 @@ out:
  * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
 
-int __journal_remove_checkpoint(struct journal_head *jh)
+int __jbd2_journal_remove_checkpoint(struct journal_head *jh)
 {
 	transaction_t *transaction;
 	journal_t *journal;
@@ -607,7 +607,7 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	 * dropped!
 	 *
 	 * The locking here around j_committing_transaction is a bit sleazy.
-	 * See the comment at the end of journal_commit_transaction().
+	 * See the comment at the end of jbd2_journal_commit_transaction().
 	 */
 	if (transaction == journal->j_committing_transaction) {
 		JBUFFER_TRACE(jh, "belongs to committing transaction");
@@ -617,7 +617,7 @@ int __journal_remove_checkpoint(struct journal_head *jh)
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
 
-	__journal_drop_transaction(journal, transaction);
+	__jbd2_journal_drop_transaction(journal, transaction);
 
 	/* Just in case anybody was waiting for more transactions to be
            checkpointed... */
@@ -636,7 +636,7 @@ out:
  * Called with the journal locked.
  * Called with j_list_lock held.
  */
-void __journal_insert_checkpoint(struct journal_head *jh,
+void __jbd2_journal_insert_checkpoint(struct journal_head *jh,
 			       transaction_t *transaction)
 {
 	JBUFFER_TRACE(jh, "entry");
@@ -666,7 +666,7 @@ void __journal_insert_checkpoint(struct journal_head *jh,
  * Called with j_list_lock held.
  */
 
-void __journal_drop_transaction(journal_t *journal, transaction_t *transaction)
+void __jbd2_journal_drop_transaction(journal_t *journal, transaction_t *transaction)
 {
 	assert_spin_locked(&journal->j_list_lock);
 	if (transaction->t_cpnext) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 10be51290a27..b1a4eafc1541 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/jbd/commit.c
+ * linux/fs/jbd2/commit.c
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  *
@@ -15,7 +15,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/mm.h>
@@ -111,7 +111,7 @@ static int journal_write_commit_record(journal_t *journal,
 	if (is_journal_aborted(journal))
 		return 0;
 
-	descriptor = journal_get_descriptor_buffer(journal);
+	descriptor = jbd2_journal_get_descriptor_buffer(journal);
 	if (!descriptor)
 		return 1;
 
@@ -120,14 +120,14 @@ static int journal_write_commit_record(journal_t *journal,
 	/* AKPM: buglet - add `i' to tmp! */
 	for (i = 0; i < bh->b_size; i += 512) {
 		journal_header_t *tmp = (journal_header_t*)bh->b_data;
-		tmp->h_magic = cpu_to_be32(JFS_MAGIC_NUMBER);
-		tmp->h_blocktype = cpu_to_be32(JFS_COMMIT_BLOCK);
+		tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+		tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
 		tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
 	}
 
 	JBUFFER_TRACE(descriptor, "write commit block");
 	set_buffer_dirty(bh);
-	if (journal->j_flags & JFS_BARRIER) {
+	if (journal->j_flags & JBD2_BARRIER) {
 		set_buffer_ordered(bh);
 		barrier_done = 1;
 	}
@@ -145,7 +145,7 @@ static int journal_write_commit_record(journal_t *journal,
 			"disabling barriers\n",
 			bdevname(journal->j_dev, b));
 		spin_lock(&journal->j_state_lock);
-		journal->j_flags &= ~JFS_BARRIER;
+		journal->j_flags &= ~JBD2_BARRIER;
 		spin_unlock(&journal->j_state_lock);
 
 		/* And try again, without the barrier */
@@ -155,7 +155,7 @@ static int journal_write_commit_record(journal_t *journal,
 		ret = sync_dirty_buffer(bh);
 	}
 	put_bh(bh);		/* One for getblk() */
-	journal_put_journal_head(descriptor);
+	jbd2_journal_put_journal_head(descriptor);
 
 	return (ret == -EIO);
 }
@@ -239,7 +239,7 @@ write_out_data:
 		if (locked && test_clear_buffer_dirty(bh)) {
 			BUFFER_TRACE(bh, "needs writeout, adding to array");
 			wbuf[bufs++] = bh;
-			__journal_file_buffer(jh, commit_transaction,
+			__jbd2_journal_file_buffer(jh, commit_transaction,
 						BJ_Locked);
 			jbd_unlock_bh_state(bh);
 			if (bufs == journal->j_wbufsize) {
@@ -251,13 +251,13 @@ write_out_data:
 		}
 		else {
 			BUFFER_TRACE(bh, "writeout complete: unfile");
-			__journal_unfile_buffer(jh);
+			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 			if (locked)
 				unlock_buffer(bh);
-			journal_remove_journal_head(bh);
+			jbd2_journal_remove_journal_head(bh);
 			/* Once for our safety reference, once for
-			 * journal_remove_journal_head() */
+			 * jbd2_journal_remove_journal_head() */
 			put_bh(bh);
 			put_bh(bh);
 		}
@@ -272,12 +272,12 @@ write_out_data:
 }
 
 /*
- * journal_commit_transaction
+ * jbd2_journal_commit_transaction
  *
  * The primary function for committing a transaction to the log.  This
  * function is called by the journal thread to begin a complete commit.
  */
-void journal_commit_transaction(journal_t *journal)
+void jbd2_journal_commit_transaction(journal_t *journal)
 {
 	transaction_t *commit_transaction;
 	struct journal_head *jh, *new_jh, *descriptor;
@@ -305,10 +305,10 @@ void journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 #endif
 
-	/* Do we need to erase the effects of a prior journal_flush? */
-	if (journal->j_flags & JFS_FLUSHED) {
+	/* Do we need to erase the effects of a prior jbd2_journal_flush? */
+	if (journal->j_flags & JBD2_FLUSHED) {
 		jbd_debug(3, "super block updated\n");
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 	} else {
 		jbd_debug(3, "superblock not updated\n");
 	}
@@ -350,7 +350,7 @@ void journal_commit_transaction(journal_t *journal)
 	 * BJ_Reserved buffers.  Note, it is _not_ permissible to assume
 	 * that there are no such buffers: if a large filesystem
 	 * operation like a truncate needs to split itself over multiple
-	 * transactions, then it may try to do a journal_restart() while
+	 * transactions, then it may try to do a jbd2_journal_restart() while
 	 * there are still BJ_Reserved buffers outstanding.  These must
 	 * be released cleanly from the current transaction.
 	 *
@@ -358,25 +358,25 @@ void journal_commit_transaction(journal_t *journal)
 	 * again before modifying the buffer in the new transaction, but
 	 * we do not require it to remember exactly which old buffers it
 	 * has reserved.  This is consistent with the existing behaviour
-	 * that multiple journal_get_write_access() calls to the same
+	 * that multiple jbd2_journal_get_write_access() calls to the same
 	 * buffer are perfectly permissable.
 	 */
 	while (commit_transaction->t_reserved_list) {
 		jh = commit_transaction->t_reserved_list;
 		JBUFFER_TRACE(jh, "reserved, unused: refile");
 		/*
-		 * A journal_get_undo_access()+journal_release_buffer() may
+		 * A jbd2_journal_get_undo_access()+jbd2_journal_release_buffer() may
 		 * leave undo-committed data.
 		 */
 		if (jh->b_committed_data) {
 			struct buffer_head *bh = jh2bh(jh);
 
 			jbd_lock_bh_state(bh);
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			jbd_unlock_bh_state(bh);
 		}
-		journal_refile_buffer(journal, jh);
+		jbd2_journal_refile_buffer(journal, jh);
 	}
 
 	/*
@@ -385,7 +385,7 @@ void journal_commit_transaction(journal_t *journal)
 	 * frees some memory
 	 */
 	spin_lock(&journal->j_list_lock);
-	__journal_clean_checkpoint_list(journal);
+	__jbd2_journal_clean_checkpoint_list(journal);
 	spin_unlock(&journal->j_list_lock);
 
 	jbd_debug (3, "JBD: commit phase 1\n");
@@ -393,7 +393,7 @@ void journal_commit_transaction(journal_t *journal)
 	/*
 	 * Switch to a new revoke table.
 	 */
-	journal_switch_revoke_table(journal);
+	jbd2_journal_switch_revoke_table(journal);
 
 	commit_transaction->t_state = T_FLUSH;
 	journal->j_committing_transaction = commit_transaction;
@@ -450,9 +450,9 @@ void journal_commit_transaction(journal_t *journal)
 			continue;
 		}
 		if (buffer_jbd(bh) && jh->b_jlist == BJ_Locked) {
-			__journal_unfile_buffer(jh);
+			__jbd2_journal_unfile_buffer(jh);
 			jbd_unlock_bh_state(bh);
-			journal_remove_journal_head(bh);
+			jbd2_journal_remove_journal_head(bh);
 			put_bh(bh);
 		} else {
 			jbd_unlock_bh_state(bh);
@@ -463,9 +463,9 @@ void journal_commit_transaction(journal_t *journal)
 	spin_unlock(&journal->j_list_lock);
 
 	if (err)
-		__journal_abort_hard(journal);
+		__jbd2_journal_abort_hard(journal);
 
-	journal_write_revoke_records(journal, commit_transaction);
+	jbd2_journal_write_revoke_records(journal, commit_transaction);
 
 	jbd_debug(3, "JBD: commit phase 2\n");
 
@@ -499,7 +499,7 @@ void journal_commit_transaction(journal_t *journal)
 
 		if (is_journal_aborted(journal)) {
 			JBUFFER_TRACE(jh, "journal is aborting: refile");
-			journal_refile_buffer(journal, jh);
+			jbd2_journal_refile_buffer(journal, jh);
 			/* If that was the last one, we need to clean up
 			 * any descriptor buffers which may have been
 			 * already allocated, even if we are now
@@ -519,9 +519,9 @@ void journal_commit_transaction(journal_t *journal)
 
 			jbd_debug(4, "JBD: get descriptor\n");
 
-			descriptor = journal_get_descriptor_buffer(journal);
+			descriptor = jbd2_journal_get_descriptor_buffer(journal);
 			if (!descriptor) {
-				__journal_abort_hard(journal);
+				__jbd2_journal_abort_hard(journal);
 				continue;
 			}
 
@@ -529,8 +529,8 @@ void journal_commit_transaction(journal_t *journal)
 			jbd_debug(4, "JBD: got buffer %llu (%p)\n",
 				(unsigned long long)bh->b_blocknr, bh->b_data);
 			header = (journal_header_t *)&bh->b_data[0];
-			header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
-			header->h_blocktype = cpu_to_be32(JFS_DESCRIPTOR_BLOCK);
+			header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
+			header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
 			header->h_sequence  = cpu_to_be32(commit_transaction->t_tid);
 
 			tagp = &bh->b_data[sizeof(journal_header_t)];
@@ -543,25 +543,25 @@ void journal_commit_transaction(journal_t *journal)
 			/* Record it so that we can wait for IO
                            completion later */
 			BUFFER_TRACE(bh, "ph3: file as descriptor");
-			journal_file_buffer(descriptor, commit_transaction,
+			jbd2_journal_file_buffer(descriptor, commit_transaction,
 					BJ_LogCtl);
 		}
 
 		/* Where is the buffer to be written? */
 
-		err = journal_next_log_block(journal, &blocknr);
+		err = jbd2_journal_next_log_block(journal, &blocknr);
 		/* If the block mapping failed, just abandon the buffer
 		   and repeat this loop: we'll fall into the
 		   refile-on-abort condition above. */
 		if (err) {
-			__journal_abort_hard(journal);
+			__jbd2_journal_abort_hard(journal);
 			continue;
 		}
 
 		/*
 		 * start_this_handle() uses t_outstanding_credits to determine
 		 * the free space in the log, but this counter is changed
-		 * by journal_next_log_block() also.
+		 * by jbd2_journal_next_log_block() also.
 		 */
 		commit_transaction->t_outstanding_credits--;
 
@@ -576,13 +576,13 @@ void journal_commit_transaction(journal_t *journal)
 
 		set_bit(BH_JWrite, &jh2bh(jh)->b_state);
 		/*
-		 * akpm: journal_write_metadata_buffer() sets
+		 * akpm: jbd2_journal_write_metadata_buffer() sets
 		 * new_bh->b_transaction to commit_transaction.
 		 * We need to clean this up before we release new_bh
 		 * (which is of type BJ_IO)
 		 */
 		JBUFFER_TRACE(jh, "ph3: write metadata");
-		flags = journal_write_metadata_buffer(commit_transaction,
+		flags = jbd2_journal_write_metadata_buffer(commit_transaction,
 						      jh, &new_jh, blocknr);
 		set_bit(BH_JWrite, &jh2bh(new_jh)->b_state);
 		wbuf[bufs++] = jh2bh(new_jh);
@@ -592,9 +592,9 @@ void journal_commit_transaction(journal_t *journal)
 
 		tag_flag = 0;
 		if (flags & 1)
-			tag_flag |= JFS_FLAG_ESCAPE;
+			tag_flag |= JBD2_FLAG_ESCAPE;
 		if (!first_tag)
-			tag_flag |= JFS_FLAG_SAME_UUID;
+			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
 		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
@@ -622,7 +622,7 @@ void journal_commit_transaction(journal_t *journal)
                            submitting the IOs.  "tag" still points to
                            the last tag we set up. */
 
-			tag->t_flags |= cpu_to_be32(JFS_FLAG_LAST_TAG);
+			tag->t_flags |= cpu_to_be32(JBD2_FLAG_LAST_TAG);
 
 start_journal_io:
 			for (i = 0; i < bufs; i++) {
@@ -678,14 +678,14 @@ wait_for_iobuf:
 		clear_buffer_jwrite(bh);
 
 		JBUFFER_TRACE(jh, "ph4: unfile after journal write");
-		journal_unfile_buffer(journal, jh);
+		jbd2_journal_unfile_buffer(journal, jh);
 
 		/*
 		 * ->t_iobuf_list should contain only dummy buffer_heads
-		 * which were created by journal_write_metadata_buffer().
+		 * which were created by jbd2_journal_write_metadata_buffer().
 		 */
 		BUFFER_TRACE(bh, "dumping temporary bh");
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		__brelse(bh);
 		J_ASSERT_BH(bh, atomic_read(&bh->b_count) == 0);
 		free_buffer_head(bh);
@@ -702,7 +702,7 @@ wait_for_iobuf:
                    we finally commit, we can do any checkpointing
                    required. */
 		JBUFFER_TRACE(jh, "file as BJ_Forget");
-		journal_file_buffer(jh, commit_transaction, BJ_Forget);
+		jbd2_journal_file_buffer(jh, commit_transaction, BJ_Forget);
 		/* Wake up any transactions which were waiting for this
 		   IO to complete */
 		wake_up_bit(&bh->b_state, BH_Unshadow);
@@ -733,8 +733,8 @@ wait_for_iobuf:
 
 		BUFFER_TRACE(bh, "ph5: control buffer writeout done: unfile");
 		clear_buffer_jwrite(bh);
-		journal_unfile_buffer(journal, jh);
-		journal_put_journal_head(jh);
+		jbd2_journal_unfile_buffer(journal, jh);
+		jbd2_journal_put_journal_head(jh);
 		__brelse(bh);		/* One for getblk */
 		/* AKPM: bforget here */
 	}
@@ -745,7 +745,7 @@ wait_for_iobuf:
 		err = -EIO;
 
 	if (err)
-		__journal_abort_hard(journal);
+		__jbd2_journal_abort_hard(journal);
 
 	/* End of a transaction!  Finally, we can do checkpoint
            processing: any buffers committed as a result of this
@@ -789,14 +789,14 @@ restart_loop:
 		 * Otherwise, we can just throw away the frozen data now.
 		 */
 		if (jh->b_committed_data) {
-			jbd_slab_free(jh->b_committed_data, bh->b_size);
+			jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			jh->b_committed_data = NULL;
 			if (jh->b_frozen_data) {
 				jh->b_committed_data = jh->b_frozen_data;
 				jh->b_frozen_data = NULL;
 			}
 		} else if (jh->b_frozen_data) {
-			jbd_slab_free(jh->b_frozen_data, bh->b_size);
+			jbd2_slab_free(jh->b_frozen_data, bh->b_size);
 			jh->b_frozen_data = NULL;
 		}
 
@@ -804,12 +804,12 @@ restart_loop:
 		cp_transaction = jh->b_cp_transaction;
 		if (cp_transaction) {
 			JBUFFER_TRACE(jh, "remove from old cp transaction");
-			__journal_remove_checkpoint(jh);
+			__jbd2_journal_remove_checkpoint(jh);
 		}
 
 		/* Only re-checkpoint the buffer_head if it is marked
 		 * dirty.  If the buffer was added to the BJ_Forget list
-		 * by journal_forget, it may no longer be dirty and
+		 * by jbd2_journal_forget, it may no longer be dirty and
 		 * there's no point in keeping a checkpoint record for
 		 * it. */
 
@@ -828,9 +828,9 @@ restart_loop:
 
 		if (buffer_jbddirty(bh)) {
 			JBUFFER_TRACE(jh, "add to new checkpointing trans");
-			__journal_insert_checkpoint(jh, commit_transaction);
+			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
 			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-			__journal_refile_buffer(jh);
+			__jbd2_journal_refile_buffer(jh);
 			jbd_unlock_bh_state(bh);
 		} else {
 			J_ASSERT_BH(bh, !buffer_dirty(bh));
@@ -842,11 +842,11 @@ restart_loop:
 			 * disk and before we process the buffer on BJ_Forget
 			 * list. */
 			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-			__journal_refile_buffer(jh);
+			__jbd2_journal_refile_buffer(jh);
 			if (!jh->b_transaction) {
 				jbd_unlock_bh_state(bh);
 				 /* needs a brelse */
-				journal_remove_journal_head(bh);
+				jbd2_journal_remove_journal_head(bh);
 				release_buffer_page(bh);
 			} else
 				jbd_unlock_bh_state(bh);
@@ -856,9 +856,9 @@ restart_loop:
 	spin_unlock(&journal->j_list_lock);
 	/*
 	 * This is a bit sleazy.  We borrow j_list_lock to protect
-	 * journal->j_committing_transaction in __journal_remove_checkpoint.
-	 * Really, __journal_remove_checkpoint should be using j_state_lock but
-	 * it's a bit hassle to hold that across __journal_remove_checkpoint
+	 * journal->j_committing_transaction in __jbd2_journal_remove_checkpoint.
+	 * Really, __jbd2_journal_remove_checkpoint should be using j_state_lock but
+	 * it's a bit hassle to hold that across __jbd2_journal_remove_checkpoint
 	 */
 	spin_lock(&journal->j_state_lock);
 	spin_lock(&journal->j_list_lock);
@@ -885,7 +885,7 @@ restart_loop:
 	spin_unlock(&journal->j_state_lock);
 
 	if (commit_transaction->t_checkpoint_list == NULL) {
-		__journal_drop_transaction(journal, commit_transaction);
+		__jbd2_journal_drop_transaction(journal, commit_transaction);
 	} else {
 		if (journal->j_checkpoint_transactions == NULL) {
 			journal->j_checkpoint_transactions = commit_transaction;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index c518dd8fe60a..3fbbba20a516 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1,5 +1,5 @@
 /*
- * linux/fs/jbd/journal.c
+ * linux/fs/jbd2/journal.c
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
  *
@@ -25,7 +25,7 @@
 #include <linux/module.h>
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
@@ -40,51 +40,51 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
-EXPORT_SYMBOL(journal_start);
-EXPORT_SYMBOL(journal_restart);
-EXPORT_SYMBOL(journal_extend);
-EXPORT_SYMBOL(journal_stop);
-EXPORT_SYMBOL(journal_lock_updates);
-EXPORT_SYMBOL(journal_unlock_updates);
-EXPORT_SYMBOL(journal_get_write_access);
-EXPORT_SYMBOL(journal_get_create_access);
-EXPORT_SYMBOL(journal_get_undo_access);
-EXPORT_SYMBOL(journal_dirty_data);
-EXPORT_SYMBOL(journal_dirty_metadata);
-EXPORT_SYMBOL(journal_release_buffer);
-EXPORT_SYMBOL(journal_forget);
+EXPORT_SYMBOL(jbd2_journal_start);
+EXPORT_SYMBOL(jbd2_journal_restart);
+EXPORT_SYMBOL(jbd2_journal_extend);
+EXPORT_SYMBOL(jbd2_journal_stop);
+EXPORT_SYMBOL(jbd2_journal_lock_updates);
+EXPORT_SYMBOL(jbd2_journal_unlock_updates);
+EXPORT_SYMBOL(jbd2_journal_get_write_access);
+EXPORT_SYMBOL(jbd2_journal_get_create_access);
+EXPORT_SYMBOL(jbd2_journal_get_undo_access);
+EXPORT_SYMBOL(jbd2_journal_dirty_data);
+EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
+EXPORT_SYMBOL(jbd2_journal_release_buffer);
+EXPORT_SYMBOL(jbd2_journal_forget);
 #if 0
 EXPORT_SYMBOL(journal_sync_buffer);
 #endif
-EXPORT_SYMBOL(journal_flush);
-EXPORT_SYMBOL(journal_revoke);
-
-EXPORT_SYMBOL(journal_init_dev);
-EXPORT_SYMBOL(journal_init_inode);
-EXPORT_SYMBOL(journal_update_format);
-EXPORT_SYMBOL(journal_check_used_features);
-EXPORT_SYMBOL(journal_check_available_features);
-EXPORT_SYMBOL(journal_set_features);
-EXPORT_SYMBOL(journal_create);
-EXPORT_SYMBOL(journal_load);
-EXPORT_SYMBOL(journal_destroy);
-EXPORT_SYMBOL(journal_update_superblock);
-EXPORT_SYMBOL(journal_abort);
-EXPORT_SYMBOL(journal_errno);
-EXPORT_SYMBOL(journal_ack_err);
-EXPORT_SYMBOL(journal_clear_err);
-EXPORT_SYMBOL(log_wait_commit);
-EXPORT_SYMBOL(journal_start_commit);
-EXPORT_SYMBOL(journal_force_commit_nested);
-EXPORT_SYMBOL(journal_wipe);
-EXPORT_SYMBOL(journal_blocks_per_page);
-EXPORT_SYMBOL(journal_invalidatepage);
-EXPORT_SYMBOL(journal_try_to_free_buffers);
-EXPORT_SYMBOL(journal_force_commit);
+EXPORT_SYMBOL(jbd2_journal_flush);
+EXPORT_SYMBOL(jbd2_journal_revoke);
+
+EXPORT_SYMBOL(jbd2_journal_init_dev);
+EXPORT_SYMBOL(jbd2_journal_init_inode);
+EXPORT_SYMBOL(jbd2_journal_update_format);
+EXPORT_SYMBOL(jbd2_journal_check_used_features);
+EXPORT_SYMBOL(jbd2_journal_check_available_features);
+EXPORT_SYMBOL(jbd2_journal_set_features);
+EXPORT_SYMBOL(jbd2_journal_create);
+EXPORT_SYMBOL(jbd2_journal_load);
+EXPORT_SYMBOL(jbd2_journal_destroy);
+EXPORT_SYMBOL(jbd2_journal_update_superblock);
+EXPORT_SYMBOL(jbd2_journal_abort);
+EXPORT_SYMBOL(jbd2_journal_errno);
+EXPORT_SYMBOL(jbd2_journal_ack_err);
+EXPORT_SYMBOL(jbd2_journal_clear_err);
+EXPORT_SYMBOL(jbd2_log_wait_commit);
+EXPORT_SYMBOL(jbd2_journal_start_commit);
+EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
+EXPORT_SYMBOL(jbd2_journal_wipe);
+EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
+EXPORT_SYMBOL(jbd2_journal_invalidatepage);
+EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
+EXPORT_SYMBOL(jbd2_journal_force_commit);
 
 static int journal_convert_superblock_v1(journal_t *, journal_superblock_t *);
 static void __journal_abort_soft (journal_t *journal, int errno);
-static int journal_create_jbd_slab(size_t slab_size);
+static int jbd2_journal_create_jbd_slab(size_t slab_size);
 
 /*
  * Helper function used to manage commit timeouts
@@ -98,7 +98,7 @@ static void commit_timeout(unsigned long __data)
 }
 
 /*
- * kjournald: The main thread function used to manage a logging device
+ * kjournald2: The main thread function used to manage a logging device
  * journal.
  *
  * This kernel thread is responsible for two things:
@@ -113,7 +113,7 @@ static void commit_timeout(unsigned long __data)
  *    known as checkpointing, and this thread is responsible for that job.
  */
 
-static int kjournald(void *arg)
+static int kjournald2(void *arg)
 {
 	journal_t *journal = arg;
 	transaction_t *transaction;
@@ -129,7 +129,7 @@ static int kjournald(void *arg)
 	journal->j_task = current;
 	wake_up(&journal->j_wait_done_commit);
 
-	printk(KERN_INFO "kjournald starting.  Commit interval %ld seconds\n",
+	printk(KERN_INFO "kjournald2 starting.  Commit interval %ld seconds\n",
 			journal->j_commit_interval / HZ);
 
 	/*
@@ -138,7 +138,7 @@ static int kjournald(void *arg)
 	spin_lock(&journal->j_state_lock);
 
 loop:
-	if (journal->j_flags & JFS_UNMOUNT)
+	if (journal->j_flags & JBD2_UNMOUNT)
 		goto end_loop;
 
 	jbd_debug(1, "commit_sequence=%d, commit_request=%d\n",
@@ -148,7 +148,7 @@ loop:
 		jbd_debug(1, "OK, requests differ\n");
 		spin_unlock(&journal->j_state_lock);
 		del_timer_sync(&journal->j_commit_timer);
-		journal_commit_transaction(journal);
+		jbd2_journal_commit_transaction(journal);
 		spin_lock(&journal->j_state_lock);
 		goto loop;
 	}
@@ -160,7 +160,7 @@ loop:
 		 * good idea, because that depends on threads that may
 		 * be already stopped.
 		 */
-		jbd_debug(1, "Now suspending kjournald\n");
+		jbd_debug(1, "Now suspending kjournald2\n");
 		spin_unlock(&journal->j_state_lock);
 		refrigerator();
 		spin_lock(&journal->j_state_lock);
@@ -180,7 +180,7 @@ loop:
 		if (transaction && time_after_eq(jiffies,
 						transaction->t_expires))
 			should_sleep = 0;
-		if (journal->j_flags & JFS_UNMOUNT)
+		if (journal->j_flags & JBD2_UNMOUNT)
 			should_sleep = 0;
 		if (should_sleep) {
 			spin_unlock(&journal->j_state_lock);
@@ -190,7 +190,7 @@ loop:
 		finish_wait(&journal->j_wait_commit, &wait);
 	}
 
-	jbd_debug(1, "kjournald wakes\n");
+	jbd_debug(1, "kjournald2 wakes\n");
 
 	/*
 	 * Were we woken up by a commit wakeup event?
@@ -211,16 +211,16 @@ end_loop:
 	return 0;
 }
 
-static void journal_start_thread(journal_t *journal)
+static void jbd2_journal_start_thread(journal_t *journal)
 {
-	kthread_run(kjournald, journal, "kjournald");
+	kthread_run(kjournald2, journal, "kjournald2");
 	wait_event(journal->j_wait_done_commit, journal->j_task != 0);
 }
 
 static void journal_kill_thread(journal_t *journal)
 {
 	spin_lock(&journal->j_state_lock);
-	journal->j_flags |= JFS_UNMOUNT;
+	journal->j_flags |= JBD2_UNMOUNT;
 
 	while (journal->j_task) {
 		wake_up(&journal->j_wait_commit);
@@ -232,7 +232,7 @@ static void journal_kill_thread(journal_t *journal)
 }
 
 /*
- * journal_write_metadata_buffer: write a metadata buffer to the journal.
+ * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
  *
  * Writes a metadata buffer to a given disk block.  The actual IO is not
  * performed but a new buffer_head is constructed which labels the data
@@ -240,7 +240,7 @@ static void journal_kill_thread(journal_t *journal)
  *
  * Any magic-number escaping which needs to be done will cause a
  * copy-out here.  If the buffer happens to start with the
- * JFS_MAGIC_NUMBER, then we can't write it to the log directly: the
+ * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
  * magic number is only written to the log for descripter blocks.  In
  * this case, we copy the data and replace the first word with 0, and we
  * return a result code which indicates that this buffer needs to be
@@ -268,7 +268,7 @@ static void journal_kill_thread(journal_t *journal)
  * Bit 1 set == buffer copy-out performed (kfree the data after IO)
  */
 
-int journal_write_metadata_buffer(transaction_t *transaction,
+int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
 				  unsigned long blocknr)
@@ -316,7 +316,7 @@ repeat:
 	 * Check for escaping
 	 */
 	if (*((__be32 *)(mapped_data + new_offset)) ==
-				cpu_to_be32(JFS_MAGIC_NUMBER)) {
+				cpu_to_be32(JBD2_MAGIC_NUMBER)) {
 		need_copy_out = 1;
 		do_escape = 1;
 	}
@@ -329,10 +329,10 @@ repeat:
 		char *tmp;
 
 		jbd_unlock_bh_state(bh_in);
-		tmp = jbd_slab_alloc(bh_in->b_size, GFP_NOFS);
+		tmp = jbd2_slab_alloc(bh_in->b_size, GFP_NOFS);
 		jbd_lock_bh_state(bh_in);
 		if (jh_in->b_frozen_data) {
-			jbd_slab_free(tmp, bh_in->b_size);
+			jbd2_slab_free(tmp, bh_in->b_size);
 			goto repeat;
 		}
 
@@ -362,7 +362,7 @@ repeat:
 	atomic_set(&new_bh->b_count, 1);
 	jbd_unlock_bh_state(bh_in);
 
-	new_jh = journal_add_journal_head(new_bh);	/* This sleeps */
+	new_jh = jbd2_journal_add_journal_head(new_bh);	/* This sleeps */
 
 	set_bh_page(new_bh, new_page, new_offset);
 	new_jh->b_transaction = NULL;
@@ -380,9 +380,9 @@ repeat:
 	 * copying is moved to the transaction's shadow queue.
 	 */
 	JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
-	journal_file_buffer(jh_in, transaction, BJ_Shadow);
+	jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
 	JBUFFER_TRACE(new_jh, "file as BJ_IO");
-	journal_file_buffer(new_jh, transaction, BJ_IO);
+	jbd2_journal_file_buffer(new_jh, transaction, BJ_IO);
 
 	return do_escape | (done_copy_out << 1);
 }
@@ -393,14 +393,14 @@ repeat:
  */
 
 /*
- * __log_space_left: Return the number of free blocks left in the journal.
+ * __jbd2_log_space_left: Return the number of free blocks left in the journal.
  *
  * Called with the journal already locked.
  *
  * Called under j_state_lock
  */
 
-int __log_space_left(journal_t *journal)
+int __jbd2_log_space_left(journal_t *journal)
 {
 	int left = journal->j_free;
 
@@ -424,7 +424,7 @@ int __log_space_left(journal_t *journal)
 /*
  * Called under j_state_lock.  Returns true if a transaction was started.
  */
-int __log_start_commit(journal_t *journal, tid_t target)
+int __jbd2_log_start_commit(journal_t *journal, tid_t target)
 {
 	/*
 	 * Are we already doing a recent enough commit?
@@ -445,12 +445,12 @@ int __log_start_commit(journal_t *journal, tid_t target)
 	return 0;
 }
 
-int log_start_commit(journal_t *journal, tid_t tid)
+int jbd2_log_start_commit(journal_t *journal, tid_t tid)
 {
 	int ret;
 
 	spin_lock(&journal->j_state_lock);
-	ret = __log_start_commit(journal, tid);
+	ret = __jbd2_log_start_commit(journal, tid);
 	spin_unlock(&journal->j_state_lock);
 	return ret;
 }
@@ -465,7 +465,7 @@ int log_start_commit(journal_t *journal, tid_t tid)
  *
  * Returns true if a transaction was started.
  */
-int journal_force_commit_nested(journal_t *journal)
+int jbd2_journal_force_commit_nested(journal_t *journal)
 {
 	transaction_t *transaction = NULL;
 	tid_t tid;
@@ -473,7 +473,7 @@ int journal_force_commit_nested(journal_t *journal)
 	spin_lock(&journal->j_state_lock);
 	if (journal->j_running_transaction && !current->journal_info) {
 		transaction = journal->j_running_transaction;
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	} else if (journal->j_committing_transaction)
 		transaction = journal->j_committing_transaction;
 
@@ -484,7 +484,7 @@ int journal_force_commit_nested(journal_t *journal)
 
 	tid = transaction->t_tid;
 	spin_unlock(&journal->j_state_lock);
-	log_wait_commit(journal, tid);
+	jbd2_log_wait_commit(journal, tid);
 	return 1;
 }
 
@@ -492,7 +492,7 @@ int journal_force_commit_nested(journal_t *journal)
  * Start a commit of the current running transaction (if any).  Returns true
  * if a transaction was started, and fills its tid in at *ptid
  */
-int journal_start_commit(journal_t *journal, tid_t *ptid)
+int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
 {
 	int ret = 0;
 
@@ -500,7 +500,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
 	if (journal->j_running_transaction) {
 		tid_t tid = journal->j_running_transaction->t_tid;
 
-		ret = __log_start_commit(journal, tid);
+		ret = __jbd2_log_start_commit(journal, tid);
 		if (ret && ptid)
 			*ptid = tid;
 	} else if (journal->j_committing_transaction && ptid) {
@@ -519,7 +519,7 @@ int journal_start_commit(journal_t *journal, tid_t *ptid)
  * Wait for a specified commit to complete.
  * The caller may not hold the journal lock.
  */
-int log_wait_commit(journal_t *journal, tid_t tid)
+int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
 {
 	int err = 0;
 
@@ -555,7 +555,7 @@ int log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int journal_next_log_block(journal_t *journal, unsigned long *retp)
+int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
 {
 	unsigned long blocknr;
 
@@ -568,7 +568,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
 	if (journal->j_head == journal->j_last)
 		journal->j_head = journal->j_first;
 	spin_unlock(&journal->j_state_lock);
-	return journal_bmap(journal, blocknr, retp);
+	return jbd2_journal_bmap(journal, blocknr, retp);
 }
 
 /*
@@ -578,7 +578,7 @@ int journal_next_log_block(journal_t *journal, unsigned long *retp)
  * this is a no-op.  If needed, we can use j_blk_offset - everything is
  * ready.
  */
-int journal_bmap(journal_t *journal, unsigned long blocknr,
+int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 		 unsigned long *retp)
 {
 	int err = 0;
@@ -610,18 +610,18 @@ int journal_bmap(journal_t *journal, unsigned long blocknr,
  * the journal without copying their contents, but for journal
  * descriptor blocks we do need to generate bona fide buffers.
  *
- * After the caller of journal_get_descriptor_buffer() has finished modifying
+ * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
  * But we don't bother doing that, so there will be coherency problems with
  * mmaps of blockdevs which hold live JBD-controlled filesystems.
  */
-struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
+struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
 	unsigned long blocknr;
 	int err;
 
-	err = journal_next_log_block(journal, &blocknr);
+	err = jbd2_journal_next_log_block(journal, &blocknr);
 
 	if (err)
 		return NULL;
@@ -632,7 +632,7 @@ struct journal_head *journal_get_descriptor_buffer(journal_t *journal)
 	set_buffer_uptodate(bh);
 	unlock_buffer(bh);
 	BUFFER_TRACE(bh, "return this buffer");
-	return journal_add_journal_head(bh);
+	return jbd2_journal_add_journal_head(bh);
 }
 
 /*
@@ -669,10 +669,10 @@ static journal_t * journal_init_common (void)
 	journal->j_commit_interval = (HZ * JBD_DEFAULT_MAX_COMMIT_AGE);
 
 	/* The journal is marked for error until we succeed with recovery! */
-	journal->j_flags = JFS_ABORT;
+	journal->j_flags = JBD2_ABORT;
 
 	/* Set up a default-sized revoke table for the new mount. */
-	err = journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
+	err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
 	if (err) {
 		kfree(journal);
 		goto fail;
@@ -682,7 +682,7 @@ fail:
 	return NULL;
 }
 
-/* journal_init_dev and journal_init_inode:
+/* jbd2_journal_init_dev and jbd2_journal_init_inode:
  *
  * Create a journal structure assigned some fixed set of disk blocks to
  * the journal.  We don't actually touch those disk blocks yet, but we
@@ -692,7 +692,7 @@ fail:
  */
 
 /**
- *  journal_t * journal_init_dev() - creates an initialises a journal structure
+ *  journal_t * jbd2_journal_init_dev() - creates an initialises a journal structure
  *  @bdev: Block device on which to create the journal
  *  @fs_dev: Device which hold journalled filesystem for this journal.
  *  @start: Block nr Start of journal.
@@ -700,11 +700,11 @@ fail:
  *  @blocksize: blocksize of journalling device
  *  @returns: a newly created journal_t *
  *
- *  journal_init_dev creates a journal which maps a fixed contiguous
+ *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
  *  range of blocks on an arbitrary block device.
  *
  */
-journal_t * journal_init_dev(struct block_device *bdev,
+journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
 			int start, int len, int blocksize)
 {
@@ -740,14 +740,14 @@ journal_t * journal_init_dev(struct block_device *bdev,
 }
 
 /**
- *  journal_t * journal_init_inode () - creates a journal which maps to a inode.
+ *  journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
  *  @inode: An inode to create the journal in
  *
- * journal_init_inode creates a journal which maps an on-disk inode as
+ * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
  * the journal.  The inode must exist already, must support bmap() and
  * must have all data blocks preallocated.
  */
-journal_t * journal_init_inode (struct inode *inode)
+journal_t * jbd2_journal_init_inode (struct inode *inode)
 {
 	struct buffer_head *bh;
 	journal_t *journal = journal_init_common();
@@ -780,7 +780,7 @@ journal_t * journal_init_inode (struct inode *inode)
 		return NULL;
 	}
 
-	err = journal_bmap(journal, 0, &blocknr);
+	err = jbd2_journal_bmap(journal, 0, &blocknr);
 	/* If that failed, give up */
 	if (err) {
 		printk(KERN_ERR "%s: Cannnot locate journal superblock\n",
@@ -838,27 +838,27 @@ static int journal_reset(journal_t *journal)
 	journal->j_max_transaction_buffers = journal->j_maxlen / 4;
 
 	/* Add the dynamic fields and write it to disk. */
-	journal_update_superblock(journal, 1);
-	journal_start_thread(journal);
+	jbd2_journal_update_superblock(journal, 1);
+	jbd2_journal_start_thread(journal);
 	return 0;
 }
 
 /**
- * int journal_create() - Initialise the new journal file
+ * int jbd2_journal_create() - Initialise the new journal file
  * @journal: Journal to create. This structure must have been initialised
  *
  * Given a journal_t structure which tells us which disk blocks we can
  * use, create a new journal superblock and initialise all of the
  * journal fields from scratch.
  **/
-int journal_create(journal_t *journal)
+int jbd2_journal_create(journal_t *journal)
 {
 	unsigned long blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
 
-	if (journal->j_maxlen < JFS_MIN_JOURNAL_BLOCKS) {
+	if (journal->j_maxlen < JBD2_MIN_JOURNAL_BLOCKS) {
 		printk (KERN_ERR "Journal length (%d blocks) too short.\n",
 			journal->j_maxlen);
 		journal_fail_superblock(journal);
@@ -876,10 +876,10 @@ int journal_create(journal_t *journal)
 	}
 
 	/* Zero out the entire journal on disk.  We cannot afford to
-	   have any blocks on disk beginning with JFS_MAGIC_NUMBER. */
+	   have any blocks on disk beginning with JBD2_MAGIC_NUMBER. */
 	jbd_debug(1, "JBD: Zeroing out journal blocks...\n");
 	for (i = 0; i < journal->j_maxlen; i++) {
-		err = journal_bmap(journal, i, &blocknr);
+		err = jbd2_journal_bmap(journal, i, &blocknr);
 		if (err)
 			return err;
 		bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
@@ -899,8 +899,8 @@ int journal_create(journal_t *journal)
 	/* OK, fill in the initial static fields in the new superblock */
 	sb = journal->j_superblock;
 
-	sb->s_header.h_magic	 = cpu_to_be32(JFS_MAGIC_NUMBER);
-	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	sb->s_header.h_magic	 = cpu_to_be32(JBD2_MAGIC_NUMBER);
+	sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
 
 	sb->s_blocksize	= cpu_to_be32(journal->j_blocksize);
 	sb->s_maxlen	= cpu_to_be32(journal->j_maxlen);
@@ -908,21 +908,21 @@ int journal_create(journal_t *journal)
 
 	journal->j_transaction_sequence = 1;
 
-	journal->j_flags &= ~JFS_ABORT;
+	journal->j_flags &= ~JBD2_ABORT;
 	journal->j_format_version = 2;
 
 	return journal_reset(journal);
 }
 
 /**
- * void journal_update_superblock() - Update journal sb on disk.
+ * void jbd2_journal_update_superblock() - Update journal sb on disk.
  * @journal: The journal to update.
  * @wait: Set to '0' if you don't want to wait for IO completion.
  *
  * Update a journal's dynamic superblock fields and write it to disk,
  * optionally waiting for the IO to complete.
  */
-void journal_update_superblock(journal_t *journal, int wait)
+void jbd2_journal_update_superblock(journal_t *journal, int wait)
 {
 	journal_superblock_t *sb = journal->j_superblock;
 	struct buffer_head *bh = journal->j_sb_buffer;
@@ -931,7 +931,7 @@ void journal_update_superblock(journal_t *journal, int wait)
 	 * As a special case, if the on-disk copy is already marked as needing
 	 * no recovery (s_start == 0) and there are no outstanding transactions
 	 * in the filesystem, then we can safely defer the superblock update
-	 * until the next commit by setting JFS_FLUSHED.  This avoids
+	 * until the next commit by setting JBD2_FLUSHED.  This avoids
 	 * attempting a write to a potential-readonly device.
 	 */
 	if (sb->s_start == 0 && journal->j_tail_sequence ==
@@ -966,9 +966,9 @@ out:
 
 	spin_lock(&journal->j_state_lock);
 	if (sb->s_start)
-		journal->j_flags &= ~JFS_FLUSHED;
+		journal->j_flags &= ~JBD2_FLUSHED;
 	else
-		journal->j_flags |= JFS_FLUSHED;
+		journal->j_flags |= JBD2_FLUSHED;
 	spin_unlock(&journal->j_state_lock);
 }
 
@@ -1000,17 +1000,17 @@ static int journal_get_superblock(journal_t *journal)
 
 	err = -EINVAL;
 
-	if (sb->s_header.h_magic != cpu_to_be32(JFS_MAGIC_NUMBER) ||
+	if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
 	    sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
 		printk(KERN_WARNING "JBD: no valid journal superblock found\n");
 		goto out;
 	}
 
 	switch(be32_to_cpu(sb->s_header.h_blocktype)) {
-	case JFS_SUPERBLOCK_V1:
+	case JBD2_SUPERBLOCK_V1:
 		journal->j_format_version = 1;
 		break;
-	case JFS_SUPERBLOCK_V2:
+	case JBD2_SUPERBLOCK_V2:
 		journal->j_format_version = 2;
 		break;
 	default:
@@ -1059,14 +1059,14 @@ static int load_superblock(journal_t *journal)
 
 
 /**
- * int journal_load() - Read journal from disk.
+ * int jbd2_journal_load() - Read journal from disk.
  * @journal: Journal to act on.
  *
  * Given a journal_t structure which tells us which disk blocks contain
  * a journal, read the journal from disk to initialise the in-memory
  * structures.
  */
-int journal_load(journal_t *journal)
+int jbd2_journal_load(journal_t *journal)
 {
 	int err;
 	journal_superblock_t *sb;
@@ -1081,9 +1081,9 @@ int journal_load(journal_t *journal)
 
 	if (journal->j_format_version >= 2) {
 		if ((sb->s_feature_ro_compat &
-		     ~cpu_to_be32(JFS_KNOWN_ROCOMPAT_FEATURES)) ||
+		     ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
 		    (sb->s_feature_incompat &
-		     ~cpu_to_be32(JFS_KNOWN_INCOMPAT_FEATURES))) {
+		     ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
 			printk (KERN_WARNING
 				"JBD: Unrecognised features on journal\n");
 			return -EINVAL;
@@ -1093,13 +1093,13 @@ int journal_load(journal_t *journal)
 	/*
 	 * Create a slab for this blocksize
 	 */
-	err = journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
+	err = jbd2_journal_create_jbd_slab(be32_to_cpu(sb->s_blocksize));
 	if (err)
 		return err;
 
 	/* Let the recovery code check whether it needs to recover any
 	 * data from the journal. */
-	if (journal_recover(journal))
+	if (jbd2_journal_recover(journal))
 		goto recovery_error;
 
 	/* OK, we've finished with the dynamic journal bits:
@@ -1108,8 +1108,8 @@ int journal_load(journal_t *journal)
 	if (journal_reset(journal))
 		goto recovery_error;
 
-	journal->j_flags &= ~JFS_ABORT;
-	journal->j_flags |= JFS_LOADED;
+	journal->j_flags &= ~JBD2_ABORT;
+	journal->j_flags |= JBD2_LOADED;
 	return 0;
 
 recovery_error:
@@ -1118,20 +1118,20 @@ recovery_error:
 }
 
 /**
- * void journal_destroy() - Release a journal_t structure.
+ * void jbd2_journal_destroy() - Release a journal_t structure.
  * @journal: Journal to act on.
  *
  * Release a journal_t structure once it is no longer in use by the
  * journaled object.
  */
-void journal_destroy(journal_t *journal)
+void jbd2_journal_destroy(journal_t *journal)
 {
 	/* Wait for the commit thread to wake up and die. */
 	journal_kill_thread(journal);
 
 	/* Force a final log commit */
 	if (journal->j_running_transaction)
-		journal_commit_transaction(journal);
+		jbd2_journal_commit_transaction(journal);
 
 	/* Force any old transactions to disk */
 
@@ -1139,7 +1139,7 @@ void journal_destroy(journal_t *journal)
 	spin_lock(&journal->j_list_lock);
 	while (journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
-		log_do_checkpoint(journal);
+		jbd2_log_do_checkpoint(journal);
 		spin_lock(&journal->j_list_lock);
 	}
 
@@ -1152,21 +1152,21 @@ void journal_destroy(journal_t *journal)
 	journal->j_tail = 0;
 	journal->j_tail_sequence = ++journal->j_transaction_sequence;
 	if (journal->j_sb_buffer) {
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 		brelse(journal->j_sb_buffer);
 	}
 
 	if (journal->j_inode)
 		iput(journal->j_inode);
 	if (journal->j_revoke)
-		journal_destroy_revoke(journal);
+		jbd2_journal_destroy_revoke(journal);
 	kfree(journal->j_wbuf);
 	kfree(journal);
 }
 
 
 /**
- *int journal_check_used_features () - Check if features specified are used.
+ *int jbd2_journal_check_used_features () - Check if features specified are used.
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1176,7 +1176,7 @@ void journal_destroy(journal_t *journal)
  * features.  Return true (non-zero) if it does.
  **/
 
-int journal_check_used_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_used_features (journal_t *journal, unsigned long compat,
 				 unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
@@ -1197,7 +1197,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
 }
 
 /**
- * int journal_check_available_features() - Check feature set in journalling layer
+ * int jbd2_journal_check_available_features() - Check feature set in journalling layer
  * @journal: Journal to check.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1207,7 +1207,7 @@ int journal_check_used_features (journal_t *journal, unsigned long compat,
  * all of a given set of features on this journal.  Return true
  * (non-zero) if it can. */
 
-int journal_check_available_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_check_available_features (journal_t *journal, unsigned long compat,
 				      unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
@@ -1224,16 +1224,16 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
 	if (journal->j_format_version != 2)
 		return 0;
 
-	if ((compat   & JFS_KNOWN_COMPAT_FEATURES) == compat &&
-	    (ro       & JFS_KNOWN_ROCOMPAT_FEATURES) == ro &&
-	    (incompat & JFS_KNOWN_INCOMPAT_FEATURES) == incompat)
+	if ((compat   & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
+	    (ro       & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
+	    (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
 		return 1;
 
 	return 0;
 }
 
 /**
- * int journal_set_features () - Mark a given journal feature in the superblock
+ * int jbd2_journal_set_features () - Mark a given journal feature in the superblock
  * @journal: Journal to act on.
  * @compat: bitmask of compatible features
  * @ro: bitmask of features that force read-only mount
@@ -1244,15 +1244,15 @@ int journal_check_available_features (journal_t *journal, unsigned long compat,
  *
  */
 
-int journal_set_features (journal_t *journal, unsigned long compat,
+int jbd2_journal_set_features (journal_t *journal, unsigned long compat,
 			  unsigned long ro, unsigned long incompat)
 {
 	journal_superblock_t *sb;
 
-	if (journal_check_used_features(journal, compat, ro, incompat))
+	if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
 		return 1;
 
-	if (!journal_check_available_features(journal, compat, ro, incompat))
+	if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
 		return 0;
 
 	jbd_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
@@ -1269,13 +1269,13 @@ int journal_set_features (journal_t *journal, unsigned long compat,
 
 
 /**
- * int journal_update_format () - Update on-disk journal structure.
+ * int jbd2_journal_update_format () - Update on-disk journal structure.
  * @journal: Journal to act on.
  *
  * Given an initialised but unloaded journal struct, poke about in the
  * on-disk structure to update it to the most recent supported version.
  */
-int journal_update_format (journal_t *journal)
+int jbd2_journal_update_format (journal_t *journal)
 {
 	journal_superblock_t *sb;
 	int err;
@@ -1287,9 +1287,9 @@ int journal_update_format (journal_t *journal)
 	sb = journal->j_superblock;
 
 	switch (be32_to_cpu(sb->s_header.h_blocktype)) {
-	case JFS_SUPERBLOCK_V2:
+	case JBD2_SUPERBLOCK_V2:
 		return 0;
-	case JFS_SUPERBLOCK_V1:
+	case JBD2_SUPERBLOCK_V1:
 		return journal_convert_superblock_v1(journal, sb);
 	default:
 		break;
@@ -1312,7 +1312,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
 	memset(&sb->s_feature_compat, 0, blocksize-offset);
 
 	sb->s_nr_users = cpu_to_be32(1);
-	sb->s_header.h_blocktype = cpu_to_be32(JFS_SUPERBLOCK_V2);
+	sb->s_header.h_blocktype = cpu_to_be32(JBD2_SUPERBLOCK_V2);
 	journal->j_format_version = 2;
 
 	bh = journal->j_sb_buffer;
@@ -1324,7 +1324,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
 
 
 /**
- * int journal_flush () - Flush journal
+ * int jbd2_journal_flush () - Flush journal
  * @journal: Journal to act on.
  *
  * Flush all data for a given journal to disk and empty the journal.
@@ -1332,7 +1332,7 @@ static int journal_convert_superblock_v1(journal_t *journal,
  * recovery does not need to happen on remount.
  */
 
-int journal_flush(journal_t *journal)
+int jbd2_journal_flush(journal_t *journal)
 {
 	int err = 0;
 	transaction_t *transaction = NULL;
@@ -1343,7 +1343,7 @@ int journal_flush(journal_t *journal)
 	/* Force everything buffered to the log... */
 	if (journal->j_running_transaction) {
 		transaction = journal->j_running_transaction;
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	} else if (journal->j_committing_transaction)
 		transaction = journal->j_committing_transaction;
 
@@ -1352,7 +1352,7 @@ int journal_flush(journal_t *journal)
 		tid_t tid = transaction->t_tid;
 
 		spin_unlock(&journal->j_state_lock);
-		log_wait_commit(journal, tid);
+		jbd2_log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&journal->j_state_lock);
 	}
@@ -1361,11 +1361,11 @@ int journal_flush(journal_t *journal)
 	spin_lock(&journal->j_list_lock);
 	while (!err && journal->j_checkpoint_transactions != NULL) {
 		spin_unlock(&journal->j_list_lock);
-		err = log_do_checkpoint(journal);
+		err = jbd2_log_do_checkpoint(journal);
 		spin_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
-	cleanup_journal_tail(journal);
+	jbd2_cleanup_journal_tail(journal);
 
 	/* Finally, mark the journal as really needing no recovery.
 	 * This sets s_start==0 in the underlying superblock, which is
@@ -1376,7 +1376,7 @@ int journal_flush(journal_t *journal)
 	old_tail = journal->j_tail;
 	journal->j_tail = 0;
 	spin_unlock(&journal->j_state_lock);
-	journal_update_superblock(journal, 1);
+	jbd2_journal_update_superblock(journal, 1);
 	spin_lock(&journal->j_state_lock);
 	journal->j_tail = old_tail;
 
@@ -1390,24 +1390,24 @@ int journal_flush(journal_t *journal)
 }
 
 /**
- * int journal_wipe() - Wipe journal contents
+ * int jbd2_journal_wipe() - Wipe journal contents
  * @journal: Journal to act on.
  * @write: flag (see below)
  *
  * Wipe out all of the contents of a journal, safely.  This will produce
  * a warning if the journal contains any valid recovery information.
- * Must be called between journal_init_*() and journal_load().
+ * Must be called between journal_init_*() and jbd2_journal_load().
  *
  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
  * we merely suppress recovery.
  */
 
-int journal_wipe(journal_t *journal, int write)
+int jbd2_journal_wipe(journal_t *journal, int write)
 {
 	journal_superblock_t *sb;
 	int err = 0;
 
-	J_ASSERT (!(journal->j_flags & JFS_LOADED));
+	J_ASSERT (!(journal->j_flags & JBD2_LOADED));
 
 	err = load_superblock(journal);
 	if (err)
@@ -1421,9 +1421,9 @@ int journal_wipe(journal_t *journal, int write)
 	printk (KERN_WARNING "JBD: %s recovery information on journal\n",
 		write ? "Clearing" : "Ignoring");
 
-	err = journal_skip_recovery(journal);
+	err = jbd2_journal_skip_recovery(journal);
 	if (write)
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 
  no_recovery:
 	return err;
@@ -1459,22 +1459,22 @@ static const char *journal_dev_name(journal_t *journal, char *buffer)
  * Aborts hard --- we mark the abort as occurred, but do _nothing_ else,
  * and don't attempt to make any other journal updates.
  */
-void __journal_abort_hard(journal_t *journal)
+void __jbd2_journal_abort_hard(journal_t *journal)
 {
 	transaction_t *transaction;
 	char b[BDEVNAME_SIZE];
 
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		return;
 
 	printk(KERN_ERR "Aborting journal on device %s.\n",
 		journal_dev_name(journal, b));
 
 	spin_lock(&journal->j_state_lock);
-	journal->j_flags |= JFS_ABORT;
+	journal->j_flags |= JBD2_ABORT;
 	transaction = journal->j_running_transaction;
 	if (transaction)
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 	spin_unlock(&journal->j_state_lock);
 }
 
@@ -1482,20 +1482,20 @@ void __journal_abort_hard(journal_t *journal)
  * but don't do any other IO. */
 static void __journal_abort_soft (journal_t *journal, int errno)
 {
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		return;
 
 	if (!journal->j_errno)
 		journal->j_errno = errno;
 
-	__journal_abort_hard(journal);
+	__jbd2_journal_abort_hard(journal);
 
 	if (errno)
-		journal_update_superblock(journal, 1);
+		jbd2_journal_update_superblock(journal, 1);
 }
 
 /**
- * void journal_abort () - Shutdown the journal immediately.
+ * void jbd2_journal_abort () - Shutdown the journal immediately.
  * @journal: the journal to shutdown.
  * @errno:   an error number to record in the journal indicating
  *           the reason for the shutdown.
@@ -1504,7 +1504,7 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  * journal (not of a single transaction).  This operation cannot be
  * undone without closing and reopening the journal.
  *
- * The journal_abort function is intended to support higher level error
+ * The jbd2_journal_abort function is intended to support higher level error
  * recovery mechanisms such as the ext2/ext3 remount-readonly error
  * mode.
  *
@@ -1520,13 +1520,13 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  *
  * Any attempt to get a new transaction handle on a journal which is in
  * ABORT state will just result in an -EROFS error return.  A
- * journal_stop on an existing handle will return -EIO if we have
+ * jbd2_journal_stop on an existing handle will return -EIO if we have
  * entered abort state during the update.
  *
  * Recursive transactions are not disturbed by journal abort until the
- * final journal_stop, which will receive the -EIO error.
+ * final jbd2_journal_stop, which will receive the -EIO error.
  *
- * Finally, the journal_abort call allows the caller to supply an errno
+ * Finally, the jbd2_journal_abort call allows the caller to supply an errno
  * which will be recorded (if possible) in the journal superblock.  This
  * allows a client to record failure conditions in the middle of a
  * transaction without having to complete the transaction to record the
@@ -1540,28 +1540,28 @@ static void __journal_abort_soft (journal_t *journal, int errno)
  *
  */
 
-void journal_abort(journal_t *journal, int errno)
+void jbd2_journal_abort(journal_t *journal, int errno)
 {
 	__journal_abort_soft(journal, errno);
 }
 
 /**
- * int journal_errno () - returns the journal's error state.
+ * int jbd2_journal_errno () - returns the journal's error state.
  * @journal: journal to examine.
  *
- * This is the errno numbet set with journal_abort(), the last
+ * This is the errno numbet set with jbd2_journal_abort(), the last
  * time the journal was mounted - if the journal was stopped
  * without calling abort this will be 0.
  *
  * If the journal has been aborted on this mount time -EROFS will
  * be returned.
  */
-int journal_errno(journal_t *journal)
+int jbd2_journal_errno(journal_t *journal)
 {
 	int err;
 
 	spin_lock(&journal->j_state_lock);
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		err = journal->j_errno;
@@ -1570,18 +1570,18 @@ int journal_errno(journal_t *journal)
 }
 
 /**
- * int journal_clear_err () - clears the journal's error state
+ * int jbd2_journal_clear_err () - clears the journal's error state
  * @journal: journal to act on.
  *
  * An error must be cleared or Acked to take a FS out of readonly
  * mode.
  */
-int journal_clear_err(journal_t *journal)
+int jbd2_journal_clear_err(journal_t *journal)
 {
 	int err = 0;
 
 	spin_lock(&journal->j_state_lock);
-	if (journal->j_flags & JFS_ABORT)
+	if (journal->j_flags & JBD2_ABORT)
 		err = -EROFS;
 	else
 		journal->j_errno = 0;
@@ -1590,21 +1590,21 @@ int journal_clear_err(journal_t *journal)
 }
 
 /**
- * void journal_ack_err() - Ack journal err.
+ * void jbd2_journal_ack_err() - Ack journal err.
  * @journal: journal to act on.
  *
  * An error must be cleared or Acked to take a FS out of readonly
  * mode.
  */
-void journal_ack_err(journal_t *journal)
+void jbd2_journal_ack_err(journal_t *journal)
 {
 	spin_lock(&journal->j_state_lock);
 	if (journal->j_errno)
-		journal->j_flags |= JFS_ACK_ERR;
+		journal->j_flags |= JBD2_ACK_ERR;
 	spin_unlock(&journal->j_state_lock);
 }
 
-int journal_blocks_per_page(struct inode *inode)
+int jbd2_journal_blocks_per_page(struct inode *inode)
 {
 	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
@@ -1613,7 +1613,7 @@ int journal_blocks_per_page(struct inode *inode)
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies.
  */
-void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
+void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry)
 {
 	return kmalloc(size, flags | (retry ? __GFP_NOFAIL : 0));
 }
@@ -1634,7 +1634,7 @@ static const char *jbd_slab_names[JBD_MAX_SLABS] = {
 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
 };
 
-static void journal_destroy_jbd_slabs(void)
+static void jbd2_journal_destroy_jbd_slabs(void)
 {
 	int i;
 
@@ -1645,7 +1645,7 @@ static void journal_destroy_jbd_slabs(void)
 	}
 }
 
-static int journal_create_jbd_slab(size_t slab_size)
+static int jbd2_journal_create_jbd_slab(size_t slab_size)
 {
 	int i = JBD_SLAB_INDEX(slab_size);
 
@@ -1671,7 +1671,7 @@ static int journal_create_jbd_slab(size_t slab_size)
 	return 0;
 }
 
-void * jbd_slab_alloc(size_t size, gfp_t flags)
+void * jbd2_slab_alloc(size_t size, gfp_t flags)
 {
 	int idx;
 
@@ -1680,7 +1680,7 @@ void * jbd_slab_alloc(size_t size, gfp_t flags)
 	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
 }
 
-void jbd_slab_free(void *ptr,  size_t size)
+void jbd2_slab_free(void *ptr,  size_t size)
 {
 	int idx;
 
@@ -1692,35 +1692,35 @@ void jbd_slab_free(void *ptr,  size_t size)
 /*
  * Journal_head storage management
  */
-static kmem_cache_t *journal_head_cache;
+static kmem_cache_t *jbd2_journal_head_cache;
 #ifdef CONFIG_JBD_DEBUG
 static atomic_t nr_journal_heads = ATOMIC_INIT(0);
 #endif
 
-static int journal_init_journal_head_cache(void)
+static int journal_init_jbd2_journal_head_cache(void)
 {
 	int retval;
 
-	J_ASSERT(journal_head_cache == 0);
-	journal_head_cache = kmem_cache_create("journal_head",
+	J_ASSERT(jbd2_journal_head_cache == 0);
+	jbd2_journal_head_cache = kmem_cache_create("journal_head",
 				sizeof(struct journal_head),
 				0,		/* offset */
 				0,		/* flags */
 				NULL,		/* ctor */
 				NULL);		/* dtor */
 	retval = 0;
-	if (journal_head_cache == 0) {
+	if (jbd2_journal_head_cache == 0) {
 		retval = -ENOMEM;
 		printk(KERN_EMERG "JBD: no memory for journal_head cache\n");
 	}
 	return retval;
 }
 
-static void journal_destroy_journal_head_cache(void)
+static void jbd2_journal_destroy_jbd2_journal_head_cache(void)
 {
-	J_ASSERT(journal_head_cache != NULL);
-	kmem_cache_destroy(journal_head_cache);
-	journal_head_cache = NULL;
+	J_ASSERT(jbd2_journal_head_cache != NULL);
+	kmem_cache_destroy(jbd2_journal_head_cache);
+	jbd2_journal_head_cache = NULL;
 }
 
 /*
@@ -1734,7 +1734,7 @@ static struct journal_head *journal_alloc_journal_head(void)
 #ifdef CONFIG_JBD_DEBUG
 	atomic_inc(&nr_journal_heads);
 #endif
-	ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+	ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
 	if (ret == 0) {
 		jbd_debug(1, "out of memory for journal_head\n");
 		if (time_after(jiffies, last_warning + 5*HZ)) {
@@ -1744,7 +1744,7 @@ static struct journal_head *journal_alloc_journal_head(void)
 		}
 		while (ret == 0) {
 			yield();
-			ret = kmem_cache_alloc(journal_head_cache, GFP_NOFS);
+			ret = kmem_cache_alloc(jbd2_journal_head_cache, GFP_NOFS);
 		}
 	}
 	return ret;
@@ -1756,7 +1756,7 @@ static void journal_free_journal_head(struct journal_head *jh)
 	atomic_dec(&nr_journal_heads);
 	memset(jh, JBD_POISON_FREE, sizeof(*jh));
 #endif
-	kmem_cache_free(journal_head_cache, jh);
+	kmem_cache_free(jbd2_journal_head_cache, jh);
 }
 
 /*
@@ -1775,22 +1775,22 @@ static void journal_free_journal_head(struct journal_head *jh)
  *
  * A journal_head may be detached from its buffer_head when the journal_head's
  * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call journal_remove_journal_head() to indicate that the
+ * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
  * journal_head can be dropped if needed.
  *
  * Various places in the kernel want to attach a journal_head to a buffer_head
  * _before_ attaching the journal_head to a transaction.  To protect the
- * journal_head in this situation, journal_add_journal_head elevates the
+ * journal_head in this situation, jbd2_journal_add_journal_head elevates the
  * journal_head's b_jcount refcount by one.  The caller must call
- * journal_put_journal_head() to undo this.
+ * jbd2_journal_put_journal_head() to undo this.
  *
  * So the typical usage would be:
  *
  *	(Attach a journal_head if needed.  Increments b_jcount)
- *	struct journal_head *jh = journal_add_journal_head(bh);
+ *	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
  *	...
  *	jh->b_transaction = xxx;
- *	journal_put_journal_head(jh);
+ *	jbd2_journal_put_journal_head(jh);
  *
  * Now, the journal_head's b_jcount is zero, but it is safe from being released
  * because it has a non-zero b_transaction.
@@ -1802,7 +1802,7 @@ static void journal_free_journal_head(struct journal_head *jh)
  * Doesn't need the journal lock.
  * May sleep.
  */
-struct journal_head *journal_add_journal_head(struct buffer_head *bh)
+struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh;
 	struct journal_head *new_jh = NULL;
@@ -1845,7 +1845,7 @@ repeat:
  * Grab a ref against this buffer_head's journal_head.  If it ended up not
  * having a journal_head, return NULL
  */
-struct journal_head *journal_grab_journal_head(struct buffer_head *bh)
+struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
 {
 	struct journal_head *jh = NULL;
 
@@ -1877,13 +1877,13 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 				printk(KERN_WARNING "%s: freeing "
 						"b_frozen_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_frozen_data, bh->b_size);
+				jbd2_slab_free(jh->b_frozen_data, bh->b_size);
 			}
 			if (jh->b_committed_data) {
 				printk(KERN_WARNING "%s: freeing "
 						"b_committed_data\n",
 						__FUNCTION__);
-				jbd_slab_free(jh->b_committed_data, bh->b_size);
+				jbd2_slab_free(jh->b_committed_data, bh->b_size);
 			}
 			bh->b_private = NULL;
 			jh->b_bh = NULL;	/* debug, really */
@@ -1897,7 +1897,7 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
 }
 
 /*
- * journal_remove_journal_head(): if the buffer isn't attached to a transaction
+ * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
  * and has a zero b_jcount then remove and release its journal_head.   If we did
  * see that the buffer is not used by any transaction we also "logically"
  * decrement ->b_count.
@@ -1905,11 +1905,11 @@ static void __journal_remove_journal_head(struct buffer_head *bh)
  * We in fact take an additional increment on ->b_count as a convenience,
  * because the caller usually wants to do additional things with the bh
  * after calling here.
- * The caller of journal_remove_journal_head() *must* run __brelse(bh) at some
+ * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
  * time.  Once the caller has run __brelse(), the buffer is eligible for
  * reaping by try_to_free_buffers().
  */
-void journal_remove_journal_head(struct buffer_head *bh)
+void jbd2_journal_remove_journal_head(struct buffer_head *bh)
 {
 	jbd_lock_bh_journal_head(bh);
 	__journal_remove_journal_head(bh);
@@ -1920,7 +1920,7 @@ void journal_remove_journal_head(struct buffer_head *bh)
  * Drop a reference on the passed journal_head.  If it fell to zero then try to
  * release the journal_head from the buffer_head.
  */
-void journal_put_journal_head(struct journal_head *jh)
+void jbd2_journal_put_journal_head(struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
@@ -1938,8 +1938,8 @@ void journal_put_journal_head(struct journal_head *jh)
  * /proc tunables
  */
 #if defined(CONFIG_JBD_DEBUG)
-int journal_enable_debug;
-EXPORT_SYMBOL(journal_enable_debug);
+int jbd2_journal_enable_debug;
+EXPORT_SYMBOL(jbd2_journal_enable_debug);
 #endif
 
 #if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS)
@@ -1951,7 +1951,7 @@ static int read_jbd_debug(char *page, char **start, off_t off,
 {
 	int ret;
 
-	ret = sprintf(page + off, "%d\n", journal_enable_debug);
+	ret = sprintf(page + off, "%d\n", jbd2_journal_enable_debug);
 	*eof = 1;
 	return ret;
 }
@@ -1966,11 +1966,11 @@ static int write_jbd_debug(struct file *file, const char __user *buffer,
 	if (copy_from_user(buf, buffer, count))
 		return -EFAULT;
 	buf[ARRAY_SIZE(buf) - 1] = '\0';
-	journal_enable_debug = simple_strtoul(buf, NULL, 10);
+	jbd2_journal_enable_debug = simple_strtoul(buf, NULL, 10);
 	return count;
 }
 
-#define JBD_PROC_NAME "sys/fs/jbd-debug"
+#define JBD_PROC_NAME "sys/fs/jbd2-debug"
 
 static void __init create_jbd_proc_entry(void)
 {
@@ -1982,7 +1982,7 @@ static void __init create_jbd_proc_entry(void)
 	}
 }
 
-static void __exit remove_jbd_proc_entry(void)
+static void __exit jbd2_remove_jbd_proc_entry(void)
 {
 	if (proc_jbd_debug)
 		remove_proc_entry(JBD_PROC_NAME, NULL);
@@ -1991,31 +1991,31 @@ static void __exit remove_jbd_proc_entry(void)
 #else
 
 #define create_jbd_proc_entry() do {} while (0)
-#define remove_jbd_proc_entry() do {} while (0)
+#define jbd2_remove_jbd_proc_entry() do {} while (0)
 
 #endif
 
-kmem_cache_t *jbd_handle_cache;
+kmem_cache_t *jbd2_handle_cache;
 
 static int __init journal_init_handle_cache(void)
 {
-	jbd_handle_cache = kmem_cache_create("journal_handle",
+	jbd2_handle_cache = kmem_cache_create("journal_handle",
 				sizeof(handle_t),
 				0,		/* offset */
 				0,		/* flags */
 				NULL,		/* ctor */
 				NULL);		/* dtor */
-	if (jbd_handle_cache == NULL) {
+	if (jbd2_handle_cache == NULL) {
 		printk(KERN_EMERG "JBD: failed to create handle cache\n");
 		return -ENOMEM;
 	}
 	return 0;
 }
 
-static void journal_destroy_handle_cache(void)
+static void jbd2_journal_destroy_handle_cache(void)
 {
-	if (jbd_handle_cache)
-		kmem_cache_destroy(jbd_handle_cache);
+	if (jbd2_handle_cache)
+		kmem_cache_destroy(jbd2_handle_cache);
 }
 
 /*
@@ -2026,20 +2026,20 @@ static int __init journal_init_caches(void)
 {
 	int ret;
 
-	ret = journal_init_revoke_caches();
+	ret = jbd2_journal_init_revoke_caches();
 	if (ret == 0)
-		ret = journal_init_journal_head_cache();
+		ret = journal_init_jbd2_journal_head_cache();
 	if (ret == 0)
 		ret = journal_init_handle_cache();
 	return ret;
 }
 
-static void journal_destroy_caches(void)
+static void jbd2_journal_destroy_caches(void)
 {
-	journal_destroy_revoke_caches();
-	journal_destroy_journal_head_cache();
-	journal_destroy_handle_cache();
-	journal_destroy_jbd_slabs();
+	jbd2_journal_destroy_revoke_caches();
+	jbd2_journal_destroy_jbd2_journal_head_cache();
+	jbd2_journal_destroy_handle_cache();
+	jbd2_journal_destroy_jbd_slabs();
 }
 
 static int __init journal_init(void)
@@ -2050,7 +2050,7 @@ static int __init journal_init(void)
 
 	ret = journal_init_caches();
 	if (ret != 0)
-		journal_destroy_caches();
+		jbd2_journal_destroy_caches();
 	create_jbd_proc_entry();
 	return ret;
 }
@@ -2062,8 +2062,8 @@ static void __exit journal_exit(void)
 	if (n)
 		printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n);
 #endif
-	remove_jbd_proc_entry();
-	journal_destroy_caches();
+	jbd2_remove_jbd_proc_entry();
+	jbd2_journal_destroy_caches();
 }
 
 MODULE_LICENSE("GPL");
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 11563fe2a52b..b2012d112432 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -18,7 +18,7 @@
 #else
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #endif
@@ -86,7 +86,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 	nbufs = 0;
 
 	for (next = start; next < max; next++) {
-		err = journal_bmap(journal, next, &blocknr);
+		err = jbd2_journal_bmap(journal, next, &blocknr);
 
 		if (err) {
 			printk (KERN_ERR "JBD: bad block at offset %u\n",
@@ -142,7 +142,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		return -EIO;
 	}
 
-	err = journal_bmap(journal, offset, &blocknr);
+	err = jbd2_journal_bmap(journal, offset, &blocknr);
 
 	if (err) {
 		printk (KERN_ERR "JBD: bad block at offset %u\n",
@@ -191,10 +191,10 @@ static int count_tags(struct buffer_head *bh, int size)
 
 		nr++;
 		tagp += sizeof(journal_block_tag_t);
-		if (!(tag->t_flags & cpu_to_be32(JFS_FLAG_SAME_UUID)))
+		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
-		if (tag->t_flags & cpu_to_be32(JFS_FLAG_LAST_TAG))
+		if (tag->t_flags & cpu_to_be32(JBD2_FLAG_LAST_TAG))
 			break;
 	}
 
@@ -210,7 +210,7 @@ do {									\
 } while (0)
 
 /**
- * journal_recover - recovers a on-disk journal
+ * jbd2_journal_recover - recovers a on-disk journal
  * @journal: the journal to recover
  *
  * The primary function for recovering the log contents when mounting a
@@ -221,7 +221,7 @@ do {									\
  * blocks.  In the third and final pass, we replay any un-revoked blocks
  * in the log.
  */
-int journal_recover(journal_t *journal)
+int jbd2_journal_recover(journal_t *journal)
 {
 	int			err;
 	journal_superblock_t *	sb;
@@ -260,13 +260,13 @@ int journal_recover(journal_t *journal)
 	 * any existing commit records in the log. */
 	journal->j_transaction_sequence = ++info.end_transaction;
 
-	journal_clear_revoke(journal);
+	jbd2_journal_clear_revoke(journal);
 	sync_blockdev(journal->j_fs_dev);
 	return err;
 }
 
 /**
- * journal_skip_recovery - Start journal and wipe exiting records
+ * jbd2_journal_skip_recovery - Start journal and wipe exiting records
  * @journal: journal to startup
  *
  * Locate any valid recovery information from the journal and set up the
@@ -278,7 +278,7 @@ int journal_recover(journal_t *journal)
  * much recovery information is being erased, and to let us initialise
  * the journal transaction sequence numbers to the next unused ID.
  */
-int journal_skip_recovery(journal_t *journal)
+int jbd2_journal_skip_recovery(journal_t *journal)
 {
 	int			err;
 	journal_superblock_t *	sb;
@@ -387,7 +387,7 @@ static int do_one_pass(journal_t *journal,
 
 		tmp = (journal_header_t *)bh->b_data;
 
-		if (tmp->h_magic != cpu_to_be32(JFS_MAGIC_NUMBER)) {
+		if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
 			brelse(bh);
 			break;
 		}
@@ -407,7 +407,7 @@ static int do_one_pass(journal_t *journal,
 		 * to do with it?  That depends on the pass... */
 
 		switch(blocktype) {
-		case JFS_DESCRIPTOR_BLOCK:
+		case JBD2_DESCRIPTOR_BLOCK:
 			/* If it is a valid descriptor block, replay it
 			 * in pass REPLAY; otherwise, just skip over the
 			 * blocks it describes. */
@@ -451,7 +451,7 @@ static int do_one_pass(journal_t *journal,
 					/* If the block has been
 					 * revoked, then we're all done
 					 * here. */
-					if (journal_test_revoke
+					if (jbd2_journal_test_revoke
 					    (journal, blocknr,
 					     next_commit_ID)) {
 						brelse(obh);
@@ -477,9 +477,9 @@ static int do_one_pass(journal_t *journal,
 					lock_buffer(nbh);
 					memcpy(nbh->b_data, obh->b_data,
 							journal->j_blocksize);
-					if (flags & JFS_FLAG_ESCAPE) {
+					if (flags & JBD2_FLAG_ESCAPE) {
 						*((__be32 *)bh->b_data) =
-						cpu_to_be32(JFS_MAGIC_NUMBER);
+						cpu_to_be32(JBD2_MAGIC_NUMBER);
 					}
 
 					BUFFER_TRACE(nbh, "marking dirty");
@@ -495,17 +495,17 @@ static int do_one_pass(journal_t *journal,
 
 			skip_write:
 				tagp += sizeof(journal_block_tag_t);
-				if (!(flags & JFS_FLAG_SAME_UUID))
+				if (!(flags & JBD2_FLAG_SAME_UUID))
 					tagp += 16;
 
-				if (flags & JFS_FLAG_LAST_TAG)
+				if (flags & JBD2_FLAG_LAST_TAG)
 					break;
 			}
 
 			brelse(bh);
 			continue;
 
-		case JFS_COMMIT_BLOCK:
+		case JBD2_COMMIT_BLOCK:
 			/* Found an expected commit block: not much to
 			 * do other than move on to the next sequence
 			 * number. */
@@ -513,7 +513,7 @@ static int do_one_pass(journal_t *journal,
 			next_commit_ID++;
 			continue;
 
-		case JFS_REVOKE_BLOCK:
+		case JBD2_REVOKE_BLOCK:
 			/* If we aren't in the REVOKE pass, then we can
 			 * just skip over this block. */
 			if (pass != PASS_REVOKE) {
@@ -570,11 +570,11 @@ static int do_one_pass(journal_t *journal,
 static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 			       tid_t sequence, struct recovery_info *info)
 {
-	journal_revoke_header_t *header;
+	jbd2_journal_revoke_header_t *header;
 	int offset, max;
 
-	header = (journal_revoke_header_t *) bh->b_data;
-	offset = sizeof(journal_revoke_header_t);
+	header = (jbd2_journal_revoke_header_t *) bh->b_data;
+	offset = sizeof(jbd2_journal_revoke_header_t);
 	max = be32_to_cpu(header->r_count);
 
 	while (offset < max) {
@@ -583,7 +583,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 
 		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
 		offset += 4;
-		err = journal_set_revoke(journal, blocknr, sequence);
+		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
 		if (err)
 			return err;
 		++info->nr_revokes;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index c532429d8d9b..2fccddc7acad 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -62,7 +62,7 @@
 #else
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/list.h>
@@ -70,14 +70,14 @@
 #include <linux/init.h>
 #endif
 
-static kmem_cache_t *revoke_record_cache;
-static kmem_cache_t *revoke_table_cache;
+static kmem_cache_t *jbd2_revoke_record_cache;
+static kmem_cache_t *jbd2_revoke_table_cache;
 
 /* Each revoke record represents one single revoked block.  During
    journal replay, this involves recording the transaction ID of the
    last transaction to revoke this block. */
 
-struct jbd_revoke_record_s
+struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
@@ -86,7 +86,7 @@ struct jbd_revoke_record_s
 
 
 /* The revoke table is just a simple hash table of revoke records. */
-struct jbd_revoke_table_s
+struct jbd2_revoke_table_s
 {
 	/* It is conceivable that we might want a larger hash table
 	 * for recovery.  Must be a power of two. */
@@ -99,7 +99,7 @@ struct jbd_revoke_table_s
 #ifdef __KERNEL__
 static void write_one_revoke_record(journal_t *, transaction_t *,
 				    struct journal_head **, int *,
-				    struct jbd_revoke_record_s *);
+				    struct jbd2_revoke_record_s *);
 static void flush_descriptor(journal_t *, struct journal_head *, int);
 #endif
 
@@ -108,7 +108,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
 static inline int hash(journal_t *journal, unsigned long block)
 {
-	struct jbd_revoke_table_s *table = journal->j_revoke;
+	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
 
 	return ((block << (hash_shift - 6)) ^
@@ -120,10 +120,10 @@ static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 repeat:
-	record = kmem_cache_alloc(revoke_record_cache, GFP_NOFS);
+	record = kmem_cache_alloc(jbd2_revoke_record_cache, GFP_NOFS);
 	if (!record)
 		goto oom;
 
@@ -145,57 +145,57 @@ oom:
 
 /* Find a revoke record in the journal's hash table. */
 
-static struct jbd_revoke_record_s *find_revoke_record(journal_t *journal,
+static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
 						      unsigned long blocknr)
 {
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	hash_list = &journal->j_revoke->hash_table[hash(journal, blocknr)];
 
 	spin_lock(&journal->j_revoke_lock);
-	record = (struct jbd_revoke_record_s *) hash_list->next;
+	record = (struct jbd2_revoke_record_s *) hash_list->next;
 	while (&(record->hash) != hash_list) {
 		if (record->blocknr == blocknr) {
 			spin_unlock(&journal->j_revoke_lock);
 			return record;
 		}
-		record = (struct jbd_revoke_record_s *) record->hash.next;
+		record = (struct jbd2_revoke_record_s *) record->hash.next;
 	}
 	spin_unlock(&journal->j_revoke_lock);
 	return NULL;
 }
 
-int __init journal_init_revoke_caches(void)
+int __init jbd2_journal_init_revoke_caches(void)
 {
-	revoke_record_cache = kmem_cache_create("revoke_record",
-					   sizeof(struct jbd_revoke_record_s),
+	jbd2_revoke_record_cache = kmem_cache_create("revoke_record",
+					   sizeof(struct jbd2_revoke_record_s),
 					   0, SLAB_HWCACHE_ALIGN, NULL, NULL);
-	if (revoke_record_cache == 0)
+	if (jbd2_revoke_record_cache == 0)
 		return -ENOMEM;
 
-	revoke_table_cache = kmem_cache_create("revoke_table",
-					   sizeof(struct jbd_revoke_table_s),
+	jbd2_revoke_table_cache = kmem_cache_create("revoke_table",
+					   sizeof(struct jbd2_revoke_table_s),
 					   0, 0, NULL, NULL);
-	if (revoke_table_cache == 0) {
-		kmem_cache_destroy(revoke_record_cache);
-		revoke_record_cache = NULL;
+	if (jbd2_revoke_table_cache == 0) {
+		kmem_cache_destroy(jbd2_revoke_record_cache);
+		jbd2_revoke_record_cache = NULL;
 		return -ENOMEM;
 	}
 	return 0;
 }
 
-void journal_destroy_revoke_caches(void)
+void jbd2_journal_destroy_revoke_caches(void)
 {
-	kmem_cache_destroy(revoke_record_cache);
-	revoke_record_cache = NULL;
-	kmem_cache_destroy(revoke_table_cache);
-	revoke_table_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_record_cache);
+	jbd2_revoke_record_cache = NULL;
+	kmem_cache_destroy(jbd2_revoke_table_cache);
+	jbd2_revoke_table_cache = NULL;
 }
 
 /* Initialise the revoke table for a given journal to a given size. */
 
-int journal_init_revoke(journal_t *journal, int hash_size)
+int jbd2_journal_init_revoke(journal_t *journal, int hash_size)
 {
 	int shift, tmp;
 
@@ -206,7 +206,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	while((tmp >>= 1UL) != 0UL)
 		shift++;
 
-	journal->j_revoke_table[0] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	journal->j_revoke_table[0] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
 	if (!journal->j_revoke_table[0])
 		return -ENOMEM;
 	journal->j_revoke = journal->j_revoke_table[0];
@@ -221,7 +221,7 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	journal->j_revoke->hash_table =
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
 	if (!journal->j_revoke->hash_table) {
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
 		journal->j_revoke = NULL;
 		return -ENOMEM;
 	}
@@ -229,10 +229,10 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 	for (tmp = 0; tmp < hash_size; tmp++)
 		INIT_LIST_HEAD(&journal->j_revoke->hash_table[tmp]);
 
-	journal->j_revoke_table[1] = kmem_cache_alloc(revoke_table_cache, GFP_KERNEL);
+	journal->j_revoke_table[1] = kmem_cache_alloc(jbd2_revoke_table_cache, GFP_KERNEL);
 	if (!journal->j_revoke_table[1]) {
 		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
 		return -ENOMEM;
 	}
 
@@ -249,8 +249,8 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 		kmalloc(hash_size * sizeof(struct list_head), GFP_KERNEL);
 	if (!journal->j_revoke->hash_table) {
 		kfree(journal->j_revoke_table[0]->hash_table);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[0]);
-		kmem_cache_free(revoke_table_cache, journal->j_revoke_table[1]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[0]);
+		kmem_cache_free(jbd2_revoke_table_cache, journal->j_revoke_table[1]);
 		journal->j_revoke = NULL;
 		return -ENOMEM;
 	}
@@ -265,9 +265,9 @@ int journal_init_revoke(journal_t *journal, int hash_size)
 
 /* Destoy a journal's revoke table.  The table must already be empty! */
 
-void journal_destroy_revoke(journal_t *journal)
+void jbd2_journal_destroy_revoke(journal_t *journal)
 {
-	struct jbd_revoke_table_s *table;
+	struct jbd2_revoke_table_s *table;
 	struct list_head *hash_list;
 	int i;
 
@@ -281,7 +281,7 @@ void journal_destroy_revoke(journal_t *journal)
 	}
 
 	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
 	journal->j_revoke = NULL;
 
 	table = journal->j_revoke_table[1];
@@ -294,7 +294,7 @@ void journal_destroy_revoke(journal_t *journal)
 	}
 
 	kfree(table->hash_table);
-	kmem_cache_free(revoke_table_cache, table);
+	kmem_cache_free(jbd2_revoke_table_cache, table);
 	journal->j_revoke = NULL;
 }
 
@@ -302,7 +302,7 @@ void journal_destroy_revoke(journal_t *journal)
 #ifdef __KERNEL__
 
 /*
- * journal_revoke: revoke a given buffer_head from the journal.  This
+ * jbd2_journal_revoke: revoke a given buffer_head from the journal.  This
  * prevents the block from being replayed during recovery if we take a
  * crash after this current transaction commits.  Any subsequent
  * metadata writes of the buffer in this transaction cancel the
@@ -314,18 +314,18 @@ void journal_destroy_revoke(journal_t *journal)
  * revoke before clearing the block bitmap when we are deleting
  * metadata.
  *
- * Revoke performs a journal_forget on any buffer_head passed in as a
+ * Revoke performs a jbd2_journal_forget on any buffer_head passed in as a
  * parameter, but does _not_ forget the buffer_head if the bh was only
  * found implicitly.
  *
  * bh_in may not be a journalled buffer - it may have come off
  * the hash tables without an attached journal_head.
  *
- * If bh_in is non-zero, journal_revoke() will decrement its b_count
+ * If bh_in is non-zero, jbd2_journal_revoke() will decrement its b_count
  * by one.
  */
 
-int journal_revoke(handle_t *handle, unsigned long blocknr,
+int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -338,7 +338,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		BUFFER_TRACE(bh_in, "enter");
 
 	journal = handle->h_transaction->t_journal;
-	if (!journal_set_features(journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)){
+	if (!jbd2_journal_set_features(journal, 0, 0, JBD2_FEATURE_INCOMPAT_REVOKE)){
 		J_ASSERT (!"Cannot set revoke feature!");
 		return -EINVAL;
 	}
@@ -386,8 +386,8 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 		set_buffer_revoked(bh);
 		set_buffer_revokevalid(bh);
 		if (bh_in) {
-			BUFFER_TRACE(bh_in, "call journal_forget");
-			journal_forget(handle, bh_in);
+			BUFFER_TRACE(bh_in, "call jbd2_journal_forget");
+			jbd2_journal_forget(handle, bh_in);
 		} else {
 			BUFFER_TRACE(bh, "call brelse");
 			__brelse(bh);
@@ -403,7 +403,7 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
 
 /*
  * Cancel an outstanding revoke.  For use only internally by the
- * journaling code (called from journal_get_write_access).
+ * journaling code (called from jbd2_journal_get_write_access).
  *
  * We trust buffer_revoked() on the buffer if the buffer is already
  * being journaled: if there is no revoke pending on the buffer, then we
@@ -418,9 +418,9 @@ int journal_revoke(handle_t *handle, unsigned long blocknr,
  *
  * The caller must have the journal locked.
  */
-int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
+int jbd2_journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_cancel;
 	int did_revoke = 0;	/* akpm: debug */
@@ -447,7 +447,7 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
 			spin_lock(&journal->j_revoke_lock);
 			list_del(&record->hash);
 			spin_unlock(&journal->j_revoke_lock);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 			did_revoke = 1;
 		}
 	}
@@ -478,7 +478,7 @@ int journal_cancel_revoke(handle_t *handle, struct journal_head *jh)
  * we do not want to suspend any processing until all revokes are
  * written -bzzz
  */
-void journal_switch_revoke_table(journal_t *journal)
+void jbd2_journal_switch_revoke_table(journal_t *journal)
 {
 	int i;
 
@@ -498,12 +498,12 @@ void journal_switch_revoke_table(journal_t *journal)
  * Called with the journal lock held.
  */
 
-void journal_write_revoke_records(journal_t *journal,
+void jbd2_journal_write_revoke_records(journal_t *journal,
 				  transaction_t *transaction)
 {
 	struct journal_head *descriptor;
-	struct jbd_revoke_record_s *record;
-	struct jbd_revoke_table_s *revoke;
+	struct jbd2_revoke_record_s *record;
+	struct jbd2_revoke_table_s *revoke;
 	struct list_head *hash_list;
 	int i, offset, count;
 
@@ -519,14 +519,14 @@ void journal_write_revoke_records(journal_t *journal,
 		hash_list = &revoke->hash_table[i];
 
 		while (!list_empty(hash_list)) {
-			record = (struct jbd_revoke_record_s *)
+			record = (struct jbd2_revoke_record_s *)
 				hash_list->next;
 			write_one_revoke_record(journal, transaction,
 						&descriptor, &offset,
 						record);
 			count++;
 			list_del(&record->hash);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 		}
 	}
 	if (descriptor)
@@ -543,7 +543,7 @@ static void write_one_revoke_record(journal_t *journal,
 				    transaction_t *transaction,
 				    struct journal_head **descriptorp,
 				    int *offsetp,
-				    struct jbd_revoke_record_s *record)
+				    struct jbd2_revoke_record_s *record)
 {
 	struct journal_head *descriptor;
 	int offset;
@@ -551,7 +551,7 @@ static void write_one_revoke_record(journal_t *journal,
 
 	/* If we are already aborting, this all becomes a noop.  We
            still need to go round the loop in
-           journal_write_revoke_records in order to free all of the
+           jbd2_journal_write_revoke_records in order to free all of the
            revoke records: only the IO to the journal is omitted. */
 	if (is_journal_aborted(journal))
 		return;
@@ -568,19 +568,19 @@ static void write_one_revoke_record(journal_t *journal,
 	}
 
 	if (!descriptor) {
-		descriptor = journal_get_descriptor_buffer(journal);
+		descriptor = jbd2_journal_get_descriptor_buffer(journal);
 		if (!descriptor)
 			return;
 		header = (journal_header_t *) &jh2bh(descriptor)->b_data[0];
-		header->h_magic     = cpu_to_be32(JFS_MAGIC_NUMBER);
-		header->h_blocktype = cpu_to_be32(JFS_REVOKE_BLOCK);
+		header->h_magic     = cpu_to_be32(JBD2_MAGIC_NUMBER);
+		header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
 		header->h_sequence  = cpu_to_be32(transaction->t_tid);
 
 		/* Record it so that we can wait for IO completion later */
 		JBUFFER_TRACE(descriptor, "file as BJ_LogCtl");
-		journal_file_buffer(descriptor, transaction, BJ_LogCtl);
+		jbd2_journal_file_buffer(descriptor, transaction, BJ_LogCtl);
 
-		offset = sizeof(journal_revoke_header_t);
+		offset = sizeof(jbd2_journal_revoke_header_t);
 		*descriptorp = descriptor;
 	}
 
@@ -601,7 +601,7 @@ static void flush_descriptor(journal_t *journal,
 			     struct journal_head *descriptor,
 			     int offset)
 {
-	journal_revoke_header_t *header;
+	jbd2_journal_revoke_header_t *header;
 	struct buffer_head *bh = jh2bh(descriptor);
 
 	if (is_journal_aborted(journal)) {
@@ -609,7 +609,7 @@ static void flush_descriptor(journal_t *journal,
 		return;
 	}
 
-	header = (journal_revoke_header_t *) jh2bh(descriptor)->b_data;
+	header = (jbd2_journal_revoke_header_t *) jh2bh(descriptor)->b_data;
 	header->r_count = cpu_to_be32(offset);
 	set_buffer_jwrite(bh);
 	BUFFER_TRACE(bh, "write");
@@ -640,11 +640,11 @@ static void flush_descriptor(journal_t *journal,
  * single block.
  */
 
-int journal_set_revoke(journal_t *journal,
+int jbd2_journal_set_revoke(journal_t *journal,
 		       unsigned long blocknr,
 		       tid_t sequence)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	record = find_revoke_record(journal, blocknr);
 	if (record) {
@@ -664,11 +664,11 @@ int journal_set_revoke(journal_t *journal,
  * ones, but later transactions still need replayed.
  */
 
-int journal_test_revoke(journal_t *journal,
+int jbd2_journal_test_revoke(journal_t *journal,
 			unsigned long blocknr,
 			tid_t sequence)
 {
-	struct jbd_revoke_record_s *record;
+	struct jbd2_revoke_record_s *record;
 
 	record = find_revoke_record(journal, blocknr);
 	if (!record)
@@ -683,21 +683,21 @@ int journal_test_revoke(journal_t *journal,
  * that it can be reused by the running filesystem.
  */
 
-void journal_clear_revoke(journal_t *journal)
+void jbd2_journal_clear_revoke(journal_t *journal)
 {
 	int i;
 	struct list_head *hash_list;
-	struct jbd_revoke_record_s *record;
-	struct jbd_revoke_table_s *revoke;
+	struct jbd2_revoke_record_s *record;
+	struct jbd2_revoke_table_s *revoke;
 
 	revoke = journal->j_revoke;
 
 	for (i = 0; i < revoke->hash_size; i++) {
 		hash_list = &revoke->hash_table[i];
 		while (!list_empty(hash_list)) {
-			record = (struct jbd_revoke_record_s*) hash_list->next;
+			record = (struct jbd2_revoke_record_s*) hash_list->next;
 			list_del(&record->hash);
-			kmem_cache_free(revoke_record_cache, record);
+			kmem_cache_free(jbd2_revoke_record_cache, record);
 		}
 	}
 }
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index e1b3c8af4d17..149957bef907 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -19,7 +19,7 @@
 
 #include <linux/time.h>
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/timer.h>
@@ -28,7 +28,7 @@
 #include <linux/highmem.h>
 
 /*
- * get_transaction: obtain a new transaction_t object.
+ * jbd2_get_transaction: obtain a new transaction_t object.
  *
  * Simply allocate and initialise a new transaction.  Create it in
  * RUNNING state and add it to the current journal (which should not
@@ -44,7 +44,7 @@
  */
 
 static transaction_t *
-get_transaction(journal_t *journal, transaction_t *transaction)
+jbd2_get_transaction(journal_t *journal, transaction_t *transaction)
 {
 	transaction->t_journal = journal;
 	transaction->t_state = T_RUNNING;
@@ -115,7 +115,7 @@ repeat:
 	spin_lock(&journal->j_state_lock);
 repeat_locked:
 	if (is_journal_aborted(journal) ||
-	    (journal->j_errno != 0 && !(journal->j_flags & JFS_ACK_ERR))) {
+	    (journal->j_errno != 0 && !(journal->j_flags & JBD2_ACK_ERR))) {
 		spin_unlock(&journal->j_state_lock);
 		ret = -EROFS;
 		goto out;
@@ -134,7 +134,7 @@ repeat_locked:
 			spin_unlock(&journal->j_state_lock);
 			goto alloc_transaction;
 		}
-		get_transaction(journal, new_transaction);
+		jbd2_get_transaction(journal, new_transaction);
 		new_transaction = NULL;
 	}
 
@@ -175,7 +175,7 @@ repeat_locked:
 		spin_unlock(&transaction->t_handle_lock);
 		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 				TASK_UNINTERRUPTIBLE);
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 		spin_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_transaction_locked, &wait);
@@ -205,12 +205,12 @@ repeat_locked:
 	 * committing_transaction->t_outstanding_credits plus "enough" for
 	 * the log control blocks.
 	 * Also, this test is inconsitent with the matching one in
-	 * journal_extend().
+	 * jbd2_journal_extend().
 	 */
-	if (__log_space_left(journal) < jbd_space_needed(journal)) {
+	if (__jbd2_log_space_left(journal) < jbd_space_needed(journal)) {
 		jbd_debug(2, "Handle %p waiting for checkpoint...\n", handle);
 		spin_unlock(&transaction->t_handle_lock);
-		__log_wait_for_space(journal);
+		__jbd2_log_wait_for_space(journal);
 		goto repeat_locked;
 	}
 
@@ -223,7 +223,7 @@ repeat_locked:
 	transaction->t_handle_count++;
 	jbd_debug(4, "Handle %p given %d credits (total %d, free %d)\n",
 		  handle, nblocks, transaction->t_outstanding_credits,
-		  __log_space_left(journal));
+		  __jbd2_log_space_left(journal));
 	spin_unlock(&transaction->t_handle_lock);
 	spin_unlock(&journal->j_state_lock);
 out:
@@ -246,7 +246,7 @@ static handle_t *new_handle(int nblocks)
 }
 
 /**
- * handle_t *journal_start() - Obtain a new handle.
+ * handle_t *jbd2_journal_start() - Obtain a new handle.
  * @journal: Journal to start transaction on.
  * @nblocks: number of block buffer we might modify
  *
@@ -259,7 +259,7 @@ static handle_t *new_handle(int nblocks)
  *
  * Return a pointer to a newly allocated handle, or NULL on failure
  */
-handle_t *journal_start(journal_t *journal, int nblocks)
+handle_t *jbd2_journal_start(journal_t *journal, int nblocks)
 {
 	handle_t *handle = journal_current_handle();
 	int err;
@@ -289,7 +289,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
 }
 
 /**
- * int journal_extend() - extend buffer credits.
+ * int jbd2_journal_extend() - extend buffer credits.
  * @handle:  handle to 'extend'
  * @nblocks: nr blocks to try to extend by.
  *
@@ -298,7 +298,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
  * a credit for a number of buffer modications in advance, but can
  * extend its credit if it needs more.
  *
- * journal_extend tries to give the running handle more buffer credits.
+ * jbd2_journal_extend tries to give the running handle more buffer credits.
  * It does not guarantee that allocation - this is a best-effort only.
  * The calling process MUST be able to deal cleanly with a failure to
  * extend here.
@@ -308,7 +308,7 @@ handle_t *journal_start(journal_t *journal, int nblocks)
  * return code < 0 implies an error
  * return code > 0 implies normal transaction-full status.
  */
-int journal_extend(handle_t *handle, int nblocks)
+int jbd2_journal_extend(handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -339,7 +339,7 @@ int journal_extend(handle_t *handle, int nblocks)
 		goto unlock;
 	}
 
-	if (wanted > __log_space_left(journal)) {
+	if (wanted > __jbd2_log_space_left(journal)) {
 		jbd_debug(3, "denied handle %p %d blocks: "
 			  "insufficient log space\n", handle, nblocks);
 		goto unlock;
@@ -360,21 +360,21 @@ out:
 
 
 /**
- * int journal_restart() - restart a handle .
+ * int jbd2_journal_restart() - restart a handle .
  * @handle:  handle to restart
  * @nblocks: nr credits requested
  *
  * Restart a handle for a multi-transaction filesystem
  * operation.
  *
- * If the journal_extend() call above fails to grant new buffer credits
- * to a running handle, a call to journal_restart will commit the
+ * If the jbd2_journal_extend() call above fails to grant new buffer credits
+ * to a running handle, a call to jbd2_journal_restart will commit the
  * handle's transaction so far and reattach the handle to a new
  * transaction capabable of guaranteeing the requested number of
  * credits.
  */
 
-int journal_restart(handle_t *handle, int nblocks)
+int jbd2_journal_restart(handle_t *handle, int nblocks)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -402,7 +402,7 @@ int journal_restart(handle_t *handle, int nblocks)
 	spin_unlock(&transaction->t_handle_lock);
 
 	jbd_debug(2, "restarting handle %p\n", handle);
-	__log_start_commit(journal, transaction->t_tid);
+	__jbd2_log_start_commit(journal, transaction->t_tid);
 	spin_unlock(&journal->j_state_lock);
 
 	handle->h_buffer_credits = nblocks;
@@ -412,7 +412,7 @@ int journal_restart(handle_t *handle, int nblocks)
 
 
 /**
- * void journal_lock_updates () - establish a transaction barrier.
+ * void jbd2_journal_lock_updates () - establish a transaction barrier.
  * @journal:  Journal to establish a barrier on.
  *
  * This locks out any further updates from being started, and blocks
@@ -421,7 +421,7 @@ int journal_restart(handle_t *handle, int nblocks)
  *
  * The journal lock should not be held on entry.
  */
-void journal_lock_updates(journal_t *journal)
+void jbd2_journal_lock_updates(journal_t *journal)
 {
 	DEFINE_WAIT(wait);
 
@@ -452,7 +452,7 @@ void journal_lock_updates(journal_t *journal)
 
 	/*
 	 * We have now established a barrier against other normal updates, but
-	 * we also need to barrier against other journal_lock_updates() calls
+	 * we also need to barrier against other jbd2_journal_lock_updates() calls
 	 * to make sure that we serialise special journal-locked operations
 	 * too.
 	 */
@@ -460,14 +460,14 @@ void journal_lock_updates(journal_t *journal)
 }
 
 /**
- * void journal_unlock_updates (journal_t* journal) - release barrier
+ * void jbd2_journal_unlock_updates (journal_t* journal) - release barrier
  * @journal:  Journal to release the barrier on.
  *
- * Release a transaction barrier obtained with journal_lock_updates().
+ * Release a transaction barrier obtained with jbd2_journal_lock_updates().
  *
  * Should be called without the journal lock held.
  */
-void journal_unlock_updates (journal_t *journal)
+void jbd2_journal_unlock_updates (journal_t *journal)
 {
 	J_ASSERT(journal->j_barrier_count != 0);
 
@@ -667,7 +667,7 @@ repeat:
 				JBUFFER_TRACE(jh, "allocate memory for buffer");
 				jbd_unlock_bh_state(bh);
 				frozen_buffer =
-					jbd_slab_alloc(jh2bh(jh)->b_size,
+					jbd2_slab_alloc(jh2bh(jh)->b_size,
 							 GFP_NOFS);
 				if (!frozen_buffer) {
 					printk(KERN_EMERG
@@ -699,7 +699,7 @@ repeat:
 		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		spin_lock(&journal->j_list_lock);
-		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 		spin_unlock(&journal->j_list_lock);
 	}
 
@@ -723,18 +723,18 @@ done:
 	 * If we are about to journal a buffer, then any revoke pending on it is
 	 * no longer valid
 	 */
-	journal_cancel_revoke(handle, jh);
+	jbd2_journal_cancel_revoke(handle, jh);
 
 out:
 	if (unlikely(frozen_buffer))	/* It's usually NULL */
-		jbd_slab_free(frozen_buffer, bh->b_size);
+		jbd2_slab_free(frozen_buffer, bh->b_size);
 
 	JBUFFER_TRACE(jh, "exit");
 	return error;
 }
 
 /**
- * int journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
+ * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
  * @credits: variable that will receive credits for the buffer
@@ -745,16 +745,16 @@ out:
  * because we're write()ing a buffer which is also part of a shared mapping.
  */
 
-int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh)
 {
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	int rc;
 
 	/* We do not want to get caught playing with fields which the
 	 * log thread also manipulates.  Make sure that the buffer
 	 * completes any outstanding IO before proceeding. */
 	rc = do_get_write_access(handle, jh, 0);
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	return rc;
 }
 
@@ -772,17 +772,17 @@ int journal_get_write_access(handle_t *handle, struct buffer_head *bh)
  * unlocked buffer beforehand. */
 
 /**
- * int journal_get_create_access () - notify intent to use newly created bh
+ * int jbd2_journal_get_create_access () - notify intent to use newly created bh
  * @handle: transaction to new buffer to
  * @bh: new buffer.
  *
  * Call this if you create a new bh.
  */
-int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	int err;
 
 	jbd_debug(5, "journal_head %p\n", jh);
@@ -812,7 +812,7 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	if (jh->b_transaction == NULL) {
 		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
-		__journal_file_buffer(jh, transaction, BJ_Reserved);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
 	} else if (jh->b_transaction == journal->j_committing_transaction) {
 		JBUFFER_TRACE(jh, "set next transaction");
 		jh->b_next_transaction = transaction;
@@ -828,14 +828,14 @@ int journal_get_create_access(handle_t *handle, struct buffer_head *bh)
 	 * which hits an assertion error.
 	 */
 	JBUFFER_TRACE(jh, "cancelling revoke");
-	journal_cancel_revoke(handle, jh);
-	journal_put_journal_head(jh);
+	jbd2_journal_cancel_revoke(handle, jh);
+	jbd2_journal_put_journal_head(jh);
 out:
 	return err;
 }
 
 /**
- * int journal_get_undo_access() -  Notify intent to modify metadata with
+ * int jbd2_journal_get_undo_access() -  Notify intent to modify metadata with
  *     non-rewindable consequences
  * @handle: transaction
  * @bh: buffer to undo
@@ -848,7 +848,7 @@ out:
  * since if we overwrote that space we would make the delete
  * un-rewindable in case of a crash.
  *
- * To deal with that, journal_get_undo_access requests write access to a
+ * To deal with that, jbd2_journal_get_undo_access requests write access to a
  * buffer for parts of non-rewindable operations such as delete
  * operations on the bitmaps.  The journaling code must keep a copy of
  * the buffer's contents prior to the undo_access call until such time
@@ -861,10 +861,10 @@ out:
  *
  * Returns error number or 0 on success.
  */
-int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 {
 	int err;
-	struct journal_head *jh = journal_add_journal_head(bh);
+	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
 	char *committed_data = NULL;
 
 	JBUFFER_TRACE(jh, "entry");
@@ -880,7 +880,7 @@ int journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
 
 repeat:
 	if (!jh->b_committed_data) {
-		committed_data = jbd_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
+		committed_data = jbd2_slab_alloc(jh2bh(jh)->b_size, GFP_NOFS);
 		if (!committed_data) {
 			printk(KERN_EMERG "%s: No memory for committed data\n",
 				__FUNCTION__);
@@ -905,14 +905,14 @@ repeat:
 	}
 	jbd_unlock_bh_state(bh);
 out:
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	if (unlikely(committed_data))
-		jbd_slab_free(committed_data, bh->b_size);
+		jbd2_slab_free(committed_data, bh->b_size);
 	return err;
 }
 
 /**
- * int journal_dirty_data() -  mark a buffer as containing dirty data which
+ * int jbd2_journal_dirty_data() -  mark a buffer as containing dirty data which
  *                             needs to be flushed before we can commit the
  *                             current transaction.
  * @handle: transaction
@@ -923,10 +923,10 @@ out:
  *
  * Returns error number or 0 on success.
  *
- * journal_dirty_data() can be called via page_launder->ext3_writepage
+ * jbd2_journal_dirty_data() can be called via page_launder->ext3_writepage
  * by kswapd.
  */
-int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 {
 	journal_t *journal = handle->h_transaction->t_journal;
 	int need_brelse = 0;
@@ -935,7 +935,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 	if (is_handle_aborted(handle))
 		return 0;
 
-	jh = journal_add_journal_head(bh);
+	jh = jbd2_journal_add_journal_head(bh);
 	JBUFFER_TRACE(jh, "entry");
 
 	/*
@@ -984,7 +984,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 			 * And while we're in that state, someone does a
 			 * writepage() in an attempt to pageout the same area
 			 * of the file via a shared mapping.  At present that
-			 * calls journal_dirty_data(), and we get right here.
+			 * calls jbd2_journal_dirty_data(), and we get right here.
 			 * It may be too late to journal the data.  Simply
 			 * falling through to the next test will suffice: the
 			 * data will be dirty and wil be checkpointed.  The
@@ -1035,7 +1035,7 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 			/* journal_clean_data_list() may have got there first */
 			if (jh->b_transaction != NULL) {
 				JBUFFER_TRACE(jh, "unfile from commit");
-				__journal_temp_unlink_buffer(jh);
+				__jbd2_journal_temp_unlink_buffer(jh);
 				/* It still points to the committing
 				 * transaction; move it to this one so
 				 * that the refile assert checks are
@@ -1054,15 +1054,15 @@ int journal_dirty_data(handle_t *handle, struct buffer_head *bh)
 		if (jh->b_jlist != BJ_SyncData && jh->b_jlist != BJ_Locked) {
 			JBUFFER_TRACE(jh, "not on correct data list: unfile");
 			J_ASSERT_JH(jh, jh->b_jlist != BJ_Shadow);
-			__journal_temp_unlink_buffer(jh);
+			__jbd2_journal_temp_unlink_buffer(jh);
 			jh->b_transaction = handle->h_transaction;
 			JBUFFER_TRACE(jh, "file as data");
-			__journal_file_buffer(jh, handle->h_transaction,
+			__jbd2_journal_file_buffer(jh, handle->h_transaction,
 						BJ_SyncData);
 		}
 	} else {
 		JBUFFER_TRACE(jh, "not on a transaction");
-		__journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
+		__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_SyncData);
 	}
 no_journal:
 	spin_unlock(&journal->j_list_lock);
@@ -1072,12 +1072,12 @@ no_journal:
 		__brelse(bh);
 	}
 	JBUFFER_TRACE(jh, "exit");
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 	return 0;
 }
 
 /**
- * int journal_dirty_metadata() -  mark a buffer as containing dirty metadata
+ * int jbd2_journal_dirty_metadata() -  mark a buffer as containing dirty metadata
  * @handle: transaction to add buffer to.
  * @bh: buffer to mark
  *
@@ -1095,7 +1095,7 @@ no_journal:
  * buffer: that only gets done when the old transaction finally
  * completes its commit.
  */
-int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1156,7 +1156,7 @@ int journal_dirty_metadata(handle_t *handle, struct buffer_head *bh)
 
 	JBUFFER_TRACE(jh, "file as BJ_Metadata");
 	spin_lock(&journal->j_list_lock);
-	__journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
+	__jbd2_journal_file_buffer(jh, handle->h_transaction, BJ_Metadata);
 	spin_unlock(&journal->j_list_lock);
 out_unlock_bh:
 	jbd_unlock_bh_state(bh);
@@ -1166,18 +1166,18 @@ out:
 }
 
 /*
- * journal_release_buffer: undo a get_write_access without any buffer
+ * jbd2_journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
  *
  */
 void
-journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+jbd2_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 {
 	BUFFER_TRACE(bh, "entry");
 }
 
 /**
- * void journal_forget() - bforget() for potentially-journaled buffers.
+ * void jbd2_journal_forget() - bforget() for potentially-journaled buffers.
  * @handle: transaction handle
  * @bh:     bh to 'forget'
  *
@@ -1193,7 +1193,7 @@ journal_release_buffer(handle_t *handle, struct buffer_head *bh)
  * Allow this call even if the handle has aborted --- it may be part of
  * the caller's cleanup after an abort.
  */
-int journal_forget (handle_t *handle, struct buffer_head *bh)
+int jbd2_journal_forget (handle_t *handle, struct buffer_head *bh)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1250,11 +1250,11 @@ int journal_forget (handle_t *handle, struct buffer_head *bh)
 		 */
 
 		if (jh->b_cp_transaction) {
-			__journal_temp_unlink_buffer(jh);
-			__journal_file_buffer(jh, transaction, BJ_Forget);
+			__jbd2_journal_temp_unlink_buffer(jh);
+			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		} else {
-			__journal_unfile_buffer(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_unfile_buffer(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 			if (!buffer_jbd(bh)) {
 				spin_unlock(&journal->j_list_lock);
@@ -1292,7 +1292,7 @@ drop:
 }
 
 /**
- * int journal_stop() - complete a transaction
+ * int jbd2_journal_stop() - complete a transaction
  * @handle: tranaction to complete.
  *
  * All done for a particular handle.
@@ -1302,12 +1302,12 @@ drop:
  * complication is that we need to start a commit operation if the
  * filesystem is marked for synchronous update.
  *
- * journal_stop itself will not usually return an error, but it may
+ * jbd2_journal_stop itself will not usually return an error, but it may
  * do so in unusual circumstances.  In particular, expect it to
- * return -EIO if a journal_abort has been executed since the
+ * return -EIO if a jbd2_journal_abort has been executed since the
  * transaction began.
  */
-int journal_stop(handle_t *handle)
+int jbd2_journal_stop(handle_t *handle)
 {
 	transaction_t *transaction = handle->h_transaction;
 	journal_t *journal = transaction->t_journal;
@@ -1383,15 +1383,15 @@ int journal_stop(handle_t *handle)
 		jbd_debug(2, "transaction too old, requesting commit for "
 					"handle %p\n", handle);
 		/* This is non-blocking */
-		__log_start_commit(journal, transaction->t_tid);
+		__jbd2_log_start_commit(journal, transaction->t_tid);
 		spin_unlock(&journal->j_state_lock);
 
 		/*
-		 * Special case: JFS_SYNC synchronous updates require us
+		 * Special case: JBD2_SYNC synchronous updates require us
 		 * to wait for the commit to complete.
 		 */
 		if (handle->h_sync && !(current->flags & PF_MEMALLOC))
-			err = log_wait_commit(journal, tid);
+			err = jbd2_log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&transaction->t_handle_lock);
 		spin_unlock(&journal->j_state_lock);
@@ -1401,24 +1401,24 @@ int journal_stop(handle_t *handle)
 	return err;
 }
 
-/**int journal_force_commit() - force any uncommitted transactions
+/**int jbd2_journal_force_commit() - force any uncommitted transactions
  * @journal: journal to force
  *
  * For synchronous operations: force any uncommitted transactions
  * to disk.  May seem kludgy, but it reuses all the handle batching
  * code in a very simple manner.
  */
-int journal_force_commit(journal_t *journal)
+int jbd2_journal_force_commit(journal_t *journal)
 {
 	handle_t *handle;
 	int ret;
 
-	handle = journal_start(journal, 1);
+	handle = jbd2_journal_start(journal, 1);
 	if (IS_ERR(handle)) {
 		ret = PTR_ERR(handle);
 	} else {
 		handle->h_sync = 1;
-		ret = journal_stop(handle);
+		ret = jbd2_journal_stop(handle);
 	}
 	return ret;
 }
@@ -1486,7 +1486,7 @@ __blist_del_buffer(struct journal_head **list, struct journal_head *jh)
  *
  * Called under j_list_lock.  The journal may not be locked.
  */
-void __journal_temp_unlink_buffer(struct journal_head *jh)
+void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh)
 {
 	struct journal_head **list = NULL;
 	transaction_t *transaction;
@@ -1538,23 +1538,23 @@ void __journal_temp_unlink_buffer(struct journal_head *jh)
 		mark_buffer_dirty(bh);	/* Expose it to the VM */
 }
 
-void __journal_unfile_buffer(struct journal_head *jh)
+void __jbd2_journal_unfile_buffer(struct journal_head *jh)
 {
-	__journal_temp_unlink_buffer(jh);
+	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = NULL;
 }
 
-void journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
+void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	jbd_lock_bh_state(jh2bh(jh));
 	spin_lock(&journal->j_list_lock);
-	__journal_unfile_buffer(jh);
+	__jbd2_journal_unfile_buffer(jh);
 	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(jh2bh(jh));
 }
 
 /*
- * Called from journal_try_to_free_buffers().
+ * Called from jbd2_journal_try_to_free_buffers().
  *
  * Called under jbd_lock_bh_state(bh)
  */
@@ -1576,16 +1576,16 @@ __journal_try_to_free_buffer(journal_t *journal, struct buffer_head *bh)
 		if (jh->b_jlist == BJ_SyncData || jh->b_jlist == BJ_Locked) {
 			/* A written-back ordered data buffer */
 			JBUFFER_TRACE(jh, "release data");
-			__journal_unfile_buffer(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_unfile_buffer(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	} else if (jh->b_cp_transaction != 0 && jh->b_transaction == 0) {
 		/* written-back checkpointed metadata buffer */
 		if (jh->b_jlist == BJ_None) {
 			JBUFFER_TRACE(jh, "remove from checkpoint list");
-			__journal_remove_checkpoint(jh);
-			journal_remove_journal_head(bh);
+			__jbd2_journal_remove_checkpoint(jh);
+			jbd2_journal_remove_journal_head(bh);
 			__brelse(bh);
 		}
 	}
@@ -1596,7 +1596,7 @@ out:
 
 
 /**
- * int journal_try_to_free_buffers() - try to free page buffers.
+ * int jbd2_journal_try_to_free_buffers() - try to free page buffers.
  * @journal: journal for operation
  * @page: to try and free
  * @unused_gfp_mask: unused
@@ -1613,13 +1613,13 @@ out:
  *
  * This complicates JBD locking somewhat.  We aren't protected by the
  * BKL here.  We wish to remove the buffer from its committing or
- * running transaction's ->t_datalist via __journal_unfile_buffer.
+ * running transaction's ->t_datalist via __jbd2_journal_unfile_buffer.
  *
  * This may *change* the value of transaction_t->t_datalist, so anyone
  * who looks at t_datalist needs to lock against this function.
  *
- * Even worse, someone may be doing a journal_dirty_data on this
- * buffer.  So we need to lock against that.  journal_dirty_data()
+ * Even worse, someone may be doing a jbd2_journal_dirty_data on this
+ * buffer.  So we need to lock against that.  jbd2_journal_dirty_data()
  * will come out of the lock with the buffer dirty, which makes it
  * ineligible for release here.
  *
@@ -1629,7 +1629,7 @@ out:
  * cannot happen because we never reallocate freed data as metadata
  * while the data is part of a transaction.  Yes?
  */
-int journal_try_to_free_buffers(journal_t *journal,
+int jbd2_journal_try_to_free_buffers(journal_t *journal,
 				struct page *page, gfp_t unused_gfp_mask)
 {
 	struct buffer_head *head;
@@ -1646,15 +1646,15 @@ int journal_try_to_free_buffers(journal_t *journal,
 		/*
 		 * We take our own ref against the journal_head here to avoid
 		 * having to add tons of locking around each instance of
-		 * journal_remove_journal_head() and journal_put_journal_head().
+		 * jbd2_journal_remove_journal_head() and jbd2_journal_put_journal_head().
 		 */
-		jh = journal_grab_journal_head(bh);
+		jh = jbd2_journal_grab_journal_head(bh);
 		if (!jh)
 			continue;
 
 		jbd_lock_bh_state(bh);
 		__journal_try_to_free_buffer(journal, bh);
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		jbd_unlock_bh_state(bh);
 		if (buffer_jbd(bh))
 			goto busy;
@@ -1681,23 +1681,23 @@ static int __dispose_buffer(struct journal_head *jh, transaction_t *transaction)
 	int may_free = 1;
 	struct buffer_head *bh = jh2bh(jh);
 
-	__journal_unfile_buffer(jh);
+	__jbd2_journal_unfile_buffer(jh);
 
 	if (jh->b_cp_transaction) {
 		JBUFFER_TRACE(jh, "on running+cp transaction");
-		__journal_file_buffer(jh, transaction, BJ_Forget);
+		__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		clear_buffer_jbddirty(bh);
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
-		journal_remove_journal_head(bh);
+		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 	return may_free;
 }
 
 /*
- * journal_invalidatepage
+ * jbd2_journal_invalidatepage
  *
  * This code is tricky.  It has a number of cases to deal with.
  *
@@ -1765,7 +1765,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 
-	jh = journal_grab_journal_head(bh);
+	jh = jbd2_journal_grab_journal_head(bh);
 	if (!jh)
 		goto zap_buffer_no_jh;
 
@@ -1796,7 +1796,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 			JBUFFER_TRACE(jh, "checkpointed: add to BJ_Forget");
 			ret = __dispose_buffer(jh,
 					journal->j_running_transaction);
-			journal_put_journal_head(jh);
+			jbd2_journal_put_journal_head(jh);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			spin_unlock(&journal->j_state_lock);
@@ -1810,7 +1810,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 				JBUFFER_TRACE(jh, "give to committing trans");
 				ret = __dispose_buffer(jh,
 					journal->j_committing_transaction);
-				journal_put_journal_head(jh);
+				jbd2_journal_put_journal_head(jh);
 				spin_unlock(&journal->j_list_lock);
 				jbd_unlock_bh_state(bh);
 				spin_unlock(&journal->j_state_lock);
@@ -1844,7 +1844,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 					journal->j_running_transaction);
 			jh->b_next_transaction = NULL;
 		}
-		journal_put_journal_head(jh);
+		jbd2_journal_put_journal_head(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
 		spin_unlock(&journal->j_state_lock);
@@ -1861,7 +1861,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh)
 	}
 
 zap_buffer:
-	journal_put_journal_head(jh);
+	jbd2_journal_put_journal_head(jh);
 zap_buffer_no_jh:
 	spin_unlock(&journal->j_list_lock);
 	jbd_unlock_bh_state(bh);
@@ -1877,7 +1877,7 @@ zap_buffer_unlocked:
 }
 
 /**
- * void journal_invalidatepage()
+ * void jbd2_journal_invalidatepage()
  * @journal: journal to use for flush...
  * @page:    page to flush
  * @offset:  length of page to invalidate.
@@ -1885,7 +1885,7 @@ zap_buffer_unlocked:
  * Reap page buffers containing data after offset in page.
  *
  */
-void journal_invalidatepage(journal_t *journal,
+void jbd2_journal_invalidatepage(journal_t *journal,
 		      struct page *page,
 		      unsigned long offset)
 {
@@ -1927,7 +1927,7 @@ void journal_invalidatepage(journal_t *journal,
 /*
  * File a buffer on the given transaction list.
  */
-void __journal_file_buffer(struct journal_head *jh,
+void __jbd2_journal_file_buffer(struct journal_head *jh,
 			transaction_t *transaction, int jlist)
 {
 	struct journal_head **list = NULL;
@@ -1956,7 +1956,7 @@ void __journal_file_buffer(struct journal_head *jh,
 	}
 
 	if (jh->b_transaction)
-		__journal_temp_unlink_buffer(jh);
+		__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = transaction;
 
 	switch (jlist) {
@@ -1998,12 +1998,12 @@ void __journal_file_buffer(struct journal_head *jh,
 		set_buffer_jbddirty(bh);
 }
 
-void journal_file_buffer(struct journal_head *jh,
+void jbd2_journal_file_buffer(struct journal_head *jh,
 				transaction_t *transaction, int jlist)
 {
 	jbd_lock_bh_state(jh2bh(jh));
 	spin_lock(&transaction->t_journal->j_list_lock);
-	__journal_file_buffer(jh, transaction, jlist);
+	__jbd2_journal_file_buffer(jh, transaction, jlist);
 	spin_unlock(&transaction->t_journal->j_list_lock);
 	jbd_unlock_bh_state(jh2bh(jh));
 }
@@ -2018,7 +2018,7 @@ void journal_file_buffer(struct journal_head *jh,
  *
  * Called under jbd_lock_bh_state(jh2bh(jh))
  */
-void __journal_refile_buffer(struct journal_head *jh)
+void __jbd2_journal_refile_buffer(struct journal_head *jh)
 {
 	int was_dirty;
 	struct buffer_head *bh = jh2bh(jh);
@@ -2029,7 +2029,7 @@ void __journal_refile_buffer(struct journal_head *jh)
 
 	/* If the buffer is now unused, just drop it. */
 	if (jh->b_next_transaction == NULL) {
-		__journal_unfile_buffer(jh);
+		__jbd2_journal_unfile_buffer(jh);
 		return;
 	}
 
@@ -2039,10 +2039,10 @@ void __journal_refile_buffer(struct journal_head *jh)
 	 */
 
 	was_dirty = test_clear_buffer_jbddirty(bh);
-	__journal_temp_unlink_buffer(jh);
+	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
-	__journal_file_buffer(jh, jh->b_transaction,
+	__jbd2_journal_file_buffer(jh, jh->b_transaction,
 				was_dirty ? BJ_Metadata : BJ_Reserved);
 	J_ASSERT_JH(jh, jh->b_transaction->t_state == T_RUNNING);
 
@@ -2054,26 +2054,26 @@ void __journal_refile_buffer(struct journal_head *jh)
  * For the unlocked version of this call, also make sure that any
  * hanging journal_head is cleaned up if necessary.
  *
- * __journal_refile_buffer is usually called as part of a single locked
+ * __jbd2_journal_refile_buffer is usually called as part of a single locked
  * operation on a buffer_head, in which the caller is probably going to
  * be hooking the journal_head onto other lists.  In that case it is up
  * to the caller to remove the journal_head if necessary.  For the
- * unlocked journal_refile_buffer call, the caller isn't going to be
+ * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
  * doing anything else to the buffer so we need to do the cleanup
  * ourselves to avoid a jh leak.
  *
  * *** The journal_head may be freed by this call! ***
  */
-void journal_refile_buffer(journal_t *journal, struct journal_head *jh)
+void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 
-	__journal_refile_buffer(jh);
+	__jbd2_journal_refile_buffer(jh);
 	jbd_unlock_bh_state(bh);
-	journal_remove_journal_head(bh);
+	jbd2_journal_remove_journal_head(bh);
 
 	spin_unlock(&journal->j_list_lock);
 	__brelse(bh);
diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h
index 3dbf6c779037..99d37557cbb4 100644
--- a/include/linux/ext4_jbd2.h
+++ b/include/linux/ext4_jbd2.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/linux/ext4_jbd.h
+ * linux/include/linux/ext4_jbd2.h
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
  *
@@ -16,7 +16,7 @@
 #define _LINUX_EXT4_JBD_H
 
 #include <linux/fs.h>
-#include <linux/jbd.h>
+#include <linux/jbd2.h>
 #include <linux/ext4_fs.h>
 
 #define EXT4_JOURNAL(inode)	(EXT4_SB((inode)->i_sb)->s_journal)
@@ -116,7 +116,7 @@ static inline int
 __ext4_journal_get_undo_access(const char *where, handle_t *handle,
 				struct buffer_head *bh)
 {
-	int err = journal_get_undo_access(handle, bh);
+	int err = jbd2_journal_get_undo_access(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -126,7 +126,7 @@ static inline int
 __ext4_journal_get_write_access(const char *where, handle_t *handle,
 				struct buffer_head *bh)
 {
-	int err = journal_get_write_access(handle, bh);
+	int err = jbd2_journal_get_write_access(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -135,13 +135,13 @@ __ext4_journal_get_write_access(const char *where, handle_t *handle,
 static inline void
 ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
 {
-	journal_release_buffer(handle, bh);
+	jbd2_journal_release_buffer(handle, bh);
 }
 
 static inline int
 __ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
 {
-	int err = journal_forget(handle, bh);
+	int err = jbd2_journal_forget(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -151,7 +151,7 @@ static inline int
 __ext4_journal_revoke(const char *where, handle_t *handle,
 		      unsigned long blocknr, struct buffer_head *bh)
 {
-	int err = journal_revoke(handle, blocknr, bh);
+	int err = jbd2_journal_revoke(handle, blocknr, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -161,7 +161,7 @@ static inline int
 __ext4_journal_get_create_access(const char *where,
 				 handle_t *handle, struct buffer_head *bh)
 {
-	int err = journal_get_create_access(handle, bh);
+	int err = jbd2_journal_get_create_access(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -171,7 +171,7 @@ static inline int
 __ext4_journal_dirty_metadata(const char *where,
 			      handle_t *handle, struct buffer_head *bh)
 {
-	int err = journal_dirty_metadata(handle, bh);
+	int err = jbd2_journal_dirty_metadata(handle, bh);
 	if (err)
 		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
 	return err;
@@ -211,22 +211,22 @@ static inline handle_t *ext4_journal_current_handle(void)
 
 static inline int ext4_journal_extend(handle_t *handle, int nblocks)
 {
-	return journal_extend(handle, nblocks);
+	return jbd2_journal_extend(handle, nblocks);
 }
 
 static inline int ext4_journal_restart(handle_t *handle, int nblocks)
 {
-	return journal_restart(handle, nblocks);
+	return jbd2_journal_restart(handle, nblocks);
 }
 
 static inline int ext4_journal_blocks_per_page(struct inode *inode)
 {
-	return journal_blocks_per_page(inode);
+	return jbd2_journal_blocks_per_page(inode);
 }
 
 static inline int ext4_journal_force_commit(journal_t *journal)
 {
-	return journal_force_commit(journal);
+	return jbd2_journal_force_commit(journal);
 }
 
 /* super.c */
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index fe89444b1c6f..3251f7abb57d 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1,5 +1,5 @@
 /*
- * linux/include/linux/jbd.h
+ * linux/include/linux/jbd2.h
  *
  * Written by Stephen C. Tweedie <sct@redhat.com>
  *
@@ -19,7 +19,7 @@
 /* Allow this file to be included directly into e2fsprogs */
 #ifndef __KERNEL__
 #include "jfs_compat.h"
-#define JFS_DEBUG
+#define JBD2_DEBUG
 #define jfs_debug jbd_debug
 #else
 
@@ -57,11 +57,11 @@
  * CONFIG_JBD_DEBUG is on.
  */
 #define JBD_EXPENSIVE_CHECKING
-extern int journal_enable_debug;
+extern int jbd2_journal_enable_debug;
 
 #define jbd_debug(n, f, a...)						\
 	do {								\
-		if ((n) <= journal_enable_debug) {			\
+		if ((n) <= jbd2_journal_enable_debug) {			\
 			printk (KERN_DEBUG "(%s, %d): %s: ",		\
 				__FILE__, __LINE__, __FUNCTION__);	\
 			printk (f, ## a);				\
@@ -71,16 +71,16 @@ extern int journal_enable_debug;
 #define jbd_debug(f, a...)	/**/
 #endif
 
-extern void * __jbd_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
-extern void * jbd_slab_alloc(size_t size, gfp_t flags);
-extern void jbd_slab_free(void *ptr, size_t size);
+extern void * __jbd2_kmalloc (const char *where, size_t size, gfp_t flags, int retry);
+extern void * jbd2_slab_alloc(size_t size, gfp_t flags);
+extern void jbd2_slab_free(void *ptr, size_t size);
 
 #define jbd_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
+	__jbd2_kmalloc(__FUNCTION__, (size), (flags), journal_oom_retry)
 #define jbd_rep_kmalloc(size, flags) \
-	__jbd_kmalloc(__FUNCTION__, (size), (flags), 1)
+	__jbd2_kmalloc(__FUNCTION__, (size), (flags), 1)
 
-#define JFS_MIN_JOURNAL_BLOCKS 1024
+#define JBD2_MIN_JOURNAL_BLOCKS 1024
 
 #ifdef __KERNEL__
 
@@ -122,7 +122,7 @@ typedef struct journal_s	journal_t;	/* Journal control structure */
  * Internal structures used by the logging mechanism:
  */
 
-#define JFS_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
+#define JBD2_MAGIC_NUMBER 0xc03b3998U /* The first 4 bytes of /dev/random! */
 
 /*
  * On-disk structures
@@ -132,11 +132,11 @@ typedef struct journal_s	journal_t;	/* Journal control structure */
  * Descriptor block types:
  */
 
-#define JFS_DESCRIPTOR_BLOCK	1
-#define JFS_COMMIT_BLOCK	2
-#define JFS_SUPERBLOCK_V1	3
-#define JFS_SUPERBLOCK_V2	4
-#define JFS_REVOKE_BLOCK	5
+#define JBD2_DESCRIPTOR_BLOCK	1
+#define JBD2_COMMIT_BLOCK	2
+#define JBD2_SUPERBLOCK_V1	3
+#define JBD2_SUPERBLOCK_V2	4
+#define JBD2_REVOKE_BLOCK	5
 
 /*
  * Standard header for all descriptor blocks:
@@ -162,18 +162,18 @@ typedef struct journal_block_tag_s
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
  */
-typedef struct journal_revoke_header_s
+typedef struct jbd2_journal_revoke_header_s
 {
 	journal_header_t r_header;
 	__be32		 r_count;	/* Count of bytes used in the block */
-} journal_revoke_header_t;
+} jbd2_journal_revoke_header_t;
 
 
 /* Definitions for the journal tag flags word: */
-#define JFS_FLAG_ESCAPE		1	/* on-disk block is escaped */
-#define JFS_FLAG_SAME_UUID	2	/* block has same uuid as previous */
-#define JFS_FLAG_DELETED	4	/* block deleted by this transaction */
-#define JFS_FLAG_LAST_TAG	8	/* last tag in this descriptor block */
+#define JBD2_FLAG_ESCAPE		1	/* on-disk block is escaped */
+#define JBD2_FLAG_SAME_UUID	2	/* block has same uuid as previous */
+#define JBD2_FLAG_DELETED	4	/* block deleted by this transaction */
+#define JBD2_FLAG_LAST_TAG	8	/* last tag in this descriptor block */
 
 
 /*
@@ -196,7 +196,7 @@ typedef struct journal_superblock_s
 	__be32	s_start;		/* blocknr of start of log */
 
 /* 0x0020 */
-	/* Error value, as set by journal_abort(). */
+	/* Error value, as set by jbd2_journal_abort(). */
 	__be32	s_errno;
 
 /* 0x0024 */
@@ -224,22 +224,22 @@ typedef struct journal_superblock_s
 /* 0x0400 */
 } journal_superblock_t;
 
-#define JFS_HAS_COMPAT_FEATURE(j,mask)					\
+#define JBD2_HAS_COMPAT_FEATURE(j,mask)					\
 	((j)->j_format_version >= 2 &&					\
 	 ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask))))
-#define JFS_HAS_RO_COMPAT_FEATURE(j,mask)				\
+#define JBD2_HAS_RO_COMPAT_FEATURE(j,mask)				\
 	((j)->j_format_version >= 2 &&					\
 	 ((j)->j_superblock->s_feature_ro_compat & cpu_to_be32((mask))))
-#define JFS_HAS_INCOMPAT_FEATURE(j,mask)				\
+#define JBD2_HAS_INCOMPAT_FEATURE(j,mask)				\
 	((j)->j_format_version >= 2 &&					\
 	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
-#define JFS_FEATURE_INCOMPAT_REVOKE	0x00000001
+#define JBD2_FEATURE_INCOMPAT_REVOKE	0x00000001
 
 /* Features known to this kernel version: */
-#define JFS_KNOWN_COMPAT_FEATURES	0
-#define JFS_KNOWN_ROCOMPAT_FEATURES	0
-#define JFS_KNOWN_INCOMPAT_FEATURES	JFS_FEATURE_INCOMPAT_REVOKE
+#define JBD2_KNOWN_COMPAT_FEATURES	0
+#define JBD2_KNOWN_ROCOMPAT_FEATURES	0
+#define JBD2_KNOWN_INCOMPAT_FEATURES	JBD2_FEATURE_INCOMPAT_REVOKE
 
 #ifdef __KERNEL__
 
@@ -359,7 +359,7 @@ static inline void jbd_unlock_bh_journal_head(struct buffer_head *bh)
 	bit_spin_unlock(BH_JournalHead, &bh->b_state);
 }
 
-struct jbd_revoke_table_s;
+struct jbd2_revoke_table_s;
 
 /**
  * struct handle_s - The handle_s type is the concrete type associated with
@@ -445,7 +445,7 @@ struct transaction_s
 
 	/*
 	 * Transaction's current state
-	 * [no locking - only kjournald alters this]
+	 * [no locking - only kjournald2 alters this]
 	 * FIXME: needs barriers
 	 * KLUDGE: [use j_state_lock]
 	 */
@@ -621,7 +621,7 @@ struct transaction_s
  * @j_revoke: The revoke table - maintains the list of revoked blocks in the
  *     current transaction.
  * @j_revoke_table: alternate revoke tables for j_revoke
- * @j_wbuf: array of buffer_heads for journal_commit_transaction
+ * @j_wbuf: array of buffer_heads for jbd2_journal_commit_transaction
  * @j_wbufsize: maximum number of buffer_heads allowed in j_wbuf, the
  *	number that will fit in j_blocksize
  * @j_last_sync_writer: most recent pid which did a synchronous write
@@ -805,11 +805,11 @@ struct journal_s
 	 * current transaction.  [j_revoke_lock]
 	 */
 	spinlock_t		j_revoke_lock;
-	struct jbd_revoke_table_s *j_revoke;
-	struct jbd_revoke_table_s *j_revoke_table[2];
+	struct jbd2_revoke_table_s *j_revoke;
+	struct jbd2_revoke_table_s *j_revoke_table[2];
 
 	/*
-	 * array of bhs for journal_commit_transaction
+	 * array of bhs for jbd2_journal_commit_transaction
 	 */
 	struct buffer_head	**j_wbuf;
 	int			j_wbufsize;
@@ -826,12 +826,12 @@ struct journal_s
 /*
  * Journal flag definitions
  */
-#define JFS_UNMOUNT	0x001	/* Journal thread is being destroyed */
-#define JFS_ABORT	0x002	/* Journaling has been aborted for errors. */
-#define JFS_ACK_ERR	0x004	/* The errno in the sb has been acked */
-#define JFS_FLUSHED	0x008	/* The journal superblock has been flushed */
-#define JFS_LOADED	0x010	/* The journal superblock has been loaded */
-#define JFS_BARRIER	0x020	/* Use IDE barriers */
+#define JBD2_UNMOUNT	0x001	/* Journal thread is being destroyed */
+#define JBD2_ABORT	0x002	/* Journaling has been aborted for errors. */
+#define JBD2_ACK_ERR	0x004	/* The errno in the sb has been acked */
+#define JBD2_FLUSHED	0x008	/* The journal superblock has been flushed */
+#define JBD2_LOADED	0x010	/* The journal superblock has been loaded */
+#define JBD2_BARRIER	0x020	/* Use IDE barriers */
 
 /*
  * Function declarations for the journaling transaction and buffer
@@ -839,31 +839,31 @@ struct journal_s
  */
 
 /* Filing buffers */
-extern void __journal_temp_unlink_buffer(struct journal_head *jh);
-extern void journal_unfile_buffer(journal_t *, struct journal_head *);
-extern void __journal_unfile_buffer(struct journal_head *);
-extern void __journal_refile_buffer(struct journal_head *);
-extern void journal_refile_buffer(journal_t *, struct journal_head *);
-extern void __journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
+extern void __jbd2_journal_unfile_buffer(struct journal_head *);
+extern void __jbd2_journal_refile_buffer(struct journal_head *);
+extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
+extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
 extern void __journal_free_buffer(struct journal_head *bh);
-extern void journal_file_buffer(struct journal_head *, transaction_t *, int);
+extern void jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
 extern void __journal_clean_data_list(transaction_t *transaction);
 
 /* Log buffer allocation */
-extern struct journal_head * journal_get_descriptor_buffer(journal_t *);
-int journal_next_log_block(journal_t *, unsigned long *);
+extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
+int jbd2_journal_next_log_block(journal_t *, unsigned long *);
 
 /* Commit management */
-extern void journal_commit_transaction(journal_t *);
+extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-int __journal_clean_checkpoint_list(journal_t *journal);
-int __journal_remove_checkpoint(struct journal_head *);
-void __journal_insert_checkpoint(struct journal_head *, transaction_t *);
+int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+int __jbd2_journal_remove_checkpoint(struct journal_head *);
+void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
 /* Buffer IO */
 extern int
-journal_write_metadata_buffer(transaction_t	  *transaction,
+jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
 			      unsigned long	   blocknr);
@@ -893,91 +893,91 @@ static inline handle_t *journal_current_handle(void)
  * Register buffer modifications against the current transaction.
  */
 
-extern handle_t *journal_start(journal_t *, int nblocks);
-extern int	 journal_restart (handle_t *, int nblocks);
-extern int	 journal_extend (handle_t *, int nblocks);
-extern int	 journal_get_write_access(handle_t *, struct buffer_head *);
-extern int	 journal_get_create_access (handle_t *, struct buffer_head *);
-extern int	 journal_get_undo_access(handle_t *, struct buffer_head *);
-extern int	 journal_dirty_data (handle_t *, struct buffer_head *);
-extern int	 journal_dirty_metadata (handle_t *, struct buffer_head *);
-extern void	 journal_release_buffer (handle_t *, struct buffer_head *);
-extern int	 journal_forget (handle_t *, struct buffer_head *);
+extern handle_t *jbd2_journal_start(journal_t *, int nblocks);
+extern int	 jbd2_journal_restart (handle_t *, int nblocks);
+extern int	 jbd2_journal_extend (handle_t *, int nblocks);
+extern int	 jbd2_journal_get_write_access(handle_t *, struct buffer_head *);
+extern int	 jbd2_journal_get_create_access (handle_t *, struct buffer_head *);
+extern int	 jbd2_journal_get_undo_access(handle_t *, struct buffer_head *);
+extern int	 jbd2_journal_dirty_data (handle_t *, struct buffer_head *);
+extern int	 jbd2_journal_dirty_metadata (handle_t *, struct buffer_head *);
+extern void	 jbd2_journal_release_buffer (handle_t *, struct buffer_head *);
+extern int	 jbd2_journal_forget (handle_t *, struct buffer_head *);
 extern void	 journal_sync_buffer (struct buffer_head *);
-extern void	 journal_invalidatepage(journal_t *,
+extern void	 jbd2_journal_invalidatepage(journal_t *,
 				struct page *, unsigned long);
-extern int	 journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
-extern int	 journal_stop(handle_t *);
-extern int	 journal_flush (journal_t *);
-extern void	 journal_lock_updates (journal_t *);
-extern void	 journal_unlock_updates (journal_t *);
+extern int	 jbd2_journal_try_to_free_buffers(journal_t *, struct page *, gfp_t);
+extern int	 jbd2_journal_stop(handle_t *);
+extern int	 jbd2_journal_flush (journal_t *);
+extern void	 jbd2_journal_lock_updates (journal_t *);
+extern void	 jbd2_journal_unlock_updates (journal_t *);
 
-extern journal_t * journal_init_dev(struct block_device *bdev,
+extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 				struct block_device *fs_dev,
 				int start, int len, int bsize);
-extern journal_t * journal_init_inode (struct inode *);
-extern int	   journal_update_format (journal_t *);
-extern int	   journal_check_used_features
+extern journal_t * jbd2_journal_init_inode (struct inode *);
+extern int	   jbd2_journal_update_format (journal_t *);
+extern int	   jbd2_journal_check_used_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_check_available_features
+extern int	   jbd2_journal_check_available_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_set_features
+extern int	   jbd2_journal_set_features
 		   (journal_t *, unsigned long, unsigned long, unsigned long);
-extern int	   journal_create     (journal_t *);
-extern int	   journal_load       (journal_t *journal);
-extern void	   journal_destroy    (journal_t *);
-extern int	   journal_recover    (journal_t *journal);
-extern int	   journal_wipe       (journal_t *, int);
-extern int	   journal_skip_recovery	(journal_t *);
-extern void	   journal_update_superblock	(journal_t *, int);
-extern void	   __journal_abort_hard	(journal_t *);
-extern void	   journal_abort      (journal_t *, int);
-extern int	   journal_errno      (journal_t *);
-extern void	   journal_ack_err    (journal_t *);
-extern int	   journal_clear_err  (journal_t *);
-extern int	   journal_bmap(journal_t *, unsigned long, unsigned long *);
-extern int	   journal_force_commit(journal_t *);
+extern int	   jbd2_journal_create     (journal_t *);
+extern int	   jbd2_journal_load       (journal_t *journal);
+extern void	   jbd2_journal_destroy    (journal_t *);
+extern int	   jbd2_journal_recover    (journal_t *journal);
+extern int	   jbd2_journal_wipe       (journal_t *, int);
+extern int	   jbd2_journal_skip_recovery	(journal_t *);
+extern void	   jbd2_journal_update_superblock	(journal_t *, int);
+extern void	   __jbd2_journal_abort_hard	(journal_t *);
+extern void	   jbd2_journal_abort      (journal_t *, int);
+extern int	   jbd2_journal_errno      (journal_t *);
+extern void	   jbd2_journal_ack_err    (journal_t *);
+extern int	   jbd2_journal_clear_err  (journal_t *);
+extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long *);
+extern int	   jbd2_journal_force_commit(journal_t *);
 
 /*
  * journal_head management
  */
-struct journal_head *journal_add_journal_head(struct buffer_head *bh);
-struct journal_head *journal_grab_journal_head(struct buffer_head *bh);
-void journal_remove_journal_head(struct buffer_head *bh);
-void journal_put_journal_head(struct journal_head *jh);
+struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh);
+struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh);
+void jbd2_journal_remove_journal_head(struct buffer_head *bh);
+void jbd2_journal_put_journal_head(struct journal_head *jh);
 
 /*
  * handle management
  */
-extern kmem_cache_t *jbd_handle_cache;
+extern kmem_cache_t *jbd2_handle_cache;
 
 static inline handle_t *jbd_alloc_handle(gfp_t gfp_flags)
 {
-	return kmem_cache_alloc(jbd_handle_cache, gfp_flags);
+	return kmem_cache_alloc(jbd2_handle_cache, gfp_flags);
 }
 
 static inline void jbd_free_handle(handle_t *handle)
 {
-	kmem_cache_free(jbd_handle_cache, handle);
+	kmem_cache_free(jbd2_handle_cache, handle);
 }
 
 /* Primary revoke support */
 #define JOURNAL_REVOKE_DEFAULT_HASH 256
-extern int	   journal_init_revoke(journal_t *, int);
-extern void	   journal_destroy_revoke_caches(void);
-extern int	   journal_init_revoke_caches(void);
+extern int	   jbd2_journal_init_revoke(journal_t *, int);
+extern void	   jbd2_journal_destroy_revoke_caches(void);
+extern int	   jbd2_journal_init_revoke_caches(void);
 
-extern void	   journal_destroy_revoke(journal_t *);
-extern int	   journal_revoke (handle_t *,
+extern void	   jbd2_journal_destroy_revoke(journal_t *);
+extern int	   jbd2_journal_revoke (handle_t *,
 				unsigned long, struct buffer_head *);
-extern int	   journal_cancel_revoke(handle_t *, struct journal_head *);
-extern void	   journal_write_revoke_records(journal_t *, transaction_t *);
+extern int	   jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
+extern void	   jbd2_journal_write_revoke_records(journal_t *, transaction_t *);
 
 /* Recovery revoke support */
-extern int	journal_set_revoke(journal_t *, unsigned long, tid_t);
-extern int	journal_test_revoke(journal_t *, unsigned long, tid_t);
-extern void	journal_clear_revoke(journal_t *);
-extern void	journal_switch_revoke_table(journal_t *journal);
+extern int	jbd2_journal_set_revoke(journal_t *, unsigned long, tid_t);
+extern int	jbd2_journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern void	jbd2_journal_clear_revoke(journal_t *);
+extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
 
 /*
  * The log thread user interface:
@@ -986,17 +986,17 @@ extern void	journal_switch_revoke_table(journal_t *journal);
  * transitions on demand.
  */
 
-int __log_space_left(journal_t *); /* Called with journal locked */
-int log_start_commit(journal_t *journal, tid_t tid);
-int __log_start_commit(journal_t *journal, tid_t tid);
-int journal_start_commit(journal_t *journal, tid_t *tid);
-int journal_force_commit_nested(journal_t *journal);
-int log_wait_commit(journal_t *journal, tid_t tid);
-int log_do_checkpoint(journal_t *journal);
+int __jbd2_log_space_left(journal_t *); /* Called with journal locked */
+int jbd2_log_start_commit(journal_t *journal, tid_t tid);
+int __jbd2_log_start_commit(journal_t *journal, tid_t tid);
+int jbd2_journal_start_commit(journal_t *journal, tid_t *tid);
+int jbd2_journal_force_commit_nested(journal_t *journal);
+int jbd2_log_wait_commit(journal_t *journal, tid_t tid);
+int jbd2_log_do_checkpoint(journal_t *journal);
 
-void __log_wait_for_space(journal_t *journal);
-extern void	__journal_drop_transaction(journal_t *, transaction_t *);
-extern int	cleanup_journal_tail(journal_t *);
+void __jbd2_log_wait_for_space(journal_t *journal);
+extern void	__jbd2_journal_drop_transaction(journal_t *, transaction_t *);
+extern int	jbd2_cleanup_journal_tail(journal_t *);
 
 /* Debugging code only: */
 
@@ -1010,7 +1010,7 @@ do {								           \
 /*
  * is_journal_abort
  *
- * Simple test wrapper function to test the JFS_ABORT state flag.  This
+ * Simple test wrapper function to test the JBD2_ABORT state flag.  This
  * bit, when set, indicates that we have had a fatal error somewhere,
  * either inside the journaling layer or indicated to us by the client
  * (eg. ext3), and that we and should not commit any further
@@ -1019,7 +1019,7 @@ do {								           \
 
 static inline int is_journal_aborted(journal_t *journal)
 {
-	return journal->j_flags & JFS_ABORT;
+	return journal->j_flags & JBD2_ABORT;
 }
 
 static inline int is_handle_aborted(handle_t *handle)
@@ -1029,7 +1029,7 @@ static inline int is_handle_aborted(handle_t *handle)
 	return is_journal_aborted(handle->h_transaction->t_journal);
 }
 
-static inline void journal_abort_handle(handle_t *handle)
+static inline void jbd2_journal_abort_handle(handle_t *handle)
 {
 	handle->h_aborted = 1;
 }
@@ -1051,7 +1051,7 @@ static inline int tid_geq(tid_t x, tid_t y)
 	return (difference >= 0);
 }
 
-extern int journal_blocks_per_page(struct inode *inode);
+extern int jbd2_journal_blocks_per_page(struct inode *inode);
 
 /*
  * Return the minimum number of blocks which must be free in the journal
-- 
cgit v1.2.3


From c3fcc8137ce4296ad6ab94f88bd60cbe03d21527 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Wed, 11 Oct 2006 01:21:02 -0700
Subject: [PATCH] jbd2: cleanup ext4_jbd.h

To allow ext4 to build during the transition from jbd to jbd2, we have both
ext4_jbd.h and ext4_jbd2.h in the tree.  We no longer need the former.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ext4_jbd.h | 268 -----------------------------------------------
 1 file changed, 268 deletions(-)
 delete mode 100644 include/linux/ext4_jbd.h

(limited to 'include')

diff --git a/include/linux/ext4_jbd.h b/include/linux/ext4_jbd.h
deleted file mode 100644
index 3dbf6c779037..000000000000
--- a/include/linux/ext4_jbd.h
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * linux/include/linux/ext4_jbd.h
- *
- * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
- *
- * Copyright 1998--1999 Red Hat corp --- All Rights Reserved
- *
- * This file is part of the Linux kernel and is made available under
- * the terms of the GNU General Public License, version 2, or at your
- * option, any later version, incorporated herein by reference.
- *
- * Ext4-specific journaling extensions.
- */
-
-#ifndef _LINUX_EXT4_JBD_H
-#define _LINUX_EXT4_JBD_H
-
-#include <linux/fs.h>
-#include <linux/jbd.h>
-#include <linux/ext4_fs.h>
-
-#define EXT4_JOURNAL(inode)	(EXT4_SB((inode)->i_sb)->s_journal)
-
-/* Define the number of blocks we need to account to a transaction to
- * modify one block of data.
- *
- * We may have to touch one inode, one bitmap buffer, up to three
- * indirection blocks, the group and superblock summaries, and the data
- * block to complete the transaction.  */
-
-#define EXT4_SINGLEDATA_TRANS_BLOCKS	8U
-
-/* Extended attribute operations touch at most two data buffers,
- * two bitmap buffers, and two group summaries, in addition to the inode
- * and the superblock, which are already accounted for. */
-
-#define EXT4_XATTR_TRANS_BLOCKS		6U
-
-/* Define the minimum size for a transaction which modifies data.  This
- * needs to take into account the fact that we may end up modifying two
- * quota files too (one for the group, one for the user quota).  The
- * superblock only gets updated once, of course, so don't bother
- * counting that again for the quota updates. */
-
-#define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS + \
-					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
-					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
-
-/* Delete operations potentially hit one directory's namespace plus an
- * entire inode, plus arbitrary amounts of bitmap/indirection data.  Be
- * generous.  We can grow the delete transaction later if necessary. */
-
-#define EXT4_DELETE_TRANS_BLOCKS(sb)	(2 * EXT4_DATA_TRANS_BLOCKS(sb) + 64)
-
-/* Define an arbitrary limit for the amount of data we will anticipate
- * writing to any given transaction.  For unbounded transactions such as
- * write(2) and truncate(2) we can write more than this, but we always
- * start off at the maximum transaction size and grow the transaction
- * optimistically as we go. */
-
-#define EXT4_MAX_TRANS_DATA		64U
-
-/* We break up a large truncate or write transaction once the handle's
- * buffer credits gets this low, we need either to extend the
- * transaction or to start a new one.  Reserve enough space here for
- * inode, bitmap, superblock, group and indirection updates for at least
- * one block, plus two quota updates.  Quota allocations are not
- * needed. */
-
-#define EXT4_RESERVE_TRANS_BLOCKS	12U
-
-#define EXT4_INDEX_EXTRA_TRANS_BLOCKS	8
-
-#ifdef CONFIG_QUOTA
-/* Amount of blocks needed for quota update - we know that the structure was
- * allocated so we need to update only inode+data */
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0)
-/* Amount of blocks needed for quota insert/delete - we do some block writes
- * but inode, sb and group updates are done only once */
-#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
-		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
-#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
-		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
-#else
-#define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
-#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
-#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
-#endif
-
-int
-ext4_mark_iloc_dirty(handle_t *handle,
-		     struct inode *inode,
-		     struct ext4_iloc *iloc);
-
-/*
- * On success, We end up with an outstanding reference count against
- * iloc->bh.  This _must_ be cleaned up later.
- */
-
-int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
-			struct ext4_iloc *iloc);
-
-int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
-
-/*
- * Wrapper functions with which ext4 calls into JBD.  The intent here is
- * to allow these to be turned into appropriate stubs so ext4 can control
- * ext2 filesystems, so ext2+ext4 systems only nee one fs.  This work hasn't
- * been done yet.
- */
-
-void ext4_journal_abort_handle(const char *caller, const char *err_fn,
-		struct buffer_head *bh, handle_t *handle, int err);
-
-static inline int
-__ext4_journal_get_undo_access(const char *where, handle_t *handle,
-				struct buffer_head *bh)
-{
-	int err = journal_get_undo_access(handle, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-static inline int
-__ext4_journal_get_write_access(const char *where, handle_t *handle,
-				struct buffer_head *bh)
-{
-	int err = journal_get_write_access(handle, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-static inline void
-ext4_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
-{
-	journal_release_buffer(handle, bh);
-}
-
-static inline int
-__ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *bh)
-{
-	int err = journal_forget(handle, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-static inline int
-__ext4_journal_revoke(const char *where, handle_t *handle,
-		      unsigned long blocknr, struct buffer_head *bh)
-{
-	int err = journal_revoke(handle, blocknr, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-static inline int
-__ext4_journal_get_create_access(const char *where,
-				 handle_t *handle, struct buffer_head *bh)
-{
-	int err = journal_get_create_access(handle, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-static inline int
-__ext4_journal_dirty_metadata(const char *where,
-			      handle_t *handle, struct buffer_head *bh)
-{
-	int err = journal_dirty_metadata(handle, bh);
-	if (err)
-		ext4_journal_abort_handle(where, __FUNCTION__, bh, handle,err);
-	return err;
-}
-
-
-#define ext4_journal_get_undo_access(handle, bh) \
-	__ext4_journal_get_undo_access(__FUNCTION__, (handle), (bh))
-#define ext4_journal_get_write_access(handle, bh) \
-	__ext4_journal_get_write_access(__FUNCTION__, (handle), (bh))
-#define ext4_journal_revoke(handle, blocknr, bh) \
-	__ext4_journal_revoke(__FUNCTION__, (handle), (blocknr), (bh))
-#define ext4_journal_get_create_access(handle, bh) \
-	__ext4_journal_get_create_access(__FUNCTION__, (handle), (bh))
-#define ext4_journal_dirty_metadata(handle, bh) \
-	__ext4_journal_dirty_metadata(__FUNCTION__, (handle), (bh))
-#define ext4_journal_forget(handle, bh) \
-	__ext4_journal_forget(__FUNCTION__, (handle), (bh))
-
-int ext4_journal_dirty_data(handle_t *handle, struct buffer_head *bh);
-
-handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks);
-int __ext4_journal_stop(const char *where, handle_t *handle);
-
-static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks)
-{
-	return ext4_journal_start_sb(inode->i_sb, nblocks);
-}
-
-#define ext4_journal_stop(handle) \
-	__ext4_journal_stop(__FUNCTION__, (handle))
-
-static inline handle_t *ext4_journal_current_handle(void)
-{
-	return journal_current_handle();
-}
-
-static inline int ext4_journal_extend(handle_t *handle, int nblocks)
-{
-	return journal_extend(handle, nblocks);
-}
-
-static inline int ext4_journal_restart(handle_t *handle, int nblocks)
-{
-	return journal_restart(handle, nblocks);
-}
-
-static inline int ext4_journal_blocks_per_page(struct inode *inode)
-{
-	return journal_blocks_per_page(inode);
-}
-
-static inline int ext4_journal_force_commit(journal_t *journal)
-{
-	return journal_force_commit(journal);
-}
-
-/* super.c */
-int ext4_force_commit(struct super_block *sb);
-
-static inline int ext4_should_journal_data(struct inode *inode)
-{
-	if (!S_ISREG(inode->i_mode))
-		return 1;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA)
-		return 1;
-	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
-		return 1;
-	return 0;
-}
-
-static inline int ext4_should_order_data(struct inode *inode)
-{
-	if (!S_ISREG(inode->i_mode))
-		return 0;
-	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
-		return 0;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA)
-		return 1;
-	return 0;
-}
-
-static inline int ext4_should_writeback_data(struct inode *inode)
-{
-	if (!S_ISREG(inode->i_mode))
-		return 0;
-	if (EXT4_I(inode)->i_flags & EXT4_JOURNAL_DATA_FL)
-		return 0;
-	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_WRITEBACK_DATA)
-		return 1;
-	return 0;
-}
-
-#endif	/* _LINUX_EXT4_JBD_H */
-- 
cgit v1.2.3


From a86c61812637c7dd0c57e29880cffd477b62f2e7 Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Wed, 11 Oct 2006 01:21:03 -0700
Subject: [PATCH] ext3: add extent map support

On disk extents format:
/*
* this is extent on-disk structure
* it's used at the bottom of the tree
*/
struct ext3_extent {
__le32  ee_block;       /* first logical block extent covers */
__le16  ee_len;         /* number of blocks covered by extent */
__le16  ee_start_hi;    /* high 16 bits of physical block */
__le32  ee_start;       /* low 32 bigs of physical block */
};

Signed-off-by: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/Makefile                |    2 +-
 fs/ext4/dir.c                   |    3 +-
 fs/ext4/extents.c               | 2075 +++++++++++++++++++++++++++++++++++++++
 fs/ext4/ialloc.c                |   11 +
 fs/ext4/inode.c                 |   17 +-
 fs/ext4/ioctl.c                 |    1 -
 fs/ext4/super.c                 |   10 +-
 include/linux/ext4_fs.h         |   33 +-
 include/linux/ext4_fs_extents.h |  196 ++++
 include/linux/ext4_fs_i.h       |   13 +
 include/linux/ext4_fs_sb.h      |   10 +
 include/linux/ext4_jbd2.h       |   15 +-
 12 files changed, 2368 insertions(+), 18 deletions(-)
 create mode 100644 fs/ext4/extents.c
 create mode 100644 include/linux/ext4_fs_extents.h

(limited to 'include')

diff --git a/fs/ext4/Makefile b/fs/ext4/Makefile
index 09c487893e4a..a6acb96ebeb9 100644
--- a/fs/ext4/Makefile
+++ b/fs/ext4/Makefile
@@ -5,7 +5,7 @@
 obj-$(CONFIG_EXT4DEV_FS) += ext4dev.o
 
 ext4dev-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
-	   ioctl.o namei.o super.o symlink.o hash.o resize.o
+	   ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
 
 ext4dev-$(CONFIG_EXT4DEV_FS_XATTR)	+= xattr.o xattr_user.o xattr_trusted.o
 ext4dev-$(CONFIG_EXT4DEV_FS_POSIX_ACL)	+= acl.o
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 9833d5d00c46..18ac173af575 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -134,8 +134,7 @@ static int ext4_readdir(struct file * filp,
 		struct buffer_head *bh = NULL;
 
 		map_bh.b_state = 0;
-		err = ext4_get_blocks_handle(NULL, inode, blk, 1,
-						&map_bh, 0, 0);
+		err = ext4_get_blocks_wrap(NULL, inode, blk, 1, &map_bh, 0, 0);
 		if (err > 0) {
 			page_cache_readahead(sb->s_bdev->bd_inode->i_mapping,
 				&filp->f_ra,
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
new file mode 100644
index 000000000000..f67b2ef6a71f
--- /dev/null
+++ b/fs/ext4/extents.c
@@ -0,0 +1,2075 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * Architecture independence:
+ *   Copyright (c) 2005, Bull S.A.
+ *   Written by Pierre Peiffer <pierre.peiffer@bull.net>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+
+/*
+ * Extents support for EXT4
+ *
+ * TODO:
+ *   - ext4*_error() should be used in some situations
+ *   - analyze all BUG()/BUG_ON(), use -EIO where appropriate
+ *   - smart tree reduction
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/time.h>
+#include <linux/ext4_jbd2.h>
+#include <linux/jbd.h>
+#include <linux/smp_lock.h>
+#include <linux/highuid.h>
+#include <linux/pagemap.h>
+#include <linux/quotaops.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/ext4_fs_extents.h>
+#include <asm/uaccess.h>
+
+
+static int ext4_ext_check_header(const char *function, struct inode *inode,
+				struct ext4_extent_header *eh)
+{
+	const char *error_msg = NULL;
+
+	if (unlikely(eh->eh_magic != EXT4_EXT_MAGIC)) {
+		error_msg = "invalid magic";
+		goto corrupted;
+	}
+	if (unlikely(eh->eh_max == 0)) {
+		error_msg = "invalid eh_max";
+		goto corrupted;
+	}
+	if (unlikely(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max))) {
+		error_msg = "invalid eh_entries";
+		goto corrupted;
+	}
+	return 0;
+
+corrupted:
+	ext4_error(inode->i_sb, function,
+			"bad header in inode #%lu: %s - magic %x, "
+			"entries %u, max %u, depth %u",
+			inode->i_ino, error_msg, le16_to_cpu(eh->eh_magic),
+			le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max),
+			le16_to_cpu(eh->eh_depth));
+
+	return -EIO;
+}
+
+static handle_t *ext4_ext_journal_restart(handle_t *handle, int needed)
+{
+	int err;
+
+	if (handle->h_buffer_credits > needed)
+		return handle;
+	if (!ext4_journal_extend(handle, needed))
+		return handle;
+	err = ext4_journal_restart(handle, needed);
+
+	return handle;
+}
+
+/*
+ * could return:
+ *  - EROFS
+ *  - ENOMEM
+ */
+static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	if (path->p_bh) {
+		/* path points to block */
+		return ext4_journal_get_write_access(handle, path->p_bh);
+	}
+	/* path points to leaf/index in inode body */
+	/* we use in-core data, no need to protect them */
+	return 0;
+}
+
+/*
+ * could return:
+ *  - EROFS
+ *  - ENOMEM
+ *  - EIO
+ */
+static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	int err;
+	if (path->p_bh) {
+		/* path points to block */
+		err = ext4_journal_dirty_metadata(handle, path->p_bh);
+	} else {
+		/* path points to leaf/index in inode body */
+		err = ext4_mark_inode_dirty(handle, inode);
+	}
+	return err;
+}
+
+static int ext4_ext_find_goal(struct inode *inode,
+			      struct ext4_ext_path *path,
+			      unsigned long block)
+{
+	struct ext4_inode_info *ei = EXT4_I(inode);
+	unsigned long bg_start;
+	unsigned long colour;
+	int depth;
+
+	if (path) {
+		struct ext4_extent *ex;
+		depth = path->p_depth;
+
+		/* try to predict block placement */
+		if ((ex = path[depth].p_ext))
+			return le32_to_cpu(ex->ee_start)
+					+ (block - le32_to_cpu(ex->ee_block));
+
+		/* it looks index is empty
+		 * try to find starting from index itself */
+		if (path[depth].p_bh)
+			return path[depth].p_bh->b_blocknr;
+	}
+
+	/* OK. use inode's group */
+	bg_start = (ei->i_block_group * EXT4_BLOCKS_PER_GROUP(inode->i_sb)) +
+		le32_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_first_data_block);
+	colour = (current->pid % 16) *
+			(EXT4_BLOCKS_PER_GROUP(inode->i_sb) / 16);
+	return bg_start + colour + block;
+}
+
+static int
+ext4_ext_new_block(handle_t *handle, struct inode *inode,
+			struct ext4_ext_path *path,
+			struct ext4_extent *ex, int *err)
+{
+	int goal, newblock;
+
+	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
+	newblock = ext4_new_block(handle, inode, goal, err);
+	return newblock;
+}
+
+static inline int ext4_ext_space_block(struct inode *inode)
+{
+	int size;
+
+	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+			/ sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+	if (size > 6)
+		size = 6;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_block_idx(struct inode *inode)
+{
+	int size;
+
+	size = (inode->i_sb->s_blocksize - sizeof(struct ext4_extent_header))
+			/ sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+	if (size > 5)
+		size = 5;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_root(struct inode *inode)
+{
+	int size;
+
+	size = sizeof(EXT4_I(inode)->i_data);
+	size -= sizeof(struct ext4_extent_header);
+	size /= sizeof(struct ext4_extent);
+#ifdef AGRESSIVE_TEST
+	if (size > 3)
+		size = 3;
+#endif
+	return size;
+}
+
+static inline int ext4_ext_space_root_idx(struct inode *inode)
+{
+	int size;
+
+	size = sizeof(EXT4_I(inode)->i_data);
+	size -= sizeof(struct ext4_extent_header);
+	size /= sizeof(struct ext4_extent_idx);
+#ifdef AGRESSIVE_TEST
+	if (size > 4)
+		size = 4;
+#endif
+	return size;
+}
+
+#ifdef EXT_DEBUG
+static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
+{
+	int k, l = path->p_depth;
+
+	ext_debug("path:");
+	for (k = 0; k <= l; k++, path++) {
+		if (path->p_idx) {
+		  ext_debug("  %d->%d", le32_to_cpu(path->p_idx->ei_block),
+			    le32_to_cpu(path->p_idx->ei_leaf));
+		} else if (path->p_ext) {
+			ext_debug("  %d:%d:%d",
+				  le32_to_cpu(path->p_ext->ee_block),
+				  le16_to_cpu(path->p_ext->ee_len),
+				  le32_to_cpu(path->p_ext->ee_start));
+		} else
+			ext_debug("  []");
+	}
+	ext_debug("\n");
+}
+
+static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
+{
+	int depth = ext_depth(inode);
+	struct ext4_extent_header *eh;
+	struct ext4_extent *ex;
+	int i;
+
+	if (!path)
+		return;
+
+	eh = path[depth].p_hdr;
+	ex = EXT_FIRST_EXTENT(eh);
+
+	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
+		ext_debug("%d:%d:%d ", le32_to_cpu(ex->ee_block),
+			  le16_to_cpu(ex->ee_len),
+			  le32_to_cpu(ex->ee_start));
+	}
+	ext_debug("\n");
+}
+#else
+#define ext4_ext_show_path(inode,path)
+#define ext4_ext_show_leaf(inode,path)
+#endif
+
+static void ext4_ext_drop_refs(struct ext4_ext_path *path)
+{
+	int depth = path->p_depth;
+	int i;
+
+	for (i = 0; i <= depth; i++, path++)
+		if (path->p_bh) {
+			brelse(path->p_bh);
+			path->p_bh = NULL;
+		}
+}
+
+/*
+ * binary search for closest index by given block
+ */
+static void
+ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+	struct ext4_extent_header *eh = path->p_hdr;
+	struct ext4_extent_idx *r, *l, *m;
+
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+	BUG_ON(le16_to_cpu(eh->eh_entries) <= 0);
+
+	ext_debug("binsearch for %d(idx):  ", block);
+
+	l = EXT_FIRST_INDEX(eh) + 1;
+	r = EXT_FIRST_INDEX(eh) + le16_to_cpu(eh->eh_entries) - 1;
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (block < le32_to_cpu(m->ei_block))
+			r = m - 1;
+		else
+			l = m + 1;
+		ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ei_block,
+				m, m->ei_block, r, r->ei_block);
+	}
+
+	path->p_idx = l - 1;
+	ext_debug("  -> %d->%d ", le32_to_cpu(path->p_idx->ei_block),
+		  le32_to_cpu(path->p_idx->ei_leaf));
+
+#ifdef CHECK_BINSEARCH
+	{
+		struct ext4_extent_idx *chix, *ix;
+		int k;
+
+		chix = ix = EXT_FIRST_INDEX(eh);
+		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ix++) {
+		  if (k != 0 &&
+		      le32_to_cpu(ix->ei_block) <= le32_to_cpu(ix[-1].ei_block)) {
+				printk("k=%d, ix=0x%p, first=0x%p\n", k,
+					ix, EXT_FIRST_INDEX(eh));
+				printk("%u <= %u\n",
+				       le32_to_cpu(ix->ei_block),
+				       le32_to_cpu(ix[-1].ei_block));
+			}
+			BUG_ON(k && le32_to_cpu(ix->ei_block)
+				           <= le32_to_cpu(ix[-1].ei_block));
+			if (block < le32_to_cpu(ix->ei_block))
+				break;
+			chix = ix;
+		}
+		BUG_ON(chix != path->p_idx);
+	}
+#endif
+
+}
+
+/*
+ * binary search for closest extent by given block
+ */
+static void
+ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
+{
+	struct ext4_extent_header *eh = path->p_hdr;
+	struct ext4_extent *r, *l, *m;
+
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+
+	if (eh->eh_entries == 0) {
+		/*
+		 * this leaf is empty yet:
+		 *  we get such a leaf in split/add case
+		 */
+		return;
+	}
+
+	ext_debug("binsearch for %d:  ", block);
+
+	l = EXT_FIRST_EXTENT(eh) + 1;
+	r = EXT_FIRST_EXTENT(eh) + le16_to_cpu(eh->eh_entries) - 1;
+
+	while (l <= r) {
+		m = l + (r - l) / 2;
+		if (block < le32_to_cpu(m->ee_block))
+			r = m - 1;
+		else
+			l = m + 1;
+		ext_debug("%p(%u):%p(%u):%p(%u) ", l, l->ee_block,
+				m, m->ee_block, r, r->ee_block);
+	}
+
+	path->p_ext = l - 1;
+	ext_debug("  -> %d:%d:%d ",
+		        le32_to_cpu(path->p_ext->ee_block),
+		        le32_to_cpu(path->p_ext->ee_start),
+		        le16_to_cpu(path->p_ext->ee_len));
+
+#ifdef CHECK_BINSEARCH
+	{
+		struct ext4_extent *chex, *ex;
+		int k;
+
+		chex = ex = EXT_FIRST_EXTENT(eh);
+		for (k = 0; k < le16_to_cpu(eh->eh_entries); k++, ex++) {
+			BUG_ON(k && le32_to_cpu(ex->ee_block)
+				          <= le32_to_cpu(ex[-1].ee_block));
+			if (block < le32_to_cpu(ex->ee_block))
+				break;
+			chex = ex;
+		}
+		BUG_ON(chex != path->p_ext);
+	}
+#endif
+
+}
+
+int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
+{
+	struct ext4_extent_header *eh;
+
+	eh = ext_inode_hdr(inode);
+	eh->eh_depth = 0;
+	eh->eh_entries = 0;
+	eh->eh_magic = EXT4_EXT_MAGIC;
+	eh->eh_max = cpu_to_le16(ext4_ext_space_root(inode));
+	ext4_mark_inode_dirty(handle, inode);
+	ext4_ext_invalidate_cache(inode);
+	return 0;
+}
+
+struct ext4_ext_path *
+ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
+{
+	struct ext4_extent_header *eh;
+	struct buffer_head *bh;
+	short int depth, i, ppos = 0, alloc = 0;
+
+	eh = ext_inode_hdr(inode);
+	BUG_ON(eh == NULL);
+	if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+		return ERR_PTR(-EIO);
+
+	i = depth = ext_depth(inode);
+
+	/* account possible depth increase */
+	if (!path) {
+		path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 2),
+				GFP_NOFS);
+		if (!path)
+			return ERR_PTR(-ENOMEM);
+		alloc = 1;
+	}
+	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+	path[0].p_hdr = eh;
+
+	/* walk through the tree */
+	while (i) {
+		ext_debug("depth %d: num %d, max %d\n",
+			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+		ext4_ext_binsearch_idx(inode, path + ppos, block);
+		path[ppos].p_block = le32_to_cpu(path[ppos].p_idx->ei_leaf);
+		path[ppos].p_depth = i;
+		path[ppos].p_ext = NULL;
+
+		bh = sb_bread(inode->i_sb, path[ppos].p_block);
+		if (!bh)
+			goto err;
+
+		eh = ext_block_hdr(bh);
+		ppos++;
+		BUG_ON(ppos > depth);
+		path[ppos].p_bh = bh;
+		path[ppos].p_hdr = eh;
+		i--;
+
+		if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+			goto err;
+	}
+
+	path[ppos].p_depth = i;
+	path[ppos].p_hdr = eh;
+	path[ppos].p_ext = NULL;
+	path[ppos].p_idx = NULL;
+
+	if (ext4_ext_check_header(__FUNCTION__, inode, eh))
+		goto err;
+
+	/* find extent */
+	ext4_ext_binsearch(inode, path + ppos, block);
+
+	ext4_ext_show_path(inode, path);
+
+	return path;
+
+err:
+	ext4_ext_drop_refs(path);
+	if (alloc)
+		kfree(path);
+	return ERR_PTR(-EIO);
+}
+
+/*
+ * insert new index [logical;ptr] into the block at cupr
+ * it check where to insert: before curp or after curp
+ */
+static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *curp,
+				int logical, int ptr)
+{
+	struct ext4_extent_idx *ix;
+	int len, err;
+
+	if ((err = ext4_ext_get_access(handle, inode, curp)))
+		return err;
+
+	BUG_ON(logical == le32_to_cpu(curp->p_idx->ei_block));
+	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
+	if (logical > le32_to_cpu(curp->p_idx->ei_block)) {
+		/* insert after */
+		if (curp->p_idx != EXT_LAST_INDEX(curp->p_hdr)) {
+			len = (len - 1) * sizeof(struct ext4_extent_idx);
+			len = len < 0 ? 0 : len;
+			ext_debug("insert new index %d after: %d. "
+					"move %d from 0x%p to 0x%p\n",
+					logical, ptr, len,
+					(curp->p_idx + 1), (curp->p_idx + 2));
+			memmove(curp->p_idx + 2, curp->p_idx + 1, len);
+		}
+		ix = curp->p_idx + 1;
+	} else {
+		/* insert before */
+		len = len * sizeof(struct ext4_extent_idx);
+		len = len < 0 ? 0 : len;
+		ext_debug("insert new index %d before: %d. "
+				"move %d from 0x%p to 0x%p\n",
+				logical, ptr, len,
+				curp->p_idx, (curp->p_idx + 1));
+		memmove(curp->p_idx + 1, curp->p_idx, len);
+		ix = curp->p_idx;
+	}
+
+	ix->ei_block = cpu_to_le32(logical);
+	ix->ei_leaf = cpu_to_le32(ptr);
+	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
+
+	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
+	                     > le16_to_cpu(curp->p_hdr->eh_max));
+	BUG_ON(ix > EXT_LAST_INDEX(curp->p_hdr));
+
+	err = ext4_ext_dirty(handle, inode, curp);
+	ext4_std_error(inode->i_sb, err);
+
+	return err;
+}
+
+/*
+ * routine inserts new subtree into the path, using free index entry
+ * at depth 'at:
+ *  - allocates all needed blocks (new leaf and all intermediate index blocks)
+ *  - makes decision where to split
+ *  - moves remaining extens and index entries (right to the split point)
+ *    into the newly allocated blocks
+ *  - initialize subtree
+ */
+static int ext4_ext_split(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext, int at)
+{
+	struct buffer_head *bh = NULL;
+	int depth = ext_depth(inode);
+	struct ext4_extent_header *neh;
+	struct ext4_extent_idx *fidx;
+	struct ext4_extent *ex;
+	int i = at, k, m, a;
+	unsigned long newblock, oldblock;
+	__le32 border;
+	int *ablocks = NULL; /* array of allocated blocks */
+	int err = 0;
+
+	/* make decision: where to split? */
+	/* FIXME: now desicion is simplest: at current extent */
+
+	/* if current leaf will be splitted, then we should use
+	 * border from split point */
+	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
+	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
+		border = path[depth].p_ext[1].ee_block;
+		ext_debug("leaf will be splitted."
+				" next leaf starts at %d\n",
+			          le32_to_cpu(border));
+	} else {
+		border = newext->ee_block;
+		ext_debug("leaf will be added."
+				" next leaf starts at %d\n",
+			        le32_to_cpu(border));
+	}
+
+	/*
+	 * if error occurs, then we break processing
+	 * and turn filesystem read-only. so, index won't
+	 * be inserted and tree will be in consistent
+	 * state. next mount will repair buffers too
+	 */
+
+	/*
+	 * get array to track all allocated blocks
+	 * we need this to handle errors and free blocks
+	 * upon them
+	 */
+	ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
+	if (!ablocks)
+		return -ENOMEM;
+	memset(ablocks, 0, sizeof(unsigned long) * depth);
+
+	/* allocate all needed blocks */
+	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
+	for (a = 0; a < depth - at; a++) {
+		newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+		if (newblock == 0)
+			goto cleanup;
+		ablocks[a] = newblock;
+	}
+
+	/* initialize new leaf */
+	newblock = ablocks[--a];
+	BUG_ON(newblock == 0);
+	bh = sb_getblk(inode->i_sb, newblock);
+	if (!bh) {
+		err = -EIO;
+		goto cleanup;
+	}
+	lock_buffer(bh);
+
+	if ((err = ext4_journal_get_create_access(handle, bh)))
+		goto cleanup;
+
+	neh = ext_block_hdr(bh);
+	neh->eh_entries = 0;
+	neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+	neh->eh_magic = EXT4_EXT_MAGIC;
+	neh->eh_depth = 0;
+	ex = EXT_FIRST_EXTENT(neh);
+
+	/* move remain of path[depth] to the new leaf */
+	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
+	/* start copy from next extent */
+	/* TODO: we could do it by single memmove */
+	m = 0;
+	path[depth].p_ext++;
+	while (path[depth].p_ext <=
+			EXT_MAX_EXTENT(path[depth].p_hdr)) {
+		ext_debug("move %d:%d:%d in new leaf %lu\n",
+			        le32_to_cpu(path[depth].p_ext->ee_block),
+			        le32_to_cpu(path[depth].p_ext->ee_start),
+			        le16_to_cpu(path[depth].p_ext->ee_len),
+				newblock);
+		/*memmove(ex++, path[depth].p_ext++,
+				sizeof(struct ext4_extent));
+		neh->eh_entries++;*/
+		path[depth].p_ext++;
+		m++;
+	}
+	if (m) {
+		memmove(ex, path[depth].p_ext-m, sizeof(struct ext4_extent)*m);
+		neh->eh_entries = cpu_to_le16(le16_to_cpu(neh->eh_entries)+m);
+	}
+
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		goto cleanup;
+	brelse(bh);
+	bh = NULL;
+
+	/* correct old leaf */
+	if (m) {
+		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+			goto cleanup;
+		path[depth].p_hdr->eh_entries =
+		     cpu_to_le16(le16_to_cpu(path[depth].p_hdr->eh_entries)-m);
+		if ((err = ext4_ext_dirty(handle, inode, path + depth)))
+			goto cleanup;
+
+	}
+
+	/* create intermediate indexes */
+	k = depth - at - 1;
+	BUG_ON(k < 0);
+	if (k)
+		ext_debug("create %d intermediate indices\n", k);
+	/* insert new index into current index block */
+	/* current depth stored in i var */
+	i = depth - 1;
+	while (k--) {
+		oldblock = newblock;
+		newblock = ablocks[--a];
+		bh = sb_getblk(inode->i_sb, newblock);
+		if (!bh) {
+			err = -EIO;
+			goto cleanup;
+		}
+		lock_buffer(bh);
+
+		if ((err = ext4_journal_get_create_access(handle, bh)))
+			goto cleanup;
+
+		neh = ext_block_hdr(bh);
+		neh->eh_entries = cpu_to_le16(1);
+		neh->eh_magic = EXT4_EXT_MAGIC;
+		neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+		neh->eh_depth = cpu_to_le16(depth - i);
+		fidx = EXT_FIRST_INDEX(neh);
+		fidx->ei_block = border;
+		fidx->ei_leaf = cpu_to_le32(oldblock);
+
+		ext_debug("int.index at %d (block %lu): %lu -> %lu\n", i,
+				newblock, (unsigned long) le32_to_cpu(border),
+				oldblock);
+		/* copy indexes */
+		m = 0;
+		path[i].p_idx++;
+
+		ext_debug("cur 0x%p, last 0x%p\n", path[i].p_idx,
+				EXT_MAX_INDEX(path[i].p_hdr));
+		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
+				EXT_LAST_INDEX(path[i].p_hdr));
+		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
+			ext_debug("%d: move %d:%d in new index %lu\n", i,
+				        le32_to_cpu(path[i].p_idx->ei_block),
+				        le32_to_cpu(path[i].p_idx->ei_leaf),
+				        newblock);
+			/*memmove(++fidx, path[i].p_idx++,
+					sizeof(struct ext4_extent_idx));
+			neh->eh_entries++;
+			BUG_ON(neh->eh_entries > neh->eh_max);*/
+			path[i].p_idx++;
+			m++;
+		}
+		if (m) {
+			memmove(++fidx, path[i].p_idx - m,
+				sizeof(struct ext4_extent_idx) * m);
+			neh->eh_entries =
+				cpu_to_le16(le16_to_cpu(neh->eh_entries) + m);
+		}
+		set_buffer_uptodate(bh);
+		unlock_buffer(bh);
+
+		if ((err = ext4_journal_dirty_metadata(handle, bh)))
+			goto cleanup;
+		brelse(bh);
+		bh = NULL;
+
+		/* correct old index */
+		if (m) {
+			err = ext4_ext_get_access(handle, inode, path + i);
+			if (err)
+				goto cleanup;
+			path[i].p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path[i].p_hdr->eh_entries)-m);
+			err = ext4_ext_dirty(handle, inode, path + i);
+			if (err)
+				goto cleanup;
+		}
+
+		i--;
+	}
+
+	/* insert new index */
+	if (err)
+		goto cleanup;
+
+	err = ext4_ext_insert_index(handle, inode, path + at,
+				    le32_to_cpu(border), newblock);
+
+cleanup:
+	if (bh) {
+		if (buffer_locked(bh))
+			unlock_buffer(bh);
+		brelse(bh);
+	}
+
+	if (err) {
+		/* free all allocated blocks in error case */
+		for (i = 0; i < depth; i++) {
+			if (!ablocks[i])
+				continue;
+			ext4_free_blocks(handle, inode, ablocks[i], 1);
+		}
+	}
+	kfree(ablocks);
+
+	return err;
+}
+
+/*
+ * routine implements tree growing procedure:
+ *  - allocates new block
+ *  - moves top-level data (index block or leaf) into the new block
+ *  - initialize new top-level, creating index that points to the
+ *    just created block
+ */
+static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext)
+{
+	struct ext4_ext_path *curp = path;
+	struct ext4_extent_header *neh;
+	struct ext4_extent_idx *fidx;
+	struct buffer_head *bh;
+	unsigned long newblock;
+	int err = 0;
+
+	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
+	if (newblock == 0)
+		return err;
+
+	bh = sb_getblk(inode->i_sb, newblock);
+	if (!bh) {
+		err = -EIO;
+		ext4_std_error(inode->i_sb, err);
+		return err;
+	}
+	lock_buffer(bh);
+
+	if ((err = ext4_journal_get_create_access(handle, bh))) {
+		unlock_buffer(bh);
+		goto out;
+	}
+
+	/* move top-level index/leaf into new block */
+	memmove(bh->b_data, curp->p_hdr, sizeof(EXT4_I(inode)->i_data));
+
+	/* set size of new block */
+	neh = ext_block_hdr(bh);
+	/* old root could have indexes or leaves
+	 * so calculate e_max right way */
+	if (ext_depth(inode))
+	  neh->eh_max = cpu_to_le16(ext4_ext_space_block_idx(inode));
+	else
+	  neh->eh_max = cpu_to_le16(ext4_ext_space_block(inode));
+	neh->eh_magic = EXT4_EXT_MAGIC;
+	set_buffer_uptodate(bh);
+	unlock_buffer(bh);
+
+	if ((err = ext4_journal_dirty_metadata(handle, bh)))
+		goto out;
+
+	/* create index in new top-level index: num,max,pointer */
+	if ((err = ext4_ext_get_access(handle, inode, curp)))
+		goto out;
+
+	curp->p_hdr->eh_magic = EXT4_EXT_MAGIC;
+	curp->p_hdr->eh_max = cpu_to_le16(ext4_ext_space_root_idx(inode));
+	curp->p_hdr->eh_entries = cpu_to_le16(1);
+	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
+	/* FIXME: it works, but actually path[0] can be index */
+	curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
+	curp->p_idx->ei_leaf = cpu_to_le32(newblock);
+
+	neh = ext_inode_hdr(inode);
+	fidx = EXT_FIRST_INDEX(neh);
+	ext_debug("new root: num %d(%d), lblock %d, ptr %d\n",
+		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
+		  le32_to_cpu(fidx->ei_block), le32_to_cpu(fidx->ei_leaf));
+
+	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
+	err = ext4_ext_dirty(handle, inode, curp);
+out:
+	brelse(bh);
+
+	return err;
+}
+
+/*
+ * routine finds empty index and adds new leaf. if no free index found
+ * then it requests in-depth growing
+ */
+static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
+					struct ext4_ext_path *path,
+					struct ext4_extent *newext)
+{
+	struct ext4_ext_path *curp;
+	int depth, i, err = 0;
+
+repeat:
+	i = depth = ext_depth(inode);
+
+	/* walk up to the tree and look for free index entry */
+	curp = path + depth;
+	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
+		i--;
+		curp--;
+	}
+
+	/* we use already allocated block for index block
+	 * so, subsequent data blocks should be contigoues */
+	if (EXT_HAS_FREE_INDEX(curp)) {
+		/* if we found index with free entry, then use that
+		 * entry: create all needed subtree and add new leaf */
+		err = ext4_ext_split(handle, inode, path, newext, i);
+
+		/* refill path */
+		ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode,
+					    le32_to_cpu(newext->ee_block),
+					    path);
+		if (IS_ERR(path))
+			err = PTR_ERR(path);
+	} else {
+		/* tree is full, time to grow in depth */
+		err = ext4_ext_grow_indepth(handle, inode, path, newext);
+		if (err)
+			goto out;
+
+		/* refill path */
+		ext4_ext_drop_refs(path);
+		path = ext4_ext_find_extent(inode,
+					    le32_to_cpu(newext->ee_block),
+					    path);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			goto out;
+		}
+
+		/*
+		 * only first (depth 0 -> 1) produces free space
+		 * in all other cases we have to split growed tree
+		 */
+		depth = ext_depth(inode);
+		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
+			/* now we need split */
+			goto repeat;
+		}
+	}
+
+out:
+	return err;
+}
+
+/*
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK
+ * NOTE: it consider block number from index entry as
+ * allocated block. thus, index entries have to be consistent
+ * with leafs
+ */
+static unsigned long
+ext4_ext_next_allocated_block(struct ext4_ext_path *path)
+{
+	int depth;
+
+	BUG_ON(path == NULL);
+	depth = path->p_depth;
+
+	if (depth == 0 && path->p_ext == NULL)
+		return EXT_MAX_BLOCK;
+
+	while (depth >= 0) {
+		if (depth == path->p_depth) {
+			/* leaf */
+			if (path[depth].p_ext !=
+					EXT_LAST_EXTENT(path[depth].p_hdr))
+			  return le32_to_cpu(path[depth].p_ext[1].ee_block);
+		} else {
+			/* index */
+			if (path[depth].p_idx !=
+					EXT_LAST_INDEX(path[depth].p_hdr))
+			  return le32_to_cpu(path[depth].p_idx[1].ei_block);
+		}
+		depth--;
+	}
+
+	return EXT_MAX_BLOCK;
+}
+
+/*
+ * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ */
+static unsigned ext4_ext_next_leaf_block(struct inode *inode,
+                                               struct ext4_ext_path *path)
+{
+	int depth;
+
+	BUG_ON(path == NULL);
+	depth = path->p_depth;
+
+	/* zero-tree has no leaf blocks at all */
+	if (depth == 0)
+		return EXT_MAX_BLOCK;
+
+	/* go to index block */
+	depth--;
+
+	while (depth >= 0) {
+		if (path[depth].p_idx !=
+				EXT_LAST_INDEX(path[depth].p_hdr))
+		  return le32_to_cpu(path[depth].p_idx[1].ei_block);
+		depth--;
+	}
+
+	return EXT_MAX_BLOCK;
+}
+
+/*
+ * if leaf gets modified and modified extent is first in the leaf
+ * then we have to correct all indexes above
+ * TODO: do we need to correct tree in all cases?
+ */
+int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path)
+{
+	struct ext4_extent_header *eh;
+	int depth = ext_depth(inode);
+	struct ext4_extent *ex;
+	__le32 border;
+	int k, err = 0;
+
+	eh = path[depth].p_hdr;
+	ex = path[depth].p_ext;
+	BUG_ON(ex == NULL);
+	BUG_ON(eh == NULL);
+
+	if (depth == 0) {
+		/* there is no tree at all */
+		return 0;
+	}
+
+	if (ex != EXT_FIRST_EXTENT(eh)) {
+		/* we correct tree if first leaf got modified only */
+		return 0;
+	}
+
+	/*
+	 * TODO: we need correction if border is smaller then current one
+	 */
+	k = depth - 1;
+	border = path[depth].p_ext->ee_block;
+	if ((err = ext4_ext_get_access(handle, inode, path + k)))
+		return err;
+	path[k].p_idx->ei_block = border;
+	if ((err = ext4_ext_dirty(handle, inode, path + k)))
+		return err;
+
+	while (k--) {
+		/* change all left-side indexes */
+		if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
+			break;
+		if ((err = ext4_ext_get_access(handle, inode, path + k)))
+			break;
+		path[k].p_idx->ei_block = border;
+		if ((err = ext4_ext_dirty(handle, inode, path + k)))
+			break;
+	}
+
+	return err;
+}
+
+static int inline
+ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
+				struct ext4_extent *ex2)
+{
+	/* FIXME: 48bit support */
+        if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
+	    != le32_to_cpu(ex2->ee_block))
+		return 0;
+
+#ifdef AGRESSIVE_TEST
+	if (le16_to_cpu(ex1->ee_len) >= 4)
+		return 0;
+#endif
+
+        if (le32_to_cpu(ex1->ee_start) + le16_to_cpu(ex1->ee_len)
+			== le32_to_cpu(ex2->ee_start))
+		return 1;
+	return 0;
+}
+
+/*
+ * this routine tries to merge requsted extent into the existing
+ * extent or inserts requested extent as new one into the tree,
+ * creating new leaf in no-space case
+ */
+int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
+				struct ext4_ext_path *path,
+				struct ext4_extent *newext)
+{
+	struct ext4_extent_header * eh;
+	struct ext4_extent *ex, *fex;
+	struct ext4_extent *nearex; /* nearest extent */
+	struct ext4_ext_path *npath = NULL;
+	int depth, len, err, next;
+
+	BUG_ON(newext->ee_len == 0);
+	depth = ext_depth(inode);
+	ex = path[depth].p_ext;
+	BUG_ON(path[depth].p_hdr == NULL);
+
+	/* try to insert block into found extent and return */
+	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
+		ext_debug("append %d block to %d:%d (from %d)\n",
+				le16_to_cpu(newext->ee_len),
+				le32_to_cpu(ex->ee_block),
+				le16_to_cpu(ex->ee_len),
+				le32_to_cpu(ex->ee_start));
+		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+			return err;
+		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
+					 + le16_to_cpu(newext->ee_len));
+		eh = path[depth].p_hdr;
+		nearex = ex;
+		goto merge;
+	}
+
+repeat:
+	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
+	if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max))
+		goto has_space;
+
+	/* probably next leaf has space for us? */
+	fex = EXT_LAST_EXTENT(eh);
+	next = ext4_ext_next_leaf_block(inode, path);
+	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
+	    && next != EXT_MAX_BLOCK) {
+		ext_debug("next leaf block - %d\n", next);
+		BUG_ON(npath != NULL);
+		npath = ext4_ext_find_extent(inode, next, NULL);
+		if (IS_ERR(npath))
+			return PTR_ERR(npath);
+		BUG_ON(npath->p_depth != path->p_depth);
+		eh = npath[depth].p_hdr;
+		if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) {
+			ext_debug("next leaf isnt full(%d)\n",
+				  le16_to_cpu(eh->eh_entries));
+			path = npath;
+			goto repeat;
+		}
+		ext_debug("next leaf has no free space(%d,%d)\n",
+			  le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
+	}
+
+	/*
+	 * there is no free space in found leaf
+	 * we're gonna add new leaf in the tree
+	 */
+	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
+	if (err)
+		goto cleanup;
+	depth = ext_depth(inode);
+	eh = path[depth].p_hdr;
+
+has_space:
+	nearex = path[depth].p_ext;
+
+	if ((err = ext4_ext_get_access(handle, inode, path + depth)))
+		goto cleanup;
+
+	if (!nearex) {
+		/* there is no extent in this leaf, create first one */
+		ext_debug("first extent in the leaf: %d:%d:%d\n",
+			        le32_to_cpu(newext->ee_block),
+			        le32_to_cpu(newext->ee_start),
+			        le16_to_cpu(newext->ee_len));
+		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
+	} else if (le32_to_cpu(newext->ee_block)
+		           > le32_to_cpu(nearex->ee_block)) {
+/*		BUG_ON(newext->ee_block == nearex->ee_block); */
+		if (nearex != EXT_LAST_EXTENT(eh)) {
+			len = EXT_MAX_EXTENT(eh) - nearex;
+			len = (len - 1) * sizeof(struct ext4_extent);
+			len = len < 0 ? 0 : len;
+			ext_debug("insert %d:%d:%d after: nearest 0x%p, "
+					"move %d from 0x%p to 0x%p\n",
+				        le32_to_cpu(newext->ee_block),
+				        le32_to_cpu(newext->ee_start),
+				        le16_to_cpu(newext->ee_len),
+					nearex, len, nearex + 1, nearex + 2);
+			memmove(nearex + 2, nearex + 1, len);
+		}
+		path[depth].p_ext = nearex + 1;
+	} else {
+		BUG_ON(newext->ee_block == nearex->ee_block);
+		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
+		len = len < 0 ? 0 : len;
+		ext_debug("insert %d:%d:%d before: nearest 0x%p, "
+				"move %d from 0x%p to 0x%p\n",
+				le32_to_cpu(newext->ee_block),
+				le32_to_cpu(newext->ee_start),
+				le16_to_cpu(newext->ee_len),
+				nearex, len, nearex + 1, nearex + 2);
+		memmove(nearex + 1, nearex, len);
+		path[depth].p_ext = nearex;
+	}
+
+	eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)+1);
+	nearex = path[depth].p_ext;
+	nearex->ee_block = newext->ee_block;
+	nearex->ee_start = newext->ee_start;
+	nearex->ee_len = newext->ee_len;
+	/* FIXME: support for large fs */
+	nearex->ee_start_hi = 0;
+
+merge:
+	/* try to merge extents to the right */
+	while (nearex < EXT_LAST_EXTENT(eh)) {
+		if (!ext4_can_extents_be_merged(inode, nearex, nearex + 1))
+			break;
+		/* merge with next extent! */
+		nearex->ee_len = cpu_to_le16(le16_to_cpu(nearex->ee_len)
+					     + le16_to_cpu(nearex[1].ee_len));
+		if (nearex + 1 < EXT_LAST_EXTENT(eh)) {
+			len = (EXT_LAST_EXTENT(eh) - nearex - 1)
+					* sizeof(struct ext4_extent);
+			memmove(nearex + 1, nearex + 2, len);
+		}
+		eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+		BUG_ON(eh->eh_entries == 0);
+	}
+
+	/* try to merge extents to the left */
+
+	/* time to correct all indexes above */
+	err = ext4_ext_correct_indexes(handle, inode, path);
+	if (err)
+		goto cleanup;
+
+	err = ext4_ext_dirty(handle, inode, path + depth);
+
+cleanup:
+	if (npath) {
+		ext4_ext_drop_refs(npath);
+		kfree(npath);
+	}
+	ext4_ext_tree_changed(inode);
+	ext4_ext_invalidate_cache(inode);
+	return err;
+}
+
+int ext4_ext_walk_space(struct inode *inode, unsigned long block,
+			unsigned long num, ext_prepare_callback func,
+			void *cbdata)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_ext_cache cbex;
+	struct ext4_extent *ex;
+	unsigned long next, start = 0, end = 0;
+	unsigned long last = block + num;
+	int depth, exists, err = 0;
+
+	BUG_ON(func == NULL);
+	BUG_ON(inode == NULL);
+
+	while (block < last && block != EXT_MAX_BLOCK) {
+		num = last - block;
+		/* find extent for this block */
+		path = ext4_ext_find_extent(inode, block, path);
+		if (IS_ERR(path)) {
+			err = PTR_ERR(path);
+			path = NULL;
+			break;
+		}
+
+		depth = ext_depth(inode);
+		BUG_ON(path[depth].p_hdr == NULL);
+		ex = path[depth].p_ext;
+		next = ext4_ext_next_allocated_block(path);
+
+		exists = 0;
+		if (!ex) {
+			/* there is no extent yet, so try to allocate
+			 * all requested space */
+			start = block;
+			end = block + num;
+		} else if (le32_to_cpu(ex->ee_block) > block) {
+			/* need to allocate space before found extent */
+			start = block;
+			end = le32_to_cpu(ex->ee_block);
+			if (block + num < end)
+				end = block + num;
+		} else if (block >=
+			     le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len)) {
+			/* need to allocate space after found extent */
+			start = block;
+			end = block + num;
+			if (end >= next)
+				end = next;
+		} else if (block >= le32_to_cpu(ex->ee_block)) {
+			/*
+			 * some part of requested space is covered
+			 * by found extent
+			 */
+			start = block;
+			end = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len);
+			if (block + num < end)
+				end = block + num;
+			exists = 1;
+		} else {
+			BUG();
+		}
+		BUG_ON(end <= start);
+
+		if (!exists) {
+			cbex.ec_block = start;
+			cbex.ec_len = end - start;
+			cbex.ec_start = 0;
+			cbex.ec_type = EXT4_EXT_CACHE_GAP;
+		} else {
+		        cbex.ec_block = le32_to_cpu(ex->ee_block);
+		        cbex.ec_len = le16_to_cpu(ex->ee_len);
+		        cbex.ec_start = le32_to_cpu(ex->ee_start);
+			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
+		}
+
+		BUG_ON(cbex.ec_len == 0);
+		err = func(inode, path, &cbex, cbdata);
+		ext4_ext_drop_refs(path);
+
+		if (err < 0)
+			break;
+		if (err == EXT_REPEAT)
+			continue;
+		else if (err == EXT_BREAK) {
+			err = 0;
+			break;
+		}
+
+		if (ext_depth(inode) != depth) {
+			/* depth was changed. we have to realloc path */
+			kfree(path);
+			path = NULL;
+		}
+
+		block = cbex.ec_block + cbex.ec_len;
+	}
+
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+
+	return err;
+}
+
+static inline void
+ext4_ext_put_in_cache(struct inode *inode, __u32 block,
+			__u32 len, __u32 start, int type)
+{
+	struct ext4_ext_cache *cex;
+	BUG_ON(len == 0);
+	cex = &EXT4_I(inode)->i_cached_extent;
+	cex->ec_type = type;
+	cex->ec_block = block;
+	cex->ec_len = len;
+	cex->ec_start = start;
+}
+
+/*
+ * this routine calculate boundaries of the gap requested block fits into
+ * and cache this gap
+ */
+static inline void
+ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
+				unsigned long block)
+{
+	int depth = ext_depth(inode);
+	unsigned long lblock, len;
+	struct ext4_extent *ex;
+
+	ex = path[depth].p_ext;
+	if (ex == NULL) {
+		/* there is no extent yet, so gap is [0;-] */
+		lblock = 0;
+		len = EXT_MAX_BLOCK;
+		ext_debug("cache gap(whole file):");
+	} else if (block < le32_to_cpu(ex->ee_block)) {
+		lblock = block;
+		len = le32_to_cpu(ex->ee_block) - block;
+		ext_debug("cache gap(before): %lu [%lu:%lu]",
+				(unsigned long) block,
+			        (unsigned long) le32_to_cpu(ex->ee_block),
+			        (unsigned long) le16_to_cpu(ex->ee_len));
+	} else if (block >= le32_to_cpu(ex->ee_block)
+		            + le16_to_cpu(ex->ee_len)) {
+	        lblock = le32_to_cpu(ex->ee_block)
+		         + le16_to_cpu(ex->ee_len);
+		len = ext4_ext_next_allocated_block(path);
+		ext_debug("cache gap(after): [%lu:%lu] %lu",
+			        (unsigned long) le32_to_cpu(ex->ee_block),
+			        (unsigned long) le16_to_cpu(ex->ee_len),
+				(unsigned long) block);
+		BUG_ON(len == lblock);
+		len = len - lblock;
+	} else {
+		lblock = len = 0;
+		BUG();
+	}
+
+	ext_debug(" -> %lu:%lu\n", (unsigned long) lblock, len);
+	ext4_ext_put_in_cache(inode, lblock, len, 0, EXT4_EXT_CACHE_GAP);
+}
+
+static inline int
+ext4_ext_in_cache(struct inode *inode, unsigned long block,
+			struct ext4_extent *ex)
+{
+	struct ext4_ext_cache *cex;
+
+	cex = &EXT4_I(inode)->i_cached_extent;
+
+	/* has cache valid data? */
+	if (cex->ec_type == EXT4_EXT_CACHE_NO)
+		return EXT4_EXT_CACHE_NO;
+
+	BUG_ON(cex->ec_type != EXT4_EXT_CACHE_GAP &&
+			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
+	if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
+	        ex->ee_block = cpu_to_le32(cex->ec_block);
+	        ex->ee_start = cpu_to_le32(cex->ec_start);
+	        ex->ee_len = cpu_to_le16(cex->ec_len);
+		ext_debug("%lu cached by %lu:%lu:%lu\n",
+				(unsigned long) block,
+				(unsigned long) cex->ec_block,
+				(unsigned long) cex->ec_len,
+				(unsigned long) cex->ec_start);
+		return cex->ec_type;
+	}
+
+	/* not in cache */
+	return EXT4_EXT_CACHE_NO;
+}
+
+/*
+ * routine removes index from the index block
+ * it's used in truncate case only. thus all requests are for
+ * last index in the block only
+ */
+int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
+			struct ext4_ext_path *path)
+{
+	struct buffer_head *bh;
+	int err;
+	unsigned long leaf;
+
+	/* free index block */
+	path--;
+	leaf = le32_to_cpu(path->p_idx->ei_leaf);
+	BUG_ON(path->p_hdr->eh_entries == 0);
+	if ((err = ext4_ext_get_access(handle, inode, path)))
+		return err;
+	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
+	if ((err = ext4_ext_dirty(handle, inode, path)))
+		return err;
+	ext_debug("index is empty, remove it, free block %lu\n", leaf);
+	bh = sb_find_get_block(inode->i_sb, leaf);
+	ext4_forget(handle, 1, inode, bh, leaf);
+	ext4_free_blocks(handle, inode, leaf, 1);
+	return err;
+}
+
+/*
+ * This routine returns max. credits extent tree can consume.
+ * It should be OK for low-performance paths like ->writepage()
+ * To allow many writing process to fit a single transaction,
+ * caller should calculate credits under truncate_mutex and
+ * pass actual path.
+ */
+int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
+						struct ext4_ext_path *path)
+{
+	int depth, needed;
+
+	if (path) {
+		/* probably there is space in leaf? */
+		depth = ext_depth(inode);
+		if (le16_to_cpu(path[depth].p_hdr->eh_entries)
+				< le16_to_cpu(path[depth].p_hdr->eh_max))
+			return 1;
+	}
+
+	/*
+	 * given 32bit logical block (4294967296 blocks), max. tree
+	 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
+	 * let's also add one more level for imbalance.
+	 */
+	depth = 5;
+
+	/* allocation of new data block(s) */
+	needed = 2;
+
+	/*
+	 * tree can be full, so it'd need to grow in depth:
+	 * allocation + old root + new root
+	 */
+	needed += 2 + 1 + 1;
+
+	/*
+	 * Index split can happen, we'd need:
+	 *    allocate intermediate indexes (bitmap + group)
+	 *  + change two blocks at each level, but root (already included)
+	 */
+	needed = (depth * 2) + (depth * 2);
+
+	/* any allocation modifies superblock */
+	needed += 1;
+
+	return needed;
+}
+
+static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
+				struct ext4_extent *ex,
+				unsigned long from, unsigned long to)
+{
+	struct buffer_head *bh;
+	int i;
+
+#ifdef EXTENTS_STATS
+	{
+		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
+		unsigned short ee_len =  le16_to_cpu(ex->ee_len);
+		spin_lock(&sbi->s_ext_stats_lock);
+		sbi->s_ext_blocks += ee_len;
+		sbi->s_ext_extents++;
+		if (ee_len < sbi->s_ext_min)
+			sbi->s_ext_min = ee_len;
+		if (ee_len > sbi->s_ext_max)
+			sbi->s_ext_max = ee_len;
+		if (ext_depth(inode) > sbi->s_depth_max)
+			sbi->s_depth_max = ext_depth(inode);
+		spin_unlock(&sbi->s_ext_stats_lock);
+	}
+#endif
+	if (from >= le32_to_cpu(ex->ee_block)
+	    && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+		/* tail removal */
+		unsigned long num, start;
+		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
+		start = le32_to_cpu(ex->ee_start) + le16_to_cpu(ex->ee_len) - num;
+		ext_debug("free last %lu blocks starting %lu\n", num, start);
+		for (i = 0; i < num; i++) {
+			bh = sb_find_get_block(inode->i_sb, start + i);
+			ext4_forget(handle, 0, inode, bh, start + i);
+		}
+		ext4_free_blocks(handle, inode, start, num);
+	} else if (from == le32_to_cpu(ex->ee_block)
+		   && to <= le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
+		printk("strange request: removal %lu-%lu from %u:%u\n",
+		       from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+	} else {
+		printk("strange request: removal(2) %lu-%lu from %u:%u\n",
+		       from, to, le32_to_cpu(ex->ee_block), le16_to_cpu(ex->ee_len));
+	}
+	return 0;
+}
+
+static int
+ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
+		struct ext4_ext_path *path, unsigned long start)
+{
+	int err = 0, correct_index = 0;
+	int depth = ext_depth(inode), credits;
+	struct ext4_extent_header *eh;
+	unsigned a, b, block, num;
+	unsigned long ex_ee_block;
+	unsigned short ex_ee_len;
+	struct ext4_extent *ex;
+
+	ext_debug("truncate since %lu in leaf\n", start);
+	if (!path[depth].p_hdr)
+		path[depth].p_hdr = ext_block_hdr(path[depth].p_bh);
+	eh = path[depth].p_hdr;
+	BUG_ON(eh == NULL);
+	BUG_ON(le16_to_cpu(eh->eh_entries) > le16_to_cpu(eh->eh_max));
+	BUG_ON(eh->eh_magic != EXT4_EXT_MAGIC);
+
+	/* find where to start removing */
+	ex = EXT_LAST_EXTENT(eh);
+
+	ex_ee_block = le32_to_cpu(ex->ee_block);
+	ex_ee_len = le16_to_cpu(ex->ee_len);
+
+	while (ex >= EXT_FIRST_EXTENT(eh) &&
+			ex_ee_block + ex_ee_len > start) {
+		ext_debug("remove ext %lu:%u\n", ex_ee_block, ex_ee_len);
+		path[depth].p_ext = ex;
+
+		a = ex_ee_block > start ? ex_ee_block : start;
+		b = ex_ee_block + ex_ee_len - 1 < EXT_MAX_BLOCK ?
+			ex_ee_block + ex_ee_len - 1 : EXT_MAX_BLOCK;
+
+		ext_debug("  border %u:%u\n", a, b);
+
+		if (a != ex_ee_block && b != ex_ee_block + ex_ee_len - 1) {
+			block = 0;
+			num = 0;
+			BUG();
+		} else if (a != ex_ee_block) {
+			/* remove tail of the extent */
+			block = ex_ee_block;
+			num = a - block;
+		} else if (b != ex_ee_block + ex_ee_len - 1) {
+			/* remove head of the extent */
+			block = a;
+			num = b - a;
+			/* there is no "make a hole" API yet */
+			BUG();
+		} else {
+			/* remove whole extent: excellent! */
+			block = ex_ee_block;
+			num = 0;
+			BUG_ON(a != ex_ee_block);
+			BUG_ON(b != ex_ee_block + ex_ee_len - 1);
+		}
+
+		/* at present, extent can't cross block group */
+		/* leaf + bitmap + group desc + sb + inode */
+		credits = 5;
+		if (ex == EXT_FIRST_EXTENT(eh)) {
+			correct_index = 1;
+			credits += (ext_depth(inode)) + 1;
+		}
+#ifdef CONFIG_QUOTA
+		credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+		handle = ext4_ext_journal_restart(handle, credits);
+		if (IS_ERR(handle)) {
+			err = PTR_ERR(handle);
+			goto out;
+		}
+
+		err = ext4_ext_get_access(handle, inode, path + depth);
+		if (err)
+			goto out;
+
+		err = ext4_remove_blocks(handle, inode, ex, a, b);
+		if (err)
+			goto out;
+
+		if (num == 0) {
+			/* this extent is removed entirely mark slot unused */
+			ex->ee_start = 0;
+			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
+		}
+
+		ex->ee_block = cpu_to_le32(block);
+		ex->ee_len = cpu_to_le16(num);
+
+		err = ext4_ext_dirty(handle, inode, path + depth);
+		if (err)
+			goto out;
+
+		ext_debug("new extent: %u:%u:%u\n", block, num,
+				le32_to_cpu(ex->ee_start));
+		ex--;
+		ex_ee_block = le32_to_cpu(ex->ee_block);
+		ex_ee_len = le16_to_cpu(ex->ee_len);
+	}
+
+	if (correct_index && eh->eh_entries)
+		err = ext4_ext_correct_indexes(handle, inode, path);
+
+	/* if this leaf is free, then we should
+	 * remove it from index block above */
+	if (err == 0 && eh->eh_entries == 0 && path[depth].p_bh != NULL)
+		err = ext4_ext_rm_idx(handle, inode, path + depth);
+
+out:
+	return err;
+}
+
+/*
+ * returns 1 if current index have to be freed (even partial)
+ */
+static int inline
+ext4_ext_more_to_rm(struct ext4_ext_path *path)
+{
+	BUG_ON(path->p_idx == NULL);
+
+	if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
+		return 0;
+
+	/*
+	 * if truncate on deeper level happened it it wasn't partial
+	 * so we have to consider current index for truncation
+	 */
+	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
+		return 0;
+	return 1;
+}
+
+int ext4_ext_remove_space(struct inode *inode, unsigned long start)
+{
+	struct super_block *sb = inode->i_sb;
+	int depth = ext_depth(inode);
+	struct ext4_ext_path *path;
+	handle_t *handle;
+	int i = 0, err = 0;
+
+	ext_debug("truncate since %lu\n", start);
+
+	/* probably first extent we're gonna free will be last in block */
+	handle = ext4_journal_start(inode, depth + 1);
+	if (IS_ERR(handle))
+		return PTR_ERR(handle);
+
+	ext4_ext_invalidate_cache(inode);
+
+	/*
+	 * we start scanning from right side freeing all the blocks
+	 * after i_size and walking into the deep
+	 */
+	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
+	if (path == NULL) {
+		ext4_journal_stop(handle);
+		return -ENOMEM;
+	}
+	memset(path, 0, sizeof(struct ext4_ext_path) * (depth + 1));
+	path[0].p_hdr = ext_inode_hdr(inode);
+	if (ext4_ext_check_header(__FUNCTION__, inode, path[0].p_hdr)) {
+		err = -EIO;
+		goto out;
+	}
+	path[0].p_depth = depth;
+
+	while (i >= 0 && err == 0) {
+		if (i == depth) {
+			/* this is leaf block */
+			err = ext4_ext_rm_leaf(handle, inode, path, start);
+			/* root level have p_bh == NULL, brelse() eats this */
+			brelse(path[i].p_bh);
+			path[i].p_bh = NULL;
+			i--;
+			continue;
+		}
+
+		/* this is index block */
+		if (!path[i].p_hdr) {
+			ext_debug("initialize header\n");
+			path[i].p_hdr = ext_block_hdr(path[i].p_bh);
+			if (ext4_ext_check_header(__FUNCTION__, inode,
+							path[i].p_hdr)) {
+				err = -EIO;
+				goto out;
+			}
+		}
+
+		BUG_ON(le16_to_cpu(path[i].p_hdr->eh_entries)
+			   > le16_to_cpu(path[i].p_hdr->eh_max));
+		BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
+
+		if (!path[i].p_idx) {
+			/* this level hasn't touched yet */
+			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
+			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
+			ext_debug("init index ptr: hdr 0x%p, num %d\n",
+				  path[i].p_hdr,
+				  le16_to_cpu(path[i].p_hdr->eh_entries));
+		} else {
+			/* we've already was here, see at next index */
+			path[i].p_idx--;
+		}
+
+		ext_debug("level %d - index, first 0x%p, cur 0x%p\n",
+				i, EXT_FIRST_INDEX(path[i].p_hdr),
+				path[i].p_idx);
+		if (ext4_ext_more_to_rm(path + i)) {
+			/* go to the next level */
+			ext_debug("move to level %d (block %d)\n",
+				  i + 1, le32_to_cpu(path[i].p_idx->ei_leaf));
+			memset(path + i + 1, 0, sizeof(*path));
+			path[i+1].p_bh =
+				sb_bread(sb, le32_to_cpu(path[i].p_idx->ei_leaf));
+			if (!path[i+1].p_bh) {
+				/* should we reset i_size? */
+				err = -EIO;
+				break;
+			}
+
+			/* put actual number of indexes to know is this
+			 * number got changed at the next iteration */
+			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
+			i++;
+		} else {
+			/* we finish processing this index, go up */
+			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
+				/* index is empty, remove it
+				 * handle must be already prepared by the
+				 * truncatei_leaf() */
+				err = ext4_ext_rm_idx(handle, inode, path + i);
+			}
+			/* root level have p_bh == NULL, brelse() eats this */
+			brelse(path[i].p_bh);
+			path[i].p_bh = NULL;
+			i--;
+			ext_debug("return to level %d\n", i);
+		}
+	}
+
+	/* TODO: flexible tree reduction should be here */
+	if (path->p_hdr->eh_entries == 0) {
+		/*
+		 * truncate to zero freed all the tree
+		 * so, we need to correct eh_depth
+		 */
+		err = ext4_ext_get_access(handle, inode, path);
+		if (err == 0) {
+			ext_inode_hdr(inode)->eh_depth = 0;
+			ext_inode_hdr(inode)->eh_max =
+				cpu_to_le16(ext4_ext_space_root(inode));
+			err = ext4_ext_dirty(handle, inode, path);
+		}
+	}
+out:
+	ext4_ext_tree_changed(inode);
+	ext4_ext_drop_refs(path);
+	kfree(path);
+	ext4_journal_stop(handle);
+
+	return err;
+}
+
+/*
+ * called at mount time
+ */
+void ext4_ext_init(struct super_block *sb)
+{
+	/*
+	 * possible initialization would be here
+	 */
+
+	if (test_opt(sb, EXTENTS)) {
+		printk("EXT4-fs: file extents enabled");
+#ifdef AGRESSIVE_TEST
+		printk(", agressive tests");
+#endif
+#ifdef CHECK_BINSEARCH
+		printk(", check binsearch");
+#endif
+#ifdef EXTENTS_STATS
+		printk(", stats");
+#endif
+		printk("\n");
+#ifdef EXTENTS_STATS
+		spin_lock_init(&EXT4_SB(sb)->s_ext_stats_lock);
+		EXT4_SB(sb)->s_ext_min = 1 << 30;
+		EXT4_SB(sb)->s_ext_max = 0;
+#endif
+	}
+}
+
+/*
+ * called at umount time
+ */
+void ext4_ext_release(struct super_block *sb)
+{
+	if (!test_opt(sb, EXTENTS))
+		return;
+
+#ifdef EXTENTS_STATS
+	if (EXT4_SB(sb)->s_ext_blocks && EXT4_SB(sb)->s_ext_extents) {
+		struct ext4_sb_info *sbi = EXT4_SB(sb);
+		printk(KERN_ERR "EXT4-fs: %lu blocks in %lu extents (%lu ave)\n",
+			sbi->s_ext_blocks, sbi->s_ext_extents,
+			sbi->s_ext_blocks / sbi->s_ext_extents);
+		printk(KERN_ERR "EXT4-fs: extents: %lu min, %lu max, max depth %lu\n",
+			sbi->s_ext_min, sbi->s_ext_max, sbi->s_depth_max);
+	}
+#endif
+}
+
+int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
+			unsigned long max_blocks, struct buffer_head *bh_result,
+			int create, int extend_disksize)
+{
+	struct ext4_ext_path *path = NULL;
+	struct ext4_extent newex, *ex;
+	int goal, newblock, err = 0, depth;
+	unsigned long allocated = 0;
+
+	__clear_bit(BH_New, &bh_result->b_state);
+	ext_debug("blocks %d/%lu requested for inode %u\n", (int) iblock,
+			max_blocks, (unsigned) inode->i_ino);
+	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+
+	/* check in cache */
+	if ((goal = ext4_ext_in_cache(inode, iblock, &newex))) {
+		if (goal == EXT4_EXT_CACHE_GAP) {
+			if (!create) {
+				/* block isn't allocated yet and
+				 * user don't want to allocate it */
+				goto out2;
+			}
+			/* we should allocate requested block */
+		} else if (goal == EXT4_EXT_CACHE_EXTENT) {
+			/* block is already allocated */
+		        newblock = iblock
+		                   - le32_to_cpu(newex.ee_block)
+			           + le32_to_cpu(newex.ee_start);
+			/* number of remain blocks in the extent */
+			allocated = le16_to_cpu(newex.ee_len) -
+					(iblock - le32_to_cpu(newex.ee_block));
+			goto out;
+		} else {
+			BUG();
+		}
+	}
+
+	/* find extent for this block */
+	path = ext4_ext_find_extent(inode, iblock, NULL);
+	if (IS_ERR(path)) {
+		err = PTR_ERR(path);
+		path = NULL;
+		goto out2;
+	}
+
+	depth = ext_depth(inode);
+
+	/*
+	 * consistent leaf must not be empty
+	 * this situations is possible, though, _during_ tree modification
+	 * this is why assert can't be put in ext4_ext_find_extent()
+	 */
+	BUG_ON(path[depth].p_ext == NULL && depth != 0);
+
+	if ((ex = path[depth].p_ext)) {
+	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
+		unsigned long ee_start = le32_to_cpu(ex->ee_start);
+		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
+		/* if found exent covers block, simple return it */
+	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
+			newblock = iblock - ee_block + ee_start;
+			/* number of remain blocks in the extent */
+			allocated = ee_len - (iblock - ee_block);
+			ext_debug("%d fit into %lu:%d -> %d\n", (int) iblock,
+					ee_block, ee_len, newblock);
+			ext4_ext_put_in_cache(inode, ee_block, ee_len,
+						ee_start, EXT4_EXT_CACHE_EXTENT);
+			goto out;
+		}
+	}
+
+	/*
+	 * requested block isn't allocated yet
+	 * we couldn't try to create block if create flag is zero
+	 */
+	if (!create) {
+		/* put just found gap into cache to speedup subsequest reqs */
+		ext4_ext_put_gap_in_cache(inode, path, iblock);
+		goto out2;
+	}
+	/*
+         * Okay, we need to do block allocation.  Lazily initialize the block
+         * allocation info here if necessary
+        */
+	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
+		ext4_init_block_alloc_info(inode);
+
+	/* allocate new block */
+	goal = ext4_ext_find_goal(inode, path, iblock);
+	allocated = max_blocks;
+	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
+	if (!newblock)
+		goto out2;
+	ext_debug("allocate new block: goal %d, found %d/%lu\n",
+			goal, newblock, allocated);
+
+	/* try to insert new extent into found leaf and return */
+	newex.ee_block = cpu_to_le32(iblock);
+	newex.ee_start = cpu_to_le32(newblock);
+	newex.ee_len = cpu_to_le16(allocated);
+	err = ext4_ext_insert_extent(handle, inode, path, &newex);
+	if (err)
+		goto out2;
+
+	if (extend_disksize && inode->i_size > EXT4_I(inode)->i_disksize)
+		EXT4_I(inode)->i_disksize = inode->i_size;
+
+	/* previous routine could use block we allocated */
+	newblock = le32_to_cpu(newex.ee_start);
+	__set_bit(BH_New, &bh_result->b_state);
+
+	ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
+				EXT4_EXT_CACHE_EXTENT);
+out:
+	if (allocated > max_blocks)
+		allocated = max_blocks;
+	ext4_ext_show_leaf(inode, path);
+	__set_bit(BH_Mapped, &bh_result->b_state);
+	bh_result->b_bdev = inode->i_sb->s_bdev;
+	bh_result->b_blocknr = newblock;
+out2:
+	if (path) {
+		ext4_ext_drop_refs(path);
+		kfree(path);
+	}
+	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+
+	return err ? err : allocated;
+}
+
+void ext4_ext_truncate(struct inode * inode, struct page *page)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct super_block *sb = inode->i_sb;
+	unsigned long last_block;
+	handle_t *handle;
+	int err = 0;
+
+	/*
+	 * probably first extent we're gonna free will be last in block
+	 */
+	err = ext4_writepage_trans_blocks(inode) + 3;
+	handle = ext4_journal_start(inode, err);
+	if (IS_ERR(handle)) {
+		if (page) {
+			clear_highpage(page);
+			flush_dcache_page(page);
+			unlock_page(page);
+			page_cache_release(page);
+		}
+		return;
+	}
+
+	if (page)
+		ext4_block_truncate_page(handle, page, mapping, inode->i_size);
+
+	mutex_lock(&EXT4_I(inode)->truncate_mutex);
+	ext4_ext_invalidate_cache(inode);
+
+	/*
+	 * TODO: optimization is possible here
+	 * probably we need not scaning at all,
+	 * because page truncation is enough
+	 */
+	if (ext4_orphan_add(handle, inode))
+		goto out_stop;
+
+	/* we have to know where to truncate from in crash case */
+	EXT4_I(inode)->i_disksize = inode->i_size;
+	ext4_mark_inode_dirty(handle, inode);
+
+	last_block = (inode->i_size + sb->s_blocksize - 1)
+			>> EXT4_BLOCK_SIZE_BITS(sb);
+	err = ext4_ext_remove_space(inode, last_block);
+
+	/* In a multi-transaction truncate, we only make the final
+	 * transaction synchronous */
+	if (IS_SYNC(inode))
+		handle->h_sync = 1;
+
+out_stop:
+	/*
+	 * If this was a simple ftruncate(), and the file will remain alive
+	 * then we need to clear up the orphan record which we created above.
+	 * However, if this was a real unlink then we were called by
+	 * ext4_delete_inode(), and we allow that function to clean up the
+	 * orphan info for us.
+	 */
+	if (inode->i_nlink)
+		ext4_orphan_del(handle, inode);
+
+	mutex_unlock(&EXT4_I(inode)->truncate_mutex);
+	ext4_journal_stop(handle);
+}
+
+/*
+ * this routine calculate max number of blocks we could modify
+ * in order to allocate new block for an inode
+ */
+int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
+{
+	int needed;
+
+	needed = ext4_ext_calc_credits_for_insert(inode, NULL);
+
+	/* caller want to allocate num blocks, but note it includes sb */
+	needed = needed * num - (num - 1);
+
+#ifdef CONFIG_QUOTA
+	needed += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb);
+#endif
+
+	return needed;
+}
+
+EXPORT_SYMBOL(ext4_mark_inode_dirty);
+EXPORT_SYMBOL(ext4_ext_invalidate_cache);
+EXPORT_SYMBOL(ext4_ext_insert_extent);
+EXPORT_SYMBOL(ext4_ext_walk_space);
+EXPORT_SYMBOL(ext4_ext_find_goal);
+EXPORT_SYMBOL(ext4_ext_calc_credits_for_insert);
+
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 34d39ae966f7..e17a6c918d72 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -615,6 +615,17 @@ got:
 		ext4_std_error(sb, err);
 		goto fail_free_drop;
 	}
+	if (test_opt(sb, EXTENTS)) {
+		EXT4_I(inode)->i_flags |= EXT4_EXTENTS_FL;
+		ext4_ext_tree_init(handle, inode);
+		if (!EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
+			err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
+			if (err) goto fail;
+			EXT4_SET_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS);
+			BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "call ext4_journal_dirty_metadata");
+			err = ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
+		}
+	}
 
 	ext4_debug("allocating inode %lu\n", inode->i_ino);
 	goto really_out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 0d96c7d3bb5b..2b81b1324a6f 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -40,8 +40,6 @@
 #include "xattr.h"
 #include "acl.h"
 
-static int ext4_writepage_trans_blocks(struct inode *inode);
-
 /*
  * Test whether an inode is a fast symlink.
  */
@@ -804,6 +802,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode,
 	ext4_fsblk_t first_block = 0;
 
 
+	J_ASSERT(!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL));
 	J_ASSERT(handle != NULL || create == 0);
 	depth = ext4_block_to_path(inode,iblock,offsets,&blocks_to_boundary);
 
@@ -984,7 +983,7 @@ static int ext4_get_block(struct inode *inode, sector_t iblock,
 
 get_block:
 	if (ret == 0) {
-		ret = ext4_get_blocks_handle(handle, inode, iblock,
+		ret = ext4_get_blocks_wrap(handle, inode, iblock,
 					max_blocks, bh_result, create, 0);
 		if (ret > 0) {
 			bh_result->b_size = (ret << inode->i_blkbits);
@@ -1008,7 +1007,7 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
 	dummy.b_state = 0;
 	dummy.b_blocknr = -1000;
 	buffer_trace_init(&dummy.b_history);
-	err = ext4_get_blocks_handle(handle, inode, block, 1,
+	err = ext4_get_blocks_wrap(handle, inode, block, 1,
 					&dummy, create, 1);
 	/*
 	 * ext4_get_blocks_handle() returns number of blocks
@@ -1759,7 +1758,7 @@ void ext4_set_aops(struct inode *inode)
  * This required during truncate. We need to physically zero the tail end
  * of that block so it doesn't yield old data if the file is later grown.
  */
-static int ext4_block_truncate_page(handle_t *handle, struct page *page,
+int ext4_block_truncate_page(handle_t *handle, struct page *page,
 		struct address_space *mapping, loff_t from)
 {
 	ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
@@ -2263,6 +2262,9 @@ void ext4_truncate(struct inode *inode)
 			return;
 	}
 
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_truncate(inode, page);
+
 	handle = start_transaction(inode);
 	if (IS_ERR(handle)) {
 		if (page) {
@@ -3003,12 +3005,15 @@ err_out:
  * block and work out the exact number of indirects which are touched.  Pah.
  */
 
-static int ext4_writepage_trans_blocks(struct inode *inode)
+int ext4_writepage_trans_blocks(struct inode *inode)
 {
 	int bpp = ext4_journal_blocks_per_page(inode);
 	int indirects = (EXT4_NDIR_BLOCKS % bpp) ? 5 : 3;
 	int ret;
 
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_writepage_trans_blocks(inode, bpp);
+
 	if (ext4_should_journal_data(inode))
 		ret = 3 * (bpp + indirects) + 2;
 	else
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index a63dce2117b8..22a737c306c7 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -248,7 +248,6 @@ flags_err:
 		return err;
 	}
 
-
 	default:
 		return -ENOTTY;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f131bb69b62e..69f875250500 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -390,6 +390,7 @@ static void ext4_put_super (struct super_block * sb)
 	struct ext4_super_block *es = sbi->s_es;
 	int i;
 
+	ext4_ext_release(sb);
 	ext4_xattr_put_super(sb);
 	jbd2_journal_destroy(sbi->s_journal);
 	if (!(sb->s_flags & MS_RDONLY)) {
@@ -454,6 +455,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
 #endif
 	ei->i_block_alloc_info = NULL;
 	ei->vfs_inode.i_version = 1;
+	memset(&ei->i_cached_extent, 0, sizeof(struct ext4_ext_cache));
 	return &ei->vfs_inode;
 }
 
@@ -677,7 +679,7 @@ enum {
 	Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
 	Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
 	Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-	Opt_grpquota
+	Opt_grpquota, Opt_extents,
 };
 
 static match_table_t tokens = {
@@ -727,6 +729,7 @@ static match_table_t tokens = {
 	{Opt_quota, "quota"},
 	{Opt_usrquota, "usrquota"},
 	{Opt_barrier, "barrier=%u"},
+	{Opt_extents, "extents"},
 	{Opt_err, NULL},
 	{Opt_resize, "resize"},
 };
@@ -1059,6 +1062,9 @@ clear_qf_name:
 		case Opt_bh:
 			clear_opt(sbi->s_mount_opt, NOBH);
 			break;
+		case Opt_extents:
+			set_opt (sbi->s_mount_opt, EXTENTS);
+			break;
 		default:
 			printk (KERN_ERR
 				"EXT4-fs: Unrecognized mount option \"%s\" "
@@ -1787,6 +1793,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		test_opt(sb,DATA_FLAGS) == EXT4_MOUNT_ORDERED_DATA ? "ordered":
 		"writeback");
 
+	ext4_ext_init(sb);
+
 	lock_kernel();
 	return 0;
 
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index f582cd762caf..b61181aadcbb 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -178,8 +178,9 @@ struct ext4_group_desc
 #define EXT4_DIRSYNC_FL			0x00010000 /* dirsync behaviour (directories only) */
 #define EXT4_TOPDIR_FL			0x00020000 /* Top of directory hierarchies*/
 #define EXT4_RESERVED_FL		0x80000000 /* reserved for ext4 lib */
+#define EXT4_EXTENTS_FL			0x00080000 /* Inode uses extents */
 
-#define EXT4_FL_USER_VISIBLE		0x0003DFFF /* User visible flags */
+#define EXT4_FL_USER_VISIBLE		0x000BDFFF /* User visible flags */
 #define EXT4_FL_USER_MODIFIABLE		0x000380FF /* User modifiable flags */
 
 /*
@@ -384,6 +385,7 @@ struct ext4_inode {
 #define EXT4_MOUNT_QUOTA		0x80000 /* Some quota option set */
 #define EXT4_MOUNT_USRQUOTA		0x100000 /* "old" user quota */
 #define EXT4_MOUNT_GRPQUOTA		0x200000 /* "old" group quota */
+#define EXT4_MOUNT_EXTENTS		0x400000 /* Extents support */
 
 /* Compatibility, for having both ext2_fs.h and ext4_fs.h included at once */
 #ifndef _LINUX_EXT2_FS_H
@@ -582,11 +584,13 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
 #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
 #define EXT4_FEATURE_INCOMPAT_META_BG		0x0010
+#define EXT4_FEATURE_INCOMPAT_EXTENTS		0x0040 /* extents support */
 
 #define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT4_FEATURE_INCOMPAT_RECOVER| \
-					 EXT4_FEATURE_INCOMPAT_META_BG)
+					 EXT4_FEATURE_INCOMPAT_META_BG| \
+					 EXT4_FEATURE_INCOMPAT_EXTENTS)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
@@ -825,6 +829,9 @@ extern int ext4_get_inode_loc(struct inode *, struct ext4_iloc *);
 extern void ext4_truncate (struct inode *);
 extern void ext4_set_inode_flags(struct inode *);
 extern void ext4_set_aops(struct inode *inode);
+extern int ext4_writepage_trans_blocks(struct inode *);
+extern int ext4_block_truncate_page(handle_t *handle, struct page *page,
+		struct address_space *mapping, loff_t from);
 
 /* ioctl.c */
 extern int ext4_ioctl (struct inode *, struct file *, unsigned int,
@@ -879,6 +886,28 @@ extern struct inode_operations ext4_special_inode_operations;
 extern struct inode_operations ext4_symlink_inode_operations;
 extern struct inode_operations ext4_fast_symlink_inode_operations;
 
+/* extents.c */
+extern int ext4_ext_tree_init(handle_t *handle, struct inode *);
+extern int ext4_ext_writepage_trans_blocks(struct inode *, int);
+extern int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t iblock,
+			unsigned long max_blocks, struct buffer_head *bh_result,
+			int create, int extend_disksize);
+extern void ext4_ext_truncate(struct inode *, struct page *);
+extern void ext4_ext_init(struct super_block *);
+extern void ext4_ext_release(struct super_block *);
+static inline int
+ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block,
+			unsigned long max_blocks, struct buffer_head *bh,
+			int create, int extend_disksize)
+{
+	if (EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL)
+		return ext4_ext_get_blocks(handle, inode, block, max_blocks,
+					bh, create, extend_disksize);
+	return ext4_get_blocks_handle(handle, inode, block, max_blocks, bh,
+					create, extend_disksize);
+}
+
 
 #endif	/* __KERNEL__ */
 
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
new file mode 100644
index 000000000000..8029879e29e2
--- /dev/null
+++ b/include/linux/ext4_fs_extents.h
@@ -0,0 +1,196 @@
+/*
+ * Copyright (c) 2003-2006, Cluster File Systems, Inc, info@clusterfs.com
+ * Written by Alex Tomas <alex@clusterfs.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public Licens
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
+ */
+
+#ifndef _LINUX_EXT4_EXTENTS
+#define _LINUX_EXT4_EXTENTS
+
+#include <linux/ext4_fs.h>
+
+/*
+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
+ * become very little, so index split, in-depth growing and
+ * other hard changes happens much more often
+ * this is for debug purposes only
+ */
+#define AGRESSIVE_TEST_
+
+/*
+ * with EXTENTS_STATS defined number of blocks and extents
+ * are collected in truncate path. they'll be showed at
+ * umount time
+ */
+#define EXTENTS_STATS__
+
+/*
+ * if CHECK_BINSEARCH defined, then results of binary search
+ * will be checked by linear search
+ */
+#define CHECK_BINSEARCH__
+
+/*
+ * if EXT_DEBUG is defined you can use 'extdebug' mount option
+ * to get lots of info what's going on
+ */
+#define EXT_DEBUG__
+#ifdef EXT_DEBUG
+#define ext_debug(a...)		printk(a)
+#else
+#define ext_debug(a...)
+#endif
+
+/*
+ * if EXT_STATS is defined then stats numbers are collected
+ * these number will be displayed at umount time
+ */
+#define EXT_STATS_
+
+
+/*
+ * ext4_inode has i_block array (60 bytes total)
+ * first 12 bytes store ext4_extent_header
+ * the remain stores array of ext4_extent
+ */
+
+/*
+ * this is extent on-disk structure
+ * it's used at the bottom of the tree
+ */
+struct ext4_extent {
+	__le32	ee_block;	/* first logical block extent covers */
+	__le16	ee_len;		/* number of blocks covered by extent */
+	__le16	ee_start_hi;	/* high 16 bits of physical block */
+	__le32	ee_start;	/* low 32 bigs of physical block */
+};
+
+/*
+ * this is index on-disk structure
+ * it's used at all the levels, but the bottom
+ */
+struct ext4_extent_idx {
+	__le32	ei_block;	/* index covers logical blocks from 'block' */
+	__le32	ei_leaf;	/* pointer to the physical block of the next *
+				 * level. leaf or next index could bet here */
+	__le16	ei_leaf_hi;	/* high 16 bits of physical block */
+	__u16	ei_unused;
+};
+
+/*
+ * each block (leaves and indexes), even inode-stored has header
+ */
+struct ext4_extent_header {
+	__le16	eh_magic;	/* probably will support different formats */
+	__le16	eh_entries;	/* number of valid entries */
+	__le16	eh_max;		/* capacity of store in entries */
+	__le16	eh_depth;	/* has tree real underlaying blocks? */
+	__le32	eh_generation;	/* generation of the tree */
+};
+
+#define EXT4_EXT_MAGIC		cpu_to_le16(0xf30a)
+
+/*
+ * array of ext4_ext_path contains path to some extent
+ * creation/lookup routines use it for traversal/splitting/etc
+ * truncate uses it to simulate recursive walking
+ */
+struct ext4_ext_path {
+	__u32				p_block;
+	__u16				p_depth;
+	struct ext4_extent		*p_ext;
+	struct ext4_extent_idx		*p_idx;
+	struct ext4_extent_header	*p_hdr;
+	struct buffer_head		*p_bh;
+};
+
+/*
+ * structure for external API
+ */
+
+#define EXT4_EXT_CACHE_NO	0
+#define EXT4_EXT_CACHE_GAP	1
+#define EXT4_EXT_CACHE_EXTENT	2
+
+/*
+ * to be called by ext4_ext_walk_space()
+ * negative retcode - error
+ * positive retcode - signal for ext4_ext_walk_space(), see below
+ * callback must return valid extent (passed or newly created)
+ */
+typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+					struct ext4_ext_cache *,
+					void *);
+
+#define EXT_CONTINUE	0
+#define EXT_BREAK	1
+#define EXT_REPEAT	2
+
+
+#define EXT_MAX_BLOCK	0xffffffff
+
+
+#define EXT_FIRST_EXTENT(__hdr__) \
+	((struct ext4_extent *) (((char *) (__hdr__)) +		\
+				 sizeof(struct ext4_extent_header)))
+#define EXT_FIRST_INDEX(__hdr__) \
+	((struct ext4_extent_idx *) (((char *) (__hdr__)) +	\
+				     sizeof(struct ext4_extent_header)))
+#define EXT_HAS_FREE_INDEX(__path__) \
+        (le16_to_cpu((__path__)->p_hdr->eh_entries) \
+	                             < le16_to_cpu((__path__)->p_hdr->eh_max))
+#define EXT_LAST_EXTENT(__hdr__) \
+	(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_LAST_INDEX(__hdr__) \
+	(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_entries) - 1)
+#define EXT_MAX_EXTENT(__hdr__) \
+	(EXT_FIRST_EXTENT((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+#define EXT_MAX_INDEX(__hdr__) \
+	(EXT_FIRST_INDEX((__hdr__)) + le16_to_cpu((__hdr__)->eh_max) - 1)
+
+static inline struct ext4_extent_header *ext_inode_hdr(struct inode *inode)
+{
+	return (struct ext4_extent_header *) EXT4_I(inode)->i_data;
+}
+
+static inline struct ext4_extent_header *ext_block_hdr(struct buffer_head *bh)
+{
+	return (struct ext4_extent_header *) bh->b_data;
+}
+
+static inline unsigned short ext_depth(struct inode *inode)
+{
+	return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
+}
+
+static inline void ext4_ext_tree_changed(struct inode *inode)
+{
+	EXT4_I(inode)->i_ext_generation++;
+}
+
+static inline void
+ext4_ext_invalidate_cache(struct inode *inode)
+{
+	EXT4_I(inode)->i_cached_extent.ec_type = EXT4_EXT_CACHE_NO;
+}
+
+extern int ext4_extent_tree_init(handle_t *, struct inode *);
+extern int ext4_ext_calc_credits_for_insert(struct inode *, struct ext4_ext_path *);
+extern int ext4_ext_insert_extent(handle_t *, struct inode *, struct ext4_ext_path *, struct ext4_extent *);
+extern int ext4_ext_walk_space(struct inode *, unsigned long, unsigned long, ext_prepare_callback, void *);
+extern struct ext4_ext_path * ext4_ext_find_extent(struct inode *, int, struct ext4_ext_path *);
+
+#endif /* _LINUX_EXT4_EXTENTS */
+
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 18a6ce98537f..40ce04a52b04 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -64,6 +64,16 @@ struct ext4_block_alloc_info {
 #define rsv_start rsv_window._rsv_start
 #define rsv_end rsv_window._rsv_end
 
+/*
+ * storage for cached extent
+ */
+struct ext4_ext_cache {
+	__u32	ec_start;
+	__u32	ec_block;
+	__u32	ec_len; /* must be 32bit to return holes */
+	__u32	ec_type;
+};
+
 /*
  * third extended file system inode data in memory
  */
@@ -142,6 +152,9 @@ struct ext4_inode_info {
 	 */
 	struct mutex truncate_mutex;
 	struct inode vfs_inode;
+
+	unsigned long i_ext_generation;
+	struct ext4_ext_cache i_cached_extent;
 };
 
 #endif	/* _LINUX_EXT4_FS_I */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index ce4856d70100..ce7a844d6703 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -78,6 +78,16 @@ struct ext4_sb_info {
 	char *s_qf_names[MAXQUOTAS];		/* Names of quota files with journalled quota */
 	int s_jquota_fmt;			/* Format of quota to use */
 #endif
+
+#ifdef EXTENTS_STATS
+	/* ext4 extents stats */
+	unsigned long s_ext_min;
+	unsigned long s_ext_max;
+	unsigned long s_depth_max;
+	spinlock_t s_ext_stats_lock;
+	unsigned long s_ext_blocks;
+	unsigned long s_ext_extents;
+#endif
 };
 
 #endif	/* _LINUX_EXT4_FS_SB */
diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h
index 99d37557cbb4..aa273f024ad0 100644
--- a/include/linux/ext4_jbd2.h
+++ b/include/linux/ext4_jbd2.h
@@ -26,9 +26,14 @@
  *
  * We may have to touch one inode, one bitmap buffer, up to three
  * indirection blocks, the group and superblock summaries, and the data
- * block to complete the transaction.  */
+ * block to complete the transaction.
+ *
+ * For extents-enabled fs we may have to allocate and modify upto
+ * 5 levels of tree + root which is stored in inode. */
 
-#define EXT4_SINGLEDATA_TRANS_BLOCKS	8U
+#define EXT4_SINGLEDATA_TRANS_BLOCKS(sb)				\
+	(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)	\
+		|| test_opt(sb, EXTENTS) ? 27U : 8U)
 
 /* Extended attribute operations touch at most two data buffers,
  * two bitmap buffers, and two group summaries, in addition to the inode
@@ -42,7 +47,7 @@
  * superblock only gets updated once, of course, so don't bother
  * counting that again for the quota updates. */
 
-#define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS + \
+#define EXT4_DATA_TRANS_BLOCKS(sb)	(EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
 					 EXT4_XATTR_TRANS_BLOCKS - 2 + \
 					 2*EXT4_QUOTA_TRANS_BLOCKS(sb))
 
@@ -78,9 +83,9 @@
 /* Amount of blocks needed for quota insert/delete - we do some block writes
  * but inode, sb and group updates are done only once */
 #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
-		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_INIT_REWRITE) : 0)
+		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
 #define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
-		(EXT4_SINGLEDATA_TRANS_BLOCKS-3)+3+DQUOT_DEL_REWRITE) : 0)
+		(EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
 #else
 #define EXT4_QUOTA_TRANS_BLOCKS(sb) 0
 #define EXT4_QUOTA_INIT_BLOCKS(sb) 0
-- 
cgit v1.2.3


From 3a5b2ecdd1fa63a8f25bd769223bc1c2564ce45d Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:05 -0700
Subject: [PATCH] ext4: switch fsblk to sector_t

Redefine ext3 in-kernel filesystem block type (ext3_fsblk_t) from unsigned
long to sector_t, to allow kernel to handle  >32 bit ext3 blocks.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c          | 23 +++++++++--------------
 fs/ext4/ialloc.c          | 11 +++++++----
 fs/ext4/resize.c          | 13 ++++++-------
 fs/ext4/super.c           |  8 ++++----
 include/linux/ext4_fs.h   | 26 ++++++++++++++++++++++++++
 include/linux/ext4_fs_i.h |  6 +++++-
 6 files changed, 57 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index e9e98449137b..aa33ff271fa9 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -147,7 +147,7 @@ restart:
 		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
-			       "start:  %lu, end:  %lu\n",
+			       "start:  "E3FSBLK", end:  "E3FSBLK"\n",
 			       rsv, rsv->rsv_start, rsv->rsv_end);
 		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
 			printk("Bad reservation %p (start >= end)\n",
@@ -443,10 +443,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 
 do_more:
 	overflow = 0;
-	block_group = (block - le32_to_cpu(es->s_first_data_block)) /
-		      EXT4_BLOCKS_PER_GROUP(sb);
-	bit = (block - le32_to_cpu(es->s_first_data_block)) %
-		      EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
 	/*
 	 * Check to see if we are freeing blocks across a group
 	 * boundary.
@@ -1404,7 +1401,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bitmap_bh = NULL;
 	struct buffer_head *gdp_bh;
-	int group_no;
+	unsigned long group_no;
 	int goal_group;
 	ext4_grpblk_t grp_target_blk;	/* blockgroup relative goal block */
 	ext4_grpblk_t grp_alloc_blk;	/* blockgroup-relative allocated block*/
@@ -1467,8 +1464,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
 	    goal >= le32_to_cpu(es->s_blocks_count))
 		goal = le32_to_cpu(es->s_first_data_block);
-	group_no = (goal - le32_to_cpu(es->s_first_data_block)) /
-			EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
 	goal_group = group_no;
 retry_alloc:
 	gdp = ext4_get_group_desc(sb, group_no, &gdp_bh);
@@ -1485,8 +1481,6 @@ retry_alloc:
 		my_rsv = NULL;
 
 	if (free_blocks > 0) {
-		grp_target_blk = ((goal - le32_to_cpu(es->s_first_data_block)) %
-				EXT4_BLOCKS_PER_GROUP(sb));
 		bitmap_bh = read_block_bitmap(sb, group_no);
 		if (!bitmap_bh)
 			goto io_error;
@@ -1613,7 +1607,7 @@ allocated:
 	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
 		ext4_error(sb, "ext4_new_block",
 			    "block("E3FSBLK") >= blocks count(%d) - "
-			    "block_group = %d, es == %p ", ret_block,
+			    "block_group = %lu, es == %p ", ret_block,
 			le32_to_cpu(es->s_blocks_count), group_no, es);
 		goto out;
 	}
@@ -1733,9 +1727,10 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 static inline int
 block_in_use(ext4_fsblk_t block, struct super_block *sb, unsigned char *map)
 {
-	return ext4_test_bit ((block -
-		le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block)) %
-			 EXT4_BLOCKS_PER_GROUP(sb), map);
+	ext4_grpblk_t offset;
+
+	ext4_get_group_no_and_offset(sb, block, NULL, &offset);
+	return ext4_test_bit (offset, map);
 }
 
 static inline int test_root(int a, int b)
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index e17a6c918d72..94e1bb4abe31 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -23,7 +23,7 @@
 #include <linux/buffer_head.h>
 #include <linux/random.h>
 #include <linux/bitops.h>
-
+#include <linux/blkdev.h>
 #include <asm/byteorder.h>
 
 #include "xattr.h"
@@ -274,7 +274,8 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
 	avefreei = freei / ngroups;
 	freeb = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	avefreeb = freeb / ngroups;
+	avefreeb = freeb;
+	sector_div(avefreeb, ngroups);
 	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);
 
 	if ((parent == sb->s_root->d_inode) ||
@@ -303,13 +304,15 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		goto fallback;
 	}
 
-	blocks_per_dir = (le32_to_cpu(es->s_blocks_count) - freeb) / ndirs;
+	blocks_per_dir = le32_to_cpu(es->s_blocks_count) - freeb;
+	sector_div(blocks_per_dir, ndirs);
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
 	min_inodes = avefreei - inodes_per_group / 4;
 	min_blocks = avefreeb - EXT4_BLOCKS_PER_GROUP(sb) / 4;
 
-	max_debt = EXT4_BLOCKS_PER_GROUP(sb) / max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST);
+	max_debt = EXT4_BLOCKS_PER_GROUP(sb);
+	sector_div(max_debt, max(blocks_per_dir, (ext4_fsblk_t)BLOCK_COST));
 	if (max_debt * INODE_COST > inodes_per_group)
 		max_debt = inodes_per_group / INODE_COST;
 	if (max_debt > 255)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 5b2828d21180..c60bfed5f5e7 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -36,7 +36,7 @@ static int verify_group_input(struct super_block *sb,
 		 le16_to_cpu(es->s_reserved_gdt_blocks)) : 0;
 	ext4_fsblk_t metaend = start + overhead;
 	struct buffer_head *bh = NULL;
-	ext4_grpblk_t free_blocks_count;
+	ext4_grpblk_t free_blocks_count, offset;
 	int err = -EINVAL;
 
 	input->free_blocks_count = free_blocks_count =
@@ -49,13 +49,13 @@ static int verify_group_input(struct super_block *sb,
 		       "no-super", input->group, input->blocks_count,
 		       free_blocks_count, input->reserved_blocks);
 
+	ext4_get_group_no_and_offset(sb, start, NULL, &offset);
 	if (group != sbi->s_groups_count)
 		ext4_warning(sb, __FUNCTION__,
 			     "Cannot add at group %u (only %lu groups)",
 			     input->group, sbi->s_groups_count);
-	else if ((start - le32_to_cpu(es->s_first_data_block)) %
-		 EXT4_BLOCKS_PER_GROUP(sb))
-		ext4_warning(sb, __FUNCTION__, "Last group not full");
+	else if (offset != 0)
+			ext4_warning(sb, __FUNCTION__, "Last group not full");
 	else if (input->reserved_blocks > input->blocks_count / 5)
 		ext4_warning(sb, __FUNCTION__, "Reserved blocks too high (%u)",
 			     input->reserved_blocks);
@@ -945,7 +945,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to resize to %lu blocks safely\n",
+			" too large to resize to "E3FSBLK" blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext4_warning(sb, __FUNCTION__,
@@ -960,8 +960,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	}
 
 	/* Handle the remaining blocks in the last group only. */
-	last = (o_blocks_count - le32_to_cpu(es->s_first_data_block)) %
-		EXT4_BLOCKS_PER_GROUP(sb);
+	ext4_get_group_no_and_offset(sb, o_blocks_count, NULL, &last);
 
 	if (last == 0) {
 		ext4_warning(sb, __FUNCTION__,
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 69f875250500..1d12e4f7d69f 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1433,8 +1433,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	 * block sizes.  We need to calculate the offset from buffer start.
 	 */
 	if (blocksize != EXT4_MIN_BLOCK_SIZE) {
-		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = sector_div(logic_sb_block, blocksize);
 	} else {
 		logic_sb_block = sb_block;
 	}
@@ -1539,8 +1539,8 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 		brelse (bh);
 		sb_set_blocksize(sb, blocksize);
-		logic_sb_block = (sb_block * EXT4_MIN_BLOCK_SIZE) / blocksize;
-		offset = (sb_block * EXT4_MIN_BLOCK_SIZE) % blocksize;
+		logic_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
+		offset = sector_div(logic_sb_block, blocksize);
 		bh = sb_bread(sb, logic_sb_block);
 		if (!bh) {
 			printk(KERN_ERR
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index b61181aadcbb..e952c6db9690 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -17,6 +17,7 @@
 #define _LINUX_EXT4_FS_H
 
 #include <linux/types.h>
+#include <linux/blkdev.h>
 #include <linux/magic.h>
 
 /*
@@ -749,6 +750,27 @@ ext4_group_first_block_no(struct super_block *sb, unsigned long group_no)
  */
 #define ERR_BAD_DX_DIR	-75000
 
+/*
+ * This function calculate the block group number and offset,
+ * given a block number
+ */
+
+static inline void ext4_get_group_no_and_offset(struct super_block * sb,
+                                ext4_fsblk_t blocknr, unsigned long* blockgrpp,
+                                ext4_grpblk_t *offsetp)
+{
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	ext4_grpblk_t offset;
+
+        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+        offset = sector_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+	if (offsetp)
+		*offsetp = offset;
+	if (blockgrpp)
+	        *blockgrpp = blocknr;
+
+}
+
 /*
  * Function prototypes
  */
@@ -762,6 +784,10 @@ ext4_group_first_block_no(struct super_block *sb, unsigned long group_no)
 # define NORET_AND     noreturn,
 
 /* balloc.c */
+extern unsigned int ext4_block_group(struct super_block *sb,
+			ext4_fsblk_t blocknr);
+extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
+			ext4_fsblk_t blocknr);
 extern int ext4_bg_has_super(struct super_block *sb, int group);
 extern unsigned long ext4_bg_num_gdb(struct super_block *sb, int group);
 extern ext4_fsblk_t ext4_new_block (handle_t *handle, struct inode *inode,
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 40ce04a52b04..b2ccd9876bd1 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -25,9 +25,13 @@
 typedef int ext4_grpblk_t;
 
 /* data type for filesystem-wide blocks number */
-typedef unsigned long ext4_fsblk_t;
+typedef sector_t ext4_fsblk_t;
 
+#if BITS_PER_LONG == 64
 #define E3FSBLK "%lu"
+#else
+#define E3FSBLK "%llu"
+#endif
 
 struct ext4_reserve_window {
 	ext4_fsblk_t	_rsv_start;	/* First byte reserved */
-- 
cgit v1.2.3


From f65e6fba163dfd0f962efb7d8f5528b6872e2b15 Mon Sep 17 00:00:00 2001
From: Alex Tomas <alex@clusterfs.com>
Date: Wed, 11 Oct 2006 01:21:05 -0700
Subject: [PATCH] ext4: 48bit physical block number support in extents

Signed-off-by: Alex Tomas <alex@clusterfs.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c               | 187 ++++++++++++++++++++++++----------------
 include/linux/ext4_fs_extents.h |   2 +-
 include/linux/ext4_fs_i.h       |   8 +-
 3 files changed, 116 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index f67b2ef6a71f..4a13b56e1540 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,6 +44,44 @@
 #include <asm/uaccess.h>
 
 
+/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ex->ee_start);
+	if (sizeof(ext4_fsblk_t) > 4)
+		block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
+	return block;
+}
+
+/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
+{
+	ext4_fsblk_t block;
+
+	block = le32_to_cpu(ix->ei_leaf);
+	if (sizeof(ext4_fsblk_t) > 4)
+		block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
+	return block;
+}
+
+/* the routine stores large phys. blocknr into extent breaking it into parts */
+static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
+{
+	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	if (sizeof(ext4_fsblk_t) > 4)
+		ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+}
+
+/* the routine stores large phys. blocknr into index breaking it into parts */
+static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
+{
+	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
+	if (sizeof(ext4_fsblk_t) > 4)
+		ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
+}
+
 static int ext4_ext_check_header(const char *function, struct inode *inode,
 				struct ext4_extent_header *eh)
 {
@@ -124,13 +162,13 @@ static int ext4_ext_dirty(handle_t *handle, struct inode *inode,
 	return err;
 }
 
-static int ext4_ext_find_goal(struct inode *inode,
+static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 			      struct ext4_ext_path *path,
-			      unsigned long block)
+			      ext4_fsblk_t block)
 {
 	struct ext4_inode_info *ei = EXT4_I(inode);
-	unsigned long bg_start;
-	unsigned long colour;
+	ext4_fsblk_t bg_start;
+	ext4_grpblk_t colour;
 	int depth;
 
 	if (path) {
@@ -139,8 +177,7 @@ static int ext4_ext_find_goal(struct inode *inode,
 
 		/* try to predict block placement */
 		if ((ex = path[depth].p_ext))
-			return le32_to_cpu(ex->ee_start)
-					+ (block - le32_to_cpu(ex->ee_block));
+			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
 		/* it looks index is empty
 		 * try to find starting from index itself */
@@ -156,12 +193,12 @@ static int ext4_ext_find_goal(struct inode *inode,
 	return bg_start + colour + block;
 }
 
-static int
+static ext4_fsblk_t
 ext4_ext_new_block(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path,
 			struct ext4_extent *ex, int *err)
 {
-	int goal, newblock;
+	ext4_fsblk_t goal, newblock;
 
 	goal = ext4_ext_find_goal(inode, path, le32_to_cpu(ex->ee_block));
 	newblock = ext4_new_block(handle, inode, goal, err);
@@ -230,13 +267,13 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("path:");
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
-		  ext_debug("  %d->%d", le32_to_cpu(path->p_idx->ei_block),
-			    le32_to_cpu(path->p_idx->ei_leaf));
+		  ext_debug("  %d->"E3FSBLK, le32_to_cpu(path->p_idx->ei_block),
+			    idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
-			ext_debug("  %d:%d:%d",
+			ext_debug("  %d:%d:"E3FSBLK" ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  le16_to_cpu(path->p_ext->ee_len),
-				  le32_to_cpu(path->p_ext->ee_start));
+				  ext_pblock(path->p_ext));
 		} else
 			ext_debug("  []");
 	}
@@ -257,9 +294,8 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	ex = EXT_FIRST_EXTENT(eh);
 
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-		ext_debug("%d:%d:%d ", le32_to_cpu(ex->ee_block),
-			  le16_to_cpu(ex->ee_len),
-			  le32_to_cpu(ex->ee_start));
+		ext_debug("%d:%d:"E3FSBLK" ", le32_to_cpu(ex->ee_block),
+			  le16_to_cpu(ex->ee_len), ext_pblock(ex));
 	}
 	ext_debug("\n");
 }
@@ -308,8 +344,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
 	}
 
 	path->p_idx = l - 1;
-	ext_debug("  -> %d->%d ", le32_to_cpu(path->p_idx->ei_block),
-		  le32_to_cpu(path->p_idx->ei_leaf));
+	ext_debug("  -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
+		  idx_block(path->p_idx));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -374,10 +410,10 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 	}
 
 	path->p_ext = l - 1;
-	ext_debug("  -> %d:%d:%d ",
+	ext_debug("  -> %d:"E3FSBLK":%d ",
 		        le32_to_cpu(path->p_ext->ee_block),
-		        le32_to_cpu(path->p_ext->ee_start),
-		        le16_to_cpu(path->p_ext->ee_len));
+		        ext_pblock(path->p_ext),
+			le16_to_cpu(path->p_ext->ee_len));
 
 #ifdef CHECK_BINSEARCH
 	{
@@ -442,7 +478,7 @@ ext4_ext_find_extent(struct inode *inode, int block, struct ext4_ext_path *path)
 		ext_debug("depth %d: num %d, max %d\n",
 			  ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
 		ext4_ext_binsearch_idx(inode, path + ppos, block);
-		path[ppos].p_block = le32_to_cpu(path[ppos].p_idx->ei_leaf);
+		path[ppos].p_block = idx_pblock(path[ppos].p_idx);
 		path[ppos].p_depth = i;
 		path[ppos].p_ext = NULL;
 
@@ -489,7 +525,7 @@ err:
  */
 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *curp,
-				int logical, int ptr)
+				int logical, ext4_fsblk_t ptr)
 {
 	struct ext4_extent_idx *ix;
 	int len, err;
@@ -524,7 +560,7 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 	}
 
 	ix->ei_block = cpu_to_le32(logical);
-	ix->ei_leaf = cpu_to_le32(ptr);
+	ext4_idx_store_pblock(ix, ptr);
 	curp->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(curp->p_hdr->eh_entries)+1);
 
 	BUG_ON(le16_to_cpu(curp->p_hdr->eh_entries)
@@ -556,9 +592,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	struct ext4_extent_idx *fidx;
 	struct ext4_extent *ex;
 	int i = at, k, m, a;
-	unsigned long newblock, oldblock;
+	ext4_fsblk_t newblock, oldblock;
 	__le32 border;
-	int *ablocks = NULL; /* array of allocated blocks */
+	ext4_fsblk_t *ablocks = NULL; /* array of allocated blocks */
 	int err = 0;
 
 	/* make decision: where to split? */
@@ -591,10 +627,10 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	 * we need this to handle errors and free blocks
 	 * upon them
 	 */
-	ablocks = kmalloc(sizeof(unsigned long) * depth, GFP_NOFS);
+	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
 		return -ENOMEM;
-	memset(ablocks, 0, sizeof(unsigned long) * depth);
+	memset(ablocks, 0, sizeof(ext4_fsblk_t) * depth);
 
 	/* allocate all needed blocks */
 	ext_debug("allocate %d blocks for indexes/leaf\n", depth - at);
@@ -633,9 +669,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	path[depth].p_ext++;
 	while (path[depth].p_ext <=
 			EXT_MAX_EXTENT(path[depth].p_hdr)) {
-		ext_debug("move %d:%d:%d in new leaf %lu\n",
+		ext_debug("move %d:"E3FSBLK":%d in new leaf "E3FSBLK"\n",
 			        le32_to_cpu(path[depth].p_ext->ee_block),
-			        le32_to_cpu(path[depth].p_ext->ee_start),
+			        ext_pblock(path[depth].p_ext),
 			        le16_to_cpu(path[depth].p_ext->ee_len),
 				newblock);
 		/*memmove(ex++, path[depth].p_ext++,
@@ -679,7 +715,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	while (k--) {
 		oldblock = newblock;
 		newblock = ablocks[--a];
-		bh = sb_getblk(inode->i_sb, newblock);
+		bh = sb_getblk(inode->i_sb, (ext4_fsblk_t)newblock);
 		if (!bh) {
 			err = -EIO;
 			goto cleanup;
@@ -696,9 +732,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		neh->eh_depth = cpu_to_le16(depth - i);
 		fidx = EXT_FIRST_INDEX(neh);
 		fidx->ei_block = border;
-		fidx->ei_leaf = cpu_to_le32(oldblock);
+		ext4_idx_store_pblock(fidx, oldblock);
 
-		ext_debug("int.index at %d (block %lu): %lu -> %lu\n", i,
+		ext_debug("int.index at %d (block "E3FSBLK"): %lu -> "E3FSBLK"\n", i,
 				newblock, (unsigned long) le32_to_cpu(border),
 				oldblock);
 		/* copy indexes */
@@ -710,9 +746,9 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
 				EXT_LAST_INDEX(path[i].p_hdr));
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
-			ext_debug("%d: move %d:%d in new index %lu\n", i,
+			ext_debug("%d: move %d:%d in new index "E3FSBLK"\n", i,
 				        le32_to_cpu(path[i].p_idx->ei_block),
-				        le32_to_cpu(path[i].p_idx->ei_leaf),
+				        idx_pblock(path[i].p_idx),
 				        newblock);
 			/*memmove(++fidx, path[i].p_idx++,
 					sizeof(struct ext4_extent_idx));
@@ -791,7 +827,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	struct ext4_extent_header *neh;
 	struct ext4_extent_idx *fidx;
 	struct buffer_head *bh;
-	unsigned long newblock;
+	ext4_fsblk_t newblock;
 	int err = 0;
 
 	newblock = ext4_ext_new_block(handle, inode, path, newext, &err);
@@ -839,13 +875,13 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
 	/* FIXME: it works, but actually path[0] can be index */
 	curp->p_idx->ei_block = EXT_FIRST_EXTENT(path[0].p_hdr)->ee_block;
-	curp->p_idx->ei_leaf = cpu_to_le32(newblock);
+	ext4_idx_store_pblock(curp->p_idx, newblock);
 
 	neh = ext_inode_hdr(inode);
 	fidx = EXT_FIRST_INDEX(neh);
-	ext_debug("new root: num %d(%d), lblock %d, ptr %d\n",
+	ext_debug("new root: num %d(%d), lblock %d, ptr "E3FSBLK"\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
-		  le32_to_cpu(fidx->ei_block), le32_to_cpu(fidx->ei_leaf));
+		  le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
 
 	neh->eh_depth = cpu_to_le16(path->p_depth + 1);
 	err = ext4_ext_dirty(handle, inode, curp);
@@ -1042,7 +1078,6 @@ static int inline
 ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 				struct ext4_extent *ex2)
 {
-	/* FIXME: 48bit support */
         if (le32_to_cpu(ex1->ee_block) + le16_to_cpu(ex1->ee_len)
 	    != le32_to_cpu(ex2->ee_block))
 		return 0;
@@ -1052,8 +1087,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 		return 0;
 #endif
 
-        if (le32_to_cpu(ex1->ee_start) + le16_to_cpu(ex1->ee_len)
-			== le32_to_cpu(ex2->ee_start))
+        if (ext_pblock(ex1) + le16_to_cpu(ex1->ee_len) == ext_pblock(ex2))
 		return 1;
 	return 0;
 }
@@ -1080,11 +1114,10 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
 	/* try to insert block into found extent and return */
 	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
-		ext_debug("append %d block to %d:%d (from %d)\n",
+		ext_debug("append %d block to %d:%d (from "E3FSBLK")\n",
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
-				le16_to_cpu(ex->ee_len),
-				le32_to_cpu(ex->ee_start));
+				le16_to_cpu(ex->ee_len), ext_pblock(ex));
 		if ((err = ext4_ext_get_access(handle, inode, path + depth)))
 			return err;
 		ex->ee_len = cpu_to_le16(le16_to_cpu(ex->ee_len)
@@ -1140,9 +1173,9 @@ has_space:
 
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
-		ext_debug("first extent in the leaf: %d:%d:%d\n",
+		ext_debug("first extent in the leaf: %d:"E3FSBLK":%d\n",
 			        le32_to_cpu(newext->ee_block),
-			        le32_to_cpu(newext->ee_start),
+			        ext_pblock(newext),
 			        le16_to_cpu(newext->ee_len));
 		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
 	} else if (le32_to_cpu(newext->ee_block)
@@ -1152,10 +1185,10 @@ has_space:
 			len = EXT_MAX_EXTENT(eh) - nearex;
 			len = (len - 1) * sizeof(struct ext4_extent);
 			len = len < 0 ? 0 : len;
-			ext_debug("insert %d:%d:%d after: nearest 0x%p, "
+			ext_debug("insert %d:"E3FSBLK":%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
 				        le32_to_cpu(newext->ee_block),
-				        le32_to_cpu(newext->ee_start),
+				        ext_pblock(newext),
 				        le16_to_cpu(newext->ee_len),
 					nearex, len, nearex + 1, nearex + 2);
 			memmove(nearex + 2, nearex + 1, len);
@@ -1165,10 +1198,10 @@ has_space:
 		BUG_ON(newext->ee_block == nearex->ee_block);
 		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
 		len = len < 0 ? 0 : len;
-		ext_debug("insert %d:%d:%d before: nearest 0x%p, "
+		ext_debug("insert %d:"E3FSBLK":%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
 				le32_to_cpu(newext->ee_block),
-				le32_to_cpu(newext->ee_start),
+				ext_pblock(newext),
 				le16_to_cpu(newext->ee_len),
 				nearex, len, nearex + 1, nearex + 2);
 		memmove(nearex + 1, nearex, len);
@@ -1179,9 +1212,8 @@ has_space:
 	nearex = path[depth].p_ext;
 	nearex->ee_block = newext->ee_block;
 	nearex->ee_start = newext->ee_start;
+	nearex->ee_start_hi = newext->ee_start_hi;
 	nearex->ee_len = newext->ee_len;
-	/* FIXME: support for large fs */
-	nearex->ee_start_hi = 0;
 
 merge:
 	/* try to merge extents to the right */
@@ -1290,7 +1322,7 @@ int ext4_ext_walk_space(struct inode *inode, unsigned long block,
 		} else {
 		        cbex.ec_block = le32_to_cpu(ex->ee_block);
 		        cbex.ec_len = le16_to_cpu(ex->ee_len);
-		        cbex.ec_start = le32_to_cpu(ex->ee_start);
+		        cbex.ec_start = ext_pblock(ex);
 			cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
 		}
 
@@ -1398,13 +1430,13 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 			cex->ec_type != EXT4_EXT_CACHE_EXTENT);
 	if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) {
 	        ex->ee_block = cpu_to_le32(cex->ec_block);
-	        ex->ee_start = cpu_to_le32(cex->ec_start);
+		ext4_ext_store_pblock(ex, cex->ec_start);
 	        ex->ee_len = cpu_to_le16(cex->ec_len);
-		ext_debug("%lu cached by %lu:%lu:%lu\n",
+		ext_debug("%lu cached by %lu:%lu:"E3FSBLK"\n",
 				(unsigned long) block,
 				(unsigned long) cex->ec_block,
 				(unsigned long) cex->ec_len,
-				(unsigned long) cex->ec_start);
+				cex->ec_start);
 		return cex->ec_type;
 	}
 
@@ -1422,18 +1454,18 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 {
 	struct buffer_head *bh;
 	int err;
-	unsigned long leaf;
+	ext4_fsblk_t leaf;
 
 	/* free index block */
 	path--;
-	leaf = le32_to_cpu(path->p_idx->ei_leaf);
+	leaf = idx_pblock(path->p_idx);
 	BUG_ON(path->p_hdr->eh_entries == 0);
 	if ((err = ext4_ext_get_access(handle, inode, path)))
 		return err;
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
 	if ((err = ext4_ext_dirty(handle, inode, path)))
 		return err;
-	ext_debug("index is empty, remove it, free block %lu\n", leaf);
+	ext_debug("index is empty, remove it, free block "E3FSBLK"\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
 	ext4_forget(handle, 1, inode, bh, leaf);
 	ext4_free_blocks(handle, inode, leaf, 1);
@@ -1515,10 +1547,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 	if (from >= le32_to_cpu(ex->ee_block)
 	    && to == le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - 1) {
 		/* tail removal */
-		unsigned long num, start;
+		unsigned long num;
+		ext4_fsblk_t start;
 		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
-		start = le32_to_cpu(ex->ee_start) + le16_to_cpu(ex->ee_len) - num;
-		ext_debug("free last %lu blocks starting %lu\n", num, start);
+		start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num;
+		ext_debug("free last %lu blocks starting "E3FSBLK"\n", num, start);
 		for (i = 0; i < num; i++) {
 			bh = sb_find_get_block(inode->i_sb, start + i);
 			ext4_forget(handle, 0, inode, bh, start + i);
@@ -1621,7 +1654,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 
 		if (num == 0) {
 			/* this extent is removed entirely mark slot unused */
-			ex->ee_start = 0;
+			ext4_ext_store_pblock(ex, 0);
 			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
 		}
 
@@ -1632,8 +1665,8 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (err)
 			goto out;
 
-		ext_debug("new extent: %u:%u:%u\n", block, num,
-				le32_to_cpu(ex->ee_start));
+		ext_debug("new extent: %u:%u:"E3FSBLK"\n", block, num,
+				ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
 		ex_ee_len = le16_to_cpu(ex->ee_len);
@@ -1748,11 +1781,11 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				path[i].p_idx);
 		if (ext4_ext_more_to_rm(path + i)) {
 			/* go to the next level */
-			ext_debug("move to level %d (block %d)\n",
-				  i + 1, le32_to_cpu(path[i].p_idx->ei_leaf));
+			ext_debug("move to level %d (block "E3FSBLK")\n",
+				  i + 1, idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
 			path[i+1].p_bh =
-				sb_bread(sb, le32_to_cpu(path[i].p_idx->ei_leaf));
+				sb_bread(sb, idx_pblock(path[i].p_idx));
 			if (!path[i+1].p_bh) {
 				/* should we reset i_size? */
 				err = -EIO;
@@ -1851,13 +1884,15 @@ void ext4_ext_release(struct super_block *sb)
 #endif
 }
 
-int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
+int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
+			ext4_fsblk_t iblock,
 			unsigned long max_blocks, struct buffer_head *bh_result,
 			int create, int extend_disksize)
 {
 	struct ext4_ext_path *path = NULL;
 	struct ext4_extent newex, *ex;
-	int goal, newblock, err = 0, depth;
+	ext4_fsblk_t goal, newblock;
+	int err = 0, depth;
 	unsigned long allocated = 0;
 
 	__clear_bit(BH_New, &bh_result->b_state);
@@ -1878,7 +1913,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 			/* block is already allocated */
 		        newblock = iblock
 		                   - le32_to_cpu(newex.ee_block)
-			           + le32_to_cpu(newex.ee_start);
+			           + ext_pblock(&newex);
 			/* number of remain blocks in the extent */
 			allocated = le16_to_cpu(newex.ee_len) -
 					(iblock - le32_to_cpu(newex.ee_block));
@@ -1907,14 +1942,14 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 
 	if ((ex = path[depth].p_ext)) {
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
-		unsigned long ee_start = le32_to_cpu(ex->ee_start);
+		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
 		/* if found exent covers block, simple return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
 			/* number of remain blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
-			ext_debug("%d fit into %lu:%d -> %d\n", (int) iblock,
+			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
 					ee_block, ee_len, newblock);
 			ext4_ext_put_in_cache(inode, ee_block, ee_len,
 						ee_start, EXT4_EXT_CACHE_EXTENT);
@@ -1944,12 +1979,12 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
 	if (!newblock)
 		goto out2;
-	ext_debug("allocate new block: goal %d, found %d/%lu\n",
+	ext_debug("allocate new block: goal "E3FSBLK", found "E3FSBLK"/%lu\n",
 			goal, newblock, allocated);
 
 	/* try to insert new extent into found leaf and return */
 	newex.ee_block = cpu_to_le32(iblock);
-	newex.ee_start = cpu_to_le32(newblock);
+	ext4_ext_store_pblock(&newex, newblock);
 	newex.ee_len = cpu_to_le16(allocated);
 	err = ext4_ext_insert_extent(handle, inode, path, &newex);
 	if (err)
@@ -1959,7 +1994,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode, sector_t iblock,
 		EXT4_I(inode)->i_disksize = inode->i_size;
 
 	/* previous routine could use block we allocated */
-	newblock = le32_to_cpu(newex.ee_start);
+	newblock = ext_pblock(&newex);
 	__set_bit(BH_New, &bh_result->b_state);
 
 	ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 8029879e29e2..facc76684d4f 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -108,7 +108,7 @@ struct ext4_extent_header {
  * truncate uses it to simulate recursive walking
  */
 struct ext4_ext_path {
-	__u32				p_block;
+	ext4_fsblk_t			p_block;
 	__u16				p_depth;
 	struct ext4_extent		*p_ext;
 	struct ext4_extent_idx		*p_idx;
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index b2ccd9876bd1..2bed0effdcae 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -72,10 +72,10 @@ struct ext4_block_alloc_info {
  * storage for cached extent
  */
 struct ext4_ext_cache {
-	__u32	ec_start;
-	__u32	ec_block;
-	__u32	ec_len; /* must be 32bit to return holes */
-	__u32	ec_type;
+	ext4_fsblk_t	ec_start;
+	__u32		ec_block;
+	__u32		ec_len; /* must be 32bit to return holes */
+	__u32		ec_type;
 };
 
 /*
-- 
cgit v1.2.3


From 471d4011a9862efff02094388b8fe8cd67683c38 Mon Sep 17 00:00:00 2001
From: Suparna Bhattacharya <suparna@in.ibm.com>
Date: Wed, 11 Oct 2006 01:21:06 -0700
Subject: [PATCH] ext4: uninitialised extent handling

Make it possible to add file preallocation support in future as an RO_COMPAT
feature by recognizing uninitialized extents as holes and limiting extent
length to keep the top bit of ee_len free for marking uninitialized extents.

Signed-off-by: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c               | 16 ++++++++++++++++
 include/linux/ext4_fs_extents.h |  2 ++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 4a13b56e1540..32526061a17d 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1082,6 +1082,13 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	    != le32_to_cpu(ex2->ee_block))
 		return 0;
 
+	/*
+	 * To allow future support for preallocated extents to be added
+	 * as an RO_COMPAT feature, refuse to merge to extents if
+	 * can result in the top bit of ee_len being set
+	 */
+	if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
+		return 0;
 #ifdef AGRESSIVE_TEST
 	if (le16_to_cpu(ex1->ee_len) >= 4)
 		return 0;
@@ -1944,6 +1951,15 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	        unsigned long ee_block = le32_to_cpu(ex->ee_block);
 		ext4_fsblk_t ee_start = ext_pblock(ex);
 		unsigned short ee_len  = le16_to_cpu(ex->ee_len);
+
+		/*
+		 * Allow future support for preallocated extents to be added
+		 * as an RO_COMPAT feature:
+		 * Uninitialized extents are treated as holes, except that
+		 * we avoid (fail) allocating new blocks during a write.
+		 */
+		if (ee_len > EXT_MAX_LEN)
+			goto out2;
 		/* if found exent covers block, simple return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index facc76684d4f..0eba0acf6ba6 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -141,6 +141,8 @@ typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
 
 #define EXT_MAX_BLOCK	0xffffffff
 
+#define EXT_MAX_LEN	((1UL << 15) - 1)
+
 
 #define EXT_FIRST_EXTENT(__hdr__) \
 	((struct ext4_extent *) (((char *) (__hdr__)) +		\
-- 
cgit v1.2.3


From d0d856e8bd6e697cb44b2b4dd038f3bec576a70e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 11 Oct 2006 01:21:07 -0700
Subject: [PATCH] ext4: clean up comments in ext4-extents patch

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/extents.c               | 226 +++++++++++++++++++++++-----------------
 include/linux/ext4_fs_extents.h |  54 +++++-----
 include/linux/ext4_jbd2.h       |   4 +-
 3 files changed, 157 insertions(+), 127 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 32526061a17d..e06e937a52b8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -44,7 +44,10 @@
 #include <asm/uaccess.h>
 
 
-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * ext_pblock:
+ * combine low and high parts of physical block number into ext4_fsblk_t
+ */
 static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 {
 	ext4_fsblk_t block;
@@ -55,7 +58,10 @@ static inline ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
 	return block;
 }
 
-/* this macro combines low and hi parts of phys. blocknr into ext4_fsblk_t */
+/*
+ * idx_pblock:
+ * combine low and high parts of a leaf physical block number into ext4_fsblk_t
+ */
 static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 {
 	ext4_fsblk_t block;
@@ -66,7 +72,11 @@ static inline ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
 	return block;
 }
 
-/* the routine stores large phys. blocknr into extent breaking it into parts */
+/*
+ * ext4_ext_store_pblock:
+ * stores a large physical block number into an extent struct,
+ * breaking it into parts
+ */
 static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
 {
 	ex->ee_start = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -74,7 +84,11 @@ static inline void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb
 		ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
 }
 
-/* the routine stores large phys. blocknr into index breaking it into parts */
+/*
+ * ext4_idx_store_pblock:
+ * stores a large physical block number into an index struct,
+ * breaking it into parts
+ */
 static inline void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
 {
 	ix->ei_leaf = cpu_to_le32((unsigned long) (pb & 0xffffffff));
@@ -179,8 +193,8 @@ static ext4_fsblk_t ext4_ext_find_goal(struct inode *inode,
 		if ((ex = path[depth].p_ext))
 			return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
 
-		/* it looks index is empty
-		 * try to find starting from index itself */
+		/* it looks like index is empty;
+		 * try to find starting block from index itself */
 		if (path[depth].p_bh)
 			return path[depth].p_bh->b_blocknr;
 	}
@@ -317,7 +331,8 @@ static void ext4_ext_drop_refs(struct ext4_ext_path *path)
 }
 
 /*
- * binary search for closest index by given block
+ * ext4_ext_binsearch_idx:
+ * binary search for the closest index of the given block
  */
 static void
 ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -375,7 +390,8 @@ ext4_ext_binsearch_idx(struct inode *inode, struct ext4_ext_path *path, int bloc
 }
 
 /*
- * binary search for closest extent by given block
+ * ext4_ext_binsearch:
+ * binary search for closest extent of the given block
  */
 static void
 ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
@@ -388,8 +404,8 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 
 	if (eh->eh_entries == 0) {
 		/*
-		 * this leaf is empty yet:
-		 *  we get such a leaf in split/add case
+		 * this leaf is empty:
+		 * we get such a leaf in split/add case
 		 */
 		return;
 	}
@@ -520,8 +536,9 @@ err:
 }
 
 /*
- * insert new index [logical;ptr] into the block at cupr
- * it check where to insert: before curp or after curp
+ * ext4_ext_insert_index:
+ * insert new index [@logical;@ptr] into the block at @curp;
+ * check where to insert: before @curp or after @curp
  */
 static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *curp,
@@ -574,13 +591,14 @@ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
 }
 
 /*
- * routine inserts new subtree into the path, using free index entry
- * at depth 'at:
- *  - allocates all needed blocks (new leaf and all intermediate index blocks)
- *  - makes decision where to split
- *  - moves remaining extens and index entries (right to the split point)
- *    into the newly allocated blocks
- *  - initialize subtree
+ * ext4_ext_split:
+ * inserts new subtree into the path, using free index entry
+ * at depth @at:
+ * - allocates all needed blocks (new leaf and all intermediate index blocks)
+ * - makes decision where to split
+ * - moves remaining extents and index entries (right to the split point)
+ *   into the newly allocated blocks
+ * - initializes subtree
  */
 static int ext4_ext_split(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
@@ -598,14 +616,14 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	int err = 0;
 
 	/* make decision: where to split? */
-	/* FIXME: now desicion is simplest: at current extent */
+	/* FIXME: now decision is simplest: at current extent */
 
-	/* if current leaf will be splitted, then we should use
+	/* if current leaf will be split, then we should use
 	 * border from split point */
 	BUG_ON(path[depth].p_ext > EXT_MAX_EXTENT(path[depth].p_hdr));
 	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
 		border = path[depth].p_ext[1].ee_block;
-		ext_debug("leaf will be splitted."
+		ext_debug("leaf will be split."
 				" next leaf starts at %d\n",
 			          le32_to_cpu(border));
 	} else {
@@ -616,16 +634,16 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * if error occurs, then we break processing
-	 * and turn filesystem read-only. so, index won't
+	 * If error occurs, then we break processing
+	 * and mark filesystem read-only. index won't
 	 * be inserted and tree will be in consistent
-	 * state. next mount will repair buffers too
+	 * state. Next mount will repair buffers too.
 	 */
 
 	/*
-	 * get array to track all allocated blocks
-	 * we need this to handle errors and free blocks
-	 * upon them
+	 * Get array to track all allocated blocks.
+	 * We need this to handle errors and free blocks
+	 * upon them.
 	 */
 	ablocks = kmalloc(sizeof(ext4_fsblk_t) * depth, GFP_NOFS);
 	if (!ablocks)
@@ -661,7 +679,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	neh->eh_depth = 0;
 	ex = EXT_FIRST_EXTENT(neh);
 
-	/* move remain of path[depth] to the new leaf */
+	/* move remainder of path[depth] to the new leaf */
 	BUG_ON(path[depth].p_hdr->eh_entries != path[depth].p_hdr->eh_max);
 	/* start copy from next extent */
 	/* TODO: we could do it by single memmove */
@@ -813,11 +831,12 @@ cleanup:
 }
 
 /*
- * routine implements tree growing procedure:
- *  - allocates new block
- *  - moves top-level data (index block or leaf) into the new block
- *  - initialize new top-level, creating index that points to the
- *    just created block
+ * ext4_ext_grow_indepth:
+ * implements tree growing procedure:
+ * - allocates new block
+ * - moves top-level data (index block or leaf) into the new block
+ * - initializes new top-level, creating index that points to the
+ *   just created block
  */
 static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
@@ -892,8 +911,9 @@ out:
 }
 
 /*
- * routine finds empty index and adds new leaf. if no free index found
- * then it requests in-depth growing
+ * ext4_ext_create_new_leaf:
+ * finds empty index and adds new leaf.
+ * if no free index is found, then it requests in-depth growing.
  */
 static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
 					struct ext4_ext_path *path,
@@ -912,8 +932,8 @@ repeat:
 		curp--;
 	}
 
-	/* we use already allocated block for index block
-	 * so, subsequent data blocks should be contigoues */
+	/* we use already allocated block for index block,
+	 * so subsequent data blocks should be contiguous */
 	if (EXT_HAS_FREE_INDEX(curp)) {
 		/* if we found index with free entry, then use that
 		 * entry: create all needed subtree and add new leaf */
@@ -943,12 +963,12 @@ repeat:
 		}
 
 		/*
-		 * only first (depth 0 -> 1) produces free space
-		 * in all other cases we have to split growed tree
+		 * only first (depth 0 -> 1) produces free space;
+		 * in all other cases we have to split the grown tree
 		 */
 		depth = ext_depth(inode);
 		if (path[depth].p_hdr->eh_entries == path[depth].p_hdr->eh_max) {
-			/* now we need split */
+			/* now we need to split */
 			goto repeat;
 		}
 	}
@@ -958,10 +978,11 @@ out:
 }
 
 /*
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK
- * NOTE: it consider block number from index entry as
- * allocated block. thus, index entries have to be consistent
- * with leafs
+ * ext4_ext_next_allocated_block:
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
+ * NOTE: it considers block number from index entry as
+ * allocated block. Thus, index entries have to be consistent
+ * with leaves.
  */
 static unsigned long
 ext4_ext_next_allocated_block(struct ext4_ext_path *path)
@@ -993,6 +1014,7 @@ ext4_ext_next_allocated_block(struct ext4_ext_path *path)
 }
 
 /*
+ * ext4_ext_next_leaf_block:
  * returns first allocated block from next leaf or EXT_MAX_BLOCK
  */
 static unsigned ext4_ext_next_leaf_block(struct inode *inode,
@@ -1021,8 +1043,9 @@ static unsigned ext4_ext_next_leaf_block(struct inode *inode,
 }
 
 /*
- * if leaf gets modified and modified extent is first in the leaf
- * then we have to correct all indexes above
+ * ext4_ext_correct_indexes:
+ * if leaf gets modified and modified extent is first in the leaf,
+ * then we have to correct all indexes above.
  * TODO: do we need to correct tree in all cases?
  */
 int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
@@ -1050,7 +1073,7 @@ int ext4_ext_correct_indexes(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * TODO: we need correction if border is smaller then current one
+	 * TODO: we need correction if border is smaller than current one
 	 */
 	k = depth - 1;
 	border = path[depth].p_ext->ee_block;
@@ -1085,7 +1108,7 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 	/*
 	 * To allow future support for preallocated extents to be added
 	 * as an RO_COMPAT feature, refuse to merge to extents if
-	 * can result in the top bit of ee_len being set
+	 * this can result in the top bit of ee_len being set.
 	 */
 	if (le16_to_cpu(ex1->ee_len) + le16_to_cpu(ex2->ee_len) > EXT_MAX_LEN)
 		return 0;
@@ -1100,9 +1123,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
 }
 
 /*
- * this routine tries to merge requsted extent into the existing
- * extent or inserts requested extent as new one into the tree,
- * creating new leaf in no-space case
+ * ext4_ext_insert_extent:
+ * tries to merge requsted extent into the existing extent or
+ * inserts requested extent as new one into the tree,
+ * creating new leaf in the no-space case.
  */
 int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 				struct ext4_ext_path *path,
@@ -1163,8 +1187,8 @@ repeat:
 	}
 
 	/*
-	 * there is no free space in found leaf
-	 * we're gonna add new leaf in the tree
+	 * There is no free space in the found leaf.
+	 * We're gonna add a new leaf in the tree.
 	 */
 	err = ext4_ext_create_new_leaf(handle, inode, path, newext);
 	if (err)
@@ -1377,7 +1401,8 @@ ext4_ext_put_in_cache(struct inode *inode, __u32 block,
 }
 
 /*
- * this routine calculate boundaries of the gap requested block fits into
+ * ext4_ext_put_gap_in_cache:
+ * calculate boundaries of the gap that the requested block fits into
  * and cache this gap
  */
 static inline void
@@ -1452,9 +1477,10 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 }
 
 /*
- * routine removes index from the index block
- * it's used in truncate case only. thus all requests are for
- * last index in the block only
+ * ext4_ext_rm_idx:
+ * removes index from the index block.
+ * It's used in truncate case only, thus all requests are for
+ * last index in the block only.
  */
 int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 			struct ext4_ext_path *path)
@@ -1480,11 +1506,12 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 }
 
 /*
- * This routine returns max. credits extent tree can consume.
+ * ext4_ext_calc_credits_for_insert:
+ * This routine returns max. credits that the extent tree can consume.
  * It should be OK for low-performance paths like ->writepage()
- * To allow many writing process to fit a single transaction,
- * caller should calculate credits under truncate_mutex and
- * pass actual path.
+ * To allow many writing processes to fit into a single transaction,
+ * the caller should calculate credits under truncate_mutex and
+ * pass the actual path.
  */
 int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 						struct ext4_ext_path *path)
@@ -1500,9 +1527,9 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 	}
 
 	/*
-	 * given 32bit logical block (4294967296 blocks), max. tree
+	 * given 32-bit logical block (4294967296 blocks), max. tree
 	 * can be 4 levels in depth -- 4 * 340^4 == 53453440000.
-	 * let's also add one more level for imbalance.
+	 * Let's also add one more level for imbalance.
 	 */
 	depth = 5;
 
@@ -1510,13 +1537,13 @@ int inline ext4_ext_calc_credits_for_insert(struct inode *inode,
 	needed = 2;
 
 	/*
-	 * tree can be full, so it'd need to grow in depth:
+	 * tree can be full, so it would need to grow in depth:
 	 * allocation + old root + new root
 	 */
 	needed += 2 + 1 + 1;
 
 	/*
-	 * Index split can happen, we'd need:
+	 * Index split can happen, we would need:
 	 *    allocate intermediate indexes (bitmap + group)
 	 *  + change two blocks at each level, but root (already included)
 	 */
@@ -1634,7 +1661,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			BUG_ON(b != ex_ee_block + ex_ee_len - 1);
 		}
 
-		/* at present, extent can't cross block group */
+		/* at present, extent can't cross block group: */
 		/* leaf + bitmap + group desc + sb + inode */
 		credits = 5;
 		if (ex == EXT_FIRST_EXTENT(eh)) {
@@ -1660,7 +1687,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 			goto out;
 
 		if (num == 0) {
-			/* this extent is removed entirely mark slot unused */
+			/* this extent is removed; mark slot entirely unused */
 			ext4_ext_store_pblock(ex, 0);
 			eh->eh_entries = cpu_to_le16(le16_to_cpu(eh->eh_entries)-1);
 		}
@@ -1692,7 +1719,8 @@ out:
 }
 
 /*
- * returns 1 if current index have to be freed (even partial)
+ * ext4_ext_more_to_rm:
+ * returns 1 if current index has to be freed (even partial)
  */
 static int inline
 ext4_ext_more_to_rm(struct ext4_ext_path *path)
@@ -1703,7 +1731,7 @@ ext4_ext_more_to_rm(struct ext4_ext_path *path)
 		return 0;
 
 	/*
-	 * if truncate on deeper level happened it it wasn't partial
+	 * if truncate on deeper level happened, it wasn't partial,
 	 * so we have to consider current index for truncation
 	 */
 	if (le16_to_cpu(path->p_hdr->eh_entries) == path->p_block)
@@ -1729,8 +1757,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	ext4_ext_invalidate_cache(inode);
 
 	/*
-	 * we start scanning from right side freeing all the blocks
-	 * after i_size and walking into the deep
+	 * We start scanning from right side, freeing all the blocks
+	 * after i_size and walking into the tree depth-wise.
 	 */
 	path = kmalloc(sizeof(struct ext4_ext_path) * (depth + 1), GFP_KERNEL);
 	if (path == NULL) {
@@ -1749,7 +1777,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 		if (i == depth) {
 			/* this is leaf block */
 			err = ext4_ext_rm_leaf(handle, inode, path, start);
-			/* root level have p_bh == NULL, brelse() eats this */
+			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
 			i--;
@@ -1772,14 +1800,14 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 		BUG_ON(path[i].p_hdr->eh_magic != EXT4_EXT_MAGIC);
 
 		if (!path[i].p_idx) {
-			/* this level hasn't touched yet */
+			/* this level hasn't been touched yet */
 			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries)+1;
 			ext_debug("init index ptr: hdr 0x%p, num %d\n",
 				  path[i].p_hdr,
 				  le16_to_cpu(path[i].p_hdr->eh_entries));
 		} else {
-			/* we've already was here, see at next index */
+			/* we were already here, see at next index */
 			path[i].p_idx--;
 		}
 
@@ -1799,19 +1827,19 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				break;
 			}
 
-			/* put actual number of indexes to know is this
-			 * number got changed at the next iteration */
+			/* save actual number of indexes since this
+			 * number is changed at the next iteration */
 			path[i].p_block = le16_to_cpu(path[i].p_hdr->eh_entries);
 			i++;
 		} else {
-			/* we finish processing this index, go up */
+			/* we finished processing this index, go up */
 			if (path[i].p_hdr->eh_entries == 0 && i > 0) {
-				/* index is empty, remove it
+				/* index is empty, remove it;
 				 * handle must be already prepared by the
 				 * truncatei_leaf() */
 				err = ext4_ext_rm_idx(handle, inode, path + i);
 			}
-			/* root level have p_bh == NULL, brelse() eats this */
+			/* root level has p_bh == NULL, brelse() eats this */
 			brelse(path[i].p_bh);
 			path[i].p_bh = NULL;
 			i--;
@@ -1822,8 +1850,8 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 	/* TODO: flexible tree reduction should be here */
 	if (path->p_hdr->eh_entries == 0) {
 		/*
-		 * truncate to zero freed all the tree
-		 * so, we need to correct eh_depth
+		 * truncate to zero freed all the tree,
+		 * so we need to correct eh_depth
 		 */
 		err = ext4_ext_get_access(handle, inode, path);
 		if (err == 0) {
@@ -1912,7 +1940,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		if (goal == EXT4_EXT_CACHE_GAP) {
 			if (!create) {
 				/* block isn't allocated yet and
-				 * user don't want to allocate it */
+				 * user doesn't want to allocate it */
 				goto out2;
 			}
 			/* we should allocate requested block */
@@ -1921,7 +1949,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		        newblock = iblock
 		                   - le32_to_cpu(newex.ee_block)
 			           + ext_pblock(&newex);
-			/* number of remain blocks in the extent */
+			/* number of remaining blocks in the extent */
 			allocated = le16_to_cpu(newex.ee_len) -
 					(iblock - le32_to_cpu(newex.ee_block));
 			goto out;
@@ -1941,8 +1969,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	depth = ext_depth(inode);
 
 	/*
-	 * consistent leaf must not be empty
-	 * this situations is possible, though, _during_ tree modification
+	 * consistent leaf must not be empty;
+	 * this situation is possible, though, _during_ tree modification;
 	 * this is why assert can't be put in ext4_ext_find_extent()
 	 */
 	BUG_ON(path[depth].p_ext == NULL && depth != 0);
@@ -1960,10 +1988,10 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 		 */
 		if (ee_len > EXT_MAX_LEN)
 			goto out2;
-		/* if found exent covers block, simple return it */
+		/* if found extent covers block, simply return it */
 	        if (iblock >= ee_block && iblock < ee_block + ee_len) {
 			newblock = iblock - ee_block + ee_start;
-			/* number of remain blocks in the extent */
+			/* number of remaining blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
 			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
 					ee_block, ee_len, newblock);
@@ -1974,17 +2002,18 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	}
 
 	/*
-	 * requested block isn't allocated yet
+	 * requested block isn't allocated yet;
 	 * we couldn't try to create block if create flag is zero
 	 */
 	if (!create) {
-		/* put just found gap into cache to speedup subsequest reqs */
+		/* put just found gap into cache to speed up
+		 * subsequent requests */
 		ext4_ext_put_gap_in_cache(inode, path, iblock);
 		goto out2;
 	}
 	/*
          * Okay, we need to do block allocation.  Lazily initialize the block
-         * allocation info here if necessary
+         * allocation info here if necessary.
         */
 	if (S_ISREG(inode->i_mode) && (!EXT4_I(inode)->i_block_alloc_info))
 		ext4_init_block_alloc_info(inode);
@@ -2062,9 +2091,9 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 	ext4_ext_invalidate_cache(inode);
 
 	/*
-	 * TODO: optimization is possible here
-	 * probably we need not scaning at all,
-	 * because page truncation is enough
+	 * TODO: optimization is possible here.
+	 * Probably we need not scan at all,
+	 * because page truncation is enough.
 	 */
 	if (ext4_orphan_add(handle, inode))
 		goto out_stop;
@@ -2078,13 +2107,13 @@ void ext4_ext_truncate(struct inode * inode, struct page *page)
 	err = ext4_ext_remove_space(inode, last_block);
 
 	/* In a multi-transaction truncate, we only make the final
-	 * transaction synchronous */
+	 * transaction synchronous. */
 	if (IS_SYNC(inode))
 		handle->h_sync = 1;
 
 out_stop:
 	/*
-	 * If this was a simple ftruncate(), and the file will remain alive
+	 * If this was a simple ftruncate() and the file will remain alive,
 	 * then we need to clear up the orphan record which we created above.
 	 * However, if this was a real unlink then we were called by
 	 * ext4_delete_inode(), and we allow that function to clean up the
@@ -2098,7 +2127,8 @@ out_stop:
 }
 
 /*
- * this routine calculate max number of blocks we could modify
+ * ext4_ext_writepage_trans_blocks:
+ * calculate max number of blocks we could modify
  * in order to allocate new block for an inode
  */
 int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
@@ -2107,7 +2137,7 @@ int ext4_ext_writepage_trans_blocks(struct inode *inode, int num)
 
 	needed = ext4_ext_calc_credits_for_insert(inode, NULL);
 
-	/* caller want to allocate num blocks, but note it includes sb */
+	/* caller wants to allocate num blocks, but note it includes sb */
 	needed = needed * num - (num - 1);
 
 #ifdef CONFIG_QUOTA
diff --git a/include/linux/ext4_fs_extents.h b/include/linux/ext4_fs_extents.h
index 0eba0acf6ba6..a41cc24568ca 100644
--- a/include/linux/ext4_fs_extents.h
+++ b/include/linux/ext4_fs_extents.h
@@ -22,29 +22,29 @@
 #include <linux/ext4_fs.h>
 
 /*
- * with AGRESSIVE_TEST defined capacity of index/leaf blocks
- * become very little, so index split, in-depth growing and
- * other hard changes happens much more often
- * this is for debug purposes only
+ * With AGRESSIVE_TEST defined, the capacity of index/leaf blocks
+ * becomes very small, so index split, in-depth growing and
+ * other hard changes happen much more often.
+ * This is for debug purposes only.
  */
 #define AGRESSIVE_TEST_
 
 /*
- * with EXTENTS_STATS defined number of blocks and extents
- * are collected in truncate path. they'll be showed at
- * umount time
+ * With EXTENTS_STATS defined, the number of blocks and extents
+ * are collected in the truncate path. They'll be shown at
+ * umount time.
  */
 #define EXTENTS_STATS__
 
 /*
- * if CHECK_BINSEARCH defined, then results of binary search
- * will be checked by linear search
+ * If CHECK_BINSEARCH is defined, then the results of the binary search
+ * will also be checked by linear search.
  */
 #define CHECK_BINSEARCH__
 
 /*
- * if EXT_DEBUG is defined you can use 'extdebug' mount option
- * to get lots of info what's going on
+ * If EXT_DEBUG is defined you can use the 'extdebug' mount option
+ * to get lots of info about what's going on.
  */
 #define EXT_DEBUG__
 #ifdef EXT_DEBUG
@@ -54,58 +54,58 @@
 #endif
 
 /*
- * if EXT_STATS is defined then stats numbers are collected
- * these number will be displayed at umount time
+ * If EXT_STATS is defined then stats numbers are collected.
+ * These number will be displayed at umount time.
  */
 #define EXT_STATS_
 
 
 /*
- * ext4_inode has i_block array (60 bytes total)
- * first 12 bytes store ext4_extent_header
- * the remain stores array of ext4_extent
+ * ext4_inode has i_block array (60 bytes total).
+ * The first 12 bytes store ext4_extent_header;
+ * the remainder stores an array of ext4_extent.
  */
 
 /*
- * this is extent on-disk structure
- * it's used at the bottom of the tree
+ * This is the extent on-disk structure.
+ * It's used at the bottom of the tree.
  */
 struct ext4_extent {
 	__le32	ee_block;	/* first logical block extent covers */
 	__le16	ee_len;		/* number of blocks covered by extent */
 	__le16	ee_start_hi;	/* high 16 bits of physical block */
-	__le32	ee_start;	/* low 32 bigs of physical block */
+	__le32	ee_start;	/* low 32 bits of physical block */
 };
 
 /*
- * this is index on-disk structure
- * it's used at all the levels, but the bottom
+ * This is index on-disk structure.
+ * It's used at all the levels except the bottom.
  */
 struct ext4_extent_idx {
 	__le32	ei_block;	/* index covers logical blocks from 'block' */
 	__le32	ei_leaf;	/* pointer to the physical block of the next *
-				 * level. leaf or next index could bet here */
+				 * level. leaf or next index could be there */
 	__le16	ei_leaf_hi;	/* high 16 bits of physical block */
 	__u16	ei_unused;
 };
 
 /*
- * each block (leaves and indexes), even inode-stored has header
+ * Each block (leaves and indexes), even inode-stored has header.
  */
 struct ext4_extent_header {
 	__le16	eh_magic;	/* probably will support different formats */
 	__le16	eh_entries;	/* number of valid entries */
 	__le16	eh_max;		/* capacity of store in entries */
-	__le16	eh_depth;	/* has tree real underlaying blocks? */
+	__le16	eh_depth;	/* has tree real underlying blocks? */
 	__le32	eh_generation;	/* generation of the tree */
 };
 
 #define EXT4_EXT_MAGIC		cpu_to_le16(0xf30a)
 
 /*
- * array of ext4_ext_path contains path to some extent
- * creation/lookup routines use it for traversal/splitting/etc
- * truncate uses it to simulate recursive walking
+ * Array of ext4_ext_path contains path to some extent.
+ * Creation/lookup routines use it for traversal/splitting/etc.
+ * Truncate uses it to simulate recursive walking.
  */
 struct ext4_ext_path {
 	ext4_fsblk_t			p_block;
diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h
index aa273f024ad0..f69af60d7692 100644
--- a/include/linux/ext4_jbd2.h
+++ b/include/linux/ext4_jbd2.h
@@ -28,8 +28,8 @@
  * indirection blocks, the group and superblock summaries, and the data
  * block to complete the transaction.
  *
- * For extents-enabled fs we may have to allocate and modify upto
- * 5 levels of tree + root which is stored in inode. */
+ * For extents-enabled fs we may have to allocate and modify up to
+ * 5 levels of tree + root which are stored in the inode. */
 
 #define EXT4_SINGLEDATA_TRANS_BLOCKS(sb)				\
 	(EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)	\
-- 
cgit v1.2.3


From b517bea1c74e4773482b3f41b3f493522a8c8e30 Mon Sep 17 00:00:00 2001
From: Zach Brown <zach.brown@oracle.com>
Date: Wed, 11 Oct 2006 01:21:08 -0700
Subject: [PATCH] 64-bit jbd2 core

Here is the patch to JBD to handle 64 bit block numbers, originally from Zach
Brown.  This patch is useful only after adding support for 64-bit block
numbers in the filesystem.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Zach Brown <zach.brown@oracle.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c     | 17 +++++++++++++----
 fs/jbd2/journal.c    | 11 +++++++++++
 fs/jbd2/recovery.c   | 43 ++++++++++++++++++++++++++++++-------------
 fs/jbd2/revoke.c     | 14 +++++++++++---
 include/linux/jbd2.h | 14 ++++++++++++--
 5 files changed, 77 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index b1a4eafc1541..44d68a113c73 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -271,6 +271,14 @@ write_out_data:
 	journal_do_submit_data(wbuf, bufs);
 }
 
+static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
+				   sector_t block)
+{
+	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
+	if (tag_bytes > JBD_TAG_SIZE32)
+		tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
+}
+
 /*
  * jbd2_journal_commit_transaction
  *
@@ -293,6 +301,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int first_tag = 0;
 	int tag_flag;
 	int i;
+	int tag_bytes = journal_tag_bytes(journal);
 
 	/*
 	 * First job: lock down the current transaction and wait for
@@ -597,10 +606,10 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 			tag_flag |= JBD2_FLAG_SAME_UUID;
 
 		tag = (journal_block_tag_t *) tagp;
-		tag->t_blocknr = cpu_to_be32(jh2bh(jh)->b_blocknr);
+		write_tag_block(tag_bytes, tag, jh2bh(jh)->b_blocknr);
 		tag->t_flags = cpu_to_be32(tag_flag);
-		tagp += sizeof(journal_block_tag_t);
-		space_left -= sizeof(journal_block_tag_t);
+		tagp += tag_bytes;
+		space_left -= tag_bytes;
 
 		if (first_tag) {
 			memcpy (tagp, journal->j_uuid, 16);
@@ -614,7 +623,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 
 		if (bufs == journal->j_wbufsize ||
 		    commit_transaction->t_buffers == NULL ||
-		    space_left < sizeof(journal_block_tag_t) + 16) {
+		    space_left < tag_bytes + 16) {
 
 			jbd_debug(4, "JBD: Submit %d IOs\n", bufs);
 
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 8d0f71e562fe..926ebcbf8a7a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -1609,6 +1609,17 @@ int jbd2_journal_blocks_per_page(struct inode *inode)
 	return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
 }
 
+/*
+ * helper functions to deal with 32 or 64bit block numbers.
+ */
+size_t journal_tag_bytes(journal_t *journal)
+{
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+		return JBD_TAG_SIZE64;
+	else
+		return JBD_TAG_SIZE32;
+}
+
 /*
  * Simple support for retrying memory allocations.  Introduced to help to
  * debug different VM deadlock avoidance strategies.
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index b2012d112432..2486843adda0 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -178,19 +178,20 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
  * Count the number of in-use tags in a journal descriptor block.
  */
 
-static int count_tags(struct buffer_head *bh, int size)
+static int count_tags(journal_t *journal, struct buffer_head *bh)
 {
 	char *			tagp;
 	journal_block_tag_t *	tag;
-	int			nr = 0;
+	int			nr = 0, size = journal->j_blocksize;
+	int			tag_bytes = journal_tag_bytes(journal);
 
 	tagp = &bh->b_data[sizeof(journal_header_t)];
 
-	while ((tagp - bh->b_data + sizeof(journal_block_tag_t)) <= size) {
+	while ((tagp - bh->b_data + tag_bytes) <= size) {
 		tag = (journal_block_tag_t *) tagp;
 
 		nr++;
-		tagp += sizeof(journal_block_tag_t);
+		tagp += tag_bytes;
 		if (!(tag->t_flags & cpu_to_be32(JBD2_FLAG_SAME_UUID)))
 			tagp += 16;
 
@@ -307,6 +308,14 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
+static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+{
+	sector_t block = be32_to_cpu(tag->t_blocknr);
+	if (tag_bytes > JBD_TAG_SIZE32)
+		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
+	return block;
+}
+
 static int do_one_pass(journal_t *journal,
 			struct recovery_info *info, enum passtype pass)
 {
@@ -318,11 +327,12 @@ static int do_one_pass(journal_t *journal,
 	struct buffer_head *	bh;
 	unsigned int		sequence;
 	int			blocktype;
+	int			tag_bytes = journal_tag_bytes(journal);
 
 	/* Precompute the maximum metadata descriptors in a descriptor block */
 	int			MAX_BLOCKS_PER_DESC;
 	MAX_BLOCKS_PER_DESC = ((journal->j_blocksize-sizeof(journal_header_t))
-			       / sizeof(journal_block_tag_t));
+			       / tag_bytes);
 
 	/*
 	 * First thing is to establish what we expect to find in the log
@@ -412,8 +422,7 @@ static int do_one_pass(journal_t *journal,
 			 * in pass REPLAY; otherwise, just skip over the
 			 * blocks it describes. */
 			if (pass != PASS_REPLAY) {
-				next_log_block +=
-					count_tags(bh, journal->j_blocksize);
+				next_log_block += count_tags(journal, bh);
 				wrap(journal, next_log_block);
 				brelse(bh);
 				continue;
@@ -424,7 +433,7 @@ static int do_one_pass(journal_t *journal,
 			 * getting done here! */
 
 			tagp = &bh->b_data[sizeof(journal_header_t)];
-			while ((tagp - bh->b_data +sizeof(journal_block_tag_t))
+			while ((tagp - bh->b_data + tag_bytes)
 			       <= journal->j_blocksize) {
 				unsigned long io_block;
 
@@ -446,7 +455,8 @@ static int do_one_pass(journal_t *journal,
 					unsigned long blocknr;
 
 					J_ASSERT(obh != NULL);
-					blocknr = be32_to_cpu(tag->t_blocknr);
+					blocknr = read_tag_block(tag_bytes,
+								 tag);
 
 					/* If the block has been
 					 * revoked, then we're all done
@@ -494,7 +504,7 @@ static int do_one_pass(journal_t *journal,
 				}
 
 			skip_write:
-				tagp += sizeof(journal_block_tag_t);
+				tagp += tag_bytes;
 				if (!(flags & JBD2_FLAG_SAME_UUID))
 					tagp += 16;
 
@@ -572,17 +582,24 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 {
 	jbd2_journal_revoke_header_t *header;
 	int offset, max;
+	int record_len = 4;
 
 	header = (jbd2_journal_revoke_header_t *) bh->b_data;
 	offset = sizeof(jbd2_journal_revoke_header_t);
 	max = be32_to_cpu(header->r_count);
 
-	while (offset < max) {
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT))
+		record_len = 8;
+
+	while (offset + record_len <= max) {
 		unsigned long blocknr;
 		int err;
 
-		blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
-		offset += 4;
+		if (record_len == 4)
+			blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
+		else
+			blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
+		offset += record_len;
 		err = jbd2_journal_set_revoke(journal, blocknr, sequence);
 		if (err)
 			return err;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 5820a0c5ad26..8aac875bd301 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -584,9 +584,17 @@ static void write_one_revoke_record(journal_t *journal,
 		*descriptorp = descriptor;
 	}
 
-	* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
-		cpu_to_be32(record->blocknr);
-	offset += 4;
+	if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_64BIT)) {
+		* ((__be64 *)(&jh2bh(descriptor)->b_data[offset])) =
+			cpu_to_be64(record->blocknr);
+		offset += 8;
+
+	} else {
+		* ((__be32 *)(&jh2bh(descriptor)->b_data[offset])) =
+			cpu_to_be32(record->blocknr);
+		offset += 4;
+	}
+
 	*offsetp = offset;
 }
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3251f7abb57d..5e5aa64f1261 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -150,14 +150,21 @@ typedef struct journal_header_s
 
 
 /*
- * The block tag: used to describe a single buffer in the journal
+ * The block tag: used to describe a single buffer in the journal.
+ * t_blocknr_high is only used if INCOMPAT_64BIT is set, so this
+ * raw struct shouldn't be used for pointer math or sizeof() - use
+ * journal_tag_bytes(journal) instead to compute this.
  */
 typedef struct journal_block_tag_s
 {
 	__be32		t_blocknr;	/* The on-disk block number */
 	__be32		t_flags;	/* See below */
+	__be32		t_blocknr_high; /* most-significant high 32bits. */
 } journal_block_tag_t;
 
+#define JBD_TAG_SIZE32 (offsetof(journal_block_tag_t, t_blocknr_high))
+#define JBD_TAG_SIZE64 (sizeof(journal_block_tag_t))
+
 /*
  * The revoke descriptor: used on disk to describe a series of blocks to
  * be revoked from the log
@@ -235,11 +242,13 @@ typedef struct journal_superblock_s
 	 ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask))))
 
 #define JBD2_FEATURE_INCOMPAT_REVOKE	0x00000001
+#define JBD2_FEATURE_INCOMPAT_64BIT	0x00000002
 
 /* Features known to this kernel version: */
 #define JBD2_KNOWN_COMPAT_FEATURES	0
 #define JBD2_KNOWN_ROCOMPAT_FEATURES	0
-#define JBD2_KNOWN_INCOMPAT_FEATURES	JBD2_FEATURE_INCOMPAT_REVOKE
+#define JBD2_KNOWN_INCOMPAT_FEATURES	(JBD2_FEATURE_INCOMPAT_REVOKE | \
+					 JBD2_FEATURE_INCOMPAT_64BIT)
 
 #ifdef __KERNEL__
 
@@ -1052,6 +1061,7 @@ static inline int tid_geq(tid_t x, tid_t y)
 }
 
 extern int jbd2_journal_blocks_per_page(struct inode *inode);
+extern size_t journal_tag_bytes(journal_t *journal);
 
 /*
  * Return the minimum number of blocks which must be free in the journal
-- 
cgit v1.2.3


From 299717696d48531d70aeb4614c3939e4a28456c1 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:09 -0700
Subject: [PATCH] jbd2: sector_t conversion

JBD layer in-kernel block varibles type fixes to support >32 bit block number
and convert to sector_t type.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c          |  2 +-
 fs/jbd2/journal.c         | 18 +++++++++---------
 fs/jbd2/recovery.c        |  8 ++++----
 fs/jbd2/revoke.c          | 23 ++++++++++++-----------
 include/linux/ext4_jbd2.h |  2 +-
 include/linux/jbd2.h      | 17 ++++++++---------
 6 files changed, 35 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 44d68a113c73..1a9ce8885220 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -293,7 +293,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	unsigned long blocknr;
+	sector_t blocknr;
 	char *tagp = NULL;
 	journal_header_t *header;
 	journal_block_tag_t *tag = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 926ebcbf8a7a..259e8365ea15 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
 int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  unsigned long blocknr)
+				  sector_t blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -555,7 +555,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
+int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
 {
 	unsigned long blocknr;
 
@@ -579,10 +579,10 @@ int jbd2_journal_next_log_block(journal_t *journal, unsigned long *retp)
  * ready.
  */
 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
-		 unsigned long *retp)
+		 sector_t *retp)
 {
 	int err = 0;
-	unsigned long ret;
+	sector_t ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -618,7 +618,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	unsigned long blocknr;
+	sector_t blocknr;
 	int err;
 
 	err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -706,7 +706,7 @@ fail:
  */
 journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
-			int start, int len, int blocksize)
+			sector_t start, int len, int blocksize)
 {
 	journal_t *journal = journal_init_common();
 	struct buffer_head *bh;
@@ -753,7 +753,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	unsigned long blocknr;
+	sector_t blocknr;
 
 	if (!journal)
 		return NULL;
@@ -819,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	unsigned long first, last;
+	sector_t first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
@@ -853,7 +853,7 @@ static int journal_reset(journal_t *journal)
  **/
 int jbd2_journal_create(journal_t *journal)
 {
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 2486843adda0..52054a83e717 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	unsigned long blocknr;
+	sector_t blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
 						"block %ld in log\n",
 						err, io_block);
 				} else {
-					unsigned long blocknr;
+					sector_t blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = read_tag_block(tag_bytes,
@@ -592,7 +592,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		record_len = 8;
 
 	while (offset + record_len <= max) {
-		unsigned long blocknr;
+		sector_t blocknr;
 		int err;
 
 		if (record_len == 4)
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 8aac875bd301..3310a1d7ace9 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -81,7 +81,7 @@ struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	unsigned long	  blocknr;
+	sector_t	  blocknr;
 };
 
 
@@ -106,17 +106,18 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, unsigned long block)
+static inline int hash(journal_t *journal, sector_t block)
 {
 	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
+	int hash = (int)block ^ (int)((block >> 31) >> 1);
 
-	return ((block << (hash_shift - 6)) ^
-		(block >> 13) ^
-		(block << (hash_shift - 12))) & (table->hash_size - 1);
+	return ((hash << (hash_shift - 6)) ^
+		(hash >> 13) ^
+		(hash << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, unsigned long blocknr,
+static int insert_revoke_hash(journal_t *journal, sector_t blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -146,7 +147,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
-						      unsigned long blocknr)
+						      sector_t blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd2_revoke_record_s *record;
@@ -325,7 +326,7 @@ void jbd2_journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
+int jbd2_journal_revoke(handle_t *handle, sector_t blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -394,7 +395,7 @@ int jbd2_journal_revoke(handle_t *handle, unsigned long blocknr,
 		}
 	}
 
-	jbd_debug(2, "insert revoke for block %lu, bh_in=%p\n", blocknr, bh_in);
+	jbd_debug(2, "insert revoke for block %llu, bh_in=%p\n",blocknr, bh_in);
 	err = insert_revoke_hash(journal, blocknr,
 				handle->h_transaction->t_tid);
 	BUFFER_TRACE(bh_in, "exit");
@@ -649,7 +650,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int jbd2_journal_set_revoke(journal_t *journal,
-		       unsigned long blocknr,
+		       sector_t blocknr,
 		       tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
@@ -673,7 +674,7 @@ int jbd2_journal_set_revoke(journal_t *journal,
  */
 
 int jbd2_journal_test_revoke(journal_t *journal,
-			unsigned long blocknr,
+			sector_t blocknr,
 			tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
diff --git a/include/linux/ext4_jbd2.h b/include/linux/ext4_jbd2.h
index f69af60d7692..72dd631912e4 100644
--- a/include/linux/ext4_jbd2.h
+++ b/include/linux/ext4_jbd2.h
@@ -154,7 +154,7 @@ __ext4_journal_forget(const char *where, handle_t *handle, struct buffer_head *b
 
 static inline int
 __ext4_journal_revoke(const char *where, handle_t *handle,
-		      unsigned long blocknr, struct buffer_head *bh)
+		      ext4_fsblk_t blocknr, struct buffer_head *bh)
 {
 	int err = jbd2_journal_revoke(handle, blocknr, bh);
 	if (err)
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 5e5aa64f1261..3c939c844a7d 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -741,7 +741,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	unsigned long		j_blk_offset;
+	sector_t		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -860,7 +860,7 @@ extern void __journal_clean_data_list(transaction_t *transaction);
 
 /* Log buffer allocation */
 extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
-int jbd2_journal_next_log_block(journal_t *, unsigned long *);
+int jbd2_journal_next_log_block(journal_t *, sector_t *);
 
 /* Commit management */
 extern void jbd2_journal_commit_transaction(journal_t *);
@@ -875,7 +875,7 @@ extern int
 jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      unsigned long	   blocknr);
+			      sector_t	   blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
@@ -923,7 +923,7 @@ extern void	 jbd2_journal_unlock_updates (journal_t *);
 
 extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 				struct block_device *fs_dev,
-				int start, int len, int bsize);
+				sector_t start, int len, int bsize);
 extern journal_t * jbd2_journal_init_inode (struct inode *);
 extern int	   jbd2_journal_update_format (journal_t *);
 extern int	   jbd2_journal_check_used_features
@@ -944,7 +944,7 @@ extern void	   jbd2_journal_abort      (journal_t *, int);
 extern int	   jbd2_journal_errno      (journal_t *);
 extern void	   jbd2_journal_ack_err    (journal_t *);
 extern int	   jbd2_journal_clear_err  (journal_t *);
-extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long *);
+extern int	   jbd2_journal_bmap(journal_t *, unsigned long, sector_t *);
 extern int	   jbd2_journal_force_commit(journal_t *);
 
 /*
@@ -977,14 +977,13 @@ extern void	   jbd2_journal_destroy_revoke_caches(void);
 extern int	   jbd2_journal_init_revoke_caches(void);
 
 extern void	   jbd2_journal_destroy_revoke(journal_t *);
-extern int	   jbd2_journal_revoke (handle_t *,
-				unsigned long, struct buffer_head *);
+extern int	   jbd2_journal_revoke (handle_t *, sector_t, struct buffer_head *);
 extern int	   jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
 extern void	   jbd2_journal_write_revoke_records(journal_t *, transaction_t *);
 
 /* Recovery revoke support */
-extern int	jbd2_journal_set_revoke(journal_t *, unsigned long, tid_t);
-extern int	jbd2_journal_test_revoke(journal_t *, unsigned long, tid_t);
+extern int	jbd2_journal_set_revoke(journal_t *, sector_t, tid_t);
+extern int	jbd2_journal_test_revoke(journal_t *, sector_t, tid_t);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
 
-- 
cgit v1.2.3


From a1ddeb7eaecea6a924e3a79aa386797020cb436f Mon Sep 17 00:00:00 2001
From: Badari Pulavarty <pbadari@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:09 -0700
Subject: [PATCH] ext4: 48bit i_file_acl

As we are planning to support 48-bit block numbers for ext4, we need to
support 48-bit block numbers for extended attributes.  In the short term, we
can do this by reuse (on-disk) 16-bit padding (linux2.i_pad1 currently used
only by "hurd") as high order bits for xattr.  This patch basically does that.

Signed-off-by: Badari Pulavarty <pbadari@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/inode.c         | 10 ++++++++++
 include/linux/ext4_fs.h |  6 ++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 2b81b1324a6f..9db8cff3baa4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2643,6 +2643,11 @@ void ext4_read_inode(struct inode * inode)
 	ei->i_frag_size = raw_inode->i_fsize;
 #endif
 	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+	if ((sizeof(sector_t) > 4) &&
+	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	     cpu_to_le32(EXT4_OS_HURD)))
+		ei->i_file_acl |=
+			((__u64)le16_to_cpu(raw_inode->i_file_acl_high)) << 32;
 	if (!S_ISREG(inode->i_mode)) {
 		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
 	} else {
@@ -2776,6 +2781,11 @@ static int ext4_do_update_inode(handle_t *handle,
 	raw_inode->i_frag = ei->i_frag_no;
 	raw_inode->i_fsize = ei->i_frag_size;
 #endif
+	if ((sizeof(sector_t) > 4) &&
+	    (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
+	     cpu_to_le32(EXT4_OS_HURD)))
+		raw_inode->i_file_acl_high =
+			cpu_to_le16(ei->i_file_acl >> 32);
 	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
 	if (!S_ISREG(inode->i_mode)) {
 		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index e952c6db9690..63ed89f5b27b 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -298,7 +298,7 @@ struct ext4_inode {
 		struct {
 			__u8	l_i_frag;	/* Fragment number */
 			__u8	l_i_fsize;	/* Fragment size */
-			__u16	i_pad1;
+			__le16	l_i_file_acl_high;
 			__le16	l_i_uid_high;	/* these 2 fields    */
 			__le16	l_i_gid_high;	/* were reserved2[0] */
 			__u32	l_i_reserved2;
@@ -314,7 +314,7 @@ struct ext4_inode {
 		struct {
 			__u8	m_i_frag;	/* Fragment number */
 			__u8	m_i_fsize;	/* Fragment size */
-			__u16	m_pad1;
+			__le16	m_i_file_acl_high;
 			__u32	m_i_reserved2[2];
 		} masix2;
 	} osd2;				/* OS dependent 2 */
@@ -328,6 +328,7 @@ struct ext4_inode {
 #define i_reserved1	osd1.linux1.l_i_reserved1
 #define i_frag		osd2.linux2.l_i_frag
 #define i_fsize		osd2.linux2.l_i_fsize
+#define i_file_acl_high	osd2.linux2.l_i_file_acl_high
 #define i_uid_low	i_uid
 #define i_gid_low	i_gid
 #define i_uid_high	osd2.linux2.l_i_uid_high
@@ -348,6 +349,7 @@ struct ext4_inode {
 #define i_reserved1	osd1.masix1.m_i_reserved1
 #define i_frag		osd2.masix2.m_i_frag
 #define i_fsize		osd2.masix2.m_i_fsize
+#define i_file_acl_high	osd2.masix2.m_i_file_acl_high
 #define i_reserved2	osd2.masix2.m_i_reserved2
 
 #endif /* defined(__KERNEL__) || defined(__linux__) */
-- 
cgit v1.2.3


From bd81d8eec043094d3ff729a8ff6d5b3a06d3c4b1 Mon Sep 17 00:00:00 2001
From: Laurent Vivier <Laurent.Vivier@bull.net>
Date: Wed, 11 Oct 2006 01:21:10 -0700
Subject: [PATCH] ext4: 64bit metadata

In-kernel super block changes to support >32 bit free blocks numbers.

Signed-off-by: Laurent Vivier <Laurent.Vivier@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c        | 50 +++++++++++++-------------
 fs/ext4/ialloc.c        |  8 ++---
 fs/ext4/inode.c         |  6 ++--
 fs/ext4/resize.c        | 52 ++++++++++++++-------------
 fs/ext4/super.c         | 96 ++++++++++++++++++++++++++++++++++---------------
 include/linux/ext4_fs.h | 86 +++++++++++++++++++++++++++++++++++++-------
 6 files changed, 201 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index aa33ff271fa9..6887151ccc47 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -99,12 +99,13 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-	bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
+	bh = sb_bread(sb, ext4_block_bitmap(desc));
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = %u",
-			    block_group, le32_to_cpu(desc->bg_block_bitmap));
+			    "block_group = %d, block_bitmap = "E3FSBLK,
+			    block_group,
+			    ext4_block_bitmap(desc));
 error_out:
 	return bh;
 }
@@ -432,14 +433,14 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 	es = sbi->s_es;
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
-	    block + count > le32_to_cpu(es->s_blocks_count)) {
+	    block + count > ext4_blocks_count(es)) {
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
 			    "block = "E3FSBLK", count = %lu", block, count);
 		goto error_return;
 	}
 
-	ext4_debug ("freeing block(s) %lu-%lu\n", block, block + count - 1);
+	ext4_debug ("freeing block(s) %llu-%llu\n", block, block + count - 1);
 
 do_more:
 	overflow = 0;
@@ -460,12 +461,11 @@ do_more:
 	if (!desc)
 		goto error_return;
 
-	if (in_range (le32_to_cpu(desc->bg_block_bitmap), block, count) ||
-	    in_range (le32_to_cpu(desc->bg_inode_bitmap), block, count) ||
-	    in_range (block, le32_to_cpu(desc->bg_inode_table),
-		      sbi->s_itb_per_group) ||
-	    in_range (block + count - 1, le32_to_cpu(desc->bg_inode_table),
-		      sbi->s_itb_per_group))
+	if (in_range(ext4_block_bitmap(desc), block, count) ||
+	    in_range(ext4_inode_bitmap(desc), block, count) ||
+	    in_range(block, ext4_inode_table(desc), sbi->s_itb_per_group) ||
+	    in_range(block + count - 1, ext4_inode_table(desc),
+		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
 			    "Block = "E3FSBLK", count = %lu",
@@ -552,8 +552,8 @@ do_more:
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
 			ext4_error(sb, __FUNCTION__,
-				"bit already cleared for block "E3FSBLK,
-				 block + i);
+				   "bit already cleared for block "E3FSBLK,
+				   (ext4_fsblk_t)(block + i));
 			jbd_lock_bh_state(bitmap_bh);
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
 		} else {
@@ -1351,7 +1351,7 @@ static int ext4_has_free_blocks(struct ext4_sb_info *sbi)
 	ext4_fsblk_t free_blocks, root_blocks;
 
 	free_blocks = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
-	root_blocks = le32_to_cpu(sbi->s_es->s_r_blocks_count);
+	root_blocks = ext4_r_blocks_count(sbi->s_es);
 	if (free_blocks < root_blocks + 1 && !capable(CAP_SYS_RESOURCE) &&
 		sbi->s_resuid != current->fsuid &&
 		(sbi->s_resgid == 0 || !in_group_p (sbi->s_resgid))) {
@@ -1462,7 +1462,7 @@ ext4_fsblk_t ext4_new_blocks(handle_t *handle, struct inode *inode,
 	 * First, test whether the goal block is free.
 	 */
 	if (goal < le32_to_cpu(es->s_first_data_block) ||
-	    goal >= le32_to_cpu(es->s_blocks_count))
+	    goal >= ext4_blocks_count(es))
 		goal = le32_to_cpu(es->s_first_data_block);
 	ext4_get_group_no_and_offset(sb, goal, &group_no, &grp_target_blk);
 	goal_group = group_no;
@@ -1561,12 +1561,12 @@ allocated:
 
 	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
-	if (in_range(le32_to_cpu(gdp->bg_block_bitmap), ret_block, num) ||
-	    in_range(le32_to_cpu(gdp->bg_inode_bitmap), ret_block, num) ||
-	    in_range(ret_block, le32_to_cpu(gdp->bg_inode_table),
-		      EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(ret_block + num - 1, le32_to_cpu(gdp->bg_inode_table),
-		      EXT4_SB(sb)->s_itb_per_group))
+	if (in_range(ext4_block_bitmap(gdp), ret_block, num) ||
+	    in_range(ext4_block_bitmap(gdp), ret_block, num) ||
+	    in_range(ret_block, ext4_inode_table(gdp),
+		     EXT4_SB(sb)->s_itb_per_group) ||
+	    in_range(ret_block + num - 1, ext4_inode_table(gdp),
+		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
 			    "blocks from "E3FSBLK", length %lu",
@@ -1604,11 +1604,11 @@ allocated:
 	jbd_unlock_bh_state(bitmap_bh);
 #endif
 
-	if (ret_block + num - 1 >= le32_to_cpu(es->s_blocks_count)) {
+	if (ret_block + num - 1 >= ext4_blocks_count(es)) {
 		ext4_error(sb, "ext4_new_block",
-			    "block("E3FSBLK") >= blocks count(%d) - "
+			    "block("E3FSBLK") >= blocks count("E3FSBLK") - "
 			    "block_group = %lu, es == %p ", ret_block,
-			le32_to_cpu(es->s_blocks_count), group_no, es);
+			ext4_blocks_count(es), group_no, es);
 		goto out;
 	}
 
@@ -1707,7 +1707,7 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 	brelse(bitmap_bh);
 	printk("ext4_count_free_blocks: stored = "E3FSBLK
 		", computed = "E3FSBLK", "E3FSBLK"\n",
-	       le32_to_cpu(es->s_free_blocks_count),
+	       EXT4_FREE_BLOCKS_COUNT(es),
 		desc_count, bitmap_count);
 	return bitmap_count;
 #else
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 94e1bb4abe31..959b7fa8f5db 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -60,12 +60,12 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 	if (!desc)
 		goto error_out;
 
-	bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
+	bh = sb_bread(sb, ext4_inode_bitmap(desc));
 	if (!bh)
 		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
-			    "block_group = %lu, inode_bitmap = %u",
-			    block_group, le32_to_cpu(desc->bg_inode_bitmap));
+			    "block_group = %lu, inode_bitmap = %llu",
+			    block_group, ext4_inode_bitmap(desc));
 error_out:
 	return bh;
 }
@@ -304,7 +304,7 @@ static int find_group_orlov(struct super_block *sb, struct inode *parent)
 		goto fallback;
 	}
 
-	blocks_per_dir = le32_to_cpu(es->s_blocks_count) - freeb;
+	blocks_per_dir = ext4_blocks_count(es) - freeb;
 	sector_div(blocks_per_dir, ndirs);
 
 	max_dirs = ndirs / ngroups + inodes_per_group / 16;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9db8cff3baa4..effc38afebe3 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2438,8 +2438,8 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = le32_to_cpu(gdp[desc].bg_inode_table) +
-		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
+	block = ext4_inode_table(gdp + desc) +
+			(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
@@ -2506,7 +2506,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 				goto make_io;
 
 			bitmap_bh = sb_getblk(inode->i_sb,
-					le32_to_cpu(desc->bg_inode_bitmap));
+				ext4_inode_bitmap(desc));
 			if (!bitmap_bh)
 				goto make_io;
 
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index c60bfed5f5e7..3dbf91b82202 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -27,7 +27,7 @@ static int verify_group_input(struct super_block *sb,
 {
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	struct ext4_super_block *es = sbi->s_es;
-	ext4_fsblk_t start = le32_to_cpu(es->s_blocks_count);
+	ext4_fsblk_t start = ext4_blocks_count(es);
 	ext4_fsblk_t end = start + input->blocks_count;
 	unsigned group = input->group;
 	ext4_fsblk_t itend = input->inode_table + sbi->s_itb_per_group;
@@ -68,43 +68,43 @@ static int verify_group_input(struct super_block *sb,
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap not in group (block %u)",
+			     "Block bitmap not in group (block %llu)",
 			     input->block_bitmap);
 	else if (outside(input->inode_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap not in group (block %u)",
+			     "Inode bitmap not in group (block %llu)",
 			     input->inode_bitmap);
 	else if (outside(input->inode_table, start, end) ||
 	         outside(itend - 1, start, end))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table not in group (blocks %u-"E3FSBLK")",
+			     "Inode table not in group (blocks %llu-%llu)",
 			     input->inode_table, itend - 1);
 	else if (input->inode_bitmap == input->block_bitmap)
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap same as inode bitmap (%u)",
+			     "Block bitmap same as inode bitmap (%llu)",
 			     input->block_bitmap);
 	else if (inside(input->block_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     "Block bitmap (%llu) in inode table (%llu-%llu)",
 			     input->block_bitmap, input->inode_table, itend-1);
 	else if (inside(input->inode_bitmap, input->inode_table, itend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap (%u) in inode table (%u-"E3FSBLK")",
+			     "Inode bitmap (%llu) in inode table (%llu-%llu)",
 			     input->inode_bitmap, input->inode_table, itend-1);
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Block bitmap (%u) in GDT table"
+			     "Block bitmap (%llu) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode bitmap (%u) in GDT table"
+			     "Inode bitmap (%llu) in GDT table"
 			     " ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table (%u-"E3FSBLK") overlaps"
+			     "Inode table ("E3FSBLK"-"E3FSBLK") overlaps"
 			     "GDT table ("E3FSBLK"-"E3FSBLK")",
 			     input->inode_table, itend - 1, start, metaend - 1);
 	else
@@ -286,6 +286,7 @@ exit_journal:
 	return err;
 }
 
+
 /*
  * Iterate through the groups which hold BACKUP superblock/GDT copies in an
  * ext4 filesystem.  The counters should be initialized to 1, 5, and 7 before
@@ -340,12 +341,15 @@ static int verify_reserved_gdb(struct super_block *sb,
 	int gdbackups = 0;
 
 	while ((grp = ext4_list_backups(sb, &three, &five, &seven)) < end) {
-		if (le32_to_cpu(*p++) != grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
+		if (le32_to_cpu(*p++) !=
+		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
 			ext4_warning(sb, __FUNCTION__,
 				     "reserved GDT "E3FSBLK
 				     " missing grp %d ("E3FSBLK")",
 				     blk, grp,
-				     grp * EXT4_BLOCKS_PER_GROUP(sb) + blk);
+				     grp *
+				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
+				     blk);
 			return -EINVAL;
 		}
 		if (++gdbackups > EXT4_ADDR_PER_BLOCK(sb))
@@ -731,8 +735,8 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 		return -EPERM;
 	}
 
-	if (le32_to_cpu(es->s_blocks_count) + input->blocks_count <
-	    le32_to_cpu(es->s_blocks_count)) {
+	if (ext4_blocks_count(es) + input->blocks_count <
+	    ext4_blocks_count(es)) {
 		ext4_warning(sb, __FUNCTION__, "blocks_count overflow\n");
 		return -EINVAL;
 	}
@@ -830,9 +834,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
-	gdp->bg_block_bitmap = cpu_to_le32(input->block_bitmap);
-	gdp->bg_inode_bitmap = cpu_to_le32(input->inode_bitmap);
-	gdp->bg_inode_table = cpu_to_le32(input->inode_table);
+	ext4_block_bitmap_set(gdp, input->block_bitmap); /* LV FIXME */
+	ext4_inode_bitmap_set(gdp, input->inode_bitmap); /* LV FIXME */
+	ext4_inode_table_set(gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
@@ -846,7 +850,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	 * blocks/inodes before the group is live won't actually let us
 	 * allocate the new space yet.
 	 */
-	es->s_blocks_count = cpu_to_le32(le32_to_cpu(es->s_blocks_count) +
+	ext4_blocks_count_set(es, ext4_blocks_count(es) +
 		input->blocks_count);
 	es->s_inodes_count = cpu_to_le32(le32_to_cpu(es->s_inodes_count) +
 		EXT4_INODES_PER_GROUP(sb));
@@ -882,7 +886,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
 	/* Update the reserved block counts only once the new group is
 	 * active. */
-	es->s_r_blocks_count = cpu_to_le32(le32_to_cpu(es->s_r_blocks_count) +
+	ext4_r_blocks_count_set(es, ext4_r_blocks_count(es) +
 		input->reserved_blocks);
 
 	/* Update the free space counts */
@@ -933,7 +937,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	/* We don't need to worry about locking wrt other resizers just
 	 * yet: we're going to revalidate es->s_blocks_count after
 	 * taking lock_super() below. */
-	o_blocks_count = le32_to_cpu(es->s_blocks_count);
+	o_blocks_count = ext4_blocks_count(es);
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
@@ -1004,7 +1008,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	}
 
 	lock_super(sb);
-	if (o_blocks_count != le32_to_cpu(es->s_blocks_count)) {
+	if (o_blocks_count != ext4_blocks_count(es)) {
 		ext4_warning(sb, __FUNCTION__,
 			     "multiple resizers run on filesystem!");
 		unlock_super(sb);
@@ -1020,7 +1024,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 		ext4_journal_stop(handle);
 		goto exit_put;
 	}
-	es->s_blocks_count = cpu_to_le32(o_blocks_count + add);
+	ext4_blocks_count_set(es, o_blocks_count + add);
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
@@ -1032,8 +1036,8 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT4-fs: extended group to %u blocks\n",
-		       le32_to_cpu(es->s_blocks_count));
+		printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
+		       ext4_blocks_count(es));
 	update_backups(sb, EXT4_SB(sb)->s_sbh->b_blocknr, (char *)es,
 		       sizeof(struct ext4_super_block));
 exit_put:
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 1d12e4f7d69f..b91dffd7a031 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -62,6 +62,43 @@ static void ext4_unlockfs(struct super_block *sb);
 static void ext4_write_super (struct super_block * sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
+
+ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_block_bitmap) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_block_bitmap_hi) << 32);
+}
+
+ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_inode_bitmap) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_bitmap_hi) << 32);
+}
+
+ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg)
+{
+	return le32_to_cpu(bg->bg_inode_table) |
+		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_table_hi) << 32);
+}
+
+void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_block_bitmap = cpu_to_le32((u32)blk);
+	bg->bg_block_bitmap_hi = cpu_to_le16(blk >> 32);
+}
+
+void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
+	bg->bg_inode_bitmap_hi = cpu_to_le16(blk >> 32);
+}
+
+void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+{
+	bg->bg_inode_table = cpu_to_le32((u32)blk);
+	bg->bg_inode_table_hi = cpu_to_le16(blk >> 32);
+}
+
 /*
  * Wrappers for jbd2_journal_start/end.
  *
@@ -1182,6 +1219,9 @@ static int ext4_check_descriptors (struct super_block * sb)
 	struct ext4_sb_info *sbi = EXT4_SB(sb);
 	ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	ext4_fsblk_t last_block;
+	ext4_fsblk_t block_bitmap;
+	ext4_fsblk_t inode_bitmap;
+	ext4_fsblk_t inode_table;
 	struct ext4_group_desc * gdp = NULL;
 	int desc_block = 0;
 	int i;
@@ -1191,7 +1231,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 	for (i = 0; i < sbi->s_groups_count; i++)
 	{
 		if (i == sbi->s_groups_count - 1)
-			last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
+			last_block = ext4_blocks_count(sbi->s_es) - 1;
 		else
 			last_block = first_block +
 				(EXT4_BLOCKS_PER_GROUP(sb) - 1);
@@ -1199,42 +1239,39 @@ static int ext4_check_descriptors (struct super_block * sb)
 		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
 			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
-		if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
-		    le32_to_cpu(gdp->bg_block_bitmap) > last_block)
+		block_bitmap = ext4_block_bitmap(gdp);
+		if (block_bitmap < first_block || block_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_block_bitmap));
+				    " not in group (block "E3FSBLK")!",
+				    i, block_bitmap);
 			return 0;
 		}
-		if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
-		    le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
+		inode_bitmap = ext4_inode_bitmap(gdp);
+		if (inode_bitmap < first_block || inode_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_inode_bitmap));
+				    " not in group (block "E3FSBLK")!",
+				    i, inode_bitmap);
 			return 0;
 		}
-		if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
-		    le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group >
-		    last_block)
+		inode_table = ext4_inode_table(gdp);
+		if (inode_table < first_block ||
+		    inode_table + sbi->s_itb_per_group > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
-				    " not in group (block %lu)!",
-				    i, (unsigned long)
-					le32_to_cpu(gdp->bg_inode_table));
+				    " not in group (block "E3FSBLK")!",
+				    i, inode_table);
 			return 0;
 		}
 		first_block += EXT4_BLOCKS_PER_GROUP(sb);
 		gdp++;
 	}
 
-	sbi->s_es->s_free_blocks_count=cpu_to_le32(ext4_count_free_blocks(sb));
+	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
 	sbi->s_es->s_free_inodes_count=cpu_to_le32(ext4_count_free_inodes(sb));
 	return 1;
 }
@@ -1411,6 +1448,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	int i;
 	int needs_recovery;
 	__le32 features;
+	__u64 blocks_count;
 
 	sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
 	if (!sbi)
@@ -1620,7 +1658,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount;
 	}
 
-	if (le32_to_cpu(es->s_blocks_count) >
+	if (ext4_blocks_count(es) >
 		    (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
 			" too large to mount safely\n", sb->s_id);
@@ -1632,9 +1670,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 
 	if (EXT4_BLOCKS_PER_GROUP(sb) == 0)
 		goto cantfind_ext4;
-	sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
-			       le32_to_cpu(es->s_first_data_block) - 1)
-				       / EXT4_BLOCKS_PER_GROUP(sb)) + 1;
+	blocks_count = (ext4_blocks_count(es) -
+			le32_to_cpu(es->s_first_data_block) +
+			EXT4_BLOCKS_PER_GROUP(sb) - 1);
+	do_div(blocks_count, EXT4_BLOCKS_PER_GROUP(sb));
+	sbi->s_groups_count = blocks_count;
 	db_count = (sbi->s_groups_count + EXT4_DESC_PER_BLOCK(sb) - 1) /
 		   EXT4_DESC_PER_BLOCK(sb);
 	sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
@@ -1949,7 +1989,7 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
 		goto out_bdev;
 	}
 
-	len = le32_to_cpu(es->s_blocks_count);
+	len = ext4_blocks_count(es);
 	start = sb_block + 1;
 	brelse(bh);	/* we're done with the superblock */
 
@@ -2119,7 +2159,7 @@ static void ext4_commit_super (struct super_block * sb,
 	if (!sbh)
 		return;
 	es->s_wtime = cpu_to_le32(get_seconds());
-	es->s_free_blocks_count = cpu_to_le32(ext4_count_free_blocks(sb));
+	ext4_free_blocks_count_set(es, ext4_count_free_blocks(sb));
 	es->s_free_inodes_count = cpu_to_le32(ext4_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
@@ -2312,7 +2352,7 @@ static int ext4_remount (struct super_block * sb, int * flags, char * data)
 	ext4_init_journal_params(sb, sbi->s_journal);
 
 	if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
-		n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
+		n_blocks_count > ext4_blocks_count(es)) {
 		if (sbi->s_mount_opt & EXT4_MOUNT_ABORT) {
 			err = -EROFS;
 			goto restore_opts;
@@ -2431,10 +2471,10 @@ static int ext4_statfs (struct dentry * dentry, struct kstatfs * buf)
 
 	buf->f_type = EXT4_SUPER_MAGIC;
 	buf->f_bsize = sb->s_blocksize;
-	buf->f_blocks = le32_to_cpu(es->s_blocks_count) - overhead;
+	buf->f_blocks = ext4_blocks_count(es) - overhead;
 	buf->f_bfree = percpu_counter_sum(&sbi->s_freeblocks_counter);
-	buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
-	if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
+	buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
+	if (buf->f_bfree < ext4_r_blocks_count(es))
 		buf->f_bavail = 0;
 	buf->f_files = le32_to_cpu(es->s_inodes_count);
 	buf->f_ffree = percpu_counter_sum(&sbi->s_freeinodes_counter);
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 63ed89f5b27b..8e5009ee4ad8 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -128,10 +128,17 @@ struct ext4_group_desc
 	__le16	bg_free_blocks_count;	/* Free blocks count */
 	__le16	bg_free_inodes_count;	/* Free inodes count */
 	__le16	bg_used_dirs_count;	/* Directories count */
-	__u16	bg_pad;
-	__le32	bg_reserved[3];
+	__u16	bg_flags;
+	__le16	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
+	__le16	bg_inode_bitmap_hi;	/* Inodes bitmap block MSB */
+	__le16	bg_inode_table_hi;	/* Inodes table block MSB */
+	__u16	bg_reserved[3];
 };
 
+#ifdef __KERNEL__
+#include <linux/ext4_fs_i.h>
+#include <linux/ext4_fs_sb.h>
+#endif
 /*
  * Macro-instructions used to manage group descriptors
  */
@@ -194,9 +201,9 @@ struct ext4_group_desc
 /* Used to pass group descriptor data when online resize is done */
 struct ext4_new_group_input {
 	__u32 group;            /* Group number for this data */
-	__u32 block_bitmap;     /* Absolute block number of block bitmap */
-	__u32 inode_bitmap;     /* Absolute block number of inode bitmap */
-	__u32 inode_table;      /* Absolute block number of inode table start */
+	__u64 block_bitmap;     /* Absolute block number of block bitmap */
+	__u64 inode_bitmap;     /* Absolute block number of inode bitmap */
+	__u64 inode_table;      /* Absolute block number of inode table start */
 	__u32 blocks_count;     /* Total number of blocks in this group */
 	__u16 reserved_blocks;  /* Number of reserved blocks in this group */
 	__u16 unused;
@@ -205,9 +212,9 @@ struct ext4_new_group_input {
 /* The struct ext4_new_group_input in kernel space, with free_blocks_count */
 struct ext4_new_group_data {
 	__u32 group;
-	__u32 block_bitmap;
-	__u32 inode_bitmap;
-	__u32 inode_table;
+	__u64 block_bitmap;
+	__u64 inode_bitmap;
+	__u64 inode_table;
 	__u32 blocks_count;
 	__u16 reserved_blocks;
 	__u16 unused;
@@ -494,14 +501,18 @@ struct ext4_super_block {
 	__u8	s_def_hash_version;	/* Default hash version to use */
 	__u8	s_reserved_char_pad;
 	__u16	s_reserved_word_pad;
-	__le32	s_default_mount_opts;
+/*100*/	__le32	s_default_mount_opts;
 	__le32	s_first_meta_bg;	/* First metablock block group */
-	__u32	s_reserved[190];	/* Padding to the end of the block */
+	__le32	s_mkfs_time;		/* When the filesystem was created */
+	__le32	s_jnl_blocks[17];	/* Backup of the journal inode */
+	/* 64bit support valid if EXT4_FEATURE_COMPAT_64BIT */
+/*150*/	__le32	s_blocks_count_hi;	/* Blocks count */
+	__le32	s_r_blocks_count_hi;	/* Reserved blocks count */
+	__le32	s_free_blocks_count_hi;	/* Free blocks count */
+	__u32	s_reserved[169];	/* Padding to the end of the block */
 };
 
 #ifdef __KERNEL__
-#include <linux/ext4_fs_i.h>
-#include <linux/ext4_fs_sb.h>
 static inline struct ext4_sb_info * EXT4_SB(struct super_block *sb)
 {
 	return sb->s_fs_info;
@@ -588,12 +599,14 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
 #define EXT4_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
 #define EXT4_FEATURE_INCOMPAT_META_BG		0x0010
 #define EXT4_FEATURE_INCOMPAT_EXTENTS		0x0040 /* extents support */
+#define EXT4_FEATURE_INCOMPAT_64BIT		0x0080
 
 #define EXT4_FEATURE_COMPAT_SUPP	EXT2_FEATURE_COMPAT_EXT_ATTR
 #define EXT4_FEATURE_INCOMPAT_SUPP	(EXT4_FEATURE_INCOMPAT_FILETYPE| \
 					 EXT4_FEATURE_INCOMPAT_RECOVER| \
 					 EXT4_FEATURE_INCOMPAT_META_BG| \
-					 EXT4_FEATURE_INCOMPAT_EXTENTS)
+					 EXT4_FEATURE_INCOMPAT_EXTENTS| \
+					 EXT4_FEATURE_INCOMPAT_64BIT)
 #define EXT4_FEATURE_RO_COMPAT_SUPP	(EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER| \
 					 EXT4_FEATURE_RO_COMPAT_LARGE_FILE| \
 					 EXT4_FEATURE_RO_COMPAT_BTREE_DIR)
@@ -888,6 +901,53 @@ extern void ext4_abort (struct super_block *, const char *, const char *, ...)
 extern void ext4_warning (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_update_dynamic_rev (struct super_block *sb);
+extern ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg);
+extern void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
+
+static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
+{
+	return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
+		le32_to_cpu(es->s_blocks_count);
+}
+
+static inline ext4_fsblk_t ext4_r_blocks_count(struct ext4_super_block *es)
+{
+	return ((ext4_fsblk_t)le32_to_cpu(es->s_r_blocks_count_hi) << 32) |
+		le32_to_cpu(es->s_r_blocks_count);
+}
+
+static inline ext4_fsblk_t ext4_free_blocks_count(struct ext4_super_block *es)
+{
+	return ((ext4_fsblk_t)le32_to_cpu(es->s_free_blocks_count_hi) << 32) |
+		le32_to_cpu(es->s_free_blocks_count);
+}
+
+static inline void ext4_blocks_count_set(struct ext4_super_block *es,
+					 ext4_fsblk_t blk)
+{
+	es->s_blocks_count = cpu_to_le32((u32)blk);
+	es->s_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+static inline void ext4_free_blocks_count_set(struct ext4_super_block *es,
+					      ext4_fsblk_t blk)
+{
+	es->s_free_blocks_count = cpu_to_le32((u32)blk);
+	es->s_free_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+static inline void ext4_r_blocks_count_set(struct ext4_super_block *es,
+					   ext4_fsblk_t blk)
+{
+	es->s_r_blocks_count = cpu_to_le32((u32)blk);
+	es->s_r_blocks_count_hi = cpu_to_le32(blk >> 32);
+}
+
+
 
 #define ext4_std_error(sb, errno)				\
 do {								\
-- 
cgit v1.2.3


From 2ae0210760aed9d626eaede5b63db95e198f7c8e Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:11 -0700
Subject: [PATCH] ext4: blk_type from sector_t to unsigned long long

Change ext4 in-kernel block type (ext4_fsblk_t) from sector_t to unsigned
long long.  Remove ext4 block type string micro E3FSBLK, replaced with "%llu"

[akpm@osdl.org: build fix]
Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c          | 18 +++++++++---------
 fs/ext4/extents.c         | 38 +++++++++++++++++++-------------------
 fs/ext4/inode.c           |  6 +++---
 fs/ext4/resize.c          | 28 ++++++++++++++--------------
 fs/ext4/super.c           |  6 +++---
 fs/ext4/xattr.c           | 12 ++++++------
 include/linux/ext4_fs_i.h |  8 +-------
 7 files changed, 55 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6887151ccc47..df77ea891f29 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -103,7 +103,7 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
-			    "block_group = %d, block_bitmap = "E3FSBLK,
+			    "block_group = %d, block_bitmap = %llu",
 			    block_group,
 			    ext4_block_bitmap(desc));
 error_out:
@@ -148,7 +148,7 @@ restart:
 		rsv = list_entry(n, struct ext4_reserve_window_node, rsv_node);
 		if (verbose)
 			printk("reservation window 0x%p "
-			       "start:  "E3FSBLK", end:  "E3FSBLK"\n",
+			       "start:  %llu, end:  %llu\n",
 			       rsv, rsv->rsv_start, rsv->rsv_end);
 		if (rsv->rsv_start && rsv->rsv_start >= rsv->rsv_end) {
 			printk("Bad reservation %p (start >= end)\n",
@@ -436,7 +436,7 @@ void ext4_free_blocks_sb(handle_t *handle, struct super_block *sb,
 	    block + count > ext4_blocks_count(es)) {
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks not in datazone - "
-			    "block = "E3FSBLK", count = %lu", block, count);
+			    "block = %llu, count = %lu", block, count);
 		goto error_return;
 	}
 
@@ -468,7 +468,7 @@ do_more:
 		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
-			    "Block = "E3FSBLK", count = %lu",
+			    "Block = %llu, count = %lu",
 			    block, count);
 
 	/*
@@ -552,7 +552,7 @@ do_more:
 						bit + i, bitmap_bh->b_data)) {
 			jbd_unlock_bh_state(bitmap_bh);
 			ext4_error(sb, __FUNCTION__,
-				   "bit already cleared for block "E3FSBLK,
+				   "bit already cleared for block %llu",
 				   (ext4_fsblk_t)(block + i));
 			jbd_lock_bh_state(bitmap_bh);
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
@@ -1569,7 +1569,7 @@ allocated:
 		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
-			    "blocks from "E3FSBLK", length %lu",
+			    "blocks from %llu, length %lu",
 			     ret_block, num);
 
 	performed_allocation = 1;
@@ -1606,7 +1606,7 @@ allocated:
 
 	if (ret_block + num - 1 >= ext4_blocks_count(es)) {
 		ext4_error(sb, "ext4_new_block",
-			    "block("E3FSBLK") >= blocks count("E3FSBLK") - "
+			    "block(%llu) >= blocks count(%llu) - "
 			    "block_group = %lu, es == %p ", ret_block,
 			ext4_blocks_count(es), group_no, es);
 		goto out;
@@ -1705,8 +1705,8 @@ ext4_fsblk_t ext4_count_free_blocks(struct super_block *sb)
 		bitmap_count += x;
 	}
 	brelse(bitmap_bh);
-	printk("ext4_count_free_blocks: stored = "E3FSBLK
-		", computed = "E3FSBLK", "E3FSBLK"\n",
+	printk("ext4_count_free_blocks: stored = %llu"
+		", computed = %llu, %llu\n",
 	       EXT4_FREE_BLOCKS_COUNT(es),
 		desc_count, bitmap_count);
 	return bitmap_count;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index e06e937a52b8..f72b7567bfa2 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -281,10 +281,10 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
 	ext_debug("path:");
 	for (k = 0; k <= l; k++, path++) {
 		if (path->p_idx) {
-		  ext_debug("  %d->"E3FSBLK, le32_to_cpu(path->p_idx->ei_block),
+		  ext_debug("  %d->%llu", le32_to_cpu(path->p_idx->ei_block),
 			    idx_pblock(path->p_idx));
 		} else if (path->p_ext) {
-			ext_debug("  %d:%d:"E3FSBLK" ",
+			ext_debug("  %d:%d:%llu ",
 				  le32_to_cpu(path->p_ext->ee_block),
 				  le16_to_cpu(path->p_ext->ee_len),
 				  ext_pblock(path->p_ext));
@@ -308,7 +308,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
 	ex = EXT_FIRST_EXTENT(eh);
 
 	for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
-		ext_debug("%d:%d:"E3FSBLK" ", le32_to_cpu(ex->ee_block),
+		ext_debug("%d:%d:%llu ", le32_to_cpu(ex->ee_block),
 			  le16_to_cpu(ex->ee_len), ext_pblock(ex));
 	}
 	ext_debug("\n");
@@ -426,7 +426,7 @@ ext4_ext_binsearch(struct inode *inode, struct ext4_ext_path *path, int block)
 	}
 
 	path->p_ext = l - 1;
-	ext_debug("  -> %d:"E3FSBLK":%d ",
+	ext_debug("  -> %d:%llu:%d ",
 		        le32_to_cpu(path->p_ext->ee_block),
 		        ext_pblock(path->p_ext),
 			le16_to_cpu(path->p_ext->ee_len));
@@ -687,7 +687,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 	path[depth].p_ext++;
 	while (path[depth].p_ext <=
 			EXT_MAX_EXTENT(path[depth].p_hdr)) {
-		ext_debug("move %d:"E3FSBLK":%d in new leaf "E3FSBLK"\n",
+		ext_debug("move %d:%llu:%d in new leaf %llu\n",
 			        le32_to_cpu(path[depth].p_ext->ee_block),
 			        ext_pblock(path[depth].p_ext),
 			        le16_to_cpu(path[depth].p_ext->ee_len),
@@ -752,7 +752,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		fidx->ei_block = border;
 		ext4_idx_store_pblock(fidx, oldblock);
 
-		ext_debug("int.index at %d (block "E3FSBLK"): %lu -> "E3FSBLK"\n", i,
+		ext_debug("int.index at %d (block %llu): %lu -> %llu\n", i,
 				newblock, (unsigned long) le32_to_cpu(border),
 				oldblock);
 		/* copy indexes */
@@ -764,7 +764,7 @@ static int ext4_ext_split(handle_t *handle, struct inode *inode,
 		BUG_ON(EXT_MAX_INDEX(path[i].p_hdr) !=
 				EXT_LAST_INDEX(path[i].p_hdr));
 		while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
-			ext_debug("%d: move %d:%d in new index "E3FSBLK"\n", i,
+			ext_debug("%d: move %d:%d in new index %llu\n", i,
 				        le32_to_cpu(path[i].p_idx->ei_block),
 				        idx_pblock(path[i].p_idx),
 				        newblock);
@@ -898,7 +898,7 @@ static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
 
 	neh = ext_inode_hdr(inode);
 	fidx = EXT_FIRST_INDEX(neh);
-	ext_debug("new root: num %d(%d), lblock %d, ptr "E3FSBLK"\n",
+	ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
 		  le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
 		  le32_to_cpu(fidx->ei_block), idx_pblock(fidx));
 
@@ -1145,7 +1145,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
 
 	/* try to insert block into found extent and return */
 	if (ex && ext4_can_extents_be_merged(inode, ex, newext)) {
-		ext_debug("append %d block to %d:%d (from "E3FSBLK")\n",
+		ext_debug("append %d block to %d:%d (from %llu)\n",
 				le16_to_cpu(newext->ee_len),
 				le32_to_cpu(ex->ee_block),
 				le16_to_cpu(ex->ee_len), ext_pblock(ex));
@@ -1204,7 +1204,7 @@ has_space:
 
 	if (!nearex) {
 		/* there is no extent in this leaf, create first one */
-		ext_debug("first extent in the leaf: %d:"E3FSBLK":%d\n",
+		ext_debug("first extent in the leaf: %d:%llu:%d\n",
 			        le32_to_cpu(newext->ee_block),
 			        ext_pblock(newext),
 			        le16_to_cpu(newext->ee_len));
@@ -1216,7 +1216,7 @@ has_space:
 			len = EXT_MAX_EXTENT(eh) - nearex;
 			len = (len - 1) * sizeof(struct ext4_extent);
 			len = len < 0 ? 0 : len;
-			ext_debug("insert %d:"E3FSBLK":%d after: nearest 0x%p, "
+			ext_debug("insert %d:%llu:%d after: nearest 0x%p, "
 					"move %d from 0x%p to 0x%p\n",
 				        le32_to_cpu(newext->ee_block),
 				        ext_pblock(newext),
@@ -1229,7 +1229,7 @@ has_space:
 		BUG_ON(newext->ee_block == nearex->ee_block);
 		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext4_extent);
 		len = len < 0 ? 0 : len;
-		ext_debug("insert %d:"E3FSBLK":%d before: nearest 0x%p, "
+		ext_debug("insert %d:%llu:%d before: nearest 0x%p, "
 				"move %d from 0x%p to 0x%p\n",
 				le32_to_cpu(newext->ee_block),
 				ext_pblock(newext),
@@ -1464,7 +1464,7 @@ ext4_ext_in_cache(struct inode *inode, unsigned long block,
 	        ex->ee_block = cpu_to_le32(cex->ec_block);
 		ext4_ext_store_pblock(ex, cex->ec_start);
 	        ex->ee_len = cpu_to_le16(cex->ec_len);
-		ext_debug("%lu cached by %lu:%lu:"E3FSBLK"\n",
+		ext_debug("%lu cached by %lu:%lu:%llu\n",
 				(unsigned long) block,
 				(unsigned long) cex->ec_block,
 				(unsigned long) cex->ec_len,
@@ -1498,7 +1498,7 @@ int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
 	path->p_hdr->eh_entries = cpu_to_le16(le16_to_cpu(path->p_hdr->eh_entries)-1);
 	if ((err = ext4_ext_dirty(handle, inode, path)))
 		return err;
-	ext_debug("index is empty, remove it, free block "E3FSBLK"\n", leaf);
+	ext_debug("index is empty, remove it, free block %llu\n", leaf);
 	bh = sb_find_get_block(inode->i_sb, leaf);
 	ext4_forget(handle, 1, inode, bh, leaf);
 	ext4_free_blocks(handle, inode, leaf, 1);
@@ -1585,7 +1585,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
 		ext4_fsblk_t start;
 		num = le32_to_cpu(ex->ee_block) + le16_to_cpu(ex->ee_len) - from;
 		start = ext_pblock(ex) + le16_to_cpu(ex->ee_len) - num;
-		ext_debug("free last %lu blocks starting "E3FSBLK"\n", num, start);
+		ext_debug("free last %lu blocks starting %llu\n", num, start);
 		for (i = 0; i < num; i++) {
 			bh = sb_find_get_block(inode->i_sb, start + i);
 			ext4_forget(handle, 0, inode, bh, start + i);
@@ -1699,7 +1699,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
 		if (err)
 			goto out;
 
-		ext_debug("new extent: %u:%u:"E3FSBLK"\n", block, num,
+		ext_debug("new extent: %u:%u:%llu\n", block, num,
 				ext_pblock(ex));
 		ex--;
 		ex_ee_block = le32_to_cpu(ex->ee_block);
@@ -1816,7 +1816,7 @@ int ext4_ext_remove_space(struct inode *inode, unsigned long start)
 				path[i].p_idx);
 		if (ext4_ext_more_to_rm(path + i)) {
 			/* go to the next level */
-			ext_debug("move to level %d (block "E3FSBLK")\n",
+			ext_debug("move to level %d (block %llu)\n",
 				  i + 1, idx_pblock(path[i].p_idx));
 			memset(path + i + 1, 0, sizeof(*path));
 			path[i+1].p_bh =
@@ -1993,7 +1993,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 			newblock = iblock - ee_block + ee_start;
 			/* number of remaining blocks in the extent */
 			allocated = ee_len - (iblock - ee_block);
-			ext_debug("%d fit into %lu:%d -> "E3FSBLK"\n", (int) iblock,
+			ext_debug("%d fit into %lu:%d -> %llu\n", (int) iblock,
 					ee_block, ee_len, newblock);
 			ext4_ext_put_in_cache(inode, ee_block, ee_len,
 						ee_start, EXT4_EXT_CACHE_EXTENT);
@@ -2024,7 +2024,7 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
 	newblock = ext4_new_blocks(handle, inode, goal, &allocated, &err);
 	if (!newblock)
 		goto out2;
-	ext_debug("allocate new block: goal "E3FSBLK", found "E3FSBLK"/%lu\n",
+	ext_debug("allocate new block: goal %llu, found %llu/%lu\n",
 			goal, newblock, allocated);
 
 	/* try to insert new extent into found leaf and return */
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index effc38afebe3..99b82b52b5f0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2115,7 +2115,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
 			 */
 			if (!bh) {
 				ext4_error(inode->i_sb, "ext4_free_branches",
-					   "Read failure, inode=%lu, block="E3FSBLK,
+					   "Read failure, inode=%lu, block=%llu",
 					   inode->i_ino, nr);
 				continue;
 			}
@@ -2466,7 +2466,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 	if (!bh) {
 		ext4_error (inode->i_sb, "ext4_get_inode_loc",
 				"unable to read inode block - "
-				"inode=%lu, block="E3FSBLK,
+				"inode=%lu, block=%llu",
 				 inode->i_ino, block);
 		return -EIO;
 	}
@@ -2548,7 +2548,7 @@ make_io:
 		if (!buffer_uptodate(bh)) {
 			ext4_error(inode->i_sb, "ext4_get_inode_loc",
 					"unable to read inode block - "
-					"inode=%lu, block="E3FSBLK,
+					"inode=%lu, block=%llu",
 					inode->i_ino, block);
 			brelse(bh);
 			return -EIO;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3dbf91b82202..3e960677c2f2 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -64,7 +64,7 @@ static int verify_group_input(struct super_block *sb,
 			     input->blocks_count);
 	else if (!(bh = sb_bread(sb, end - 1)))
 		ext4_warning(sb, __FUNCTION__,
-			     "Cannot read last block ("E3FSBLK")",
+			     "Cannot read last block (%llu)",
 			     end - 1);
 	else if (outside(input->block_bitmap, start, end))
 		ext4_warning(sb, __FUNCTION__,
@@ -94,18 +94,18 @@ static int verify_group_input(struct super_block *sb,
 	else if (inside(input->block_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Block bitmap (%llu) in GDT table"
-			     " ("E3FSBLK"-"E3FSBLK")",
+			     " (%llu-%llu)",
 			     input->block_bitmap, start, metaend - 1);
 	else if (inside(input->inode_bitmap, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
 			     "Inode bitmap (%llu) in GDT table"
-			     " ("E3FSBLK"-"E3FSBLK")",
+			     " (%llu-%llu)",
 			     input->inode_bitmap, start, metaend - 1);
 	else if (inside(input->inode_table, start, metaend) ||
 	         inside(itend - 1, start, metaend))
 		ext4_warning(sb, __FUNCTION__,
-			     "Inode table ("E3FSBLK"-"E3FSBLK") overlaps"
-			     "GDT table ("E3FSBLK"-"E3FSBLK")",
+			     "Inode table (%llu-%llu) overlaps"
+			     "GDT table (%llu-%llu)",
 			     input->inode_table, itend - 1, start, metaend - 1);
 	else
 		err = 0;
@@ -344,8 +344,8 @@ static int verify_reserved_gdb(struct super_block *sb,
 		if (le32_to_cpu(*p++) !=
 		    grp * EXT4_BLOCKS_PER_GROUP(sb) + blk){
 			ext4_warning(sb, __FUNCTION__,
-				     "reserved GDT "E3FSBLK
-				     " missing grp %d ("E3FSBLK")",
+				     "reserved GDT %llu"
+				     " missing grp %d (%llu)",
 				     blk, grp,
 				     grp *
 				     (ext4_fsblk_t)EXT4_BLOCKS_PER_GROUP(sb) +
@@ -424,7 +424,7 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
 	data = (__le32 *)dind->b_data;
 	if (le32_to_cpu(data[gdb_num % EXT4_ADDR_PER_BLOCK(sb)]) != gdblock) {
 		ext4_warning(sb, __FUNCTION__,
-			     "new group %u GDT block "E3FSBLK" not reserved",
+			     "new group %u GDT block %llu not reserved",
 			     input->group, gdblock);
 		err = -EINVAL;
 		goto exit_dind;
@@ -547,7 +547,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
 	for (res = 0; res < reserved_gdb; res++, blk++) {
 		if (le32_to_cpu(*data) != blk) {
 			ext4_warning(sb, __FUNCTION__,
-				     "reserved block "E3FSBLK
+				     "reserved block %llu"
 				     " not at offset %ld",
 				     blk,
 				     (long)(data - (__le32 *)dind->b_data));
@@ -941,7 +941,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	o_groups_count = EXT4_SB(sb)->s_groups_count;
 
 	if (test_opt(sb, DEBUG))
-		printk(KERN_DEBUG "EXT4-fs: extending last group from "E3FSBLK" uto "E3FSBLK" blocks\n",
+		printk(KERN_DEBUG "EXT4-fs: extending last group from %llu uto %llu blocks\n",
 		       o_blocks_count, n_blocks_count);
 
 	if (n_blocks_count == 0 || n_blocks_count == o_blocks_count)
@@ -949,7 +949,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (n_blocks_count > (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
 		printk(KERN_ERR "EXT4-fs: filesystem on %s:"
-			" too large to resize to "E3FSBLK" blocks safely\n",
+			" too large to resize to %llu blocks safely\n",
 			sb->s_id, n_blocks_count);
 		if (sizeof(sector_t) < 8)
 			ext4_warning(sb, __FUNCTION__,
@@ -984,7 +984,7 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 
 	if (o_blocks_count + add < n_blocks_count)
 		ext4_warning(sb, __FUNCTION__,
-			     "will only finish group ("E3FSBLK
+			     "will only finish group (%llu"
 			     " blocks, %u new)",
 			     o_blocks_count + add, add);
 
@@ -1028,10 +1028,10 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
 	ext4_journal_dirty_metadata(handle, EXT4_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
 	unlock_super(sb);
-	ext4_debug("freeing blocks %lu through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freeing blocks %lu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	ext4_free_blocks_sb(handle, sb, o_blocks_count, add, &freed_blocks);
-	ext4_debug("freed blocks "E3FSBLK" through "E3FSBLK"\n", o_blocks_count,
+	ext4_debug("freed blocks %llu through %llu\n", o_blocks_count,
 		   o_blocks_count + add);
 	if ((err = ext4_journal_stop(handle)))
 		goto exit_put;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b91dffd7a031..d844175e60e8 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1244,7 +1244,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Block bitmap for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, block_bitmap);
 			return 0;
 		}
@@ -1253,7 +1253,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode bitmap for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, inode_bitmap);
 			return 0;
 		}
@@ -1263,7 +1263,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
 				    "Inode table for group %d"
-				    " not in group (block "E3FSBLK")!",
+				    " not in group (block %llu)!",
 				    i, inode_table);
 			return 0;
 		}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 90f7d5c0bae4..63233cd946a7 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -233,7 +233,7 @@ ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
 bad_block:	ext4_error(inode->i_sb, __FUNCTION__,
-			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -375,7 +375,7 @@ ext4_xattr_block_list(struct inode *inode, char *buffer, size_t buffer_size)
 		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
 	if (ext4_xattr_check_block(bh)) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+			   "inode %lu: bad block %llu", inode->i_ino,
 			   EXT4_I(inode)->i_file_acl);
 		error = -EIO;
 		goto cleanup;
@@ -647,7 +647,7 @@ ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
 			le32_to_cpu(BHDR(bs->bh)->h_refcount));
 		if (ext4_xattr_check_block(bs->bh)) {
 			ext4_error(sb, __FUNCTION__,
-				"inode %lu: bad block "E3FSBLK, inode->i_ino,
+				"inode %lu: bad block %llu", inode->i_ino,
 				EXT4_I(inode)->i_file_acl);
 			error = -EIO;
 			goto cleanup;
@@ -848,7 +848,7 @@ cleanup_dquot:
 
 bad_block:
 	ext4_error(inode->i_sb, __FUNCTION__,
-		   "inode %lu: bad block "E3FSBLK, inode->i_ino,
+		   "inode %lu: bad block %llu", inode->i_ino,
 		   EXT4_I(inode)->i_file_acl);
 	goto cleanup;
 
@@ -1077,14 +1077,14 @@ ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
 	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
 	if (!bh) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			"inode %lu: block "E3FSBLK" read error", inode->i_ino,
+			"inode %lu: block %llu read error", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
 	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
 	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
 		ext4_error(inode->i_sb, __FUNCTION__,
-			"inode %lu: bad block "E3FSBLK, inode->i_ino,
+			"inode %lu: bad block %llu", inode->i_ino,
 			EXT4_I(inode)->i_file_acl);
 		goto cleanup;
 	}
diff --git a/include/linux/ext4_fs_i.h b/include/linux/ext4_fs_i.h
index 2bed0effdcae..bb42379cb7fd 100644
--- a/include/linux/ext4_fs_i.h
+++ b/include/linux/ext4_fs_i.h
@@ -25,13 +25,7 @@
 typedef int ext4_grpblk_t;
 
 /* data type for filesystem-wide blocks number */
-typedef sector_t ext4_fsblk_t;
-
-#if BITS_PER_LONG == 64
-#define E3FSBLK "%lu"
-#else
-#define E3FSBLK "%llu"
-#endif
+typedef unsigned long long ext4_fsblk_t;
 
 struct ext4_reserve_window {
 	ext4_fsblk_t	_rsv_start;	/* First byte reserved */
-- 
cgit v1.2.3


From 18eba7aae080d4a5c0d850ea810e83d11f0a8d77 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Wed, 11 Oct 2006 01:21:13 -0700
Subject: [PATCH] jbd2: switch blks_type from sector_t to ull

Similar to ext4, change blocks in JBD2 from sector_t to unsigned long long.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jbd2/commit.c     |  4 ++--
 fs/jbd2/journal.c    | 18 +++++++++---------
 fs/jbd2/recovery.c   | 12 ++++++------
 fs/jbd2/revoke.c     | 14 +++++++-------
 include/linux/jbd2.h | 16 ++++++++--------
 5 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 1a9ce8885220..70b2ae1ef281 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -272,7 +272,7 @@ write_out_data:
 }
 
 static inline void write_tag_block(int tag_bytes, journal_block_tag_t *tag,
-				   sector_t block)
+				   unsigned long long block)
 {
 	tag->t_blocknr = cpu_to_be32(block & (u32)~0);
 	if (tag_bytes > JBD_TAG_SIZE32)
@@ -293,7 +293,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
 	int bufs;
 	int flags;
 	int err;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	char *tagp = NULL;
 	journal_header_t *header;
 	journal_block_tag_t *tag = NULL;
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 259e8365ea15..10db92ced014 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_t *journal)
 int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
 				  struct journal_head  *jh_in,
 				  struct journal_head **jh_out,
-				  sector_t blocknr)
+				  unsigned long long blocknr)
 {
 	int need_copy_out = 0;
 	int done_copy_out = 0;
@@ -555,7 +555,7 @@ int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
  * Log buffer allocation routines:
  */
 
-int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
+int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
 {
 	unsigned long blocknr;
 
@@ -579,10 +579,10 @@ int jbd2_journal_next_log_block(journal_t *journal, sector_t *retp)
  * ready.
  */
 int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
-		 sector_t *retp)
+		 unsigned long long *retp)
 {
 	int err = 0;
-	sector_t ret;
+	unsigned long long ret;
 
 	if (journal->j_inode) {
 		ret = bmap(journal->j_inode, blocknr);
@@ -618,7 +618,7 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
 struct journal_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
 {
 	struct buffer_head *bh;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	int err;
 
 	err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -706,7 +706,7 @@ fail:
  */
 journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 			struct block_device *fs_dev,
-			sector_t start, int len, int blocksize)
+			unsigned long long start, int len, int blocksize)
 {
 	journal_t *journal = journal_init_common();
 	struct buffer_head *bh;
@@ -753,7 +753,7 @@ journal_t * jbd2_journal_init_inode (struct inode *inode)
 	journal_t *journal = journal_init_common();
 	int err;
 	int n;
-	sector_t blocknr;
+	unsigned long long blocknr;
 
 	if (!journal)
 		return NULL;
@@ -819,7 +819,7 @@ static void journal_fail_superblock (journal_t *journal)
 static int journal_reset(journal_t *journal)
 {
 	journal_superblock_t *sb = journal->j_superblock;
-	sector_t first, last;
+	unsigned long long first, last;
 
 	first = be32_to_cpu(sb->s_first);
 	last = be32_to_cpu(sb->s_maxlen);
@@ -853,7 +853,7 @@ static int journal_reset(journal_t *journal)
  **/
 int jbd2_journal_create(journal_t *journal)
 {
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 	journal_superblock_t *sb;
 	int i, err;
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 52054a83e717..9f10acafaf70 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -70,7 +70,7 @@ static int do_readahead(journal_t *journal, unsigned int start)
 {
 	int err;
 	unsigned int max, nbufs, next;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	struct buffer_head * bufs[MAXBUF];
@@ -132,7 +132,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
 		 unsigned int offset)
 {
 	int err;
-	sector_t blocknr;
+	unsigned long long blocknr;
 	struct buffer_head *bh;
 
 	*bhp = NULL;
@@ -308,9 +308,9 @@ int jbd2_journal_skip_recovery(journal_t *journal)
 	return err;
 }
 
-static inline sector_t read_tag_block(int tag_bytes, journal_block_tag_t *tag)
+static inline unsigned long long read_tag_block(int tag_bytes, journal_block_tag_t *tag)
 {
-	sector_t block = be32_to_cpu(tag->t_blocknr);
+	unsigned long long block = be32_to_cpu(tag->t_blocknr);
 	if (tag_bytes > JBD_TAG_SIZE32)
 		block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
 	return block;
@@ -452,7 +452,7 @@ static int do_one_pass(journal_t *journal,
 						"block %ld in log\n",
 						err, io_block);
 				} else {
-					sector_t blocknr;
+					unsigned long long blocknr;
 
 					J_ASSERT(obh != NULL);
 					blocknr = read_tag_block(tag_bytes,
@@ -592,7 +592,7 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
 		record_len = 8;
 
 	while (offset + record_len <= max) {
-		sector_t blocknr;
+		unsigned long long blocknr;
 		int err;
 
 		if (record_len == 4)
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 3310a1d7ace9..380d19917f37 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -81,7 +81,7 @@ struct jbd2_revoke_record_s
 {
 	struct list_head  hash;
 	tid_t		  sequence;	/* Used for recovery only */
-	sector_t	  blocknr;
+	unsigned long long	  blocknr;
 };
 
 
@@ -106,7 +106,7 @@ static void flush_descriptor(journal_t *, struct journal_head *, int);
 /* Utility functions to maintain the revoke table */
 
 /* Borrowed from buffer.c: this is a tried and tested block hash function */
-static inline int hash(journal_t *journal, sector_t block)
+static inline int hash(journal_t *journal, unsigned long long block)
 {
 	struct jbd2_revoke_table_s *table = journal->j_revoke;
 	int hash_shift = table->hash_shift;
@@ -117,7 +117,7 @@ static inline int hash(journal_t *journal, sector_t block)
 		(hash << (hash_shift - 12))) & (table->hash_size - 1);
 }
 
-static int insert_revoke_hash(journal_t *journal, sector_t blocknr,
+static int insert_revoke_hash(journal_t *journal, unsigned long long blocknr,
 			      tid_t seq)
 {
 	struct list_head *hash_list;
@@ -147,7 +147,7 @@ oom:
 /* Find a revoke record in the journal's hash table. */
 
 static struct jbd2_revoke_record_s *find_revoke_record(journal_t *journal,
-						      sector_t blocknr)
+						      unsigned long long blocknr)
 {
 	struct list_head *hash_list;
 	struct jbd2_revoke_record_s *record;
@@ -326,7 +326,7 @@ void jbd2_journal_destroy_revoke(journal_t *journal)
  * by one.
  */
 
-int jbd2_journal_revoke(handle_t *handle, sector_t blocknr,
+int jbd2_journal_revoke(handle_t *handle, unsigned long long blocknr,
 		   struct buffer_head *bh_in)
 {
 	struct buffer_head *bh = NULL;
@@ -650,7 +650,7 @@ static void flush_descriptor(journal_t *journal,
  */
 
 int jbd2_journal_set_revoke(journal_t *journal,
-		       sector_t blocknr,
+		       unsigned long long blocknr,
 		       tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
@@ -674,7 +674,7 @@ int jbd2_journal_set_revoke(journal_t *journal,
  */
 
 int jbd2_journal_test_revoke(journal_t *journal,
-			sector_t blocknr,
+			unsigned long long blocknr,
 			tid_t sequence)
 {
 	struct jbd2_revoke_record_s *record;
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 3c939c844a7d..ddb128795781 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -741,7 +741,7 @@ struct journal_s
 	 */
 	struct block_device	*j_dev;
 	int			j_blocksize;
-	sector_t		j_blk_offset;
+	unsigned long long		j_blk_offset;
 
 	/*
 	 * Device which holds the client fs.  For internal journal this will be
@@ -860,7 +860,7 @@ extern void __journal_clean_data_list(transaction_t *transaction);
 
 /* Log buffer allocation */
 extern struct journal_head * jbd2_journal_get_descriptor_buffer(journal_t *);
-int jbd2_journal_next_log_block(journal_t *, sector_t *);
+int jbd2_journal_next_log_block(journal_t *, unsigned long long *);
 
 /* Commit management */
 extern void jbd2_journal_commit_transaction(journal_t *);
@@ -875,7 +875,7 @@ extern int
 jbd2_journal_write_metadata_buffer(transaction_t	  *transaction,
 			      struct journal_head  *jh_in,
 			      struct journal_head **jh_out,
-			      sector_t	   blocknr);
+			      unsigned long long   blocknr);
 
 /* Transaction locking */
 extern void		__wait_on_journal (journal_t *);
@@ -923,7 +923,7 @@ extern void	 jbd2_journal_unlock_updates (journal_t *);
 
 extern journal_t * jbd2_journal_init_dev(struct block_device *bdev,
 				struct block_device *fs_dev,
-				sector_t start, int len, int bsize);
+				unsigned long long start, int len, int bsize);
 extern journal_t * jbd2_journal_init_inode (struct inode *);
 extern int	   jbd2_journal_update_format (journal_t *);
 extern int	   jbd2_journal_check_used_features
@@ -944,7 +944,7 @@ extern void	   jbd2_journal_abort      (journal_t *, int);
 extern int	   jbd2_journal_errno      (journal_t *);
 extern void	   jbd2_journal_ack_err    (journal_t *);
 extern int	   jbd2_journal_clear_err  (journal_t *);
-extern int	   jbd2_journal_bmap(journal_t *, unsigned long, sector_t *);
+extern int	   jbd2_journal_bmap(journal_t *, unsigned long, unsigned long long *);
 extern int	   jbd2_journal_force_commit(journal_t *);
 
 /*
@@ -977,13 +977,13 @@ extern void	   jbd2_journal_destroy_revoke_caches(void);
 extern int	   jbd2_journal_init_revoke_caches(void);
 
 extern void	   jbd2_journal_destroy_revoke(journal_t *);
-extern int	   jbd2_journal_revoke (handle_t *, sector_t, struct buffer_head *);
+extern int	   jbd2_journal_revoke (handle_t *, unsigned long long, struct buffer_head *);
 extern int	   jbd2_journal_cancel_revoke(handle_t *, struct journal_head *);
 extern void	   jbd2_journal_write_revoke_records(journal_t *, transaction_t *);
 
 /* Recovery revoke support */
-extern int	jbd2_journal_set_revoke(journal_t *, sector_t, tid_t);
-extern int	jbd2_journal_test_revoke(journal_t *, sector_t, tid_t);
+extern int	jbd2_journal_set_revoke(journal_t *, unsigned long long, tid_t);
+extern int	jbd2_journal_test_revoke(journal_t *, unsigned long long, tid_t);
 extern void	jbd2_journal_clear_revoke(journal_t *);
 extern void	jbd2_journal_switch_revoke_table(journal_t *journal);
 
-- 
cgit v1.2.3


From 0d1ee42f27d30eed1659f3e85bcbbc7b3711f61f Mon Sep 17 00:00:00 2001
From: Alexandre Ratchov <alexandre.ratchov@bull.net>
Date: Wed, 11 Oct 2006 01:21:14 -0700
Subject: [PATCH] ext4: allow larger descriptor size

make block group descriptor larger.

Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c           |  6 ++++--
 fs/ext4/inode.c            |  8 +++++---
 fs/ext4/super.c            | 18 +++++++++++++++---
 include/linux/ext4_fs.h    |  9 ++++++---
 include/linux/ext4_fs_sb.h |  1 +
 5 files changed, 31 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index df77ea891f29..3dacb124b8c8 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -74,10 +74,12 @@ struct ext4_group_desc * ext4_get_group_desc(struct super_block * sb,
 		return NULL;
 	}
 
-	desc = (struct ext4_group_desc *) sbi->s_group_desc[group_desc]->b_data;
+	desc = (struct ext4_group_desc *)(
+		(__u8 *)sbi->s_group_desc[group_desc]->b_data +
+		offset * EXT4_DESC_SIZE(sb));
 	if (bh)
 		*bh = sbi->s_group_desc[group_desc];
-	return desc + offset;
+	return desc;
 }
 
 /**
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index c05dc57148bb..d03e7d85a638 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2432,14 +2432,16 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 		return 0;
 	}
 
-	gdp = (struct ext4_group_desc *)bh->b_data;
+	gdp = (struct ext4_group_desc *)((__u8 *)bh->b_data +
+		desc * EXT4_DESC_SIZE(sb));
 	/*
 	 * Figure out the offset within the block group inode table
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = ext4_inode_table(gdp + desc) +
-			(offset >> EXT4_BLOCK_SIZE_BITS(sb));
+	block = ext4_inode_table(gdp) + (offset >> EXT4_BLOCK_SIZE_BITS(sb));
+
+
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index d844175e60e8..bc8848bff2f1 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1268,7 +1268,8 @@ static int ext4_check_descriptors (struct super_block * sb)
 			return 0;
 		}
 		first_block += EXT4_BLOCKS_PER_GROUP(sb);
-		gdp++;
+		gdp = (struct ext4_group_desc *)
+			((__u8 *)gdp + EXT4_DESC_SIZE(sb));
 	}
 
 	ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
@@ -1619,7 +1620,18 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		       sbi->s_frag_size, blocksize);
 		goto failed_mount;
 	}
-	sbi->s_frags_per_block = 1;
+	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
+	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
+		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE ||
+		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
+		    sbi->s_desc_size & (sbi->s_desc_size - 1)) {
+			printk(KERN_ERR
+			       "EXT4-fs: unsupported descriptor size %ld\n",
+			       sbi->s_desc_size);
+			goto failed_mount;
+		}
+	} else
+		sbi->s_desc_size = EXT4_MIN_DESC_SIZE;
 	sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
 	sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
 	sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
@@ -1630,7 +1642,7 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 		goto cantfind_ext4;
 	sbi->s_itb_per_group = sbi->s_inodes_per_group /
 					sbi->s_inodes_per_block;
-	sbi->s_desc_per_block = blocksize / sizeof(struct ext4_group_desc);
+	sbi->s_desc_per_block = blocksize / EXT4_DESC_SIZE(sb);
 	sbi->s_sbh = bh;
 	sbi->s_mount_state = le16_to_cpu(es->s_state);
 	sbi->s_addr_per_block_bits = log2(EXT4_ADDR_PER_BLOCK(sb));
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 8e5009ee4ad8..a3df2afc004b 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -142,6 +142,9 @@ struct ext4_group_desc
 /*
  * Macro-instructions used to manage group descriptors
  */
+#define EXT4_MIN_DESC_SIZE		32
+#define	EXT4_MAX_DESC_SIZE		EXT4_MIN_BLOCK_SIZE
+#define EXT4_DESC_SIZE(s)		(EXT4_SB(s)->s_desc_size)
 #ifdef __KERNEL__
 # define EXT4_BLOCKS_PER_GROUP(s)	(EXT4_SB(s)->s_blocks_per_group)
 # define EXT4_DESC_PER_BLOCK(s)		(EXT4_SB(s)->s_desc_per_block)
@@ -149,7 +152,7 @@ struct ext4_group_desc
 # define EXT4_DESC_PER_BLOCK_BITS(s)	(EXT4_SB(s)->s_desc_per_block_bits)
 #else
 # define EXT4_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
-# define EXT4_DESC_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / sizeof (struct ext4_group_desc))
+# define EXT4_DESC_PER_BLOCK(s)		(EXT4_BLOCK_SIZE(s) / EXT4_DESC_SIZE(s))
 # define EXT4_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
 #endif
 
@@ -474,7 +477,7 @@ struct ext4_super_block {
 	 * things it doesn't understand...
 	 */
 	__le32	s_first_ino;		/* First non-reserved inode */
-	__le16   s_inode_size;		/* size of inode structure */
+	__le16  s_inode_size;		/* size of inode structure */
 	__le16	s_block_group_nr;	/* block group # of this superblock */
 	__le32	s_feature_compat;	/* compatible feature set */
 /*60*/	__le32	s_feature_incompat;	/* incompatible feature set */
@@ -500,7 +503,7 @@ struct ext4_super_block {
 	__le32	s_hash_seed[4];		/* HTREE hash seed */
 	__u8	s_def_hash_version;	/* Default hash version to use */
 	__u8	s_reserved_char_pad;
-	__u16	s_reserved_word_pad;
+	__le16  s_desc_size;		/* size of group descriptor */
 /*100*/	__le32	s_default_mount_opts;
 	__le32	s_first_meta_bg;	/* First metablock block group */
 	__le32	s_mkfs_time;		/* When the filesystem was created */
diff --git a/include/linux/ext4_fs_sb.h b/include/linux/ext4_fs_sb.h
index ce7a844d6703..691a713139ce 100644
--- a/include/linux/ext4_fs_sb.h
+++ b/include/linux/ext4_fs_sb.h
@@ -29,6 +29,7 @@
  */
 struct ext4_sb_info {
 	unsigned long s_frag_size;	/* Size of a fragment in bytes */
+	unsigned long s_desc_size;	/* Size of a group descriptor in bytes */
 	unsigned long s_frags_per_block;/* Number of fragments per block */
 	unsigned long s_inodes_per_block;/* Number of inodes per block */
 	unsigned long s_frags_per_group;/* Number of fragments in a group */
-- 
cgit v1.2.3


From 8fadc14323684c547f74cf2f4d13517c6c264731 Mon Sep 17 00:00:00 2001
From: Alexandre Ratchov <alexandre.ratchov@bull.net>
Date: Wed, 11 Oct 2006 01:21:15 -0700
Subject: [PATCH] ext4: move block number hi bits

move '_hi' bits of block numbers in the larger part of the
block group descriptor structure

Signed-off-by: Alexandre Ratchov <alexandre.ratchov@bull.net>
Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c        | 20 ++++++++++----------
 fs/ext4/ialloc.c        |  4 ++--
 fs/ext4/inode.c         |  7 +++----
 fs/ext4/resize.c        |  6 +++---
 fs/ext4/super.c         | 46 +++++++++++++++++++++++++++++-----------------
 include/linux/ext4_fs.h | 27 +++++++++++++++++----------
 6 files changed, 64 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3dacb124b8c8..3e85886a6382 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -101,13 +101,13 @@ read_block_bitmap(struct super_block *sb, unsigned int block_group)
 	desc = ext4_get_group_desc (sb, block_group, NULL);
 	if (!desc)
 		goto error_out;
-	bh = sb_bread(sb, ext4_block_bitmap(desc));
+	bh = sb_bread(sb, ext4_block_bitmap(sb, desc));
 	if (!bh)
 		ext4_error (sb, "read_block_bitmap",
 			    "Cannot read block bitmap - "
 			    "block_group = %d, block_bitmap = %llu",
 			    block_group,
-			    ext4_block_bitmap(desc));
+			    ext4_block_bitmap(sb, desc));
 error_out:
 	return bh;
 }
@@ -463,10 +463,10 @@ do_more:
 	if (!desc)
 		goto error_return;
 
-	if (in_range(ext4_block_bitmap(desc), block, count) ||
-	    in_range(ext4_inode_bitmap(desc), block, count) ||
-	    in_range(block, ext4_inode_table(desc), sbi->s_itb_per_group) ||
-	    in_range(block + count - 1, ext4_inode_table(desc),
+	if (in_range(ext4_block_bitmap(sb, desc), block, count) ||
+	    in_range(ext4_inode_bitmap(sb, desc), block, count) ||
+	    in_range(block, ext4_inode_table(sb, desc), sbi->s_itb_per_group) ||
+	    in_range(block + count - 1, ext4_inode_table(sb, desc),
 		     sbi->s_itb_per_group))
 		ext4_error (sb, "ext4_free_blocks",
 			    "Freeing blocks in system zones - "
@@ -1563,11 +1563,11 @@ allocated:
 
 	ret_block = grp_alloc_blk + ext4_group_first_block_no(sb, group_no);
 
-	if (in_range(ext4_block_bitmap(gdp), ret_block, num) ||
-	    in_range(ext4_block_bitmap(gdp), ret_block, num) ||
-	    in_range(ret_block, ext4_inode_table(gdp),
+	if (in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
+	    in_range(ext4_block_bitmap(sb, gdp), ret_block, num) ||
+	    in_range(ret_block, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group) ||
-	    in_range(ret_block + num - 1, ext4_inode_table(gdp),
+	    in_range(ret_block + num - 1, ext4_inode_table(sb, gdp),
 		     EXT4_SB(sb)->s_itb_per_group))
 		ext4_error(sb, "ext4_new_block",
 			    "Allocating block in system zone - "
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 959b7fa8f5db..75608e1e5555 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -60,12 +60,12 @@ read_inode_bitmap(struct super_block * sb, unsigned long block_group)
 	if (!desc)
 		goto error_out;
 
-	bh = sb_bread(sb, ext4_inode_bitmap(desc));
+	bh = sb_bread(sb, ext4_inode_bitmap(sb, desc));
 	if (!bh)
 		ext4_error(sb, "read_inode_bitmap",
 			    "Cannot read inode bitmap - "
 			    "block_group = %lu, inode_bitmap = %llu",
-			    block_group, ext4_inode_bitmap(desc));
+			    block_group, ext4_inode_bitmap(sb, desc));
 error_out:
 	return bh;
 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d03e7d85a638..0a60ec5a16db 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2439,9 +2439,8 @@ static ext4_fsblk_t ext4_get_inode_block(struct super_block *sb,
 	 */
 	offset = ((ino - 1) % EXT4_INODES_PER_GROUP(sb)) *
 		EXT4_INODE_SIZE(sb);
-	block = ext4_inode_table(gdp) + (offset >> EXT4_BLOCK_SIZE_BITS(sb));
-
-
+	block = ext4_inode_table(sb, gdp) +
+		(offset >> EXT4_BLOCK_SIZE_BITS(sb));
 
 	iloc->block_group = block_group;
 	iloc->offset = offset & (EXT4_BLOCK_SIZE(sb) - 1);
@@ -2508,7 +2507,7 @@ static int __ext4_get_inode_loc(struct inode *inode,
 				goto make_io;
 
 			bitmap_bh = sb_getblk(inode->i_sb,
-				ext4_inode_bitmap(desc));
+				ext4_inode_bitmap(inode->i_sb, desc));
 			if (!bitmap_bh)
 				goto make_io;
 
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3e960677c2f2..1e9578052cd3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -834,9 +834,9 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 	/* Update group descriptor block for new group */
 	gdp = (struct ext4_group_desc *)primary->b_data + gdb_off;
 
-	ext4_block_bitmap_set(gdp, input->block_bitmap); /* LV FIXME */
-	ext4_inode_bitmap_set(gdp, input->inode_bitmap); /* LV FIXME */
-	ext4_inode_table_set(gdp, input->inode_table); /* LV FIXME */
+	ext4_block_bitmap_set(sb, gdp, input->block_bitmap); /* LV FIXME */
+	ext4_inode_bitmap_set(sb, gdp, input->inode_bitmap); /* LV FIXME */
+	ext4_inode_table_set(sb, gdp, input->inode_table); /* LV FIXME */
 	gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
 	gdp->bg_free_inodes_count = cpu_to_le16(EXT4_INODES_PER_GROUP(sb));
 
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index bc8848bff2f1..811011fc5c94 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -63,40 +63,52 @@ static void ext4_write_super (struct super_block * sb);
 static void ext4_write_super_lockfs(struct super_block *sb);
 
 
-ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+			       struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_block_bitmap) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_block_bitmap_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_block_bitmap_hi) << 32 : 0);
 }
 
-ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
+			       struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_inode_bitmap) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_bitmap_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_bitmap_hi) << 32 : 0);
 }
 
-ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg)
+ext4_fsblk_t ext4_inode_table(struct super_block *sb,
+			      struct ext4_group_desc *bg)
 {
 	return le32_to_cpu(bg->bg_inode_table) |
-		((ext4_fsblk_t)le16_to_cpu(bg->bg_inode_table_hi) << 32);
+		(EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT ?
+		 (ext4_fsblk_t)le32_to_cpu(bg->bg_inode_table_hi) << 32 : 0);
 }
 
-void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_block_bitmap_set(struct super_block *sb,
+			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_block_bitmap = cpu_to_le32((u32)blk);
-	bg->bg_block_bitmap_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_block_bitmap_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_inode_bitmap_set(struct super_block *sb,
+			   struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_inode_bitmap  = cpu_to_le32((u32)blk);
-	bg->bg_inode_bitmap_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_inode_bitmap_hi = cpu_to_le32(blk >> 32);
 }
 
-void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk)
+void ext4_inode_table_set(struct super_block *sb,
+			  struct ext4_group_desc *bg, ext4_fsblk_t blk)
 {
 	bg->bg_inode_table = cpu_to_le32((u32)blk);
-	bg->bg_inode_table_hi = cpu_to_le16(blk >> 32);
+	if (EXT4_DESC_SIZE(sb) >= EXT4_MIN_DESC_SIZE_64BIT)
+		bg->bg_inode_table_hi = cpu_to_le32(blk >> 32);
 }
 
 /*
@@ -1239,7 +1251,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 		if ((i % EXT4_DESC_PER_BLOCK(sb)) == 0)
 			gdp = (struct ext4_group_desc *)
 					sbi->s_group_desc[desc_block++]->b_data;
-		block_bitmap = ext4_block_bitmap(gdp);
+		block_bitmap = ext4_block_bitmap(sb, gdp);
 		if (block_bitmap < first_block || block_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
@@ -1248,7 +1260,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 				    i, block_bitmap);
 			return 0;
 		}
-		inode_bitmap = ext4_inode_bitmap(gdp);
+		inode_bitmap = ext4_inode_bitmap(sb, gdp);
 		if (inode_bitmap < first_block || inode_bitmap > last_block)
 		{
 			ext4_error (sb, "ext4_check_descriptors",
@@ -1257,7 +1269,7 @@ static int ext4_check_descriptors (struct super_block * sb)
 				    i, inode_bitmap);
 			return 0;
 		}
-		inode_table = ext4_inode_table(gdp);
+		inode_table = ext4_inode_table(sb, gdp);
 		if (inode_table < first_block ||
 		    inode_table + sbi->s_itb_per_group > last_block)
 		{
@@ -1622,11 +1634,11 @@ static int ext4_fill_super (struct super_block *sb, void *data, int silent)
 	}
 	sbi->s_desc_size = le16_to_cpu(es->s_desc_size);
 	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_64BIT)) {
-		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE ||
+		if (sbi->s_desc_size < EXT4_MIN_DESC_SIZE_64BIT ||
 		    sbi->s_desc_size > EXT4_MAX_DESC_SIZE ||
 		    sbi->s_desc_size & (sbi->s_desc_size - 1)) {
 			printk(KERN_ERR
-			       "EXT4-fs: unsupported descriptor size %ld\n",
+			       "EXT4-fs: unsupported descriptor size %lu\n",
 			       sbi->s_desc_size);
 			goto failed_mount;
 		}
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index a3df2afc004b..296609b9242d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -129,10 +129,10 @@ struct ext4_group_desc
 	__le16	bg_free_inodes_count;	/* Free inodes count */
 	__le16	bg_used_dirs_count;	/* Directories count */
 	__u16	bg_flags;
-	__le16	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
-	__le16	bg_inode_bitmap_hi;	/* Inodes bitmap block MSB */
-	__le16	bg_inode_table_hi;	/* Inodes table block MSB */
-	__u16	bg_reserved[3];
+	__u32	bg_reserved[3];
+	__le32	bg_block_bitmap_hi;	/* Blocks bitmap block MSB */
+	__le32	bg_inode_bitmap_hi;	/* Inodes bitmap block MSB */
+	__le32	bg_inode_table_hi;	/* Inodes table block MSB */
 };
 
 #ifdef __KERNEL__
@@ -143,6 +143,7 @@ struct ext4_group_desc
  * Macro-instructions used to manage group descriptors
  */
 #define EXT4_MIN_DESC_SIZE		32
+#define EXT4_MIN_DESC_SIZE_64BIT	64
 #define	EXT4_MAX_DESC_SIZE		EXT4_MIN_BLOCK_SIZE
 #define EXT4_DESC_SIZE(s)		(EXT4_SB(s)->s_desc_size)
 #ifdef __KERNEL__
@@ -904,12 +905,18 @@ extern void ext4_abort (struct super_block *, const char *, const char *, ...)
 extern void ext4_warning (struct super_block *, const char *, const char *, ...)
 	__attribute__ ((format (printf, 3, 4)));
 extern void ext4_update_dynamic_rev (struct super_block *sb);
-extern ext4_fsblk_t ext4_block_bitmap(struct ext4_group_desc *bg);
-extern ext4_fsblk_t ext4_inode_bitmap(struct ext4_group_desc *bg);
-extern ext4_fsblk_t ext4_inode_table(struct ext4_group_desc *bg);
-extern void ext4_block_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_inode_bitmap_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
-extern void ext4_inode_table_set(struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern ext4_fsblk_t ext4_block_bitmap(struct super_block *sb,
+				      struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_bitmap(struct super_block *sb,
+				      struct ext4_group_desc *bg);
+extern ext4_fsblk_t ext4_inode_table(struct super_block *sb,
+				     struct ext4_group_desc *bg);
+extern void ext4_block_bitmap_set(struct super_block *sb,
+				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_bitmap_set(struct super_block *sb,
+				  struct ext4_group_desc *bg, ext4_fsblk_t blk);
+extern void ext4_inode_table_set(struct super_block *sb,
+				 struct ext4_group_desc *bg, ext4_fsblk_t blk);
 
 static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
 {
-- 
cgit v1.2.3


From 72b64b594081ef0a0717f6aad77e891c72ed4afa Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 01:21:18 -0700
Subject: [PATCH] ext4 uninline ext4_get_group_no_and_offset()

Way too big to inline.

Cc: <linux-ext4@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext4/balloc.c        | 18 ++++++++++++++++++
 include/linux/ext4_fs.h | 22 ++--------------------
 2 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 3e85886a6382..402475a6f3df 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -24,6 +24,24 @@
  * balloc.c contains the blocks allocation and deallocation routines
  */
 
+/*
+ * Calculate the block group number and offset, given a block number
+ */
+void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+		unsigned long *blockgrpp, ext4_grpblk_t *offsetp)
+{
+        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
+	ext4_grpblk_t offset;
+
+        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
+        offset = sector_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
+	if (offsetp)
+		*offsetp = offset;
+	if (blockgrpp)
+	        *blockgrpp = blocknr;
+
+}
+
 /*
  * The free blocks are managed by bitmaps.  A file system contains several
  * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
diff --git a/include/linux/ext4_fs.h b/include/linux/ext4_fs.h
index 296609b9242d..498503ee613d 100644
--- a/include/linux/ext4_fs.h
+++ b/include/linux/ext4_fs.h
@@ -769,26 +769,8 @@ ext4_group_first_block_no(struct super_block *sb, unsigned long group_no)
  */
 #define ERR_BAD_DX_DIR	-75000
 
-/*
- * This function calculate the block group number and offset,
- * given a block number
- */
-
-static inline void ext4_get_group_no_and_offset(struct super_block * sb,
-                                ext4_fsblk_t blocknr, unsigned long* blockgrpp,
-                                ext4_grpblk_t *offsetp)
-{
-        struct ext4_super_block *es = EXT4_SB(sb)->s_es;
-	ext4_grpblk_t offset;
-
-        blocknr = blocknr - le32_to_cpu(es->s_first_data_block);
-        offset = sector_div(blocknr, EXT4_BLOCKS_PER_GROUP(sb));
-	if (offsetp)
-		*offsetp = offset;
-	if (blockgrpp)
-	        *blockgrpp = blocknr;
-
-}
+void ext4_get_group_no_and_offset(struct super_block *sb, ext4_fsblk_t blocknr,
+			unsigned long *blockgrpp, ext4_grpblk_t *offsetp);
 
 /*
  * Function prototypes
-- 
cgit v1.2.3


From 9858db504caedb2424b9a32744c23f9a81ec1731 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 11 Oct 2006 01:21:30 -0700
Subject: [PATCH] mm: locks_freed fix

Move the lock debug checks below the page reserved checks.  Also, having
debug_check_no_locks_freed in kernel_map_pages is wrong.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mm.h | 7 +------
 mm/page_alloc.c    | 8 ++++----
 2 files changed, 5 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 26146623be2f..5a6068ff5556 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1103,12 +1103,7 @@ static inline void vm_stat_account(struct mm_struct *mm,
 
 #ifndef CONFIG_DEBUG_PAGEALLOC
 static inline void
-kernel_map_pages(struct page *page, int numpages, int enable)
-{
-	if (!PageHighMem(page) && !enable)
-		debug_check_no_locks_freed(page_address(page),
-					   numpages * PAGE_SIZE);
-}
+kernel_map_pages(struct page *page, int numpages, int enable) {}
 #endif
 
 extern struct vm_area_struct *get_gate_vma(struct task_struct *tsk);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c5caac2c3c5a..40db96a655d0 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -495,15 +495,13 @@ static void __free_pages_ok(struct page *page, unsigned int order)
 	int i;
 	int reserved = 0;
 
-	if (!PageHighMem(page))
-		debug_check_no_locks_freed(page_address(page),
-					   PAGE_SIZE<<order);
-
 	for (i = 0 ; i < (1 << order) ; ++i)
 		reserved += free_pages_check(page + i);
 	if (reserved)
 		return;
 
+	if (!PageHighMem(page))
+		debug_check_no_locks_freed(page_address(page),PAGE_SIZE<<order);
 	arch_free_page(page, order);
 	kernel_map_pages(page, 1 << order, 0);
 
@@ -787,6 +785,8 @@ static void fastcall free_hot_cold_page(struct page *page, int cold)
 	if (free_pages_check(page))
 		return;
 
+	if (!PageHighMem(page))
+		debug_check_no_locks_freed(page_address(page), PAGE_SIZE);
 	arch_free_page(page, 0);
 	kernel_map_pages(page, 1, 0);
 
-- 
cgit v1.2.3


From b611967de4dc5c52049676c4369dcac622a7cdfe Mon Sep 17 00:00:00 2001
From: Davide Libenzi <davidel@xmailserver.org>
Date: Wed, 11 Oct 2006 01:21:44 -0700
Subject: [PATCH] epoll_pwait()

Implement the epoll_pwait system call, that extend the event wait mechanism
with the same logic ppoll and pselect do.  The definition of epoll_pwait
is:

int epoll_pwait(int epfd, struct epoll_event *events, int maxevents,
                 int timeout, const sigset_t *sigmask, size_t sigsetsize);

The difference between the vanilla epoll_wait and epoll_pwait is that the
latter allows the caller to specify a signal mask to be set while waiting
for events.  Hence epoll_pwait will wait until either one monitored event,
or an unmasked signal happen.  If sigmask is NULL, the epoll_pwait system
call will act exactly like epoll_wait.  For the POSIX definition of
pselect, information is available here:

http://www.opengroup.org/onlinepubs/009695399/functions/select.html

Signed-off-by: Davide Libenzi <davidel@xmailserver.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Andi Kleen <ak@muc.de>
Cc: Michael Kerrisk <mtk-manpages@gmx.net>
Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Roland McGrath <roland@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/syscall_table.S |  1 +
 fs/eventpoll.c                   | 56 +++++++++++++++++++++++++++++++++++++---
 include/asm-i386/unistd.h        |  3 ++-
 include/linux/syscalls.h         |  4 +++
 4 files changed, 60 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index 7e639f78b0b9..2697e9210e92 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -318,3 +318,4 @@ ENTRY(sys_call_table)
 	.long sys_vmsplice
 	.long sys_move_pages
 	.long sys_getcpu
+	.long sys_epoll_pwait
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 557d5b614fae..ae228ec54e94 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -105,6 +105,8 @@
 /* Maximum msec timeout value storeable in a long int */
 #define EP_MAX_MSTIMEO min(1000ULL * MAX_SCHEDULE_TIMEOUT / HZ, (LONG_MAX - 999ULL) / HZ)
 
+#define EP_MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
+
 
 struct epoll_filefd {
 	struct file *file;
@@ -497,7 +499,7 @@ void eventpoll_release_file(struct file *file)
  */
 asmlinkage long sys_epoll_create(int size)
 {
-	int error, fd;
+	int error, fd = -1;
 	struct eventpoll *ep;
 	struct inode *inode;
 	struct file *file;
@@ -640,7 +642,6 @@ eexit_1:
 	return error;
 }
 
-#define MAX_EVENTS (INT_MAX / sizeof(struct epoll_event))
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
@@ -657,7 +658,7 @@ asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 		     current, epfd, events, maxevents, timeout));
 
 	/* The maximum number of event must be greater than zero */
-	if (maxevents <= 0 || maxevents > MAX_EVENTS)
+	if (maxevents <= 0 || maxevents > EP_MAX_EVENTS)
 		return -EINVAL;
 
 	/* Verify that the area passed by the user is writeable */
@@ -699,6 +700,55 @@ eexit_1:
 }
 
 
+#ifdef TIF_RESTORE_SIGMASK
+
+/*
+ * Implement the event wait interface for the eventpoll file. It is the kernel
+ * part of the user space epoll_pwait(2).
+ */
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+		int maxevents, int timeout, const sigset_t __user *sigmask,
+		size_t sigsetsize)
+{
+	int error;
+	sigset_t ksigmask, sigsaved;
+
+	/*
+	 * If the caller wants a certain signal mask to be set during the wait,
+	 * we apply it here.
+	 */
+	if (sigmask) {
+		if (sigsetsize != sizeof(sigset_t))
+			return -EINVAL;
+		if (copy_from_user(&ksigmask, sigmask, sizeof(ksigmask)))
+			return -EFAULT;
+		sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+		sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+	}
+
+	error = sys_epoll_wait(epfd, events, maxevents, timeout);
+
+	/*
+	 * If we changed the signal mask, we need to restore the original one.
+	 * In case we've got a signal while waiting, we do not restore the
+	 * signal mask yet, and we allow do_signal() to deliver the signal on
+	 * the way back to userspace, before the signal mask is restored.
+	 */
+	if (sigmask) {
+		if (error == -EINTR) {
+			memcpy(&current->saved_sigmask, &sigsaved,
+				sizeof(sigsaved));
+			set_thread_flag(TIF_RESTORE_SIGMASK);
+		} else
+			sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+	}
+
+	return error;
+}
+
+#endif /* #ifdef TIF_RESTORE_SIGMASK */
+
+
 /*
  * Creates the file descriptor to be used by the epoll interface.
  */
diff --git a/include/asm-i386/unistd.h b/include/asm-i386/unistd.h
index 3ca7ab963d7d..beeeaf6b054a 100644
--- a/include/asm-i386/unistd.h
+++ b/include/asm-i386/unistd.h
@@ -324,10 +324,11 @@
 #define __NR_vmsplice		316
 #define __NR_move_pages		317
 #define __NR_getcpu		318
+#define __NR_epoll_pwait	319
 
 #ifdef __KERNEL__
 
-#define NR_syscalls 319
+#define NR_syscalls 320
 #include <linux/err.h>
 
 /*
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index b0ace3fd7eb9..1912c6cbef55 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -431,6 +431,10 @@ asmlinkage long sys_epoll_ctl(int epfd, int op, int fd,
 				struct epoll_event __user *event);
 asmlinkage long sys_epoll_wait(int epfd, struct epoll_event __user *events,
 				int maxevents, int timeout);
+asmlinkage long sys_epoll_pwait(int epfd, struct epoll_event __user *events,
+				int maxevents, int timeout,
+				const sigset_t __user *sigmask,
+				size_t sigsetsize);
 asmlinkage long sys_gethostname(char __user *name, int len);
 asmlinkage long sys_sethostname(char __user *name, int len);
 asmlinkage long sys_setdomainname(char __user *name, int len);
-- 
cgit v1.2.3


From e0ab2928cc2202f13f0574d4c6f567f166d307eb Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@hpl.hp.com>
Date: Wed, 11 Oct 2006 01:21:45 -0700
Subject: [PATCH] Add carta_random32() library routine

This is a follow-up patch based on the review for perfmon2.  This patch
adds the carta_random32() library routine + carta_random32.h header file.

This is fast, simple, and efficient pseudo number generator algorithm.  We
use it in perfmon2 to randomize the sampling periods.  In this context, we
do not need any fancy randomizer.

Signed-off-by: stephane eranian <eranian@hpl.hp.com>
Cc: David Mosberger <david.mosberger@acm.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/carta_random32.h | 29 +++++++++++++++++++++++++++++
 lib/Makefile                   |  2 +-
 lib/carta_random32.c           | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/carta_random32.h
 create mode 100644 lib/carta_random32.c

(limited to 'include')

diff --git a/include/linux/carta_random32.h b/include/linux/carta_random32.h
new file mode 100644
index 000000000000..f6f3bd9f20b5
--- /dev/null
+++ b/include/linux/carta_random32.h
@@ -0,0 +1,29 @@
+/*
+ * Fast, simple, yet decent quality random number generator based on
+ * a paper by David G. Carta ("Two Fast Implementations of the
+ * `Minimal Standard' Random Number Generator," Communications of the
+ * ACM, January, 1990).
+ *
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
+ *	Contributed by Stephane Eranian <eranian@hpl.hp.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#ifndef _LINUX_CARTA_RANDOM32_H_
+#define _LINUX_CARTA_RANDOM32_H_
+
+u64 carta_random32(u64 seed);
+
+#endif /* _LINUX_CARTA_RANDOM32_H_ */
diff --git a/lib/Makefile b/lib/Makefile
index 8e6662bb9c37..59070dbfbeb4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -5,7 +5,7 @@
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 bust_spinlocks.o rbtree.o radix-tree.o dump_stack.o \
 	 idr.o div64.o int_sqrt.o bitmap.o extable.o prio_tree.o \
-	 sha1.o irq_regs.o
+	 sha1.o irq_regs.o carta_random32.o
 
 lib-$(CONFIG_MMU) += ioremap.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/carta_random32.c b/lib/carta_random32.c
new file mode 100644
index 000000000000..ca82df70eee4
--- /dev/null
+++ b/lib/carta_random32.c
@@ -0,0 +1,41 @@
+/*
+ * Copyright (c) 2002-2006 Hewlett-Packard Development Company, L.P.
+ *	Contributed by David Mosberger-Tang <davidm@hpl.hp.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ * 02111-1307 USA
+ */
+#include <linux/types.h>
+#include <linux/module.h>
+
+/*
+ * Fast, simple, yet decent quality random number generator based on
+ * a paper by David G. Carta ("Two Fast Implementations of the
+ * `Minimal Standard' Random Number Generator," Communications of the
+ * ACM, January, 1990).
+ */
+u64 carta_random32 (u64 seed)
+{
+#       define A 16807
+#       define M ((u32) 1 << 31)
+        u64 s, prod = A * seed, p, q;
+
+        p = (prod >> 31) & (M - 1);
+        q = (prod >>  0) & (M - 1);
+        s = p + q;
+        if (s >= M)
+                s -= M - 1;
+        return s;
+}
+EXPORT_SYMBOL_GPL(carta_random32);
-- 
cgit v1.2.3


From fa3ba2e81ea23416272a22009bba95954c81969c Mon Sep 17 00:00:00 2001
From: Florin Malita <fmalita@gmail.com>
Date: Wed, 11 Oct 2006 01:21:48 -0700
Subject: [PATCH] fix Module taint flags listing in Oops/panic

Module taint flags listing in Oops/panic has a couple of issues:

* taint_flags() doesn't null-terminate the buffer after printing the flags

* per-module taints are only set if the kernel is not already tainted
  (with that particular flag) => only the first offending module gets its
  taint info correctly updated

Some additional changes:

* 'license_gplok' is no longer needed - equivalent to !(taints &
  TAINT_PROPRIETARY_MODULE) - so we can drop it from struct module *
  exporting module taint info via /proc/module:

pwc 88576 0 - Live 0xf8c32000
evilmod 6784 1 pwc, Live 0xf8bbf000 (PF)

Signed-off-by: Florin Malita <fmalita@gmail.com>
Cc: "Randy.Dunlap" <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/module.h |  3 --
 kernel/module.c        | 94 ++++++++++++++++++++++++++------------------------
 2 files changed, 49 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/module.h b/include/linux/module.h
index 4b2d8091a410..d1d00ce8f4ed 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -317,9 +317,6 @@ struct module
 	/* Am I unsafe to unload? */
 	int unsafe;
 
-	/* Am I GPL-compatible */
-	int license_gplok;
-
 	unsigned int taints;	/* same bits as kernel:tainted */
 
 #ifdef CONFIG_MODULE_UNLOAD
diff --git a/kernel/module.c b/kernel/module.c
index 7f60e782de1e..67009bd56c52 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -87,6 +87,12 @@ static inline int strong_try_module_get(struct module *mod)
 	return try_module_get(mod);
 }
 
+static inline void add_taint_module(struct module *mod, unsigned flag)
+{
+	add_taint(flag);
+	mod->taints |= flag;
+}
+
 /* A thread that wants to hold a reference to a module only while it
  * is running can call ths to safely exit.
  * nfsd and lockd use this.
@@ -847,12 +853,10 @@ static int check_version(Elf_Shdr *sechdrs,
 		return 0;
 	}
 	/* Not in module's version table.  OK, but that taints the kernel. */
-	if (!(tainted & TAINT_FORCED_MODULE)) {
+	if (!(tainted & TAINT_FORCED_MODULE))
 		printk("%s: no version for \"%s\" found: kernel tainted.\n",
 		       mod->name, symname);
-		add_taint(TAINT_FORCED_MODULE);
-		mod->taints |= TAINT_FORCED_MODULE;
-	}
+	add_taint_module(mod, TAINT_FORCED_MODULE);
 	return 1;
 }
 
@@ -910,7 +914,8 @@ static unsigned long resolve_symbol(Elf_Shdr *sechdrs,
 	unsigned long ret;
 	const unsigned long *crc;
 
-	ret = __find_symbol(name, &owner, &crc, mod->license_gplok);
+	ret = __find_symbol(name, &owner, &crc,
+			!(mod->taints & TAINT_PROPRIETARY_MODULE));
 	if (ret) {
 		/* use_module can fail due to OOM, or module unloading */
 		if (!check_version(sechdrs, versindex, name, mod, crc) ||
@@ -1335,12 +1340,11 @@ static void set_license(struct module *mod, const char *license)
 	if (!license)
 		license = "unspecified";
 
-	mod->license_gplok = license_is_gpl_compatible(license);
-	if (!mod->license_gplok && !(tainted & TAINT_PROPRIETARY_MODULE)) {
-		printk(KERN_WARNING "%s: module license '%s' taints kernel.\n",
-		       mod->name, license);
-		add_taint(TAINT_PROPRIETARY_MODULE);
-		mod->taints |= TAINT_PROPRIETARY_MODULE;
+	if (!license_is_gpl_compatible(license)) {
+		if (!(tainted & TAINT_PROPRIETARY_MODULE))
+			printk(KERN_WARNING "%s: module license '%s' taints"
+				"kernel.\n", mod->name, license);
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
 	}
 }
 
@@ -1619,8 +1623,7 @@ static struct module *load_module(void __user *umod,
 	modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
 	/* This is allowed: modprobe --force will invalidate it. */
 	if (!modmagic) {
-		add_taint(TAINT_FORCED_MODULE);
-		mod->taints |= TAINT_FORCED_MODULE;
+		add_taint_module(mod, TAINT_FORCED_MODULE);
 		printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
 		       mod->name);
 	} else if (!same_magic(modmagic, vermagic)) {
@@ -1714,14 +1717,10 @@ static struct module *load_module(void __user *umod,
 	/* Set up license info based on the info section */
 	set_license(mod, get_modinfo(sechdrs, infoindex, "license"));
 
-	if (strcmp(mod->name, "ndiswrapper") == 0) {
-		add_taint(TAINT_PROPRIETARY_MODULE);
-		mod->taints |= TAINT_PROPRIETARY_MODULE;
-	}
-	if (strcmp(mod->name, "driverloader") == 0) {
-		add_taint(TAINT_PROPRIETARY_MODULE);
-		mod->taints |= TAINT_PROPRIETARY_MODULE;
-	}
+	if (strcmp(mod->name, "ndiswrapper") == 0)
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
+	if (strcmp(mod->name, "driverloader") == 0)
+		add_taint_module(mod, TAINT_PROPRIETARY_MODULE);
 
 	/* Set up MODINFO_ATTR fields */
 	setup_modinfo(mod, sechdrs, infoindex);
@@ -1766,8 +1765,7 @@ static struct module *load_module(void __user *umod,
 	    (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
 		printk(KERN_WARNING "%s: No versions for exported symbols."
 		       " Tainting kernel.\n", mod->name);
-		add_taint(TAINT_FORCED_MODULE);
-		mod->taints |= TAINT_FORCED_MODULE;
+		add_taint_module(mod, TAINT_FORCED_MODULE);
 	}
 #endif
 
@@ -2132,9 +2130,33 @@ static void m_stop(struct seq_file *m, void *p)
 	mutex_unlock(&module_mutex);
 }
 
+static char *taint_flags(unsigned int taints, char *buf)
+{
+	int bx = 0;
+
+	if (taints) {
+		buf[bx++] = '(';
+		if (taints & TAINT_PROPRIETARY_MODULE)
+			buf[bx++] = 'P';
+		if (taints & TAINT_FORCED_MODULE)
+			buf[bx++] = 'F';
+		/*
+		 * TAINT_FORCED_RMMOD: could be added.
+		 * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
+		 * apply to modules.
+		 */
+		buf[bx++] = ')';
+	}
+	buf[bx] = '\0';
+
+	return buf;
+}
+
 static int m_show(struct seq_file *m, void *p)
 {
 	struct module *mod = list_entry(p, struct module, list);
+	char buf[8];
+
 	seq_printf(m, "%s %lu",
 		   mod->name, mod->init_size + mod->core_size);
 	print_unload_info(m, mod);
@@ -2147,6 +2169,10 @@ static int m_show(struct seq_file *m, void *p)
 	/* Used by oprofile and other similar tools. */
 	seq_printf(m, " 0x%p", mod->module_core);
 
+	/* Taints info */
+	if (mod->taints)
+		seq_printf(m, " %s", taint_flags(mod->taints, buf));
+
 	seq_printf(m, "\n");
 	return 0;
 }
@@ -2235,28 +2261,6 @@ struct module *module_text_address(unsigned long addr)
 	return mod;
 }
 
-static char *taint_flags(unsigned int taints, char *buf)
-{
-	*buf = '\0';
-	if (taints) {
-		int bx;
-
-		buf[0] = '(';
-		bx = 1;
-		if (taints & TAINT_PROPRIETARY_MODULE)
-			buf[bx++] = 'P';
-		if (taints & TAINT_FORCED_MODULE)
-			buf[bx++] = 'F';
-		/*
-		 * TAINT_FORCED_RMMOD: could be added.
-		 * TAINT_UNSAFE_SMP, TAINT_MACHINE_CHECK, TAINT_BAD_PAGE don't
-		 * apply to modules.
-		 */
-		buf[bx] = ')';
-	}
-	return buf;
-}
-
 /* Don't grab lock, we're oopsing. */
 void print_modules(void)
 {
-- 
cgit v1.2.3


From beed33a816204cb402c69266475b6a60a2433ceb Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Wed, 11 Oct 2006 01:21:52 -0700
Subject: [PATCH] sched: likely profiling

This likely profiling is pretty fun. I found a few possible problems
in sched.c.

This patch may be not measurable, but when I did measure long ago,
nooping (un)likely cost a couple of % on scheduler heavy benchmarks, so
it all adds up.

Tweak some branch hints:

- the 2nd 64 bits in the bitmask is likely to be populated, because it
  contains the first 28 bits (nearly 3/4) of the normal priorities.
  (ratio of 669669:691 ~= 1000:1).

- it isn't unlikely that context switching switches to another process. it
  might be very rapidly switching to and from the idle process (ratio of
  475815:419004 and 471330:423544). Let the branch predictor decide.

- preempt_enable seems to be very often called in a nested preempt_disable
  or with interrupts disabled (ratio of 3567760:87965 ~= 40:1)

Signed-off-by: Nick Piggin <npiggin@suse.de>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Daniel Walker <dwalker@mvista.com>
Cc: Hua Zhong <hzhong@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/bitops/sched.h | 2 +-
 kernel/sched.c                     | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-generic/bitops/sched.h b/include/asm-generic/bitops/sched.h
index 5ef93a4d009f..815bb0148060 100644
--- a/include/asm-generic/bitops/sched.h
+++ b/include/asm-generic/bitops/sched.h
@@ -15,7 +15,7 @@ static inline int sched_find_first_bit(const unsigned long *b)
 #if BITS_PER_LONG == 64
 	if (unlikely(b[0]))
 		return __ffs(b[0]);
-	if (unlikely(b[1]))
+	if (likely(b[1]))
 		return __ffs(b[1]) + 64;
 	return __ffs(b[2]) + 128;
 #elif BITS_PER_LONG == 32
diff --git a/kernel/sched.c b/kernel/sched.c
index 53608a59d6e3..094b5687eef6 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -1822,14 +1822,14 @@ context_switch(struct rq *rq, struct task_struct *prev,
 	struct mm_struct *mm = next->mm;
 	struct mm_struct *oldmm = prev->active_mm;
 
-	if (unlikely(!mm)) {
+	if (!mm) {
 		next->active_mm = oldmm;
 		atomic_inc(&oldmm->mm_count);
 		enter_lazy_tlb(oldmm, next);
 	} else
 		switch_mm(oldmm, mm, next);
 
-	if (unlikely(!prev->mm)) {
+	if (!prev->mm) {
 		prev->active_mm = NULL;
 		WARN_ON(rq->prev_mm);
 		rq->prev_mm = oldmm;
@@ -3491,7 +3491,7 @@ asmlinkage void __sched preempt_schedule(void)
 	 * If there is a non-zero preempt_count or interrupts are disabled,
 	 * we do not want to preempt the current task.  Just return..
 	 */
-	if (unlikely(ti->preempt_count || irqs_disabled()))
+	if (likely(ti->preempt_count || irqs_disabled()))
 		return;
 
 need_resched:
-- 
cgit v1.2.3


From 39484e53bb00f55b6303a908070db133608ef2a5 Mon Sep 17 00:00:00 2001
From: "Maciej W. Rozycki" <macro@linux-mips.org>
Date: Wed, 11 Oct 2006 01:21:54 -0700
Subject: [PATCH] 32-bit compatibility HDIO IOCTLs

A couple of HDIO IOCTLs are not yet handled and a few others are marked
as using a pointer rather than an unsigned long.  The formers include:

HDIO_GET_WCACHE, HDIO_GET_ACOUSTIC, HDIO_GET_ADDRESS and
HDIO_GET_BUSSTATE.  The latters are: HDIO_SET_MULTCOUNT,
HDIO_SET_UNMASKINTR, HDIO_SET_KEEPSETTINGS, HDIO_SET_32BIT,
HDIO_SET_NOWERR, HDIO_SET_DMA, HDIO_SET_PIO_MODE and HDIO_SET_NICE.

Additionally 0x330 used to be HDIO_GETGEO_BIG and may be issued by 32-bit
`hdparm' run on a 64-bit kernel making Linux complain loudly.

This is a fix for these issues.

Signed-off-by: Maciej W. Rozycki <macro@linux-mips.org>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/compat_ioctl.c            | 10 +++++++---
 include/linux/compat_ioctl.h | 24 +++++++++++++++---------
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 27ca1aa30562..a91f2628c981 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -2438,13 +2438,17 @@ HANDLE_IOCTL(0x1260, broken_blkgetsize)
 HANDLE_IOCTL(BLKFRAGET, w_long)
 HANDLE_IOCTL(BLKSECTGET, w_long)
 HANDLE_IOCTL(BLKPG, blkpg_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_UNMASKINTR, hdio_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
-HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_MULTCOUNT, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_KEEPSETTINGS, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_32BIT, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_NOWERR, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_DMA, hdio_ioctl_trans)
 HANDLE_IOCTL(HDIO_GET_NICE, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_WCACHE, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_ACOUSTIC, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_ADDRESS, hdio_ioctl_trans)
+HANDLE_IOCTL(HDIO_GET_BUSSTATE, hdio_ioctl_trans)
 HANDLE_IOCTL(FDSETPRM32, fd_ioctl_trans)
 HANDLE_IOCTL(FDDEFPRM32, fd_ioctl_trans)
 HANDLE_IOCTL(FDGETPRM32, fd_ioctl_trans)
diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index 4e1663d7691e..cfdb4f6a89d4 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -61,17 +61,23 @@ COMPATIBLE_IOCTL(FIGETBSZ)
  *         Some need translations, these do not.
  */
 COMPATIBLE_IOCTL(HDIO_GET_IDENTITY)
-COMPATIBLE_IOCTL(HDIO_SET_DMA)
-COMPATIBLE_IOCTL(HDIO_SET_UNMASKINTR)
-COMPATIBLE_IOCTL(HDIO_SET_NOWERR)
-COMPATIBLE_IOCTL(HDIO_SET_32BIT)
-COMPATIBLE_IOCTL(HDIO_SET_MULTCOUNT)
-COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
 COMPATIBLE_IOCTL(HDIO_DRIVE_TASK)
-COMPATIBLE_IOCTL(HDIO_SET_PIO_MODE)
-COMPATIBLE_IOCTL(HDIO_SET_NICE)
-COMPATIBLE_IOCTL(HDIO_SET_KEEPSETTINGS)
+COMPATIBLE_IOCTL(HDIO_DRIVE_CMD)
+ULONG_IOCTL(HDIO_SET_MULTCOUNT)
+ULONG_IOCTL(HDIO_SET_UNMASKINTR)
+ULONG_IOCTL(HDIO_SET_KEEPSETTINGS)
+ULONG_IOCTL(HDIO_SET_32BIT)
+ULONG_IOCTL(HDIO_SET_NOWERR)
+ULONG_IOCTL(HDIO_SET_DMA)
+ULONG_IOCTL(HDIO_SET_PIO_MODE)
+ULONG_IOCTL(HDIO_SET_NICE)
+ULONG_IOCTL(HDIO_SET_WCACHE)
+ULONG_IOCTL(HDIO_SET_ACOUSTIC)
+ULONG_IOCTL(HDIO_SET_BUSSTATE)
+ULONG_IOCTL(HDIO_SET_ADDRESS)
 COMPATIBLE_IOCTL(HDIO_SCAN_HWIF)
+/* 0x330 is reserved -- it used to be HDIO_GETGEO_BIG */
+COMPATIBLE_IOCTL(0x330)
 /* 0x02 -- Floppy ioctls */
 COMPATIBLE_IOCTL(FDMSGON)
 COMPATIBLE_IOCTL(FDMSGOFF)
-- 
cgit v1.2.3


From 01a3ee2b203e511e20f98b85a9172fd32c53e87c Mon Sep 17 00:00:00 2001
From: Reinette Chatre <reinette.chatre@linux.intel.com>
Date: Wed, 11 Oct 2006 01:21:55 -0700
Subject: [PATCH] bitmap: parse input from kernel and user buffers

lib/bitmap.c:bitmap_parse() is a library function that received as input a
user buffer.  This seemed to have originated from the way the write_proc
function of the /proc filesystem operates.

This has been reworked to not use kmalloc and eliminates a lot of
get_user() overhead by performing one access_ok before using __get_user().

We need to test if we are in kernel or user space (is_user) and access the
buffer differently.  We cannot use __get_user() to access kernel addresses
in all cases, for example in architectures with separate address space for
kernel and user.

This function will be useful for other uses as well; for example, taking
input for /sysfs instead of /proc, so it was changed to accept kernel
buffers.  We have this use for the Linux UWB project, as part as the
upcoming bandwidth allocator code.

Only a few routines used this function and they were changed too.

Signed-off-by: Reinette Chatre <reinette.chatre@linux.intel.com>
Signed-off-by: Inaky Perez-Gonzalez <inaky@linux.intel.com>
Cc: Paul Jackson <pj@sgi.com>
Cc: Joe Korty <joe.korty@ccur.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/bitmap.h   | 13 ++++++++++--
 include/linux/cpumask.h  | 14 ++++++-------
 include/linux/nodemask.h | 14 ++++++-------
 kernel/irq/proc.c        |  2 +-
 kernel/profile.c         |  2 +-
 lib/bitmap.c             | 54 ++++++++++++++++++++++++++++++++++++++----------
 6 files changed, 70 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index dcc5de7cc487..64b4641904fe 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -46,7 +46,8 @@
  * bitmap_remap(dst, src, old, new, nbits)	*dst = map(old, new)(src)
  * bitmap_bitremap(oldbit, old, new, nbits)	newbit = map(old, new)(oldbit)
  * bitmap_scnprintf(buf, len, src, nbits)	Print bitmap src to buf
- * bitmap_parse(ubuf, ulen, dst, nbits)		Parse bitmap dst from user buf
+ * bitmap_parse(buf, buflen, dst, nbits)	Parse bitmap dst from kernel buf
+ * bitmap_parse_user(ubuf, ulen, dst, nbits)	Parse bitmap dst from user buf
  * bitmap_scnlistprintf(buf, len, src, nbits)	Print bitmap src as list to buf
  * bitmap_parselist(buf, dst, nbits)		Parse bitmap dst from list
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
@@ -106,7 +107,9 @@ extern int __bitmap_weight(const unsigned long *bitmap, int bits);
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
-extern int bitmap_parse(const char __user *ubuf, unsigned int ulen,
+extern int __bitmap_parse(const char *buf, unsigned int buflen, int is_user,
+			unsigned long *dst, int nbits);
+extern int bitmap_parse_user(const char __user *ubuf, unsigned int ulen,
 			unsigned long *dst, int nbits);
 extern int bitmap_scnlistprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
@@ -270,6 +273,12 @@ static inline void bitmap_shift_left(unsigned long *dst,
 		__bitmap_shift_left(dst, src, n, nbits);
 }
 
+static inline int bitmap_parse(const char *buf, unsigned int buflen,
+			unsigned long *maskp, int nmaskbits)
+{
+	return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* __LINUX_BITMAP_H */
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index b268a3c0c376..d0e8c8b0e34d 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -8,8 +8,8 @@
  * See detailed comments in the file linux/bitmap.h describing the
  * data type on which these cpumasks are based.
  *
- * For details of cpumask_scnprintf() and cpumask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of cpumask_scnprintf() and cpumask_parse_user(),
+ * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
  * For details of cpulist_scnprintf() and cpulist_parse(), see
  * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
  * For details of cpu_remap(), see bitmap_bitremap in lib/bitmap.c
@@ -49,7 +49,7 @@
  * unsigned long *cpus_addr(mask)	Array of unsigned long's in mask
  *
  * int cpumask_scnprintf(buf, len, mask) Format cpumask for printing
- * int cpumask_parse(ubuf, ulen, mask)	Parse ascii string as cpumask
+ * int cpumask_parse_user(ubuf, ulen, mask)	Parse ascii string as cpumask
  * int cpulist_scnprintf(buf, len, mask) Format cpumask as list for printing
  * int cpulist_parse(buf, map)		Parse ascii string as cpulist
  * int cpu_remap(oldbit, old, new)	newbit = map(old, new)(oldbit)
@@ -273,12 +273,12 @@ static inline int __cpumask_scnprintf(char *buf, int len,
 	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define cpumask_parse(ubuf, ulen, dst) \
-			__cpumask_parse((ubuf), (ulen), &(dst), NR_CPUS)
-static inline int __cpumask_parse(const char __user *buf, int len,
+#define cpumask_parse_user(ubuf, ulen, dst) \
+			__cpumask_parse_user((ubuf), (ulen), &(dst), NR_CPUS)
+static inline int __cpumask_parse_user(const char __user *buf, int len,
 					cpumask_t *dstp, int nbits)
 {
-	return bitmap_parse(buf, len, dstp->bits, nbits);
+	return bitmap_parse_user(buf, len, dstp->bits, nbits);
 }
 
 #define cpulist_scnprintf(buf, len, src) \
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 5dce5c21822c..b1063e9cdb1b 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -8,8 +8,8 @@
  * See detailed comments in the file linux/bitmap.h describing the
  * data type on which these nodemasks are based.
  *
- * For details of nodemask_scnprintf() and nodemask_parse(),
- * see bitmap_scnprintf() and bitmap_parse() in lib/bitmap.c.
+ * For details of nodemask_scnprintf() and nodemask_parse_user(),
+ * see bitmap_scnprintf() and bitmap_parse_user() in lib/bitmap.c.
  * For details of nodelist_scnprintf() and nodelist_parse(), see
  * bitmap_scnlistprintf() and bitmap_parselist(), also in bitmap.c.
  * For details of node_remap(), see bitmap_bitremap in lib/bitmap.c.
@@ -51,7 +51,7 @@
  * unsigned long *nodes_addr(mask)	Array of unsigned long's in mask
  *
  * int nodemask_scnprintf(buf, len, mask) Format nodemask for printing
- * int nodemask_parse(ubuf, ulen, mask)	Parse ascii string as nodemask
+ * int nodemask_parse_user(ubuf, ulen, mask)	Parse ascii string as nodemask
  * int nodelist_scnprintf(buf, len, mask) Format nodemask as list for printing
  * int nodelist_parse(buf, map)		Parse ascii string as nodelist
  * int node_remap(oldbit, old, new)	newbit = map(old, new)(oldbit)
@@ -288,12 +288,12 @@ static inline int __nodemask_scnprintf(char *buf, int len,
 	return bitmap_scnprintf(buf, len, srcp->bits, nbits);
 }
 
-#define nodemask_parse(ubuf, ulen, dst) \
-			__nodemask_parse((ubuf), (ulen), &(dst), MAX_NUMNODES)
-static inline int __nodemask_parse(const char __user *buf, int len,
+#define nodemask_parse_user(ubuf, ulen, dst) \
+		__nodemask_parse_user((ubuf), (ulen), &(dst), MAX_NUMNODES)
+static inline int __nodemask_parse_user(const char __user *buf, int len,
 					nodemask_t *dstp, int nbits)
 {
-	return bitmap_parse(buf, len, dstp->bits, nbits);
+	return bitmap_parse_user(buf, len, dstp->bits, nbits);
 }
 
 #define nodelist_scnprintf(buf, len, src) \
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index 607c7809ad01..9a352667007c 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -57,7 +57,7 @@ static int irq_affinity_write_proc(struct file *file, const char __user *buffer,
 	if (!irq_desc[irq].chip->set_affinity || no_irq_affinity)
 		return -EIO;
 
-	err = cpumask_parse(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		return err;
 
diff --git a/kernel/profile.c b/kernel/profile.c
index 857300a2afec..f940b462eec9 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -399,7 +399,7 @@ static int prof_cpu_mask_write_proc (struct file *file, const char __user *buffe
 	unsigned long full_count = count, err;
 	cpumask_t new_value;
 
-	err = cpumask_parse(buffer, count, new_value);
+	err = cpumask_parse_user(buffer, count, new_value);
 	if (err)
 		return err;
 
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d71e38c54ea5..037fa9aa2ed7 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -316,10 +316,11 @@ int bitmap_scnprintf(char *buf, unsigned int buflen,
 EXPORT_SYMBOL(bitmap_scnprintf);
 
 /**
- * bitmap_parse - convert an ASCII hex string into a bitmap.
- * @ubuf: pointer to buffer in user space containing string.
- * @ubuflen: buffer size in bytes.  If string is smaller than this
+ * __bitmap_parse - convert an ASCII hex string into a bitmap.
+ * @buf: pointer to buffer containing string.
+ * @buflen: buffer size in bytes.  If string is smaller than this
  *    then it must be terminated with a \0.
+ * @is_user: location of buffer, 0 indicates kernel space
  * @maskp: pointer to bitmap array that will contain result.
  * @nmaskbits: size of bitmap, in bits.
  *
@@ -330,11 +331,13 @@ EXPORT_SYMBOL(bitmap_scnprintf);
  * characters and for grouping errors such as "1,,5", ",44", "," and "".
  * Leading and trailing whitespace accepted, but not embedded whitespace.
  */
-int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
-        unsigned long *maskp, int nmaskbits)
+int __bitmap_parse(const char *buf, unsigned int buflen,
+		int is_user, unsigned long *maskp,
+		int nmaskbits)
 {
 	int c, old_c, totaldigits, ndigits, nchunks, nbits;
 	u32 chunk;
+	const char __user *ubuf = buf;
 
 	bitmap_zero(maskp, nmaskbits);
 
@@ -343,11 +346,15 @@ int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
 		chunk = ndigits = 0;
 
 		/* Get the next chunk of the bitmap */
-		while (ubuflen) {
+		while (buflen) {
 			old_c = c;
-			if (get_user(c, ubuf++))
-				return -EFAULT;
-			ubuflen--;
+			if (is_user) {
+				if (__get_user(c, ubuf++))
+					return -EFAULT;
+			}
+			else
+				c = *buf++;
+			buflen--;
 			if (isspace(c))
 				continue;
 
@@ -388,11 +395,36 @@ int bitmap_parse(const char __user *ubuf, unsigned int ubuflen,
 		nbits += (nchunks == 1) ? nbits_to_hold_value(chunk) : CHUNKSZ;
 		if (nbits > nmaskbits)
 			return -EOVERFLOW;
-	} while (ubuflen && c == ',');
+	} while (buflen && c == ',');
 
 	return 0;
 }
-EXPORT_SYMBOL(bitmap_parse);
+EXPORT_SYMBOL(__bitmap_parse);
+
+/**
+ * bitmap_parse_user()
+ *
+ * @ubuf: pointer to user buffer containing string.
+ * @ulen: buffer size in bytes.  If string is smaller than this
+ *    then it must be terminated with a \0.
+ * @maskp: pointer to bitmap array that will contain result.
+ * @nmaskbits: size of bitmap, in bits.
+ *
+ * Wrapper for __bitmap_parse(), providing it with user buffer.
+ *
+ * We cannot have this as an inline function in bitmap.h because it needs
+ * linux/uaccess.h to get the access_ok() declaration and this causes
+ * cyclic dependencies.
+ */
+int bitmap_parse_user(const char __user *ubuf,
+			unsigned int ulen, unsigned long *maskp,
+			int nmaskbits)
+{
+	if (!access_ok(VERIFY_READ, ubuf, ulen))
+		return -EFAULT;
+	return __bitmap_parse((const char *)ubuf, ulen, 1, maskp, nmaskbits);
+}
+EXPORT_SYMBOL(bitmap_parse_user);
 
 /*
  * bscnl_emit(buf, buflen, rbot, rtop, bp)
-- 
cgit v1.2.3


From e50190a8341485b413f599033cb74649f849d939 Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <matthew@wil.cx>
Date: Wed, 11 Oct 2006 01:22:02 -0700
Subject: [PATCH] Consolidate check_signature

There's nothing arch-specific about check_signature(), so move it to
<linux/io.h>.  Use a cross between the Alpha and i386 implementations as
the generic one.

Signed-off-by: Matthew Wilcox <willy@parisc-linux.org>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-alpha/io.h   | 13 -------------
 include/asm-arm/io.h     | 17 -----------------
 include/asm-frv/io.h     | 21 ---------------------
 include/asm-i386/io.h    | 27 ---------------------------
 include/asm-m32r/io.h    | 32 --------------------------------
 include/asm-mips/io.h    | 26 --------------------------
 include/asm-powerpc/io.h | 26 --------------------------
 include/asm-ppc/io.h     | 16 ----------------
 include/asm-sh/io.h      | 16 ----------------
 include/asm-sh64/io.h    | 16 ----------------
 include/asm-sparc64/io.h | 15 ---------------
 include/asm-x86_64/io.h  | 27 ---------------------------
 include/linux/io.h       | 27 +++++++++++++++++++++++++++
 13 files changed, 27 insertions(+), 252 deletions(-)

(limited to 'include')

diff --git a/include/asm-alpha/io.h b/include/asm-alpha/io.h
index f5ae98c25d1f..5d15af24573b 100644
--- a/include/asm-alpha/io.h
+++ b/include/asm-alpha/io.h
@@ -533,19 +533,6 @@ extern void outsl (unsigned long port, const void *src, unsigned long count);
 #define eth_io_copy_and_sum(skb,src,len,unused) \
   memcpy_fromio((skb)->data,src,len)
 
-static inline int
-check_signature(const volatile void __iomem *io_addr,
-		const unsigned char *signature, int length)
-{
-	do {
-		if (readb(io_addr) != *signature)
-			return 0;
-		io_addr++;
-		signature++;
-	} while (--length);
-	return 1;
-}
-
 /*
  * The Alpha Jensen hardware for some rather strange reason puts
  * the RTC clock at 0x170 instead of 0x70. Probably due to some
diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h
index 34aaaac4f617..ae999fd5dc67 100644
--- a/include/asm-arm/io.h
+++ b/include/asm-arm/io.h
@@ -193,23 +193,6 @@ extern void _memset_io(volatile void __iomem *, int, size_t);
 #define eth_io_copy_and_sum(s,c,l,b) \
 				eth_copy_and_sum((s),__mem_pci(c),(l),(b))
 
-static inline int
-check_signature(void __iomem *io_addr, const unsigned char *signature,
-		int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 #elif !defined(readb)
 
 #define readb(c)			(__readwrite_bug("readb"),0)
diff --git a/include/asm-frv/io.h b/include/asm-frv/io.h
index 7765f5528894..20e44fe00abf 100644
--- a/include/asm-frv/io.h
+++ b/include/asm-frv/io.h
@@ -385,27 +385,6 @@ static inline void pci_iounmap(struct pci_dev *dev, void __iomem *p)
  */
 #define xlate_dev_kmem_ptr(p)	p
 
-/*
- * Check BIOS signature
- */
-static inline int check_signature(volatile void __iomem *io_addr,
-				  const unsigned char *signature, int length)
-{
-	int retval = 0;
-
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-
-	retval = 1;
-out:
-	return retval;
-}
-
 #endif /* __KERNEL__ */
 
 #endif /* _ASM_IO_H */
diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h
index b3724fe93ff1..68df0dc3ab8f 100644
--- a/include/asm-i386/io.h
+++ b/include/asm-i386/io.h
@@ -224,33 +224,6 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int
 
 #define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),(void __force *)(b),(c),(d))
 
-/**
- *	check_signature		-	find BIOS signatures
- *	@io_addr: mmio address to check 
- *	@signature:  signature block
- *	@length: length of signature
- *
- *	Perform a signature comparison with the mmio address io_addr. This
- *	address should have been obtained by ioremap.
- *	Returns 1 on a match.
- */
- 
-static inline int check_signature(volatile void __iomem * io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /*
  *	Cache management
  *
diff --git a/include/asm-m32r/io.h b/include/asm-m32r/io.h
index 70ad1c949c2b..d06933bd6318 100644
--- a/include/asm-m32r/io.h
+++ b/include/asm-m32r/io.h
@@ -166,38 +166,6 @@ static inline void _writel(unsigned long l, unsigned long addr)
 
 #define flush_write_buffers() do { } while (0)  /* M32R_FIXME */
 
-/**
- *	check_signature		-	find BIOS signatures
- *	@io_addr: mmio address to check
- *	@signature:  signature block
- *	@length: length of signature
- *
- *	Perform a signature comparison with the ISA mmio address io_addr.
- *	Returns 1 on a match.
- *
- *	This function is deprecated. New drivers should use ioremap and
- *	check_signature.
- */
-
-static inline int check_signature(void __iomem *io_addr,
-        const unsigned char *signature, int length)
-{
-        int retval = 0;
-#if 0
-printk("check_signature\n");
-        do {
-                if (readb(io_addr) != *signature)
-                        goto out;
-                io_addr++;
-                signature++;
-                length--;
-        } while (length);
-        retval = 1;
-out:
-#endif
-        return retval;
-}
-
 static inline void
 memset_io(volatile void __iomem *addr, unsigned char val, int count)
 {
diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h
index df624e1ee6e2..c2d124badbe5 100644
--- a/include/asm-mips/io.h
+++ b/include/asm-mips/io.h
@@ -561,32 +561,6 @@ extern void pci_iounmap(struct pci_dev *dev, void __iomem *);
  */
 #define eth_io_copy_and_sum(skb,src,len,unused) memcpy_fromio((skb)->data,(src),(len))
 
-/*
- *     check_signature         -       find BIOS signatures
- *     @io_addr: mmio address to check
- *     @signature:  signature block
- *     @length: length of signature
- *
- *     Perform a signature comparison with the mmio address io_addr. This
- *     address should have been obtained by ioremap.
- *     Returns 1 on a match.
- */
-static inline int check_signature(char __iomem *io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /*
  * The caches on some architectures aren't dma-coherent and have need to
  * handle this in software.  There are three types of operations that
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index cbbd8c648df1..3baff8b0fd5a 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -404,32 +404,6 @@ static inline void __out_be64(volatile unsigned long __iomem *addr, unsigned lon
 
 #include <asm/eeh.h>
 
-/**
- *	check_signature		-	find BIOS signatures
- *	@io_addr: mmio address to check
- *	@signature:  signature block
- *	@length: length of signature
- *
- *	Perform a signature comparison with the mmio address io_addr. This
- *	address should have been obtained by ioremap.
- *	Returns 1 on a match.
- */
-static inline int check_signature(const volatile void __iomem * io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /* Nothing to do */
 
 #define dma_cache_inv(_start,_size)		do { } while (0)
diff --git a/include/asm-ppc/io.h b/include/asm-ppc/io.h
index 3d9a9e6f3321..a4c411b753ef 100644
--- a/include/asm-ppc/io.h
+++ b/include/asm-ppc/io.h
@@ -439,22 +439,6 @@ extern inline void * phys_to_virt(unsigned long address)
 #define iobarrier_r()  eieio()
 #define iobarrier_w()  eieio()
 
-static inline int check_signature(volatile void __iomem * io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /*
  * Here comes the ppc implementation of the IOMAP 
  * interfaces.
diff --git a/include/asm-sh/io.h b/include/asm-sh/io.h
index ed12d38e8c00..a0e55b09e4fd 100644
--- a/include/asm-sh/io.h
+++ b/include/asm-sh/io.h
@@ -304,22 +304,6 @@ __ioremap_mode(unsigned long offset, unsigned long size, unsigned long flags)
 #define iounmap(addr)					\
 	__iounmap((addr))
 
-static inline int check_signature(char __iomem *io_addr,
-			const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /*
  * The caches on some architectures aren't dma-coherent and have need to
  * handle this in software.  There are three types of operations that
diff --git a/include/asm-sh64/io.h b/include/asm-sh64/io.h
index 252fedbb6621..14d8e7b4bf4b 100644
--- a/include/asm-sh64/io.h
+++ b/include/asm-sh64/io.h
@@ -178,22 +178,6 @@ extern void iounmap(void *addr);
 unsigned long onchip_remap(unsigned long addr, unsigned long size, const char* name);
 extern void onchip_unmap(unsigned long vaddr);
 
-static __inline__ int check_signature(volatile void __iomem *io_addr,
-			const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /*
  * The caches on some architectures aren't dma-coherent and have need to
  * handle this in software.  There are three types of operations that
diff --git a/include/asm-sparc64/io.h b/include/asm-sparc64/io.h
index 0056770e83ad..30b912d8e8bc 100644
--- a/include/asm-sparc64/io.h
+++ b/include/asm-sparc64/io.h
@@ -440,21 +440,6 @@ _memcpy_toio(volatile void __iomem *dst, const void *src, __kernel_size_t n)
 
 #define memcpy_toio(d,s,sz)	_memcpy_toio(d,s,sz)
 
-static inline int check_signature(void __iomem *io_addr,
-				  const unsigned char *signature,
-				  int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature++)
-			goto out;
-		io_addr++;
-	} while (--length);
-	retval = 1;
-out:
-	return retval;
-}
-
 #define mmiowb()
 
 #ifdef __KERNEL__
diff --git a/include/asm-x86_64/io.h b/include/asm-x86_64/io.h
index 70e91fe76344..6ee9fadaaacb 100644
--- a/include/asm-x86_64/io.h
+++ b/include/asm-x86_64/io.h
@@ -254,33 +254,6 @@ void memset_io(volatile void __iomem *a, int b, size_t c);
 
 #define eth_io_copy_and_sum(a,b,c,d)		eth_copy_and_sum((a),(void *)(b),(c),(d))
 
-/**
- *	check_signature		-	find BIOS signatures
- *	@io_addr: mmio address to check 
- *	@signature:  signature block
- *	@length: length of signature
- *
- *	Perform a signature comparison with the mmio address io_addr. This
- *	address should have been obtained by ioremap.
- *	Returns 1 on a match.
- */
- 
-static inline int check_signature(void __iomem *io_addr,
-	const unsigned char *signature, int length)
-{
-	int retval = 0;
-	do {
-		if (readb(io_addr) != *signature)
-			goto out;
-		io_addr++;
-		signature++;
-		length--;
-	} while (length);
-	retval = 1;
-out:
-	return retval;
-}
-
 /* Nothing to do */
 
 #define dma_cache_inv(_start,_size)		do { } while (0)
diff --git a/include/linux/io.h b/include/linux/io.h
index 2ad96c3f0e4e..81877ea39309 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -28,4 +28,31 @@ void __iowrite64_copy(void __iomem *to, const void *from, size_t count);
 int ioremap_page_range(unsigned long addr, unsigned long end,
 		       unsigned long phys_addr, pgprot_t prot);
 
+/**
+ *	check_signature		-	find BIOS signatures
+ *	@io_addr: mmio address to check
+ *	@signature:  signature block
+ *	@length: length of signature
+ *
+ *	Perform a signature comparison with the mmio address io_addr. This
+ *	address should have been obtained by ioremap.
+ *	Returns 1 on a match.
+ */
+
+static inline int check_signature(const volatile void __iomem *io_addr,
+	const unsigned char *signature, int length)
+{
+	int retval = 0;
+	do {
+		if (readb(io_addr) != *signature)
+			goto out;
+		io_addr++;
+		signature++;
+		length--;
+	} while (length);
+	retval = 1;
+out:
+	return retval;
+}
+
 #endif /* _LINUX_IO_H */
-- 
cgit v1.2.3


From 9c7fff6ef36526fb54694ee8201870f98b6a1747 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Wed, 11 Oct 2006 01:22:10 -0700
Subject: [PATCH] uaccess.h: match kernel-doc and function names

Place kernel-doc function comment header immediately before the function that
is being documented.

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-i386/uaccess.h | 67 +++++++++++++++++++++++-----------------------
 1 file changed, 34 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/uaccess.h b/include/asm-i386/uaccess.h
index 54d905ebc63d..eef5133b9ce2 100644
--- a/include/asm-i386/uaccess.h
+++ b/include/asm-i386/uaccess.h
@@ -404,20 +404,6 @@ unsigned long __must_check __copy_from_user_ll_nocache_nozero(void *to,
  * anything, so this is accurate.
  */
 
-/**
- * __copy_to_user: - Copy a block of data into user space, with less checking.
- * @to:   Destination address, in user space.
- * @from: Source address, in kernel space.
- * @n:    Number of bytes to copy.
- *
- * Context: User context only.  This function may sleep.
- *
- * Copy data from kernel space to user space.  Caller must check
- * the specified block with access_ok() before calling this function.
- *
- * Returns number of bytes that could not be copied.
- * On success, this will be zero.
- */
 static __always_inline unsigned long __must_check
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
@@ -439,35 +425,27 @@ __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 	return __copy_to_user_ll(to, from, n);
 }
 
-static __always_inline unsigned long __must_check
-__copy_to_user(void __user *to, const void *from, unsigned long n)
-{
-       might_sleep();
-       return __copy_to_user_inatomic(to, from, n);
-}
-
 /**
- * __copy_from_user: - Copy a block of data from user space, with less checking.
- * @to:   Destination address, in kernel space.
- * @from: Source address, in user space.
+ * __copy_to_user: - Copy a block of data into user space, with less checking.
+ * @to:   Destination address, in user space.
+ * @from: Source address, in kernel space.
  * @n:    Number of bytes to copy.
  *
  * Context: User context only.  This function may sleep.
  *
- * Copy data from user space to kernel space.  Caller must check
+ * Copy data from kernel space to user space.  Caller must check
  * the specified block with access_ok() before calling this function.
  *
  * Returns number of bytes that could not be copied.
  * On success, this will be zero.
- *
- * If some data could not be copied, this function will pad the copied
- * data to the requested size using zero bytes.
- *
- * An alternate version - __copy_from_user_inatomic() - may be called from
- * atomic context and will fail rather than sleep.  In this case the
- * uncopied bytes will *NOT* be padded with zeros.  See fs/filemap.h
- * for explanation of why this is needed.
  */
+static __always_inline unsigned long __must_check
+__copy_to_user(void __user *to, const void *from, unsigned long n)
+{
+       might_sleep();
+       return __copy_to_user_inatomic(to, from, n);
+}
+
 static __always_inline unsigned long
 __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
@@ -493,6 +471,29 @@ __copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 	}
 	return __copy_from_user_ll_nozero(to, from, n);
 }
+
+/**
+ * __copy_from_user: - Copy a block of data from user space, with less checking.
+ * @to:   Destination address, in kernel space.
+ * @from: Source address, in user space.
+ * @n:    Number of bytes to copy.
+ *
+ * Context: User context only.  This function may sleep.
+ *
+ * Copy data from user space to kernel space.  Caller must check
+ * the specified block with access_ok() before calling this function.
+ *
+ * Returns number of bytes that could not be copied.
+ * On success, this will be zero.
+ *
+ * If some data could not be copied, this function will pad the copied
+ * data to the requested size using zero bytes.
+ *
+ * An alternate version - __copy_from_user_inatomic() - may be called from
+ * atomic context and will fail rather than sleep.  In this case the
+ * uncopied bytes will *NOT* be padded with zeros.  See fs/filemap.h
+ * for explanation of why this is needed.
+ */
 static __always_inline unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
-- 
cgit v1.2.3


From c751c1dbb1289d220a8a175ba0df47706ce95a7e Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier@gentoo.org>
Date: Wed, 11 Oct 2006 01:22:11 -0700
Subject: [PATCH] include linux/types.h in linux/nbd.h

The nbd header uses __be32 and such types but doesn't actually include the
header that defines these things (linux/types.h); so let's include it.

Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/nbd.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/nbd.h b/include/linux/nbd.h
index e712e7d47cc2..d6b6dc09ad97 100644
--- a/include/linux/nbd.h
+++ b/include/linux/nbd.h
@@ -15,6 +15,8 @@
 #ifndef LINUX_NBD_H
 #define LINUX_NBD_H
 
+#include <linux/types.h>
+
 #define NBD_SET_SOCK	_IO( 0xab, 0 )
 #define NBD_SET_BLKSIZE	_IO( 0xab, 1 )
 #define NBD_SET_SIZE	_IO( 0xab, 2 )
-- 
cgit v1.2.3


From c636ebdb186bf37f98d3839f69293597723edb36 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Wed, 11 Oct 2006 01:22:19 -0700
Subject: [PATCH] VFS: Destroy the dentries contributed by a superblock on
 unmounting

The attached patch destroys all the dentries attached to a superblock in one go
by:

 (1) Destroying the tree rooted at s_root.

 (2) Destroying every entry in the anon list, one at a time.

 (3) Each entry in the anon list has its subtree consumed from the leaves
     inwards.

This reduces the amount of work generic_shutdown_super() does, and avoids
iterating through the dentry_unused list.

Note that locking is almost entirely absent in the shrink_dcache_for_umount*()
functions added by this patch.  This is because:

 (1) at the point the filesystem calls generic_shutdown_super(), it is not
     permitted to further touch the superblock's set of dentries, and nor may
     it remove aliases from inodes;

 (2) the dcache memory shrinker now skips dentries that are being unmounted;
     and

 (3) the superblock no longer has any external references through which the VFS
     can reach it.

Given these points, the only locking we need to do is when we remove dentries
from the unused list and the name hashes, which we do a directory's worth at a
time.

We also don't need to guard against reference counts going to zero unexpectedly
and removing bits of the tree we're working on as nothing else can call dput().

A cut down version of dentry_iput() has been folded into
shrink_dcache_for_umount_subtree() function.  Apart from not needing to unlock
things, it also doesn't need to check for inotify watches.

In this version of the patch, the complaint about a dentry still being in use
has been expanded from a single BUG_ON() and now gives much more information.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: NeilBrown <neilb@suse.de>
Acked-by: Ian Kent <raven@themaw.net>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/dcache.c            | 130 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/super.c             |  12 ++---
 include/linux/dcache.h |   1 +
 3 files changed, 137 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/fs/dcache.c b/fs/dcache.c
index 2355bddad8de..2bac4ba1d1d3 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -548,6 +548,136 @@ repeat:
 	spin_unlock(&dcache_lock);
 }
 
+/*
+ * destroy a single subtree of dentries for unmount
+ * - see the comments on shrink_dcache_for_umount() for a description of the
+ *   locking
+ */
+static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
+{
+	struct dentry *parent;
+
+	BUG_ON(!IS_ROOT(dentry));
+
+	/* detach this root from the system */
+	spin_lock(&dcache_lock);
+	if (!list_empty(&dentry->d_lru)) {
+		dentry_stat.nr_unused--;
+		list_del_init(&dentry->d_lru);
+	}
+	__d_drop(dentry);
+	spin_unlock(&dcache_lock);
+
+	for (;;) {
+		/* descend to the first leaf in the current subtree */
+		while (!list_empty(&dentry->d_subdirs)) {
+			struct dentry *loop;
+
+			/* this is a branch with children - detach all of them
+			 * from the system in one go */
+			spin_lock(&dcache_lock);
+			list_for_each_entry(loop, &dentry->d_subdirs,
+					    d_u.d_child) {
+				if (!list_empty(&loop->d_lru)) {
+					dentry_stat.nr_unused--;
+					list_del_init(&loop->d_lru);
+				}
+
+				__d_drop(loop);
+				cond_resched_lock(&dcache_lock);
+			}
+			spin_unlock(&dcache_lock);
+
+			/* move to the first child */
+			dentry = list_entry(dentry->d_subdirs.next,
+					    struct dentry, d_u.d_child);
+		}
+
+		/* consume the dentries from this leaf up through its parents
+		 * until we find one with children or run out altogether */
+		do {
+			struct inode *inode;
+
+			if (atomic_read(&dentry->d_count) != 0) {
+				printk(KERN_ERR
+				       "BUG: Dentry %p{i=%lx,n=%s}"
+				       " still in use (%d)"
+				       " [unmount of %s %s]\n",
+				       dentry,
+				       dentry->d_inode ?
+				       dentry->d_inode->i_ino : 0UL,
+				       dentry->d_name.name,
+				       atomic_read(&dentry->d_count),
+				       dentry->d_sb->s_type->name,
+				       dentry->d_sb->s_id);
+				BUG();
+			}
+
+			parent = dentry->d_parent;
+			if (parent == dentry)
+				parent = NULL;
+			else
+				atomic_dec(&parent->d_count);
+
+			list_del(&dentry->d_u.d_child);
+			dentry_stat.nr_dentry--;	/* For d_free, below */
+
+			inode = dentry->d_inode;
+			if (inode) {
+				dentry->d_inode = NULL;
+				list_del_init(&dentry->d_alias);
+				if (dentry->d_op && dentry->d_op->d_iput)
+					dentry->d_op->d_iput(dentry, inode);
+				else
+					iput(inode);
+			}
+
+			d_free(dentry);
+
+			/* finished when we fall off the top of the tree,
+			 * otherwise we ascend to the parent and move to the
+			 * next sibling if there is one */
+			if (!parent)
+				return;
+
+			dentry = parent;
+
+		} while (list_empty(&dentry->d_subdirs));
+
+		dentry = list_entry(dentry->d_subdirs.next,
+				    struct dentry, d_u.d_child);
+	}
+}
+
+/*
+ * destroy the dentries attached to a superblock on unmounting
+ * - we don't need to use dentry->d_lock, and only need dcache_lock when
+ *   removing the dentry from the system lists and hashes because:
+ *   - the superblock is detached from all mountings and open files, so the
+ *     dentry trees will not be rearranged by the VFS
+ *   - s_umount is write-locked, so the memory pressure shrinker will ignore
+ *     any dentries belonging to this superblock that it comes across
+ *   - the filesystem itself is no longer permitted to rearrange the dentries
+ *     in this superblock
+ */
+void shrink_dcache_for_umount(struct super_block *sb)
+{
+	struct dentry *dentry;
+
+	if (down_read_trylock(&sb->s_umount))
+		BUG();
+
+	dentry = sb->s_root;
+	sb->s_root = NULL;
+	atomic_dec(&dentry->d_count);
+	shrink_dcache_for_umount_subtree(dentry);
+
+	while (!hlist_empty(&sb->s_anon)) {
+		dentry = hlist_entry(sb->s_anon.first, struct dentry, d_hash);
+		shrink_dcache_for_umount_subtree(dentry);
+	}
+}
+
 /*
  * Search for at least 1 mount point in the dentry's subdirs.
  * We descend to the next level whenever the d_subdirs
diff --git a/fs/super.c b/fs/super.c
index aec99ddbe53f..47e554c12e76 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -260,17 +260,17 @@ int fsync_super(struct super_block *sb)
  *	that need destruction out of superblock, call generic_shutdown_super()
  *	and release aforementioned objects.  Note: dentries and inodes _are_
  *	taken care of and do not need specific handling.
+ *
+ *	Upon calling this function, the filesystem may no longer alter or
+ *	rearrange the set of dentries belonging to this super_block, nor may it
+ *	change the attachments of dentries to inodes.
  */
 void generic_shutdown_super(struct super_block *sb)
 {
-	struct dentry *root = sb->s_root;
 	struct super_operations *sop = sb->s_op;
 
-	if (root) {
-		sb->s_root = NULL;
-		shrink_dcache_parent(root);
-		shrink_dcache_sb(sb);
-		dput(root);
+	if (sb->s_root) {
+		shrink_dcache_for_umount(sb);
 		fsync_super(sb);
 		lock_super(sb);
 		sb->s_flags &= ~MS_ACTIVE;
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 44605be59409..63f64a9a5bf7 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,6 +230,7 @@ extern struct dentry * d_alloc_anon(struct inode *);
 extern struct dentry * d_splice_alias(struct inode *, struct dentry *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
+extern void shrink_dcache_for_umount(struct super_block *);
 extern int d_invalidate(struct dentry *);
 
 /* only used at mount-time */
-- 
cgit v1.2.3


From fc048b5b0f2554bc953a8ada5b2e3b82bde2fcb0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 11 Oct 2006 17:22:54 +0100
Subject: [PATCH] arm: use unsigned long instead of unsigned int in get_user()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-arm/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/uaccess.h b/include/asm-arm/uaccess.h
index 87aba57a66c4..09ad0cab9014 100644
--- a/include/asm-arm/uaccess.h
+++ b/include/asm-arm/uaccess.h
@@ -110,7 +110,7 @@ extern int __get_user_4(void *);
 #define get_user(x,p)							\
 	({								\
 		const register typeof(*(p)) __user *__p asm("r0") = (p);\
-		register unsigned int __r2 asm("r2");			\
+		register unsigned long __r2 asm("r2");			\
 		register int __e asm("r0");				\
 		switch (sizeof(*(__p))) {				\
 		case 1:							\
-- 
cgit v1.2.3


From 399ad77b9098ed2eb27cbfbeb6449c1caab3c18e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 11 Oct 2006 17:22:34 +0100
Subject: [PATCH] arm-versatile iomem annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm/mach-versatile/core.c            |  4 ++--
 arch/arm/mach-versatile/pci.c             | 32 +++++++++++++++----------------
 include/asm-arm/arch-versatile/hardware.h |  4 ++--
 3 files changed, 20 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mach-versatile/core.c b/arch/arm/mach-versatile/core.c
index 2aa150b57ba1..3b8576111c16 100644
--- a/arch/arm/mach-versatile/core.c
+++ b/arch/arm/mach-versatile/core.c
@@ -188,12 +188,12 @@ static struct map_desc versatile_io_desc[] __initdata = {
 		.length		= SZ_4K,
 		.type		= MT_DEVICE
 	}, {
-		.virtual	=  VERSATILE_PCI_VIRT_BASE,
+		.virtual	=  (unsigned long)VERSATILE_PCI_VIRT_BASE,
 		.pfn		= __phys_to_pfn(VERSATILE_PCI_BASE),
 		.length		= VERSATILE_PCI_BASE_SIZE,
 		.type		= MT_DEVICE
 	}, {
-		.virtual	=  VERSATILE_PCI_CFG_VIRT_BASE,
+		.virtual	=  (unsigned long)VERSATILE_PCI_CFG_VIRT_BASE,
 		.pfn		= __phys_to_pfn(VERSATILE_PCI_CFG_BASE),
 		.length		= VERSATILE_PCI_CFG_BASE_SIZE,
 		.type		= MT_DEVICE
diff --git a/arch/arm/mach-versatile/pci.c b/arch/arm/mach-versatile/pci.c
index 13bbd08ff841..5cd0b5d9e7eb 100644
--- a/arch/arm/mach-versatile/pci.c
+++ b/arch/arm/mach-versatile/pci.c
@@ -40,14 +40,15 @@
  * Cfg   42000000 - 42FFFFFF	  PCI config
  *
  */
-#define SYS_PCICTL			IO_ADDRESS(VERSATILE_SYS_PCICTL)
-#define PCI_IMAP0			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0)
-#define PCI_IMAP1			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4)
-#define PCI_IMAP2			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8)
-#define PCI_SMAP0			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10)
-#define PCI_SMAP1			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
-#define PCI_SMAP2			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
-#define PCI_SELFID			IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc)
+#define __IO_ADDRESS(n) ((void __iomem *)(unsigned long)IO_ADDRESS(n))
+#define SYS_PCICTL		__IO_ADDRESS(VERSATILE_SYS_PCICTL)
+#define PCI_IMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x0)
+#define PCI_IMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x4)
+#define PCI_IMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x8)
+#define PCI_SMAP0		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x10)
+#define PCI_SMAP1		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x14)
+#define PCI_SMAP2		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0x18)
+#define PCI_SELFID		__IO_ADDRESS(VERSATILE_PCI_CORE_BASE+0xc)
 
 #define DEVICE_ID_OFFSET		0x00
 #define CSR_OFFSET			0x04
@@ -76,7 +77,7 @@ static int __init versatile_pci_slot_ignore(char *str)
 __setup("pci_slot_ignore=", versatile_pci_slot_ignore);
 
 
-static unsigned long __pci_addr(struct pci_bus *bus,
+static void __iomem *__pci_addr(struct pci_bus *bus,
 				unsigned int devfn, int offset)
 {
 	unsigned int busnr = bus->number;
@@ -91,14 +92,14 @@ static unsigned long __pci_addr(struct pci_bus *bus,
 	if (devfn > 255)
 		BUG();
 
-	return (VERSATILE_PCI_CFG_VIRT_BASE | (busnr << 16) |
+	return VERSATILE_PCI_CFG_VIRT_BASE + ((busnr << 16) |
 		(PCI_SLOT(devfn) << 11) | (PCI_FUNC(devfn) << 8) | offset);
 }
 
 static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int where,
 				 int size, u32 *val)
 {
-	unsigned long addr = __pci_addr(bus, devfn, where);
+	void __iomem *addr = __pci_addr(bus, devfn, where & ~3);
 	u32 v;
 	int slot = PCI_SLOT(devfn);
 
@@ -121,13 +122,12 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh
 			break;
 
 		case 2:
-			v = __raw_readl(addr & ~3);
-			if (addr & 2) v >>= 16;
+			v = __raw_readl(addr);
+			if (where & 2) v >>= 16;
  			v &= 0xffff;
 			break;
 
 		default:
-			addr &= ~3;
 			v = __raw_readl(addr);
 			break;
 		}
@@ -140,7 +140,7 @@ static int versatile_read_config(struct pci_bus *bus, unsigned int devfn, int wh
 static int versatile_write_config(struct pci_bus *bus, unsigned int devfn, int where,
 				  int size, u32 val)
 {
-	unsigned long addr = __pci_addr(bus, devfn, where);
+	void __iomem *addr = __pci_addr(bus, devfn, where);
 	int slot = PCI_SLOT(devfn);
 
 	if (pci_slot_ignore & (1 << slot)) {
@@ -279,7 +279,7 @@ int __init pci_versatile_setup(int nr, struct pci_sys_data *sys)
 	printk("PCI core found (slot %d)\n",myslot);
 
 	__raw_writel(myslot, PCI_SELFID);
-	local_pci_cfg_base = (void *) VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11);
+	local_pci_cfg_base = VERSATILE_PCI_CFG_VIRT_BASE + (myslot << 11);
 
 	val = __raw_readl(local_pci_cfg_base + CSR_OFFSET);
 	val |= PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
diff --git a/include/asm-arm/arch-versatile/hardware.h b/include/asm-arm/arch-versatile/hardware.h
index 41c1bee342ad..edc06598d187 100644
--- a/include/asm-arm/arch-versatile/hardware.h
+++ b/include/asm-arm/arch-versatile/hardware.h
@@ -28,8 +28,8 @@
 /*
  * PCI space virtual addresses
  */
-#define VERSATILE_PCI_VIRT_BASE		0xe8000000
-#define VERSATILE_PCI_CFG_VIRT_BASE	0xe9000000
+#define VERSATILE_PCI_VIRT_BASE		(void __iomem *)0xe8000000ul
+#define VERSATILE_PCI_CFG_VIRT_BASE	(void __iomem *)0xe9000000ul
 
 #if 0
 #define VERSATILE_PCI_VIRT_MEM_BASE0	0xf4000000
-- 
cgit v1.2.3


From b971018bae94bb43ae2402f884684ad69e85f931 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Wed, 11 Oct 2006 17:27:57 +0100
Subject: [PATCH] m68k uaccess __user annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-m68k/uaccess.h | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/asm-m68k/uaccess.h b/include/asm-m68k/uaccess.h
index 88b1f47400e1..e4c9f080ff20 100644
--- a/include/asm-m68k/uaccess.h
+++ b/include/asm-m68k/uaccess.h
@@ -76,7 +76,7 @@ asm volatile ("\n"					\
 		break;							\
 	case 8:								\
  	    {								\
- 		const void *__pu_ptr = (ptr);				\
+ 		const void __user *__pu_ptr = (ptr);			\
 		asm volatile ("\n"					\
 			"1:	moves.l	%2,(%1)+\n"			\
 			"2:	moves.l	%R2,(%1)\n"			\
@@ -125,7 +125,7 @@ asm volatile ("\n"					\
 		"	.previous"				\
 		: "+d" (res), "=&" #reg (__gu_val)		\
 		: "m" (*(ptr)), "i" (err));			\
-	(x) = (typeof(*(ptr)))(long)__gu_val;			\
+	(x) = (typeof(*(ptr)))(unsigned long)__gu_val;		\
 })
 
 #define __get_user(x, ptr)						\
@@ -221,16 +221,16 @@ __constant_copy_from_user(void *to, const void __user *from, unsigned long n)
 
 	switch (n) {
 	case 1:
-		__get_user_asm(res, *(u8 *)to, (u8 *)from, u8, b, d, 1);
+		__get_user_asm(res, *(u8 *)to, (u8 __user *)from, u8, b, d, 1);
 		break;
 	case 2:
-		__get_user_asm(res, *(u16 *)to, (u16 *)from, u16, w, d, 2);
+		__get_user_asm(res, *(u16 *)to, (u16 __user *)from, u16, w, d, 2);
 		break;
 	case 3:
 		__constant_copy_from_user_asm(res, to, from, tmp, 3, w, b,);
 		break;
 	case 4:
-		__get_user_asm(res, *(u32 *)to, (u32 *)from, u32, l, r, 4);
+		__get_user_asm(res, *(u32 *)to, (u32 __user *)from, u32, l, r, 4);
 		break;
 	case 5:
 		__constant_copy_from_user_asm(res, to, from, tmp, 5, l, b,);
@@ -302,16 +302,16 @@ __constant_copy_to_user(void __user *to, const void *from, unsigned long n)
 
 	switch (n) {
 	case 1:
-		__put_user_asm(res, *(u8 *)from, (u8 *)to, b, d, 1);
+		__put_user_asm(res, *(u8 *)from, (u8 __user *)to, b, d, 1);
 		break;
 	case 2:
-		__put_user_asm(res, *(u16 *)from, (u16 *)to, w, d, 2);
+		__put_user_asm(res, *(u16 *)from, (u16 __user *)to, w, d, 2);
 		break;
 	case 3:
 		__constant_copy_to_user_asm(res, to, from, tmp, 3, w, b,);
 		break;
 	case 4:
-		__put_user_asm(res, *(u32 *)from, (u32 *)to, l, r, 4);
+		__put_user_asm(res, *(u32 *)from, (u32 __user *)to, l, r, 4);
 		break;
 	case 5:
 		__constant_copy_to_user_asm(res, to, from, tmp, 5, l, b,);
-- 
cgit v1.2.3


From dff9262ed1491a1e531dc56e687605b5e4cd488d Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Wed, 11 Oct 2006 01:07:01 +0900
Subject: [MIPS] <asm/irq.h> does not need pt_regs anymore.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/irq.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/irq.h b/include/asm-mips/irq.h
index 1a9804c65369..0ce2a80b689e 100644
--- a/include/asm-mips/irq.h
+++ b/include/asm-mips/irq.h
@@ -24,8 +24,6 @@ static inline int irq_canonicalize(int irq)
 #define irq_canonicalize(irq) (irq)	/* Sane hardware, sane code ... */
 #endif
 
-struct pt_regs;
-
 extern asmlinkage unsigned int do_IRQ(unsigned int irq);
 
 #ifdef CONFIG_MIPS_MT_SMTC
-- 
cgit v1.2.3


From 9b95e629eab59ee140fe2b17bbd7fea6821c6085 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Tue, 10 Oct 2006 22:46:52 +0900
Subject: [MIPS] Optimize and cleanup get_saved_sp, set_saved_sp

If CONFIG_BUILD_ELF64 was not selected and gcc had -msym32 option
(i.e. 4.0 or newer), there is no point to use %highest, %higher for
kernel symbols.

This patch also fixes 64-bit SMTC version of get_saved_sp() which is
broken but harmless since there is no such CPUs for now.

A bonus is set_saved_sp() and SMP version of get_saved_sp() are more
readable now.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/stackframe.h | 64 +++++++++++++------------------------------
 1 file changed, 19 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/stackframe.h b/include/asm-mips/stackframe.h
index 158a4cd12e46..1fae5dc58138 100644
--- a/include/asm-mips/stackframe.h
+++ b/include/asm-mips/stackframe.h
@@ -59,69 +59,43 @@
 		.endm
 
 #ifdef CONFIG_SMP
-		.macro	get_saved_sp	/* SMP variation */
-#ifdef CONFIG_32BIT
 #ifdef CONFIG_MIPS_MT_SMTC
-		.set	mips32
-		mfc0	k0, CP0_TCBIND;
-		.set	mips0
-		lui	k1, %hi(kernelsp)
-		srl	k0, k0, 19
-		/* No need to shift down and up to clear bits 0-1 */
+#define PTEBASE_SHIFT	19	/* TCBIND */
 #else
-		mfc0	k0, CP0_CONTEXT
-		lui	k1, %hi(kernelsp)
-		srl	k0, k0, 23
-#endif
-		addu	k1, k0
-		LONG_L	k1, %lo(kernelsp)(k1)
+#define PTEBASE_SHIFT	23	/* CONTEXT */
 #endif
-#ifdef CONFIG_64BIT
+		.macro	get_saved_sp	/* SMP variation */
 #ifdef CONFIG_MIPS_MT_SMTC
-		.set	mips64
-		mfc0	k0, CP0_TCBIND;
-		.set	mips0
-		lui	k0, %highest(kernelsp)
-		dsrl	k1, 19
-		/* No need to shift down and up to clear bits 0-2 */
+		mfc0	k0, CP0_TCBIND
 #else
-		MFC0	k1, CP0_CONTEXT
-		lui	k0, %highest(kernelsp)
-		dsrl	k1, 23
-		daddiu	k0, %higher(kernelsp)
-		dsll	k0, k0, 16
-		daddiu	k0, %hi(kernelsp)
-		dsll	k0, k0, 16
-#endif /* CONFIG_MIPS_MT_SMTC */
-		daddu	k1, k1, k0
+		MFC0	k0, CP0_CONTEXT
+#endif
+#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4)
+		lui	k1, %highest(kernelsp)
+		daddiu	k1, %higher(kernelsp)
+		dsll	k1, 16
+		daddiu	k1, %hi(kernelsp)
+		dsll	k1, 16
+#else
+		lui	k1, %hi(kernelsp)
+#endif
+		LONG_SRL	k0, PTEBASE_SHIFT
+		LONG_ADDU	k1, k0
 		LONG_L	k1, %lo(kernelsp)(k1)
-#endif /* CONFIG_64BIT */
 		.endm
 
 		.macro	set_saved_sp stackp temp temp2
-#ifdef CONFIG_32BIT
-#ifdef CONFIG_MIPS_MT_SMTC
-		mfc0	\temp, CP0_TCBIND
-		srl	\temp, 19
-#else
-		mfc0	\temp, CP0_CONTEXT
-		srl	\temp, 23
-#endif
-#endif
-#ifdef CONFIG_64BIT
 #ifdef CONFIG_MIPS_MT_SMTC
 		mfc0	\temp, CP0_TCBIND
-		dsrl	\temp, 19
 #else
 		MFC0	\temp, CP0_CONTEXT
-		dsrl	\temp, 23
-#endif
 #endif
+		LONG_SRL	\temp, PTEBASE_SHIFT
 		LONG_S	\stackp, kernelsp(\temp)
 		.endm
 #else
 		.macro	get_saved_sp	/* Uniprocessor variation */
-#ifdef CONFIG_64BIT
+#if defined(CONFIG_BUILD_ELF64) || (defined(CONFIG_64BIT) && __GNUC__ < 4)
 		lui	k1, %highest(kernelsp)
 		daddiu	k1, %higher(kernelsp)
 		dsll	k1, k1, 16
-- 
cgit v1.2.3


From 04d4d7d5e36957b2d941310fc8243aa7bf036f3b Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 11 Oct 2006 01:22:12 +0100
Subject: [MIPS] Cleanup definitions of speed_t and tcflag_t.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/termbits.h | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/asm-mips/termbits.h b/include/asm-mips/termbits.h
index fa6d04dac56b..b62ec7c521cc 100644
--- a/include/asm-mips/termbits.h
+++ b/include/asm-mips/termbits.h
@@ -3,7 +3,7 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  *
- * Copyright (C) 1995, 1996, 1999, 2001 Ralf Baechle
+ * Copyright (C) 1995, 96, 99, 2001, 06 Ralf Baechle
  * Copyright (C) 1999 Silicon Graphics, Inc.
  * Copyright (C) 2001 MIPS Technologies, Inc.
  */
@@ -13,14 +13,8 @@
 #include <linux/posix_types.h>
 
 typedef unsigned char cc_t;
-#if (_MIPS_SZLONG == 32)
-typedef unsigned long speed_t;
-typedef unsigned long tcflag_t;
-#endif
-#if (_MIPS_SZLONG == 64)
-typedef __u32 speed_t;
-typedef __u32 tcflag_t;
-#endif
+typedef unsigned int speed_t;
+typedef unsigned int tcflag_t;
 
 /*
  * The ABI says nothing about NCC but seems to use NCCS as
-- 
cgit v1.2.3


From 8ae91b9ad88a130cd50fc0b78b16e7b9510b8067 Mon Sep 17 00:00:00 2001
From: Ryusuke Sakato <sakato@hsdv.com>
Date: Thu, 12 Oct 2006 12:16:13 +0900
Subject: sh: SH-4A UBC support

A simple patch to enable the UBC on SH-4A.

Signed-off-by: Ryusuke Sakato <sakato@hsdv.com>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/process.c     | 30 ++++++++++++++++++++++++++++++
 include/asm-sh/cpu-sh4/ubc.h | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 0b1d5dd7a93b..91516dca4a85 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -5,6 +5,7 @@
  *  Copyright (C) 1995  Linus Torvalds
  *
  *  SuperH version:  Copyright (C) 1999, 2000  Niibe Yutaka & Kaz Kojima
+ *		     Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC
  */
 
 /*
@@ -290,6 +291,24 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long usp,
 static void
 ubc_set_tracing(int asid, unsigned long pc)
 {
+#if defined(CONFIG_CPU_SH4A)
+	unsigned long val;
+
+	val = (UBC_CBR_ID_INST | UBC_CBR_RW_READ | UBC_CBR_CE);
+	val |= (UBC_CBR_AIE | UBC_CBR_AIV_SET(asid));
+
+	ctrl_outl(val, UBC_CBR0);
+	ctrl_outl(pc,  UBC_CAR0);
+	ctrl_outl(0x0, UBC_CAMR0);
+	ctrl_outl(0x0, UBC_CBCR);
+
+	val = (UBC_CRR_RES | UBC_CRR_PCB | UBC_CRR_BIE);
+	ctrl_outl(val, UBC_CRR0);
+
+	/* Read UBC register that we writed last. For chekking UBC Register changed */
+	val = ctrl_inl(UBC_CRR0);
+
+#else	/* CONFIG_CPU_SH4A */
 	ctrl_outl(pc, UBC_BARA);
 
 #ifdef CONFIG_MMU
@@ -307,6 +326,7 @@ ubc_set_tracing(int asid, unsigned long pc)
 		ctrl_outw(BBR_INST | BBR_READ, UBC_BBRA);
 		ctrl_outw(BRCR_PCBA, UBC_BRCR);
 	}
+#endif	/* CONFIG_CPU_SH4A */
 }
 
 /*
@@ -359,8 +379,13 @@ struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *ne
 #endif
 		ubc_set_tracing(asid, next->thread.ubc_pc);
 	} else {
+#if defined(CONFIG_CPU_SH4A)
+		ctrl_outl(UBC_CBR_INIT, UBC_CBR0);
+		ctrl_outl(UBC_CRR_INIT, UBC_CRR0);
+#else
 		ctrl_outw(0, UBC_BBRA);
 		ctrl_outw(0, UBC_BBRB);
+#endif
 	}
 
 	return prev;
@@ -460,8 +485,13 @@ asmlinkage void break_point_trap(unsigned long r4, unsigned long r5,
 				 struct pt_regs regs)
 {
 	/* Clear tracing.  */
+#if defined(CONFIG_CPU_SH4A)
+	ctrl_outl(UBC_CBR_INIT, UBC_CBR0);
+	ctrl_outl(UBC_CRR_INIT, UBC_CRR0);
+#else
 	ctrl_outw(0, UBC_BBRA);
 	ctrl_outw(0, UBC_BBRB);
+#endif
 	current->thread.ubc_pc = 0;
 	ubc_usercnt -= 1;
 
diff --git a/include/asm-sh/cpu-sh4/ubc.h b/include/asm-sh/cpu-sh4/ubc.h
index 3d0943167659..c86e17050935 100644
--- a/include/asm-sh/cpu-sh4/ubc.h
+++ b/include/asm-sh/cpu-sh4/ubc.h
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 1999 Niibe Yutaka
  * Copyright (C) 2003 Paul Mundt
+ * Copyright (C) 2006 Lineo Solutions Inc. support SH4A UBC
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -11,6 +12,41 @@
 #ifndef __ASM_CPU_SH4_UBC_H
 #define __ASM_CPU_SH4_UBC_H
 
+#if defined(CONFIG_CPU_SH4A)
+#define UBC_CBR0		0xff200000
+#define UBC_CRR0		0xff200004
+#define UBC_CAR0		0xff200008
+#define UBC_CAMR0		0xff20000c
+#define UBC_CBR1		0xff200020
+#define UBC_CRR1		0xff200024
+#define UBC_CAR1		0xff200028
+#define UBC_CAMR1		0xff20002c
+#define UBC_CDR1		0xff200030
+#define UBC_CDMR1		0xff200034
+#define UBC_CETR1		0xff200038
+#define UBC_CCMFR		0xff200600
+#define UBC_CBCR		0xff200620
+
+/* CBR	*/
+#define UBC_CBR_AIE		(0x01<<30)
+#define UBC_CBR_ID_INST		(0x01<<4)
+#define UBC_CBR_RW_READ		(0x01<<1)
+#define UBC_CBR_CE		(0x01)
+
+#define	UBC_CBR_AIV_MASK	(0x00FF0000)
+#define	UBC_CBR_AIV_SHIFT	(16)
+#define UBC_CBR_AIV_SET(asid)	(((asid)<<UBC_CBR_AIV_SHIFT) & UBC_CBR_AIV_MASK)
+
+#define UBC_CBR_INIT		0x20000000
+
+/* CRR	*/
+#define UBC_CRR_RES		(0x01<<13)
+#define UBC_CRR_PCB		(0x01<<1)
+#define UBC_CRR_BIE		(0x01)
+
+#define UBC_CRR_INIT		0x00002000
+
+#else	/* CONFIG_CPU_SH4 */
 #define UBC_BARA		0xff200000
 #define UBC_BAMRA		0xff200004
 #define UBC_BBRA		0xff200008
@@ -22,6 +58,7 @@
 #define UBC_BDRB		0xff200018
 #define UBC_BDMRB		0xff20001c
 #define UBC_BRCR		0xff200020
+#endif	/* CONFIG_CPU_SH4 */
 
 #endif /* __ASM_CPU_SH4_UBC_H */
 
-- 
cgit v1.2.3


From 07646e217f473a3e6213f8228336a9046833d6aa Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Wed, 11 Oct 2006 23:45:23 -0400
Subject: Lockdep: fix compile error in drivers/input/serio/serio.c

lockdep_set_subclass() was missing in !LOCKDEP case

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 include/linux/lockdep.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 14fec2a23b2e..819f08f1310d 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -268,6 +268,8 @@ static inline int lockdep_internal(void)
 		do { (void)(key); } while (0)
 #define lockdep_set_class_and_subclass(lock, key, sub) \
 		do { (void)(key); } while (0)
+#define lockdep_set_subclass(lock, sub)		do { } while (0)
+
 # define INIT_LOCKDEP
 # define lockdep_reset()		do { debug_locks = 1; } while (0)
 # define lockdep_free_key_range(start, size)	do { } while (0)
-- 
cgit v1.2.3


From ffb733c65000ee701294f7b80c4eca2a5f335637 Mon Sep 17 00:00:00 2001
From: "paul.moore@hp.com" <paul.moore@hp.com>
Date: Wed, 4 Oct 2006 11:46:31 -0400
Subject: NetLabel: fix a cache race condition

Testing revealed a problem with the NetLabel cache where a cached entry could
be freed while in use by the LSM layer causing an oops and other problems.
This patch fixes that problem by introducing a reference counter to the cache
entry so that it is only freed when it is no longer in use.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/net/netlabel.h         | 62 ++++++++++++++++++++++++++++++++----------
 net/ipv4/cipso_ipv4.c          | 18 ++++++------
 net/netlabel/netlabel_kapi.c   |  2 +-
 security/selinux/ss/services.c | 37 ++++++++++++++-----------
 4 files changed, 79 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index c63a58058e21..113337c27955 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -34,6 +34,7 @@
 #include <linux/net.h>
 #include <linux/skbuff.h>
 #include <net/netlink.h>
+#include <asm/atomic.h>
 
 /*
  * NetLabel - A management interface for maintaining network packet label
@@ -106,6 +107,7 @@ int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info);
 
 /* LSM security attributes */
 struct netlbl_lsm_cache {
+	atomic_t refcount;
 	void (*free) (const void *data);
 	void *data;
 };
@@ -117,7 +119,7 @@ struct netlbl_lsm_secattr {
 	unsigned char *mls_cat;
 	size_t mls_cat_len;
 
-	struct netlbl_lsm_cache cache;
+	struct netlbl_lsm_cache *cache;
 };
 
 /*
@@ -125,6 +127,43 @@ struct netlbl_lsm_secattr {
  */
 
 
+/**
+ * netlbl_secattr_cache_alloc - Allocate and initialize a secattr cache
+ * @flags: the memory allocation flags
+ *
+ * Description:
+ * Allocate and initialize a netlbl_lsm_cache structure.  Returns a pointer
+ * on success, NULL on failure.
+ *
+ */
+static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(int flags)
+{
+	struct netlbl_lsm_cache *cache;
+
+	cache = kzalloc(sizeof(*cache), flags);
+	if (cache)
+		atomic_set(&cache->refcount, 1);
+	return cache;
+}
+
+/**
+ * netlbl_secattr_cache_free - Frees a netlbl_lsm_cache struct
+ * @cache: the struct to free
+ *
+ * Description:
+ * Frees @secattr including all of the internal buffers.
+ *
+ */
+static inline void netlbl_secattr_cache_free(struct netlbl_lsm_cache *cache)
+{
+	if (!atomic_dec_and_test(&cache->refcount))
+		return;
+
+	if (cache->free)
+		cache->free(cache->data);
+	kfree(cache);
+}
+
 /**
  * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct
  * @secattr: the struct to initialize
@@ -143,20 +182,16 @@ static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr)
 /**
  * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct
  * @secattr: the struct to clear
- * @clear_cache: cache clear flag
  *
  * Description:
  * Destroys the @secattr struct, including freeing all of the internal buffers.
- * If @clear_cache is true then free the cache fields, otherwise leave them
- * intact.  The struct must be reset with a call to netlbl_secattr_init()
- * before reuse.
+ * The struct must be reset with a call to netlbl_secattr_init() before reuse.
  *
  */
-static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr,
-					  u32 clear_cache)
+static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr)
 {
-	if (clear_cache && secattr->cache.data != NULL && secattr->cache.free)
-		secattr->cache.free(secattr->cache.data);
+	if (secattr->cache)
+		netlbl_secattr_cache_free(secattr->cache);
 	kfree(secattr->domain);
 	kfree(secattr->mls_cat);
 }
@@ -178,17 +213,14 @@ static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags)
 /**
  * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct
  * @secattr: the struct to free
- * @clear_cache: cache clear flag
  *
  * Description:
- * Frees @secattr including all of the internal buffers.  If @clear_cache is
- * true then free the cache fields, otherwise leave them intact.
+ * Frees @secattr including all of the internal buffers.
  *
  */
-static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr,
-				       u32 clear_cache)
+static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr)
 {
-	netlbl_secattr_destroy(secattr, clear_cache);
+	netlbl_secattr_destroy(secattr);
 	kfree(secattr);
 }
 
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index a8e2e879a647..bde8ccaa1531 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -43,6 +43,7 @@
 #include <net/tcp.h>
 #include <net/netlabel.h>
 #include <net/cipso_ipv4.h>
+#include <asm/atomic.h>
 #include <asm/bug.h>
 
 struct cipso_v4_domhsh_entry {
@@ -79,7 +80,7 @@ struct cipso_v4_map_cache_entry {
 	unsigned char *key;
 	size_t key_len;
 
-	struct netlbl_lsm_cache lsm_data;
+	struct netlbl_lsm_cache *lsm_data;
 
 	u32 activity;
 	struct list_head list;
@@ -188,13 +189,14 @@ static void cipso_v4_doi_domhsh_free(struct rcu_head *entry)
  * @entry: the entry to free
  *
  * Description:
- * This function frees the memory associated with a cache entry.
+ * This function frees the memory associated with a cache entry including the
+ * LSM cache data if there are no longer any users, i.e. reference count == 0.
  *
  */
 static void cipso_v4_cache_entry_free(struct cipso_v4_map_cache_entry *entry)
 {
-	if (entry->lsm_data.free)
-		entry->lsm_data.free(entry->lsm_data.data);
+	if (entry->lsm_data)
+		netlbl_secattr_cache_free(entry->lsm_data);
 	kfree(entry->key);
 	kfree(entry);
 }
@@ -315,8 +317,8 @@ static int cipso_v4_cache_check(const unsigned char *key,
 		    entry->key_len == key_len &&
 		    memcmp(entry->key, key, key_len) == 0) {
 			entry->activity += 1;
-			secattr->cache.free = entry->lsm_data.free;
-			secattr->cache.data = entry->lsm_data.data;
+			atomic_inc(&entry->lsm_data->refcount);
+			secattr->cache = entry->lsm_data;
 			if (prev_entry == NULL) {
 				spin_unlock_bh(&cipso_v4_cache[bkt].lock);
 				return 0;
@@ -383,8 +385,8 @@ int cipso_v4_cache_add(const struct sk_buff *skb,
 	memcpy(entry->key, cipso_ptr, cipso_ptr_len);
 	entry->key_len = cipso_ptr_len;
 	entry->hash = cipso_v4_map_cache_hash(cipso_ptr, cipso_ptr_len);
-	entry->lsm_data.free = secattr->cache.free;
-	entry->lsm_data.data = secattr->cache.data;
+	atomic_inc(&secattr->cache->refcount);
+	entry->lsm_data = secattr->cache;
 
 	bkt = entry->hash & (CIPSO_V4_CACHE_BUCKETBITS - 1);
 	spin_lock_bh(&cipso_v4_cache[bkt].lock);
diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c
index 54fb7de3c2b1..ff971103fd0c 100644
--- a/net/netlabel/netlabel_kapi.c
+++ b/net/netlabel/netlabel_kapi.c
@@ -200,7 +200,7 @@ void netlbl_cache_invalidate(void)
 int netlbl_cache_add(const struct sk_buff *skb,
 		     const struct netlbl_lsm_secattr *secattr)
 {
-	if (secattr->cache.data == NULL)
+	if (secattr->cache == NULL)
 		return -ENOMSG;
 
 	if (CIPSO_V4_OPTEXIST(skb))
diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c
index 0c219a1b3243..bb2d2bc869ba 100644
--- a/security/selinux/ss/services.c
+++ b/security/selinux/ss/services.c
@@ -2172,7 +2172,12 @@ struct netlbl_cache {
  */
 static void selinux_netlbl_cache_free(const void *data)
 {
-	struct netlbl_cache *cache = NETLBL_CACHE(data);
+	struct netlbl_cache *cache;
+
+	if (data == NULL)
+		return;
+
+	cache = NETLBL_CACHE(data);
 	switch (cache->type) {
 	case NETLBL_CACHE_T_MLS:
 		ebitmap_destroy(&cache->data.mls_label.level[0].cat);
@@ -2197,17 +2202,20 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx)
 	struct netlbl_lsm_secattr secattr;
 
 	netlbl_secattr_init(&secattr);
+	secattr.cache = netlbl_secattr_cache_alloc(GFP_ATOMIC);
+	if (secattr.cache == NULL)
+		goto netlbl_cache_add_return;
 
 	cache = kzalloc(sizeof(*cache),	GFP_ATOMIC);
 	if (cache == NULL)
-		goto netlbl_cache_add_failure;
-	secattr.cache.free = selinux_netlbl_cache_free;
-	secattr.cache.data = (void *)cache;
+		goto netlbl_cache_add_return;
+	secattr.cache->free = selinux_netlbl_cache_free;
+	secattr.cache->data = (void *)cache;
 
 	cache->type = NETLBL_CACHE_T_MLS;
 	if (ebitmap_cpy(&cache->data.mls_label.level[0].cat,
 			&ctx->range.level[0].cat) != 0)
-		goto netlbl_cache_add_failure;
+		goto netlbl_cache_add_return;
 	cache->data.mls_label.level[1].cat.highbit =
 		cache->data.mls_label.level[0].cat.highbit;
 	cache->data.mls_label.level[1].cat.node =
@@ -2215,13 +2223,10 @@ static void selinux_netlbl_cache_add(struct sk_buff *skb, struct context *ctx)
 	cache->data.mls_label.level[0].sens = ctx->range.level[0].sens;
 	cache->data.mls_label.level[1].sens = ctx->range.level[0].sens;
 
-	if (netlbl_cache_add(skb, &secattr) != 0)
-		goto netlbl_cache_add_failure;
-
-	return;
+	netlbl_cache_add(skb, &secattr);
 
-netlbl_cache_add_failure:
-	netlbl_secattr_destroy(&secattr, 1);
+netlbl_cache_add_return:
+	netlbl_secattr_destroy(&secattr);
 }
 
 /**
@@ -2263,8 +2268,8 @@ static int selinux_netlbl_secattr_to_sid(struct sk_buff *skb,
 
 	POLICY_RDLOCK;
 
-	if (secattr->cache.data) {
-		cache = NETLBL_CACHE(secattr->cache.data);
+	if (secattr->cache) {
+		cache = NETLBL_CACHE(secattr->cache->data);
 		switch (cache->type) {
 		case NETLBL_CACHE_T_SID:
 			*sid = cache->data.sid;
@@ -2369,7 +2374,7 @@ static int selinux_netlbl_skbuff_getsid(struct sk_buff *skb,
 						   &secattr,
 						   base_sid,
 						   sid);
-	netlbl_secattr_destroy(&secattr, 0);
+	netlbl_secattr_destroy(&secattr);
 
 	return rc;
 }
@@ -2415,7 +2420,7 @@ static int selinux_netlbl_socket_setsid(struct socket *sock, u32 sid)
 	if (rc == 0)
 		sksec->nlbl_state = NLBL_LABELED;
 
-	netlbl_secattr_destroy(&secattr, 0);
+	netlbl_secattr_destroy(&secattr);
 
 netlbl_socket_setsid_return:
 	POLICY_RDUNLOCK;
@@ -2517,7 +2522,7 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock)
 					  sksec->sid,
 					  &nlbl_peer_sid) == 0)
 		sksec->peer_sid = nlbl_peer_sid;
-	netlbl_secattr_destroy(&secattr, 0);
+	netlbl_secattr_destroy(&secattr);
 
 	sksec->nlbl_state = NLBL_REQUIRE;
 
-- 
cgit v1.2.3


From 134b0fc544ba062498451611cb6f3e4454221b3d Mon Sep 17 00:00:00 2001
From: James Morris <jmorris@namei.org>
Date: Thu, 5 Oct 2006 15:42:27 -0500
Subject: IPsec: propagate security module errors up from flow_cache_lookup

When a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return an access denied permission
(or other error).  We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.

The way I was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.

The first SYNACK would be blocked, because of an uncached lookup via
flow_cache_lookup(), which would fail to resolve an xfrm policy because
the SELinux policy is checked at that point via the resolver.

However, retransmitted SYNACKs would then find a cached flow entry when
calling into flow_cache_lookup() with a null xfrm policy, which is
interpreted by xfrm_lookup() as the packet not having any associated
policy and similarly to the first case, allowing it to pass without
transformation.

The solution presented here is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.

Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely).  This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).

Signed-off-by: James Morris <jmorris@namei.org>
---
 include/net/flow.h     |  2 +-
 net/core/flow.c        | 42 ++++++++++++++++++++-----------
 net/xfrm/xfrm_policy.c | 68 +++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 82 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/include/net/flow.h b/include/net/flow.h
index ddf5f3ca1720..3b44d72b27d3 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -97,7 +97,7 @@ struct flowi {
 #define FLOW_DIR_FWD	2
 
 struct sock;
-typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
+typedef int (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp);
 
 extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
diff --git a/net/core/flow.c b/net/core/flow.c
index f23e7e386543..b16d31ae5e54 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -85,6 +85,14 @@ static void flow_cache_new_hashrnd(unsigned long arg)
 	add_timer(&flow_hash_rnd_timer);
 }
 
+static void flow_entry_kill(int cpu, struct flow_cache_entry *fle)
+{
+	if (fle->object)
+		atomic_dec(fle->object_ref);
+	kmem_cache_free(flow_cachep, fle);
+	flow_count(cpu)--;
+}
+
 static void __flow_cache_shrink(int cpu, int shrink_to)
 {
 	struct flow_cache_entry *fle, **flp;
@@ -100,10 +108,7 @@ static void __flow_cache_shrink(int cpu, int shrink_to)
 		}
 		while ((fle = *flp) != NULL) {
 			*flp = fle->next;
-			if (fle->object)
-				atomic_dec(fle->object_ref);
-			kmem_cache_free(flow_cachep, fle);
-			flow_count(cpu)--;
+			flow_entry_kill(cpu, fle);
 		}
 	}
 }
@@ -220,24 +225,33 @@ void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
 
 nocache:
 	{
+		int err;
 		void *obj;
 		atomic_t *obj_ref;
 
-		resolver(key, family, dir, &obj, &obj_ref);
+		err = resolver(key, family, dir, &obj, &obj_ref);
 
 		if (fle) {
-			fle->genid = atomic_read(&flow_cache_genid);
-
-			if (fle->object)
-				atomic_dec(fle->object_ref);
-
-			fle->object = obj;
-			fle->object_ref = obj_ref;
-			if (obj)
-				atomic_inc(fle->object_ref);
+			if (err) {
+				/* Force security policy check on next lookup */
+				*head = fle->next;
+				flow_entry_kill(cpu, fle);
+			} else {
+				fle->genid = atomic_read(&flow_cache_genid);
+
+				if (fle->object)
+					atomic_dec(fle->object_ref);
+
+				fle->object = obj;
+				fle->object_ref = obj_ref;
+				if (obj)
+					atomic_inc(fle->object_ref);
+			}
 		}
 		local_bh_enable();
 
+		if (err)
+			obj = ERR_PTR(err);
 		return obj;
 	}
 }
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 2a7861661f14..fffdd34f3baf 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -883,30 +883,32 @@ out:
 }
 EXPORT_SYMBOL(xfrm_policy_walk);
 
-/* Find policy to apply to this flow. */
-
+/*
+ * Find policy to apply to this flow.
+ *
+ * Returns 0 if policy found, else an -errno.
+ */
 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
 			     u8 type, u16 family, int dir)
 {
 	struct xfrm_selector *sel = &pol->selector;
-	int match;
+	int match, ret = -ESRCH;
 
 	if (pol->family != family ||
 	    pol->type != type)
-		return 0;
+		return ret;
 
 	match = xfrm_selector_match(sel, fl, family);
-	if (match) {
-		if (!security_xfrm_policy_lookup(pol, fl->secid, dir))
-			return 1;
-	}
+	if (match)
+		ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
 
-	return 0;
+	return ret;
 }
 
 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 						     u16 family, u8 dir)
 {
+	int err;
 	struct xfrm_policy *pol, *ret;
 	xfrm_address_t *daddr, *saddr;
 	struct hlist_node *entry;
@@ -922,7 +924,15 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	chain = policy_hash_direct(daddr, saddr, family, dir);
 	ret = NULL;
 	hlist_for_each_entry(pol, entry, chain, bydst) {
-		if (xfrm_policy_match(pol, fl, type, family, dir)) {
+		err = xfrm_policy_match(pol, fl, type, family, dir);
+		if (err) {
+			if (err == -ESRCH)
+				continue;
+			else {
+				ret = ERR_PTR(err);
+				goto fail;
+			}
+		} else {
 			ret = pol;
 			priority = ret->priority;
 			break;
@@ -930,36 +940,53 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
 	}
 	chain = &xfrm_policy_inexact[dir];
 	hlist_for_each_entry(pol, entry, chain, bydst) {
-		if (xfrm_policy_match(pol, fl, type, family, dir) &&
-		    pol->priority < priority) {
+		err = xfrm_policy_match(pol, fl, type, family, dir);
+		if (err) {
+			if (err == -ESRCH)
+				continue;
+			else {
+				ret = ERR_PTR(err);
+				goto fail;
+			}
+		} else if (pol->priority < priority) {
 			ret = pol;
 			break;
 		}
 	}
 	if (ret)
 		xfrm_pol_hold(ret);
+fail:
 	read_unlock_bh(&xfrm_policy_lock);
 
 	return ret;
 }
 
-static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
+static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
 			       void **objp, atomic_t **obj_refp)
 {
 	struct xfrm_policy *pol;
+	int err = 0;
 
 #ifdef CONFIG_XFRM_SUB_POLICY
 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
-	if (pol)
+	if (IS_ERR(pol)) {
+		err = PTR_ERR(pol);
+		pol = NULL;
+	}
+	if (pol || err)
 		goto end;
 #endif
 	pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
-
+	if (IS_ERR(pol)) {
+		err = PTR_ERR(pol);
+		pol = NULL;
+	}
 #ifdef CONFIG_XFRM_SUB_POLICY
 end:
 #endif
 	if ((*objp = (void *) pol) != NULL)
 		*obj_refp = &pol->refcnt;
+	return err;
 }
 
 static inline int policy_to_flow_dir(int dir)
@@ -1297,6 +1324,8 @@ restart:
 
 		policy = flow_cache_lookup(fl, dst_orig->ops->family,
 					   dir, xfrm_policy_lookup);
+		if (IS_ERR(policy))
+			return PTR_ERR(policy);
 	}
 
 	if (!policy)
@@ -1343,6 +1372,10 @@ restart:
 							    fl, family,
 							    XFRM_POLICY_OUT);
 			if (pols[1]) {
+				if (IS_ERR(pols[1])) {
+					err = PTR_ERR(pols[1]);
+					goto error;
+				}
 				if (pols[1]->action == XFRM_POLICY_BLOCK) {
 					err = -EPERM;
 					goto error;
@@ -1581,6 +1614,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 		pol = flow_cache_lookup(&fl, family, fl_dir,
 					xfrm_policy_lookup);
 
+	if (IS_ERR(pol))
+		return 0;
+
 	if (!pol) {
 		if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
 			xfrm_secpath_reject(xerr_idx, skb, &fl);
@@ -1599,6 +1635,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
 						    &fl, family,
 						    XFRM_POLICY_IN);
 		if (pols[1]) {
+			if (IS_ERR(pols[1]))
+				return 0;
 			pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec;
 			npols ++;
 		}
-- 
cgit v1.2.3


From 5b368e61c2bcb2666bb66e2acf1d6d85ba6f474d Mon Sep 17 00:00:00 2001
From: Venkat Yekkirala <vyekkirala@trustedcs.com>
Date: Thu, 5 Oct 2006 15:42:18 -0500
Subject: IPsec: correct semantics for SELinux policy matching

Currently when an IPSec policy rule doesn't specify a security
context, it is assumed to be "unlabeled" by SELinux, and so
the IPSec policy rule fails to match to a flow that it would
otherwise match to, unless one has explicitly added an SELinux
policy rule allowing the flow to "polmatch" to the "unlabeled"
IPSec policy rules. In the absence of such an explicitly added
SELinux policy rule, the IPSec policy rule fails to match and
so the packet(s) flow in clear text without the otherwise applicable
xfrm(s) applied.

The above SELinux behavior violates the SELinux security notion of
"deny by default" which should actually translate to "encrypt by
default" in the above case.

This was first reported by Evgeniy Polyakov and the way James Morris
was seeing the problem was when connecting via IPsec to a
confined service on an SELinux box (vsftpd), which did not have the
appropriate SELinux policy permissions to send packets via IPsec.

With this patch applied, SELinux "polmatching" of flows Vs. IPSec
policy rules will only come into play when there's a explicit context
specified for the IPSec policy rule (which also means there's corresponding
SELinux policy allowing appropriate domains/flows to polmatch to this context).

Secondly, when a security module is loaded (in this case, SELinux), the
security_xfrm_policy_lookup() hook can return errors other than access denied,
such as -EINVAL.  We were not handling that correctly, and in fact
inverting the return logic and propagating a false "ok" back up to
xfrm_lookup(), which then allowed packets to pass as if they were not
associated with an xfrm policy.

The solution for this is to first ensure that errno values are
correctly propagated all the way back up through the various call chains
from security_xfrm_policy_lookup(), and handled correctly.

Then, flow_cache_lookup() is modified, so that if the policy resolver
fails (typically a permission denied via the security module), the flow
cache entry is killed rather than having a null policy assigned (which
indicates that the packet can pass freely).  This also forces any future
lookups for the same flow to consult the security module (e.g. SELinux)
for current security policy (rather than, say, caching the error on the
flow cache entry).

This patch: Fix the selinux side of things.

This makes sure SELinux polmatching of flow contexts to IPSec policy
rules comes into play only when an explicit context is associated
with the IPSec policy rule.

Also, this no longer defaults the context of a socket policy to
the context of the socket since the "no explicit context" case
is now handled properly.

Signed-off-by: Venkat Yekkirala <vyekkirala@TrustedCS.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h        | 24 +++++++------------
 include/net/xfrm.h              |  3 ++-
 net/ipv4/xfrm4_policy.c         |  2 +-
 net/ipv6/xfrm6_policy.c         |  2 +-
 net/key/af_key.c                |  5 ----
 net/xfrm/xfrm_policy.c          |  7 +++---
 net/xfrm/xfrm_user.c            |  9 -------
 security/dummy.c                |  3 ++-
 security/selinux/include/xfrm.h |  3 ++-
 security/selinux/xfrm.c         | 53 +++++++++++++++++++++++++++++++----------
 10 files changed, 62 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/include/linux/security.h b/include/linux/security.h
index 9b5fea81f55e..b200b9856f32 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -882,7 +882,8 @@ struct request_sock;
  *	Check permission when a flow selects a xfrm_policy for processing
  *	XFRMs on a packet.  The hook is called when selecting either a
  *	per-socket policy or a generic xfrm policy.
- *	Return 0 if permission is granted.
+ *	Return 0 if permission is granted, -ESRCH otherwise, or -errno
+ *	on other errors.
  * @xfrm_state_pol_flow_match:
  *	@x contains the state to match.
  *	@xp contains the policy to check for a match.
@@ -891,6 +892,7 @@ struct request_sock;
  * @xfrm_flow_state_match:
  *	@fl contains the flow key to match.
  *	@xfrm points to the xfrm_state to match.
+ *	@xp points to the xfrm_policy to match.
  *	Return 1 if there is a match.
  * @xfrm_decode_session:
  *	@skb points to skb to decode.
@@ -1388,7 +1390,8 @@ struct security_operations {
 	int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
 	int (*xfrm_state_pol_flow_match)(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
-	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm);
+	int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm,
+			struct xfrm_policy *xp);
 	int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall);
 #endif	/* CONFIG_SECURITY_NETWORK_XFRM */
 
@@ -3120,11 +3123,6 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm
 	return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL);
 }
 
-static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
-{
-	return security_ops->xfrm_policy_alloc_security(xp, NULL, sk);
-}
-
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
 {
 	return security_ops->xfrm_policy_clone_security(old, new);
@@ -3175,9 +3173,10 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
 	return security_ops->xfrm_state_pol_flow_match(x, xp, fl);
 }
 
-static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+static inline int security_xfrm_flow_state_match(struct flowi *fl,
+			struct xfrm_state *xfrm, struct xfrm_policy *xp)
 {
-	return security_ops->xfrm_flow_state_match(fl, xfrm);
+	return security_ops->xfrm_flow_state_match(fl, xfrm, xp);
 }
 
 static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid)
@@ -3197,11 +3196,6 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm
 	return 0;
 }
 
-static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk)
-{
-	return 0;
-}
-
 static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new)
 {
 	return 0;
@@ -3249,7 +3243,7 @@ static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x,
 }
 
 static inline int security_xfrm_flow_state_match(struct flowi *fl,
-                                struct xfrm_state *xfrm)
+			struct xfrm_state *xfrm, struct xfrm_policy *xp)
 {
 	return 1;
 }
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 1e2a4ddec96e..737fdb2ee8a4 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -995,7 +995,8 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
 				  int create, unsigned short family);
 extern void xfrm_policy_flush(u8 type);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
-extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict);
+extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
+			  struct flowi *fl, int family, int strict);
 extern void xfrm_init_pmtu(struct dst_entry *dst);
 
 extern wait_queue_head_t km_waitq;
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 7a7a00147e55..1bed0cdf53e3 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -52,7 +52,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
 	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
 	    	    xdst->u.rt.fl.fl4_tos == fl->fl4_tos &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET, 0)) {
+		    xfrm_bundle_ok(policy, xdst, fl, AF_INET, 0)) {
 			dst_clone(dst);
 			break;
 		}
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 6a252e2134d1..73cee2ec07e8 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -73,7 +73,7 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
 				 xdst->u.rt6.rt6i_src.plen);
 		if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
 		    ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
-		    xfrm_bundle_ok(xdst, fl, AF_INET6,
+		    xfrm_bundle_ok(policy, xdst, fl, AF_INET6,
 				   (xdst->u.rt6.rt6i_dst.plen != 128 ||
 				    xdst->u.rt6.rt6i_src.plen != 128))) {
 			dst_clone(dst);
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ff98e70b0931..20ff7cca1d07 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -2928,11 +2928,6 @@ static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
 		if (*dir)
 			goto out;
 	}
-	else {
-		*dir = security_xfrm_sock_policy_alloc(xp, sk);
-		if (*dir)
-			goto out;
-	}
 
 	*dir = pol->sadb_x_policy_dir-1;
 	return xp;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index fffdd34f3baf..695761ff1321 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1744,7 +1744,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 
 static int stale_bundle(struct dst_entry *dst)
 {
-	return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
+	return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
 }
 
 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
@@ -1866,7 +1866,8 @@ EXPORT_SYMBOL(xfrm_init_pmtu);
  * still valid.
  */
 
-int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict)
+int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
+		struct flowi *fl, int family, int strict)
 {
 	struct dst_entry *dst = &first->u.dst;
 	struct xfrm_dst *last;
@@ -1883,7 +1884,7 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str
 
 		if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 			return 0;
-		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm))
+		if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm, pol))
 			return 0;
 		if (dst->xfrm->km.state != XFRM_STATE_VALID)
 			return 0;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index d54b3a70d5df..2b2e59d8ffbc 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1992,15 +1992,6 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt,
 	xp->type = XFRM_POLICY_TYPE_MAIN;
 	copy_templates(xp, ut, nr);
 
-	if (!xp->security) {
-		int err = security_xfrm_sock_policy_alloc(xp, sk);
-		if (err) {
-			kfree(xp);
-			*dir = err;
-			return NULL;
-		}
-	}
-
 	*dir = p->dir;
 
 	return xp;
diff --git a/security/dummy.c b/security/dummy.c
index aeee70565509..43874c1e6e23 100644
--- a/security/dummy.c
+++ b/security/dummy.c
@@ -881,7 +881,8 @@ static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x,
 	return 1;
 }
 
-static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm,
+				struct xfrm_policy *xp)
 {
 	return 1;
 }
diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h
index 81eb59890162..526b28019aca 100644
--- a/security/selinux/include/xfrm.h
+++ b/security/selinux/include/xfrm.h
@@ -19,7 +19,8 @@ int selinux_xfrm_state_delete(struct xfrm_state *x);
 int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir);
 int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x,
 			struct xfrm_policy *xp, struct flowi *fl);
-int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm);
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm,
+			struct xfrm_policy *xp);
 
 
 /*
diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c
index 3e742b850af6..675b995a67c3 100644
--- a/security/selinux/xfrm.c
+++ b/security/selinux/xfrm.c
@@ -77,8 +77,8 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x)
  */
 int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 {
-	int rc = 0;
-	u32 sel_sid = SECINITSID_UNLABELED;
+	int rc;
+	u32 sel_sid;
 	struct xfrm_sec_ctx *ctx;
 
 	/* Context sid is either set to label or ANY_ASSOC */
@@ -88,11 +88,21 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir)
 
 		sel_sid = ctx->ctx_sid;
 	}
+	else
+		/*
+		 * All flows should be treated as polmatch'ing an
+		 * otherwise applicable "non-labeled" policy. This
+		 * would prevent inadvertent "leaks".
+		 */
+		return 0;
 
 	rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION,
 			  ASSOCIATION__POLMATCH,
 			  NULL);
 
+	if (rc == -EACCES)
+		rc = -ESRCH;
+
 	return rc;
 }
 
@@ -108,15 +118,20 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
 	u32 pol_sid;
 	int err;
 
-	if (x->security)
-		state_sid = x->security->ctx_sid;
-	else
-		state_sid = SECINITSID_UNLABELED;
-
-	if (xp->security)
+	if (xp->security) {
+		if (!x->security)
+			/* unlabeled SA and labeled policy can't match */
+			return 0;
+		else
+			state_sid = x->security->ctx_sid;
 		pol_sid = xp->security->ctx_sid;
-	else
-		pol_sid = SECINITSID_UNLABELED;
+	} else
+		if (x->security)
+			/* unlabeled policy and labeled SA can't match */
+			return 0;
+		else
+			/* unlabeled policy and unlabeled SA match all flows */
+			return 1;
 
 	err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION,
 			  ASSOCIATION__POLMATCH,
@@ -125,7 +140,11 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
 	if (err)
 		return 0;
 
-	return selinux_xfrm_flow_state_match(fl, x);
+	err = avc_has_perm(fl->secid, state_sid, SECCLASS_ASSOCIATION,
+			  ASSOCIATION__SENDTO,
+			  NULL)? 0:1;
+
+	return err;
 }
 
 /*
@@ -133,12 +152,22 @@ int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *
  * can use a given security association.
  */
 
-int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm)
+int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm,
+				  struct xfrm_policy *xp)
 {
 	int rc = 0;
 	u32 sel_sid = SECINITSID_UNLABELED;
 	struct xfrm_sec_ctx *ctx;
 
+	if (!xp->security)
+		if (!xfrm->security)
+			return 1;
+		else
+			return 0;
+	else
+		if (!xfrm->security)
+			return 0;
+
 	/* Context sid is either set to label or ANY_ASSOC */
 	if ((ctx = xfrm->security)) {
 		if (!selinux_authorizable_ctx(ctx))
-- 
cgit v1.2.3


From 331c4ee7faa4ee1e1404c872a139784753100498 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vladislav.yasevich@hp.com>
Date: Mon, 9 Oct 2006 21:34:04 -0700
Subject: [SCTP]: Fix receive buffer accounting.

When doing receiver buffer accounting, we always used skb->truesize.
This is problematic when processing bundled DATA chunks because for
every DATA chunk that could be small part of one large skb, we would
charge the size of the entire skb.  The new approach is to store the
size of the DATA chunk we are accounting for in the sctp_ulpevent
structure and use that stored value for accounting.

Signed-off-by: Vlad Yasevich <vladislav.yasevich@hp.com>
Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/sctp.h     | 14 ++++++++++++++
 include/net/sctp/ulpevent.h |  1 +
 net/sctp/socket.c           | 22 ++++++++++++++++++----
 net/sctp/ulpevent.c         | 25 +++++++++++++++----------
 net/sctp/ulpqueue.c         |  2 +-
 5 files changed, 49 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index ee68a3124076..764e3af5be93 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -139,6 +139,7 @@ int sctp_inet_listen(struct socket *sock, int backlog);
 void sctp_write_space(struct sock *sk);
 unsigned int sctp_poll(struct file *file, struct socket *sock,
 		poll_table *wait);
+void sctp_sock_rfree(struct sk_buff *skb);
 
 /*
  * sctp/primitive.c
@@ -444,6 +445,19 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list)
 	return result;
 }
 
+/* SCTP version of skb_set_owner_r.  We need this one because
+ * of the way we have to do receive buffer accounting on bundled
+ * chunks.
+ */
+static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
+{
+	struct sctp_ulpevent *event = sctp_skb2event(skb);
+
+	skb->sk = sk;
+	skb->destructor = sctp_sock_rfree;
+	atomic_add(event->rmem_len, &sk->sk_rmem_alloc);
+}
+
 /* Tests if the list has one and only one entry. */
 static inline int sctp_list_single_entry(struct list_head *head)
 {
diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 6c40cfc4832d..1a4ddc1ec7d2 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -63,6 +63,7 @@ struct sctp_ulpevent {
 	__u32 cumtsn;
 	int msg_flags;
 	int iif;
+	unsigned int rmem_len;
 };
 
 /* Retrieve the skb this event sits inside of. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 3fe906d65069..9deec4391187 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -5362,6 +5362,20 @@ static void sctp_wfree(struct sk_buff *skb)
 	sctp_association_put(asoc);
 }
 
+/* Do accounting for the receive space on the socket.
+ * Accounting for the association is done in ulpevent.c
+ * We set this as a destructor for the cloned data skbs so that
+ * accounting is done at the correct time.
+ */
+void sctp_sock_rfree(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct sctp_ulpevent *event = sctp_skb2event(skb);
+
+	atomic_sub(event->rmem_len, &sk->sk_rmem_alloc);
+}
+
+
 /* Helper function to wait for space in the sndbuf.  */
 static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p,
 				size_t msg_len)
@@ -5634,10 +5648,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 	sctp_skb_for_each(skb, &oldsk->sk_receive_queue, tmp) {
 		event = sctp_skb2event(skb);
 		if (event->asoc == assoc) {
-			sock_rfree(skb);
+			sctp_sock_rfree(skb);
 			__skb_unlink(skb, &oldsk->sk_receive_queue);
 			__skb_queue_tail(&newsk->sk_receive_queue, skb);
-			skb_set_owner_r(skb, newsk);
+			sctp_skb_set_owner_r(skb, newsk);
 		}
 	}
 
@@ -5665,10 +5679,10 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk,
 		sctp_skb_for_each(skb, &oldsp->pd_lobby, tmp) {
 			event = sctp_skb2event(skb);
 			if (event->asoc == assoc) {
-				sock_rfree(skb);
+				sctp_sock_rfree(skb);
 				__skb_unlink(skb, &oldsp->pd_lobby);
 				__skb_queue_tail(queue, skb);
-				skb_set_owner_r(skb, newsk);
+				sctp_skb_set_owner_r(skb, newsk);
 			}
 		}
 
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index ee236784a6bb..a015283a9087 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -55,10 +55,13 @@ static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event);
 
 
 /* Initialize an ULP event from an given skb.  */
-SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event, int msg_flags)
+SCTP_STATIC void sctp_ulpevent_init(struct sctp_ulpevent *event,
+				    int msg_flags,
+				    unsigned int len)
 {
 	memset(event, 0, sizeof(struct sctp_ulpevent));
 	event->msg_flags = msg_flags;
+	event->rmem_len = len;
 }
 
 /* Create a new sctp_ulpevent.  */
@@ -73,7 +76,7 @@ SCTP_STATIC struct sctp_ulpevent *sctp_ulpevent_new(int size, int msg_flags,
 		goto fail;
 
 	event = sctp_skb2event(skb);
-	sctp_ulpevent_init(event, msg_flags);
+	sctp_ulpevent_init(event, msg_flags, skb->truesize);
 
 	return event;
 
@@ -101,17 +104,16 @@ static inline void sctp_ulpevent_set_owner(struct sctp_ulpevent *event,
 	sctp_association_hold((struct sctp_association *)asoc);
 	skb = sctp_event2skb(event);
 	event->asoc = (struct sctp_association *)asoc;
-	atomic_add(skb->truesize, &event->asoc->rmem_alloc);
-	skb_set_owner_r(skb, asoc->base.sk);
+	atomic_add(event->rmem_len, &event->asoc->rmem_alloc);
+	sctp_skb_set_owner_r(skb, asoc->base.sk);
 }
 
 /* A simple destructor to give up the reference to the association. */
 static inline void sctp_ulpevent_release_owner(struct sctp_ulpevent *event)
 {
 	struct sctp_association *asoc = event->asoc;
-	struct sk_buff *skb = sctp_event2skb(event);
 
-	atomic_sub(skb->truesize, &asoc->rmem_alloc);
+	atomic_sub(event->rmem_len, &asoc->rmem_alloc);
 	sctp_association_put(asoc);
 }
 
@@ -372,7 +374,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
 
 	/* Embed the event fields inside the cloned skb.  */
 	event = sctp_skb2event(skb);
-	sctp_ulpevent_init(event, MSG_NOTIFICATION);
+	sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
 	sre = (struct sctp_remote_error *)
 		skb_push(skb, sizeof(struct sctp_remote_error));
@@ -464,7 +466,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_send_failed(
 
 	/* Embed the event fields inside the cloned skb.  */
 	event = sctp_skb2event(skb);
-	sctp_ulpevent_init(event, MSG_NOTIFICATION);
+	sctp_ulpevent_init(event, MSG_NOTIFICATION, skb->truesize);
 
 	ssf = (struct sctp_send_failed *)
 		skb_push(skb, sizeof(struct sctp_send_failed));
@@ -682,8 +684,11 @@ struct sctp_ulpevent *sctp_ulpevent_make_rcvmsg(struct sctp_association *asoc,
 	/* Embed the event fields inside the cloned skb.  */
 	event = sctp_skb2event(skb);
 
-	/* Initialize event with flags 0.  */
-	sctp_ulpevent_init(event, 0);
+	/* Initialize event with flags 0  and correct length
+	 * Since this is a clone of the original skb, only account for
+	 * the data of this chunk as other chunks will be accounted separately.
+	 */
+	sctp_ulpevent_init(event, 0, skb->len + sizeof(struct sk_buff));
 
 	sctp_ulpevent_receive_data(event, asoc);
 
diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c
index 575e556aeb3e..e1d144275f97 100644
--- a/net/sctp/ulpqueue.c
+++ b/net/sctp/ulpqueue.c
@@ -309,7 +309,7 @@ static struct sctp_ulpevent *sctp_make_reassembled_event(struct sk_buff_head *qu
  			if (!new)
  				return NULL;	/* try again later */
 
- 			new->sk = f_frag->sk;
+ 			sctp_skb_set_owner_r(new, f_frag->sk);
 
  			skb_shinfo(new)->frag_list = pos;
  		} else
-- 
cgit v1.2.3


From 42b6785eeb40fe3e9dab9981b6e3231a77c7c2f6 Mon Sep 17 00:00:00 2001
From: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Date: Tue, 10 Oct 2006 19:42:09 -0700
Subject: [NET]: Introduce protocol-specific destructor for time-wait sockets.

Signed-off-by: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_timewait_sock.h | 1 +
 include/net/timewait_sock.h      | 7 +++++++
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h
index 6d14c22a00c5..5f48748fe017 100644
--- a/include/net/inet_timewait_sock.h
+++ b/include/net/inet_timewait_sock.h
@@ -196,6 +196,7 @@ static inline void inet_twsk_put(struct inet_timewait_sock *tw)
 {
 	if (atomic_dec_and_test(&tw->tw_refcnt)) {
 		struct module *owner = tw->tw_prot->owner;
+		twsk_destructor((struct sock *)tw);
 #ifdef SOCK_REFCNT_DEBUG
 		printk(KERN_DEBUG "%s timewait_sock %p released\n",
 		       tw->tw_prot->name, tw);
diff --git a/include/net/timewait_sock.h b/include/net/timewait_sock.h
index 2544281e1d5e..be293d795e38 100644
--- a/include/net/timewait_sock.h
+++ b/include/net/timewait_sock.h
@@ -19,6 +19,7 @@ struct timewait_sock_ops {
 	unsigned int	twsk_obj_size;
 	int		(*twsk_unique)(struct sock *sk,
 				       struct sock *sktw, void *twp);
+	void		(*twsk_destructor)(struct sock *sk);
 };
 
 static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
@@ -28,4 +29,10 @@ static inline int twsk_unique(struct sock *sk, struct sock *sktw, void *twp)
 	return 0;
 }
 
+static inline void twsk_destructor(struct sock *sk)
+{
+	if (sk->sk_prot->twsk_prot->twsk_destructor != NULL)
+		sk->sk_prot->twsk_prot->twsk_destructor(sk);
+}
+
 #endif /* _TIMEWAIT_SOCK_H */
-- 
cgit v1.2.3


From 2b1191af683d16a899c2b81b87b605841ceffdec Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Mon, 9 Oct 2006 13:04:35 +0200
Subject: [PATCH] elevator: elevator_type member not used

elevator_type field in elevator_type structure is useless:
it isn't used anywhere in kernel sources.

Signed-off-by: Vasily Tarasov <vtaras@openvz.org>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/elevator.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/elevator.h b/include/linux/elevator.h
index b3370ef5164d..2fa9f1144228 100644
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -70,7 +70,6 @@ struct elevator_type
 {
 	struct list_head list;
 	struct elevator_ops ops;
-	struct elevator_type *elevator_type;
 	struct elv_fs_entry *elevator_attrs;
 	char elevator_name[ELV_NAME_MAX];
 	struct module *elevator_owner;
-- 
cgit v1.2.3


From cea2885a2e989d1dc19af1fc991717b33b7d1456 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Thu, 12 Oct 2006 15:08:45 +0200
Subject: [PATCH] ide-cd: fix breakage with internally queued commands

We still need to maintain a private PC style command, since it
isn't completely unified with REQ_TYPE_BLOCK_PC yet.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 drivers/ide/ide-cd.c   | 5 +++--
 include/linux/blkdev.h | 1 +
 2 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
index 69bbb6206a00..e7513e55ace8 100644
--- a/drivers/ide/ide-cd.c
+++ b/drivers/ide/ide-cd.c
@@ -597,7 +597,7 @@ static void cdrom_prepare_request(ide_drive_t *drive, struct request *rq)
 	struct cdrom_info *cd = drive->driver_data;
 
 	ide_init_drive_cmd(rq);
-	rq->cmd_type = REQ_TYPE_BLOCK_PC;
+	rq->cmd_type = REQ_TYPE_ATA_PC;
 	rq->rq_disk = cd->disk;
 }
 
@@ -2023,7 +2023,8 @@ ide_do_rw_cdrom (ide_drive_t *drive, struct request *rq, sector_t block)
 		}
 		info->last_block = block;
 		return action;
-	} else if (rq->cmd_type == REQ_TYPE_SENSE) {
+	} else if (rq->cmd_type == REQ_TYPE_SENSE ||
+		   rq->cmd_type == REQ_TYPE_ATA_PC) {
 		return cdrom_do_packet_command(drive);
 	} else if (blk_pc_request(rq)) {
 		return cdrom_do_block_pc(drive, rq);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 26f7856ff812..d370d2cfe138 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -157,6 +157,7 @@ enum rq_cmd_type_bits {
 	REQ_TYPE_ATA_CMD,
 	REQ_TYPE_ATA_TASK,
 	REQ_TYPE_ATA_TASKFILE,
+	REQ_TYPE_ATA_PC,
 };
 
 /*
-- 
cgit v1.2.3


From 58f07943b0ef1e59cbf9a45cdc727048d224637f Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@hobholes.localdomain>
Date: Thu, 12 Oct 2006 22:23:18 -0500
Subject: [VOYAGER] fix up attribute packed specifiers in voyager.h

The old style (attribute on each structure entry) never really worked.
Move it to an attribute per structure

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/asm-i386/voyager.h | 160 ++++++++++++++++++++++-----------------------
 1 file changed, 80 insertions(+), 80 deletions(-)

(limited to 'include')

diff --git a/include/asm-i386/voyager.h b/include/asm-i386/voyager.h
index e74c54aa757f..5b27838905b2 100644
--- a/include/asm-i386/voyager.h
+++ b/include/asm-i386/voyager.h
@@ -118,33 +118,33 @@ typedef struct voyager_module {
 } voyager_module_t;
 
 typedef struct voyager_eeprom_hdr {
-	 __u8  module_id[4] __attribute__((packed)); 
-	 __u8  version_id __attribute__((packed));
-	 __u8  config_id __attribute__((packed)); 
-	 __u16 boundry_id __attribute__((packed));	/* boundary scan id */
-	 __u16 ee_size __attribute__((packed));		/* size of EEPROM */
-	 __u8  assembly[11] __attribute__((packed));	/* assembly # */
-	 __u8  assembly_rev __attribute__((packed));	/* assembly rev */
-	 __u8  tracer[4] __attribute__((packed));	/* tracer number */
-	 __u16 assembly_cksum __attribute__((packed));	/* asm checksum */
-	 __u16 power_consump __attribute__((packed));	/* pwr requirements */
-	 __u16 num_asics __attribute__((packed));	/* number of asics */
-	 __u16 bist_time __attribute__((packed));	/* min. bist time */
-	 __u16 err_log_offset __attribute__((packed));	/* error log offset */
-	 __u16 scan_path_offset __attribute__((packed));/* scan path offset */
-	 __u16 cct_offset __attribute__((packed));
-	 __u16 log_length __attribute__((packed));	/* length of err log */
-	 __u16 xsum_end __attribute__((packed));	/* offset to end of
+	 __u8  module_id[4];
+	 __u8  version_id;
+	 __u8  config_id;
+	 __u16 boundry_id;	/* boundary scan id */
+	 __u16 ee_size;		/* size of EEPROM */
+	 __u8  assembly[11];	/* assembly # */
+	 __u8  assembly_rev;	/* assembly rev */
+	 __u8  tracer[4];	/* tracer number */
+	 __u16 assembly_cksum;	/* asm checksum */
+	 __u16 power_consump;	/* pwr requirements */
+	 __u16 num_asics;	/* number of asics */
+	 __u16 bist_time;	/* min. bist time */
+	 __u16 err_log_offset;	/* error log offset */
+	 __u16 scan_path_offset;/* scan path offset */
+	 __u16 cct_offset;
+	 __u16 log_length;	/* length of err log */
+	 __u16 xsum_end;	/* offset to end of
 							   checksum */
-	 __u8  reserved[4] __attribute__((packed));
-	 __u8  sflag __attribute__((packed));		/* starting sentinal */
-	 __u8  part_number[13] __attribute__((packed));	/* prom part number */
-	 __u8  version[10] __attribute__((packed));	/* version number */
-	 __u8  signature[8] __attribute__((packed));
-	 __u16 eeprom_chksum __attribute__((packed));
-	 __u32  data_stamp_offset __attribute__((packed));
-	 __u8  eflag  __attribute__((packed));		 /* ending sentinal */
-} voyager_eprom_hdr_t;
+	 __u8  reserved[4];
+	 __u8  sflag;		/* starting sentinal */
+	 __u8  part_number[13];	/* prom part number */
+	 __u8  version[10];	/* version number */
+	 __u8  signature[8];
+	 __u16 eeprom_chksum;
+	 __u32  data_stamp_offset;
+	 __u8  eflag ;		 /* ending sentinal */
+} __attribute__((packed)) voyager_eprom_hdr_t;
 
 
@@ -155,30 +155,30 @@ typedef struct voyager_eeprom_hdr {
  * in the module EPROMs.  We really only care about the IDs and
  * offsets */
 typedef struct voyager_sp_table {
-	__u8 asic_id __attribute__((packed));
-	__u8 bypass_flag __attribute__((packed));
-	__u16 asic_data_offset __attribute__((packed));
-	__u16 config_data_offset __attribute__((packed));
-} voyager_sp_table_t;
+	__u8 asic_id;
+	__u8 bypass_flag;
+	__u16 asic_data_offset;
+	__u16 config_data_offset;
+} __attribute__((packed)) voyager_sp_table_t;
 
 typedef struct voyager_jtag_table {
-	__u8 icode[4] __attribute__((packed));
-	__u8 runbist[4] __attribute__((packed));
-	__u8 intest[4] __attribute__((packed));
-	__u8 samp_preld[4] __attribute__((packed));
-	__u8 ireg_len __attribute__((packed));
-} voyager_jtt_t;
+	__u8 icode[4];
+	__u8 runbist[4];
+	__u8 intest[4];
+	__u8 samp_preld[4];
+	__u8 ireg_len;
+} __attribute__((packed)) voyager_jtt_t;
 
 typedef struct voyager_asic_data_table {
-	__u8 jtag_id[4] __attribute__((packed));
-	__u16 length_bsr __attribute__((packed));
-	__u16 length_bist_reg __attribute__((packed));
-	__u32 bist_clk __attribute__((packed));
-	__u16 subaddr_bits __attribute__((packed));
-	__u16 seed_bits __attribute__((packed));
-	__u16 sig_bits __attribute__((packed));
-	__u16 jtag_offset __attribute__((packed));
-} voyager_at_t;
+	__u8 jtag_id[4];
+	__u16 length_bsr;
+	__u16 length_bist_reg;
+	__u32 bist_clk;
+	__u16 subaddr_bits;
+	__u16 seed_bits;
+	__u16 sig_bits;
+	__u16 jtag_offset;
+} __attribute__((packed)) voyager_at_t;
 
 /* Voyager Interrupt Controller (VIC) registers */
 
@@ -328,52 +328,52 @@ struct voyager_bios_info {
 #define NUMBER_OF_POS_REGS	8
 
 typedef struct {
-	__u8	MC_Slot __attribute__((packed));
-	__u8	POS_Values[NUMBER_OF_POS_REGS] __attribute__((packed));
-} MC_SlotInformation_t;
+	__u8	MC_Slot;
+	__u8	POS_Values[NUMBER_OF_POS_REGS];
+} __attribute__((packed)) MC_SlotInformation_t;
 
 struct QuadDescription {
-	__u8  Type __attribute__((packed));	/* for type 0 (DYADIC or MONADIC) all fields
+	__u8  Type;	/* for type 0 (DYADIC or MONADIC) all fields
                          * will be zero except for slot */
-	__u8 StructureVersion __attribute__((packed));
-	__u32 CPI_BaseAddress __attribute__((packed));
-	__u32  LARC_BankSize __attribute__((packed));	
-	__u32 LocalMemoryStateBits __attribute__((packed));
-	__u8  Slot __attribute__((packed)); /* Processor slots 1 - 4 */
-}; 
+	__u8 StructureVersion;
+	__u32 CPI_BaseAddress;
+	__u32  LARC_BankSize;
+	__u32 LocalMemoryStateBits;
+	__u8  Slot; /* Processor slots 1 - 4 */
+} __attribute__((packed));
 
 struct ProcBoardInfo { 
-	__u8 Type __attribute__((packed));    
-	__u8 StructureVersion __attribute__((packed));
-	__u8 NumberOfBoards __attribute__((packed));
-	struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS] __attribute__((packed));
-};
+	__u8 Type;
+	__u8 StructureVersion;
+	__u8 NumberOfBoards;
+	struct QuadDescription QuadData[MAX_PROCESSOR_BOARDS];
+} __attribute__((packed));
 
 struct CacheDescription {
-	__u8 Level __attribute__((packed));
-	__u32 TotalSize __attribute__((packed));
-	__u16 LineSize __attribute__((packed));
-	__u8  Associativity __attribute__((packed));
-	__u8  CacheType __attribute__((packed));
-	__u8  WriteType __attribute__((packed));
-	__u8  Number_CPUs_SharedBy __attribute__((packed));
-	__u8  Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS] __attribute__((packed));
+	__u8 Level;
+	__u32 TotalSize;
+	__u16 LineSize;
+	__u8  Associativity;
+	__u8  CacheType;
+	__u8  WriteType;
+	__u8  Number_CPUs_SharedBy;
+	__u8  Shared_CPUs_Hardware_IDs[MAX_SHARED_CPUS];
 
-};
+} __attribute__((packed));
 
 struct CPU_Description {
-	__u8 CPU_HardwareId __attribute__((packed));
-	char *FRU_String __attribute__((packed));
-	__u8 NumberOfCacheLevels __attribute__((packed));
-	struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS] __attribute__((packed));
-};
+	__u8 CPU_HardwareId;
+	char *FRU_String;
+	__u8 NumberOfCacheLevels;
+	struct CacheDescription CacheLevelData[MAX_CACHE_LEVELS];
+} __attribute__((packed));
 
 struct CPU_Info {
-	__u8 Type __attribute__((packed));
-	__u8 StructureVersion __attribute__((packed));
-	__u8 NumberOf_CPUs __attribute__((packed));
-	struct CPU_Description CPU_Data[MAX_CPUS] __attribute__((packed));
-};
+	__u8 Type;
+	__u8 StructureVersion;
+	__u8 NumberOf_CPUs;
+	struct CPU_Description CPU_Data[MAX_CPUS];
+} __attribute__((packed));
 
 
 /*
-- 
cgit v1.2.3


From 81c06b10bcd4c7e8c88b4b425c55402b1d65fd0e Mon Sep 17 00:00:00 2001
From: James Bottomley <jejb@hobholes.localdomain>
Date: Thu, 12 Oct 2006 22:25:03 -0500
Subject: [VOYAGER] fix up ptregs removal mess

Apparently whoever converted voyager never actually checked that the
patch would compile ...

Remove as much of the pt_regs references as possible and move the
remaining ones into line with what's in x86 generic.

Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 arch/i386/mach-voyager/voyager_basic.c |  6 +++---
 arch/i386/mach-voyager/voyager_smp.c   | 10 ++++------
 include/asm-i386/vic.h                 |  2 +-
 3 files changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/i386/mach-voyager/voyager_basic.c b/arch/i386/mach-voyager/voyager_basic.c
index c639d30d8bdc..8fe7e4593d5f 100644
--- a/arch/i386/mach-voyager/voyager_basic.c
+++ b/arch/i386/mach-voyager/voyager_basic.c
@@ -44,7 +44,7 @@ struct voyager_SUS *voyager_SUS = NULL;
 
 #ifdef CONFIG_SMP
 static void
-voyager_dump(int dummy1, struct pt_regs *dummy2, struct tty_struct *dummy3)
+voyager_dump(int dummy1, struct tty_struct *dummy3)
 {
 	/* get here via a sysrq */
 	voyager_smp_dump();
@@ -166,7 +166,7 @@ voyager_memory_detect(int region, __u32 *start, __u32 *length)
  * off the timer tick to the SMP code, since the VIC doesn't have an
  * internal timer (The QIC does, but that's another story). */
 void
-voyager_timer_interrupt(struct pt_regs *regs)
+voyager_timer_interrupt(void)
 {
 	if((jiffies & 0x3ff) == 0) {
 
@@ -202,7 +202,7 @@ voyager_timer_interrupt(struct pt_regs *regs)
 		}
 	}
 #ifdef CONFIG_SMP
-	smp_vic_timer_interrupt(regs);
+	smp_vic_timer_interrupt();
 #endif
 }
 
diff --git a/arch/i386/mach-voyager/voyager_smp.c b/arch/i386/mach-voyager/voyager_smp.c
index 2e73f353f165..f3fea2ad50fe 100644
--- a/arch/i386/mach-voyager/voyager_smp.c
+++ b/arch/i386/mach-voyager/voyager_smp.c
@@ -1141,9 +1141,9 @@ smp_apic_timer_interrupt(struct pt_regs *regs)
 fastcall void
 smp_qic_timer_interrupt(struct pt_regs *regs)
 {
-	ack_QIC_CPI(QIC_TIMER_CPI);
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	wrapper_smp_local_timer_interrupt(void);
+	ack_QIC_CPI(QIC_TIMER_CPI);
+	wrapper_smp_local_timer_interrupt();
 	set_irq_regs(old_regs);
 }
 
@@ -1267,12 +1267,10 @@ smp_send_stop(void)
 /* this function is triggered in time.c when a clock tick fires
  * we need to re-broadcast the tick to all CPUs */
 void
-smp_vic_timer_interrupt(struct pt_regs *regs)
+smp_vic_timer_interrupt(void)
 {
-	struct pt_regs *old_regs = set_irq_regs(regs);
 	send_CPI_allbutself(VIC_TIMER_CPI);
 	smp_local_timer_interrupt();
-	set_irq_regs(old_regs);
 }
 
 /* local (per CPU) timer interrupt.  It does both profiling and
@@ -1307,7 +1305,7 @@ smp_local_timer_interrupt(void)
 						per_cpu(prof_counter, cpu);
 		}
 
-		update_process_times(user_mode_vm(irq_regs));
+		update_process_times(user_mode_vm(get_irq_regs()));
 	}
 
 	if( ((1<<cpu) & voyager_extended_vic_processors) == 0)
diff --git a/include/asm-i386/vic.h b/include/asm-i386/vic.h
index 4abfcfb91eb8..53100f353612 100644
--- a/include/asm-i386/vic.h
+++ b/include/asm-i386/vic.h
@@ -58,4 +58,4 @@ static const int VIC_CPI_Registers[] =
 
 #define VIC_BOOT_INTERRUPT_MASK		0xfe
 
-extern void smp_vic_timer_interrupt(struct pt_regs *regs);
+extern void smp_vic_timer_interrupt(void);
-- 
cgit v1.2.3


From e0fafda36a2fc5ecf8d11771f6c01c523b2a1fd8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 12 Oct 2006 23:18:53 +0200
Subject: [PATCH] m68knommu: sync syscalls with m68k

m68knommu: sync syscalls with m68k

Signed-Off-By: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-Off-By: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/m68knommu/kernel/syscalltable.S | 37 ++++++++++++++++++++++++++++++++----
 include/asm-m68knommu/unistd.h       | 35 +++++++++++++++++++++++++++++++---
 2 files changed, 65 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/m68knommu/kernel/syscalltable.S b/arch/m68knommu/kernel/syscalltable.S
index 617e43ec95ae..4603f4f3c935 100644
--- a/arch/m68knommu/kernel/syscalltable.S
+++ b/arch/m68knommu/kernel/syscalltable.S
@@ -296,10 +296,39 @@ ENTRY(sys_call_table)
 	.long sys_mq_notify	/* 275 */
 	.long sys_mq_getsetattr
 	.long sys_waitid
-	.long sys_ni_syscall	/* sys_setaltroot */
-	.long sys_ni_syscall	/* sys_add_key */
-	.long sys_ni_syscall	/* 280 */ /* sys_request_key */
-	.long sys_ni_syscall	/* sys_keyctl */
+	.long sys_ni_syscall	/* for sys_vserver */
+	.long sys_add_key
+	.long sys_request_key	/* 280 */
+	.long sys_keyctl
+	.long sys_ioprio_set
+	.long sys_ioprio_get
+	.long sys_inotify_init
+	.long sys_inotify_add_watch	/* 285 */
+	.long sys_inotify_rm_watch
+	.long sys_migrate_pages
+	.long sys_openat
+	.long sys_mkdirat
+	.long sys_mknodat		/* 290 */
+	.long sys_fchownat
+	.long sys_futimesat
+	.long sys_fstatat64
+	.long sys_unlinkat
+	.long sys_renameat		/* 295 */
+	.long sys_linkat
+	.long sys_symlinkat
+	.long sys_readlinkat
+	.long sys_fchmodat
+	.long sys_faccessat		/* 300 */
+	.long sys_ni_syscall		/* Reserved for pselect6 */
+	.long sys_ni_syscall		/* Reserved for ppoll */
+	.long sys_unshare
+	.long sys_set_robust_list
+	.long sys_get_robust_list	/* 305 */
+	.long sys_splice
+	.long sys_sync_file_range
+	.long sys_tee
+	.long sys_vmsplice
+	.long sys_move_pages		/* 310 */
 
 	.rept NR_syscalls-(.-sys_call_table)/4
 		.long sys_ni_syscall
diff --git a/include/asm-m68knommu/unistd.h b/include/asm-m68knommu/unistd.h
index daafb5d43ef1..ebaf03197114 100644
--- a/include/asm-m68knommu/unistd.h
+++ b/include/asm-m68knommu/unistd.h
@@ -281,14 +281,43 @@
 #define __NR_mq_notify		275
 #define __NR_mq_getsetattr	276
 #define __NR_waitid		277
-#define __NR_sys_setaltroot	278
+#define __NR_vserver		278
 #define __NR_add_key		279
 #define __NR_request_key	280
 #define __NR_keyctl		281
- 
+#define __NR_ioprio_set		282
+#define __NR_ioprio_get		283
+#define __NR_inotify_init	284
+#define __NR_inotify_add_watch	285
+#define __NR_inotify_rm_watch	286
+#define __NR_migrate_pages	287
+#define __NR_openat		288
+#define __NR_mkdirat		289
+#define __NR_mknodat		290
+#define __NR_fchownat		291
+#define __NR_futimesat		292
+#define __NR_fstatat64		293
+#define __NR_unlinkat		294
+#define __NR_renameat		295
+#define __NR_linkat		296
+#define __NR_symlinkat		297
+#define __NR_readlinkat		298
+#define __NR_fchmodat		299
+#define __NR_faccessat		300
+#define __NR_pselect6		301
+#define __NR_ppoll		302
+#define __NR_unshare		303
+#define __NR_set_robust_list	304
+#define __NR_get_robust_list	305
+#define __NR_splice		306
+#define __NR_sync_file_range	307
+#define __NR_tee		308
+#define __NR_vmsplice		309
+#define __NR_move_pages		310
+
 #ifdef __KERNEL__
 
-#define NR_syscalls		282
+#define NR_syscalls		311
 #include <linux/err.h>
 
 /* user-visible error numbers are in the range -1 - -MAX_ERRNO: see
-- 
cgit v1.2.3


From 5011915cbb139a331c083e65a61c82e9174f9813 Mon Sep 17 00:00:00 2001
From: Hans Verkuil <hverkuil@xs4all.nl>
Date: Fri, 13 Oct 2006 05:12:42 -0300
Subject: V4L/DVB (4746): HM12 is YUV 4:2:0, not YUV 4:1:1

Fix comment in videodev2.h

Signed-off-by: Hans Verkuil <hverkuil@xs4all.nl>
Signed-off-by: Mauro Carvalho Chehab <mchehab@infradead.org>
---
 include/linux/videodev2.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/videodev2.h b/include/linux/videodev2.h
index c5fdf6259548..df5c4654360d 100644
--- a/include/linux/videodev2.h
+++ b/include/linux/videodev2.h
@@ -243,7 +243,7 @@ struct v4l2_pix_format
 #define V4L2_PIX_FMT_YUV420  v4l2_fourcc('Y','U','1','2') /* 12  YUV 4:2:0     */
 #define V4L2_PIX_FMT_YYUV    v4l2_fourcc('Y','Y','U','V') /* 16  YUV 4:2:2     */
 #define V4L2_PIX_FMT_HI240   v4l2_fourcc('H','I','2','4') /*  8  8-bit color   */
-#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H','M','1','2') /*  8  YUV 4:1:1 16x16 macroblocks */
+#define V4L2_PIX_FMT_HM12    v4l2_fourcc('H','M','1','2') /*  8  YUV 4:2:0 16x16 macroblocks */
 
 /* see http://www.siliconimaging.com/RGB%20Bayer.htm */
 #define V4L2_PIX_FMT_SBGGR8  v4l2_fourcc('B','A','8','1') /*  8  BGBG.. GRGR.. */
-- 
cgit v1.2.3


From 991528d7348667924176f3e29addea0675298944 Mon Sep 17 00:00:00 2001
From: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Date: Mon, 25 Sep 2006 16:28:13 -0700
Subject: ACPI: Processor native C-states using MWAIT

Intel processors starting with the Core Duo support
support processor native C-state using the MWAIT instruction.
Refer: Intel Architecture Software Developer's Manual
http://www.intel.com/design/Pentium4/manuals/253668.htm

Platform firmware exports the support for Native C-state to OS using
ACPI _PDC and _CST methods.
Refer: Intel Processor Vendor-Specific ACPI: Interface Specification
http://www.intel.com/technology/iapc/acpi/downloads/302223.htm

With Processor Native C-state, we use 'MWAIT' instruction on the processor
to enter different C-states (C1, C2, C3).  We won't use the special IO
ports to enter C-state and no SMM mode etc required to enter C-state.
Overall this will mean better C-state support.

One major advantage of using MWAIT for all C-states is, with this and
"treat interrupt as break event" feature of MWAIT, we can now get accurate
timing for the time spent in C1, C2, ..  states.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/i386/kernel/acpi/cstate.c | 122 ++++++++++++++++++++++++++++++++++++++++-
 arch/i386/kernel/process.c     |  22 +++++---
 arch/x86_64/kernel/process.c   |  22 +++++---
 drivers/acpi/processor_idle.c  | 101 +++++++++++++++++++++-------------
 include/acpi/pdc_intel.h       |   9 ++-
 include/acpi/processor.h       |  18 ++++++
 include/asm-i386/processor.h   |   2 +
 include/asm-x86_64/processor.h |   2 +
 8 files changed, 242 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/acpi/cstate.c b/arch/i386/kernel/acpi/cstate.c
index 25db49ef1770..20563e52c622 100644
--- a/arch/i386/kernel/acpi/cstate.c
+++ b/arch/i386/kernel/acpi/cstate.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <linux/acpi.h>
+#include <linux/cpu.h>
 
 #include <acpi/processor.h>
 #include <asm/acpi.h>
@@ -41,5 +42,124 @@ void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 		flags->bm_check = 1;
 	}
 }
-
 EXPORT_SYMBOL(acpi_processor_power_init_bm_check);
+
+/* The code below handles cstate entry with monitor-mwait pair on Intel*/
+
+struct cstate_entry_s {
+	struct {
+		unsigned int eax;
+		unsigned int ecx;
+	} states[ACPI_PROCESSOR_MAX_POWER];
+};
+static struct cstate_entry_s *cpu_cstate_entry;	/* per CPU ptr */
+
+static short mwait_supported[ACPI_PROCESSOR_MAX_POWER];
+
+#define MWAIT_SUBSTATE_MASK	(0xf)
+#define MWAIT_SUBSTATE_SIZE	(4)
+
+#define CPUID_MWAIT_LEAF (5)
+#define CPUID5_ECX_EXTENSIONS_SUPPORTED (0x1)
+#define CPUID5_ECX_INTERRUPT_BREAK	(0x2)
+
+#define MWAIT_ECX_INTERRUPT_BREAK	(0x1)
+
+#define NATIVE_CSTATE_BEYOND_HALT	(2)
+
+int acpi_processor_ffh_cstate_probe(unsigned int cpu,
+		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
+{
+	struct cstate_entry_s *percpu_entry;
+	struct cpuinfo_x86 *c = cpu_data + cpu;
+
+	cpumask_t saved_mask;
+	int retval;
+	unsigned int eax, ebx, ecx, edx;
+	unsigned int edx_part;
+	unsigned int cstate_type; /* C-state type and not ACPI C-state type */
+	unsigned int num_cstate_subtype;
+
+	if (!cpu_cstate_entry || c->cpuid_level < CPUID_MWAIT_LEAF )
+		return -1;
+
+	if (reg->bit_offset != NATIVE_CSTATE_BEYOND_HALT)
+		return -1;
+
+	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
+	percpu_entry->states[cx->index].eax = 0;
+	percpu_entry->states[cx->index].ecx = 0;
+
+	/* Make sure we are running on right CPU */
+	saved_mask = current->cpus_allowed;
+	retval = set_cpus_allowed(current, cpumask_of_cpu(cpu));
+	if (retval)
+		return -1;
+
+	cpuid(CPUID_MWAIT_LEAF, &eax, &ebx, &ecx, &edx);
+
+	/* Check whether this particular cx_type (in CST) is supported or not */
+	cstate_type = (cx->address >> MWAIT_SUBSTATE_SIZE) + 1;
+	edx_part = edx >> (cstate_type * MWAIT_SUBSTATE_SIZE);
+	num_cstate_subtype = edx_part & MWAIT_SUBSTATE_MASK;
+
+	retval = 0;
+	if (num_cstate_subtype < (cx->address & MWAIT_SUBSTATE_MASK)) {
+		retval = -1;
+		goto out;
+	}
+
+	/* mwait ecx extensions INTERRUPT_BREAK should be supported for C2/C3 */
+	if (!(ecx & CPUID5_ECX_EXTENSIONS_SUPPORTED) ||
+	    !(ecx & CPUID5_ECX_INTERRUPT_BREAK)) {
+		retval = -1;
+		goto out;
+	}
+	percpu_entry->states[cx->index].ecx = MWAIT_ECX_INTERRUPT_BREAK;
+
+	/* Use the hint in CST */
+	percpu_entry->states[cx->index].eax = cx->address;
+
+	if (!mwait_supported[cstate_type]) {
+		mwait_supported[cstate_type] = 1;
+		printk(KERN_DEBUG "Monitor-Mwait will be used to enter C-%d "
+		       "state\n", cx->type);
+	}
+
+out:
+	set_cpus_allowed(current, saved_mask);
+	return retval;
+}
+EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_probe);
+
+void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cx)
+{
+	unsigned int cpu = smp_processor_id();
+	struct cstate_entry_s *percpu_entry;
+
+	percpu_entry = per_cpu_ptr(cpu_cstate_entry, cpu);
+	mwait_idle_with_hints(percpu_entry->states[cx->index].eax,
+	                      percpu_entry->states[cx->index].ecx);
+}
+EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter);
+
+static int __init ffh_cstate_init(void)
+{
+	struct cpuinfo_x86 *c = &boot_cpu_data;
+	if (c->x86_vendor != X86_VENDOR_INTEL)
+		return -1;
+
+	cpu_cstate_entry = alloc_percpu(struct cstate_entry_s);
+	return 0;
+}
+
+static void __exit ffh_cstate_exit(void)
+{
+	if (cpu_cstate_entry) {
+		free_percpu(cpu_cstate_entry);
+		cpu_cstate_entry = NULL;
+	}
+}
+
+arch_initcall(ffh_cstate_init);
+__exitcall(ffh_cstate_exit);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index b0a07801d9df..57d375900afb 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -236,20 +236,28 @@ EXPORT_SYMBOL_GPL(cpu_idle_wait);
  * We execute MONITOR against need_resched and enter optimized wait state
  * through MWAIT. Whenever someone changes need_resched, we would be woken
  * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
  */
-static void mwait_idle(void)
+void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-	local_irq_enable();
-
-	while (!need_resched()) {
+	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
-		if (need_resched())
-			break;
-		__mwait(0, 0);
+		if (!need_resched())
+			__mwait(eax, ecx);
 	}
 }
 
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+	local_irq_enable();
+	while (!need_resched())
+		mwait_idle_with_hints(0, 0);
+}
+
 void __devinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 	if (cpu_has(c, X86_FEATURE_MWAIT)) {
diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 5e95b257ee26..49f7fac6229e 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -238,20 +238,28 @@ void cpu_idle (void)
  * We execute MONITOR against need_resched and enter optimized wait state
  * through MWAIT. Whenever someone changes need_resched, we would be woken
  * up from MWAIT (without an IPI).
+ *
+ * New with Core Duo processors, MWAIT can take some hints based on CPU
+ * capability.
  */
-static void mwait_idle(void)
+void mwait_idle_with_hints(unsigned long eax, unsigned long ecx)
 {
-	local_irq_enable();
-
-	while (!need_resched()) {
+	if (!need_resched()) {
 		__monitor((void *)&current_thread_info()->flags, 0, 0);
 		smp_mb();
-		if (need_resched())
-			break;
-		__mwait(0, 0);
+		if (!need_resched())
+			__mwait(eax, ecx);
 	}
 }
 
+/* Default MONITOR/MWAIT with no hints, used for default C1 state */
+static void mwait_idle(void)
+{
+	local_irq_enable();
+	while (!need_resched())
+		mwait_idle_with_hints(0,0);
+}
+
 void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
 {
 	static int printed;
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 0a395fca843b..429a39dbd75d 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -219,6 +219,23 @@ static void acpi_safe_halt(void)
 
 static atomic_t c3_cpu_count;
 
+/* Common C-state entry for C2, C3, .. */
+static void acpi_cstate_enter(struct acpi_processor_cx *cstate)
+{
+	if (cstate->space_id == ACPI_CSTATE_FFH) {
+		/* Call into architectural FFH based C-state */
+		acpi_processor_ffh_cstate_enter(cstate);
+	} else {
+		int unused;
+		/* IO port based C-state */
+		inb(cstate->address);
+		/* Dummy wait op - must do something useless after P_LVL2 read
+		   because chipsets cannot guarantee that STPCLK# signal
+		   gets asserted in time to freeze execution properly. */
+		unused = inl(acpi_fadt.xpm_tmr_blk.address);
+	}
+}
+
 static void acpi_processor_idle(void)
 {
 	struct acpi_processor *pr = NULL;
@@ -361,11 +378,7 @@ static void acpi_processor_idle(void)
 		/* Get start time (ticks) */
 		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
 		/* Invoke C2 */
-		inb(cx->address);
-		/* Dummy wait op - must do something useless after P_LVL2 read
-		   because chipsets cannot guarantee that STPCLK# signal
-		   gets asserted in time to freeze execution properly. */
-		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
+		acpi_cstate_enter(cx);
 		/* Get end time (ticks) */
 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
 
@@ -401,9 +414,7 @@ static void acpi_processor_idle(void)
 		/* Get start time (ticks) */
 		t1 = inl(acpi_fadt.xpm_tmr_blk.address);
 		/* Invoke C3 */
-		inb(cx->address);
-		/* Dummy wait op (see above) */
-		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
+		acpi_cstate_enter(cx);
 		/* Get end time (ticks) */
 		t2 = inl(acpi_fadt.xpm_tmr_blk.address);
 		if (pr->flags.bm_check) {
@@ -628,20 +639,16 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
 	return 0;
 }
 
-static int acpi_processor_get_power_info_default_c1(struct acpi_processor *pr)
+static int acpi_processor_get_power_info_default(struct acpi_processor *pr)
 {
-
-	/* Zero initialize all the C-states info. */
-	memset(pr->power.states, 0, sizeof(pr->power.states));
-
-	/* set the first C-State to C1 */
-	pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
-
-	/* the C0 state only exists as a filler in our array,
-	 * and all processors need to support C1 */
+	if (!pr->power.states[ACPI_STATE_C1].valid) {
+		/* set the first C-State to C1 */
+		/* all processors need to support C1 */
+		pr->power.states[ACPI_STATE_C1].type = ACPI_STATE_C1;
+		pr->power.states[ACPI_STATE_C1].valid = 1;
+	}
+	/* the C0 state only exists as a filler in our array */
 	pr->power.states[ACPI_STATE_C0].valid = 1;
-	pr->power.states[ACPI_STATE_C1].valid = 1;
-
 	return 0;
 }
 
@@ -658,12 +665,7 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 	if (nocst)
 		return -ENODEV;
 
-	current_count = 1;
-
-	/* Zero initialize C2 onwards and prepare for fresh CST lookup */
-	for (i = 2; i < ACPI_PROCESSOR_MAX_POWER; i++)
-		memset(&(pr->power.states[i]), 0, 
-				sizeof(struct acpi_processor_cx));
+	current_count = 0;
 
 	status = acpi_evaluate_object(pr->handle, "_CST", NULL, &buffer);
 	if (ACPI_FAILURE(status)) {
@@ -718,22 +720,39 @@ static int acpi_processor_get_power_info_cst(struct acpi_processor *pr)
 		    (reg->space_id != ACPI_ADR_SPACE_FIXED_HARDWARE))
 			continue;
 
-		cx.address = (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) ?
-		    0 : reg->address;
-
 		/* There should be an easy way to extract an integer... */
 		obj = (union acpi_object *)&(element->package.elements[1]);
 		if (obj->type != ACPI_TYPE_INTEGER)
 			continue;
 
 		cx.type = obj->integer.value;
-
-		if ((cx.type != ACPI_STATE_C1) &&
-		    (reg->space_id != ACPI_ADR_SPACE_SYSTEM_IO))
-			continue;
-
-		if ((cx.type < ACPI_STATE_C2) || (cx.type > ACPI_STATE_C3))
-			continue;
+		/*
+		 * Some buggy BIOSes won't list C1 in _CST -
+		 * Let acpi_processor_get_power_info_default() handle them later
+		 */
+		if (i == 1 && cx.type != ACPI_STATE_C1)
+			current_count++;
+
+		cx.address = reg->address;
+		cx.index = current_count + 1;
+
+		cx.space_id = ACPI_CSTATE_SYSTEMIO;
+		if (reg->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
+			if (acpi_processor_ffh_cstate_probe
+					(pr->id, &cx, reg) == 0) {
+				cx.space_id = ACPI_CSTATE_FFH;
+			} else if (cx.type != ACPI_STATE_C1) {
+				/*
+				 * C1 is a special case where FIXED_HARDWARE
+				 * can be handled in non-MWAIT way as well.
+				 * In that case, save this _CST entry info.
+				 * That is, we retain space_id of SYSTEM_IO for
+				 * halt based C1.
+				 * Otherwise, ignore this info and continue.
+				 */
+				continue;
+			}
+		}
 
 		obj = (union acpi_object *)&(element->package.elements[2]);
 		if (obj->type != ACPI_TYPE_INTEGER)
@@ -938,12 +957,18 @@ static int acpi_processor_get_power_info(struct acpi_processor *pr)
 	/* NOTE: the idle thread may not be running while calling
 	 * this function */
 
-	/* Adding C1 state */
-	acpi_processor_get_power_info_default_c1(pr);
+	/* Zero initialize all the C-states info. */
+	memset(pr->power.states, 0, sizeof(pr->power.states));
+
 	result = acpi_processor_get_power_info_cst(pr);
 	if (result == -ENODEV)
 		acpi_processor_get_power_info_fadt(pr);
 
+	if (result)
+		return result;
+
+	acpi_processor_get_power_info_default(pr);
+
 	pr->power.count = acpi_processor_power_verify(pr);
 
 	/*
diff --git a/include/acpi/pdc_intel.h b/include/acpi/pdc_intel.h
index c5472be6f3a2..e72bfdd887f9 100644
--- a/include/acpi/pdc_intel.h
+++ b/include/acpi/pdc_intel.h
@@ -13,6 +13,7 @@
 #define ACPI_PDC_SMP_C_SWCOORD		(0x0040)
 #define ACPI_PDC_SMP_T_SWCOORD		(0x0080)
 #define ACPI_PDC_C_C1_FFH		(0x0100)
+#define ACPI_PDC_C_C2C3_FFH		(0x0200)
 
 #define ACPI_PDC_EST_CAPABILITY_SMP	(ACPI_PDC_SMP_C1PT | \
 					 ACPI_PDC_C_C1_HALT | \
@@ -23,8 +24,10 @@
 					 ACPI_PDC_SMP_P_SWCOORD | \
 					 ACPI_PDC_P_FFH)
 
-#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3 | \
-					 ACPI_PDC_SMP_C1PT | \
-					 ACPI_PDC_C_C1_HALT)
+#define ACPI_PDC_C_CAPABILITY_SMP	(ACPI_PDC_SMP_C2C3  | \
+					 ACPI_PDC_SMP_C1PT  | \
+					 ACPI_PDC_C_C1_HALT | \
+					 ACPI_PDC_C_C1_FFH  | \
+					 ACPI_PDC_C_C2C3_FFH)
 
 #endif				/* __PDC_INTEL_H__ */
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 9dd5b75961f8..7798d2a9f793 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -29,6 +29,9 @@
 #define DOMAIN_COORD_TYPE_SW_ANY	0xfd
 #define DOMAIN_COORD_TYPE_HW_ALL	0xfe
 
+#define ACPI_CSTATE_SYSTEMIO	(0)
+#define ACPI_CSTATE_FFH		(1)
+
 /* Power Management */
 
 struct acpi_processor_cx;
@@ -58,6 +61,8 @@ struct acpi_processor_cx {
 	u8 valid;
 	u8 type;
 	u32 address;
+	u8 space_id;
+	u8 index;
 	u32 latency;
 	u32 latency_ticks;
 	u32 power;
@@ -206,6 +211,9 @@ void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
 #ifdef ARCH_HAS_POWER_INIT
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu);
+int acpi_processor_ffh_cstate_probe(unsigned int cpu,
+		struct acpi_processor_cx *cx, struct acpi_power_register *reg);
+void acpi_processor_ffh_cstate_enter(struct acpi_processor_cx *cstate);
 #else
 static inline void acpi_processor_power_init_bm_check(struct
 						      acpi_processor_flags
@@ -214,6 +222,16 @@ static inline void acpi_processor_power_init_bm_check(struct
 	flags->bm_check = 1;
 	return;
 }
+static inline int acpi_processor_ffh_cstate_probe(unsigned int cpu,
+		struct acpi_processor_cx *cx, struct acpi_power_register *reg)
+{
+	return -1;
+}
+static inline void acpi_processor_ffh_cstate_enter(
+		struct acpi_processor_cx *cstate)
+{
+	return;
+}
 #endif
 
 /* in processor_perflib.c */
diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h
index 2277127696d2..e0ddca94d50c 100644
--- a/include/asm-i386/processor.h
+++ b/include/asm-i386/processor.h
@@ -306,6 +306,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
 		: :"a" (eax), "c" (ecx));
 }
 
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+
 /* from system description table in BIOS.  Mostly for MCA use, but
 others may find it useful. */
 extern unsigned int machine_id;
diff --git a/include/asm-x86_64/processor.h b/include/asm-x86_64/processor.h
index de9c3147ee4c..cef17e0f828c 100644
--- a/include/asm-x86_64/processor.h
+++ b/include/asm-x86_64/processor.h
@@ -475,6 +475,8 @@ static inline void __mwait(unsigned long eax, unsigned long ecx)
 		: :"a" (eax), "c" (ecx));
 }
 
+extern void mwait_idle_with_hints(unsigned long eax, unsigned long ecx);
+
 #define stack_current() \
 ({								\
 	struct thread_info *ti;					\
-- 
cgit v1.2.3


From d7a76e4cb3b4469b1eccb6204c053e3ebcd4c196 Mon Sep 17 00:00:00 2001
From: Lennart Poettering <mzxreary@0pointer.de>
Date: Tue, 5 Sep 2006 12:12:24 -0400
Subject: ACPI: consolidate functions in acpi ec driver

Unify the following functions:

    acpi_ec_poll_read()
    acpi_ec_poll_write()
    acpi_ec_poll_query()
    acpi_ec_intr_read()
    acpi_ec_intr_write()
    acpi_ec_intr_query()

into:

    acpi_ec_poll_transaction()
    acpi_ec_intr_transaction()

These new functions take as arguments an ACPI EC command, a few bytes
to write to the EC data register and a buffer for a few bytes to read
from the EC data register. The old _read(), _write(), _query() are
just special cases of these functions.

Then unified the code in acpi_ec_poll_transaction() and
acpi_ec_intr_transaction() a little more. Both functions are now just
wrappers around the new acpi_ec_transaction_unlocked() function. The
latter contains the EC access logic, the two original
function now just do their special way of locking and call the the
new function for the actual work.

This saves a lot of very similar code. The primary reason for doing
this, however, is that my driver for MSI 270 laptops needs to issue
some non-standard EC commands in a safe way. Due to this I added a new
exported function similar to ec_write()/ec_write() which is called
ec_transaction() and is essentially just a wrapper around
acpi_ec_{poll,intr}_transaction().

Signed-off-by: Lennart Poettering <mzxreary@0pointer.de>
Acked-by: Luming Yu <luming.yu@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/ec.c    | 325 +++++++++++++++------------------------------------
 include/linux/acpi.h |   3 +
 2 files changed, 98 insertions(+), 230 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c
index e5d796362854..a0dcbad97c45 100644
--- a/drivers/acpi/ec.c
+++ b/drivers/acpi/ec.c
@@ -122,12 +122,12 @@ union acpi_ec {
 
 static int acpi_ec_poll_wait(union acpi_ec *ec, u8 event);
 static int acpi_ec_intr_wait(union acpi_ec *ec, unsigned int event);
-static int acpi_ec_poll_read(union acpi_ec *ec, u8 address, u32 * data);
-static int acpi_ec_intr_read(union acpi_ec *ec, u8 address, u32 * data);
-static int acpi_ec_poll_write(union acpi_ec *ec, u8 address, u8 data);
-static int acpi_ec_intr_write(union acpi_ec *ec, u8 address, u8 data);
-static int acpi_ec_poll_query(union acpi_ec *ec, u32 * data);
-static int acpi_ec_intr_query(union acpi_ec *ec, u32 * data);
+static int acpi_ec_poll_transaction(union acpi_ec *ec, u8 command,
+                                    const u8 *wdata, unsigned wdata_len,
+                                    u8 *rdata, unsigned rdata_len);
+static int acpi_ec_intr_transaction(union acpi_ec *ec, u8 command,
+                                    const u8 *wdata, unsigned wdata_len,
+                                    u8 *rdata, unsigned rdata_len);
 static void acpi_ec_gpe_poll_query(void *ec_cxt);
 static void acpi_ec_gpe_intr_query(void *ec_cxt);
 static u32 acpi_ec_gpe_poll_handler(void *data);
@@ -302,110 +302,95 @@ end:
 }
 #endif /* ACPI_FUTURE_USAGE */
 
-static int acpi_ec_read(union acpi_ec *ec, u8 address, u32 * data)
+static int acpi_ec_transaction(union acpi_ec *ec, u8 command,
+                               const u8 *wdata, unsigned wdata_len,
+                               u8 *rdata, unsigned rdata_len)
 {
 	if (acpi_ec_poll_mode)
-		return acpi_ec_poll_read(ec, address, data);
+		return acpi_ec_poll_transaction(ec, command, wdata, wdata_len, rdata, rdata_len);
 	else
-		return acpi_ec_intr_read(ec, address, data);
+		return acpi_ec_intr_transaction(ec, command, wdata, wdata_len, rdata, rdata_len);
+}
+static int acpi_ec_read(union acpi_ec *ec, u8 address, u32 * data)
+{
+        int result;
+        u8 d;
+        result = acpi_ec_transaction(ec, ACPI_EC_COMMAND_READ, &address, 1, &d, 1);
+        *data = d;
+        return result;
 }
 static int acpi_ec_write(union acpi_ec *ec, u8 address, u8 data)
 {
-	if (acpi_ec_poll_mode)
-		return acpi_ec_poll_write(ec, address, data);
-	else
-		return acpi_ec_intr_write(ec, address, data);
+        u8 wdata[2] = { address, data };
+        return acpi_ec_transaction(ec, ACPI_EC_COMMAND_WRITE, wdata, 2, NULL, 0);
 }
-static int acpi_ec_poll_read(union acpi_ec *ec, u8 address, u32 * data)
+
+static int acpi_ec_transaction_unlocked(union acpi_ec *ec, u8 command,
+                                             const u8 *wdata, unsigned wdata_len,
+                                             u8 *rdata, unsigned rdata_len)
 {
-	acpi_status status = AE_OK;
-	int result = 0;
-	u32 glk = 0;
+	int result;
 
+	acpi_hw_low_level_write(8, command, &ec->common.command_addr);
 
-	if (!ec || !data)
-		return -EINVAL;
+        result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
+	if (result)
+		return result;
 
-	*data = 0;
+        for (; wdata_len > 0; wdata_len --) {
 
-	if (ec->common.global_lock) {
-		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-	}
+                acpi_hw_low_level_write(8, *(wdata++), &ec->common.data_addr);
 
-	if (down_interruptible(&ec->poll.sem)) {
-		result = -ERESTARTSYS;
-		goto end_nosem;
-	}
-	
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_READ,
-				&ec->common.command_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (result)
-		goto end;
+                result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
+                if (result)
+                        return result;
+        }
 
-	acpi_hw_low_level_write(8, address, &ec->common.data_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF);
-	if (result)
-		goto end;
 
-	acpi_hw_low_level_read(8, data, &ec->common.data_addr);
+        for (; rdata_len > 0; rdata_len --) {
+                u32 d;
 
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Read [%02x] from address [%02x]\n",
-			  *data, address));
+                result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF);
+                if (result)
+                        return result;
 
-      end:
-	up(&ec->poll.sem);
-end_nosem:
-	if (ec->common.global_lock)
-		acpi_release_global_lock(glk);
+                acpi_hw_low_level_read(8, &d, &ec->common.data_addr);
+                *(rdata++) = (u8) d;
+        }
 
-	return result;
+        return 0;
 }
 
-static int acpi_ec_poll_write(union acpi_ec *ec, u8 address, u8 data)
+static int acpi_ec_poll_transaction(union acpi_ec *ec, u8 command,
+                                    const u8 *wdata, unsigned wdata_len,
+                                    u8 *rdata, unsigned rdata_len)
 {
-	int result = 0;
 	acpi_status status = AE_OK;
+	int result;
 	u32 glk = 0;
 
-
-	if (!ec)
+	if (!ec || (wdata_len && !wdata) || (rdata_len && !rdata))
 		return -EINVAL;
 
+        if (rdata)
+                memset(rdata, 0, rdata_len);
+
 	if (ec->common.global_lock) {
 		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
 		if (ACPI_FAILURE(status))
 			return -ENODEV;
-	}
+        }
 
 	if (down_interruptible(&ec->poll.sem)) {
 		result = -ERESTARTSYS;
 		goto end_nosem;
 	}
-	
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_WRITE,
-				&ec->common.command_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (result)
-		goto end;
 
-	acpi_hw_low_level_write(8, address, &ec->common.data_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (result)
-		goto end;
-
-	acpi_hw_low_level_write(8, data, &ec->common.data_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (result)
-		goto end;
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Wrote [%02x] to address [%02x]\n",
-			  data, address));
-
-      end:
+        result = acpi_ec_transaction_unlocked(ec, command,
+                                              wdata, wdata_len,
+                                              rdata, rdata_len);
 	up(&ec->poll.sem);
+
 end_nosem:
 	if (ec->common.global_lock)
 		acpi_release_global_lock(glk);
@@ -413,16 +398,18 @@ end_nosem:
 	return result;
 }
 
-static int acpi_ec_intr_read(union acpi_ec *ec, u8 address, u32 * data)
+static int acpi_ec_intr_transaction(union acpi_ec *ec, u8 command,
+                                    const u8 *wdata, unsigned wdata_len,
+                                    u8 *rdata, unsigned rdata_len)
 {
-	int status = 0;
+	int status;
 	u32 glk;
 
-
-	if (!ec || !data)
+	if (!ec || (wdata_len && !wdata) || (rdata_len && !rdata))
 		return -EINVAL;
 
-	*data = 0;
+        if (rdata)
+                memset(rdata, 0, rdata_len);
 
 	if (ec->common.global_lock) {
 		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
@@ -438,72 +425,12 @@ static int acpi_ec_intr_read(union acpi_ec *ec, u8 address, u32 * data)
 		printk(KERN_DEBUG PREFIX "read EC, IB not empty\n");
 		goto end;
 	}
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_READ,
-				&ec->common.command_addr);
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "read EC, IB not empty\n");
-	}
-
-	acpi_hw_low_level_write(8, address, &ec->common.data_addr);
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "read EC, OB not full\n");
-		goto end;
-	}
-	acpi_hw_low_level_read(8, data, &ec->common.data_addr);
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Read [%02x] from address [%02x]\n",
-			  *data, address));
-
-      end:
-	up(&ec->intr.sem);
-
-	if (ec->common.global_lock)
-		acpi_release_global_lock(glk);
-
-	return status;
-}
-
-static int acpi_ec_intr_write(union acpi_ec *ec, u8 address, u8 data)
-{
-	int status = 0;
-	u32 glk;
-
-
-	if (!ec)
-		return -EINVAL;
-
-	if (ec->common.global_lock) {
-		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-	}
-
-	WARN_ON(in_interrupt());
-	down(&ec->intr.sem);
-
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "write EC, IB not empty\n");
-	}
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_WRITE,
-				&ec->common.command_addr);
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "write EC, IB not empty\n");
-	}
-
-	acpi_hw_low_level_write(8, address, &ec->common.data_addr);
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "write EC, IB not empty\n");
-	}
 
-	acpi_hw_low_level_write(8, data, &ec->common.data_addr);
-
-	ACPI_DEBUG_PRINT((ACPI_DB_INFO, "Wrote [%02x] to address [%02x]\n",
-			  data, address));
+        status = acpi_ec_transaction_unlocked(ec, command,
+                                              wdata, wdata_len,
+                                              rdata, rdata_len);
 
+end:
 	up(&ec->intr.sem);
 
 	if (ec->common.global_lock)
@@ -554,106 +481,44 @@ int ec_write(u8 addr, u8 val)
 
 EXPORT_SYMBOL(ec_write);
 
-static int acpi_ec_query(union acpi_ec *ec, u32 * data)
-{
-	if (acpi_ec_poll_mode)
-		return acpi_ec_poll_query(ec, data);
-	else
-		return acpi_ec_intr_query(ec, data);
-}
-static int acpi_ec_poll_query(union acpi_ec *ec, u32 * data)
+extern int ec_transaction(u8 command,
+                          const u8 *wdata, unsigned wdata_len,
+                          u8 *rdata, unsigned rdata_len)
 {
-	int result = 0;
-	acpi_status status = AE_OK;
-	u32 glk = 0;
-
-
-	if (!ec || !data)
-		return -EINVAL;
-
-	*data = 0;
-
-	if (ec->common.global_lock) {
-		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-	}
-
-	/*
-	 * Query the EC to find out which _Qxx method we need to evaluate.
-	 * Note that successful completion of the query causes the ACPI_EC_SCI
-	 * bit to be cleared (and thus clearing the interrupt source).
-	 */
-	if (down_interruptible(&ec->poll.sem)) {
-		result = -ERESTARTSYS;
-		goto end_nosem;
-	}
-	
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_QUERY,
-				&ec->common.command_addr);
-	result = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF);
-	if (result)
-		goto end;
+	union acpi_ec *ec;
 
-	acpi_hw_low_level_read(8, data, &ec->common.data_addr);
-	if (!*data)
-		result = -ENODATA;
+	if (!first_ec)
+		return -ENODEV;
 
-      end:
-	up(&ec->poll.sem);
-end_nosem:
-	if (ec->common.global_lock)
-		acpi_release_global_lock(glk);
+	ec = acpi_driver_data(first_ec);
 
-	return result;
+	return acpi_ec_transaction(ec, command, wdata, wdata_len, rdata, rdata_len);
 }
-static int acpi_ec_intr_query(union acpi_ec *ec, u32 * data)
-{
-	int status = 0;
-	u32 glk;
 
+EXPORT_SYMBOL(ec_transaction);
 
-	if (!ec || !data)
-		return -EINVAL;
-	*data = 0;
+static int acpi_ec_query(union acpi_ec *ec, u32 * data) {
+        int result;
+        u8 d;
 
-	if (ec->common.global_lock) {
-		status = acpi_acquire_global_lock(ACPI_EC_UDELAY_GLK, &glk);
-		if (ACPI_FAILURE(status))
-			return -ENODEV;
-	}
+        if (!ec || !data)
+                return -EINVAL;
 
-	down(&ec->intr.sem);
+        /*
+         * Query the EC to find out which _Qxx method we need to evaluate.
+         * Note that successful completion of the query causes the ACPI_EC_SCI
+         * bit to be cleared (and thus clearing the interrupt source).
+         */
 
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_IBE);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "query EC, IB not empty\n");
-		goto end;
-	}
-	/*
-	 * Query the EC to find out which _Qxx method we need to evaluate.
-	 * Note that successful completion of the query causes the ACPI_EC_SCI
-	 * bit to be cleared (and thus clearing the interrupt source).
-	 */
-	acpi_hw_low_level_write(8, ACPI_EC_COMMAND_QUERY,
-				&ec->common.command_addr);
-	status = acpi_ec_wait(ec, ACPI_EC_EVENT_OBF);
-	if (status) {
-		printk(KERN_DEBUG PREFIX "query EC, OB not full\n");
-		goto end;
-	}
-
-	acpi_hw_low_level_read(8, data, &ec->common.data_addr);
-	if (!*data)
-		status = -ENODATA;
+        result = acpi_ec_transaction(ec, ACPI_EC_COMMAND_QUERY, NULL, 0, &d, 1);
+        if (result)
+                return result;
 
-      end:
-	up(&ec->intr.sem);
+        if (!d)
+                return -ENODATA;
 
-	if (ec->common.global_lock)
-		acpi_release_global_lock(glk);
-
-	return status;
+        *data = d;
+        return 0;
 }
 
 /* --------------------------------------------------------------------------
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 88b5dfd8ee12..2b0c955590fe 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -494,6 +494,9 @@ void acpi_pci_unregister_driver(struct acpi_pci_driver *driver);
 
 extern int ec_read(u8 addr, u8 *val);
 extern int ec_write(u8 addr, u8 val);
+extern int ec_transaction(u8 command,
+                          const u8 *wdata, unsigned wdata_len,
+                          u8 *rdata, unsigned rdata_len);
 
 #endif /*CONFIG_ACPI_EC*/
 
-- 
cgit v1.2.3


From 69f0304e174c765c624d75b79c35e49b7ba67ed4 Mon Sep 17 00:00:00 2001
From: Liam Girdwood <Liam.Girdwood@com.rmk.(none)>
Date: Thu, 5 Oct 2006 13:22:42 +0100
Subject: [ARM] 3888/1: add pxa27x SSP FSRT register bit definition

This patch adds a register bit definition for the pxa27x SSP port Frame
Sync Relative Timing (FSRT) bit.

Signed-off-by: Liam Girdwood <liam.girdwood@wolfsonmicro.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index f5cc65dd7d0d..68731e0923a4 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -1681,6 +1681,7 @@
 #define SSSR_TINT		(1 << 19)	/* Receiver Time-out Interrupt */
 #define SSSR_PINT		(1 << 18)	/* Peripheral Trailing Byte Interrupt */
 
+#define SSPSP_FSRT		(1 << 25)	/* Frame Sync Relative Timing */
 #define SSPSP_DMYSTOP(x)	(x << 23)	/* Dummy Stop */
 #define SSPSP_SFRMWDTH(x)	(x << 16)	/* Serial Frame Width */
 #define SSPSP_SFRMDLY(x)	(x << 9)	/* Serial Frame Delay */
-- 
cgit v1.2.3


From 8e25b84e76a0b2c117218405818cadd591512ff8 Mon Sep 17 00:00:00 2001
From: Kristoffer Ericson <Kristoffer_e1@com.rmk.(none)>
Date: Sun, 8 Oct 2006 20:00:53 +0100
Subject: [ARM] 3890/1: [Jornada7xx] Addition of MCU commands into jornada720.h

This adds relevant MCU commands for the j7xx chipset.

Signed-off-by: Kristoffer Ericson <Kristoffer_e1@hotmail.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-sa1100/jornada720.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include')

diff --git a/include/asm-arm/arch-sa1100/jornada720.h b/include/asm-arm/arch-sa1100/jornada720.h
index 1b8e8a304800..3f37ca07806d 100644
--- a/include/asm-arm/arch-sa1100/jornada720.h
+++ b/include/asm-arm/arch-sa1100/jornada720.h
@@ -19,6 +19,20 @@
 #define GPIO_JORNADA720_KEYBOARD_IRQ	IRQ_GPIO0
 #define GPIO_JORNADA720_MOUSE_IRQ		IRQ_GPIO9
 
+/* MCU COMMANDS */
+#define MCU_GetBatteryData  0xc0
+#define MCU_GetScanKeyCode  0x90
+#define MCU_GetTouchSamples 0xa0
+#define MCU_GetContrast     0xD0
+#define MCU_SetContrast     0xD1
+#define MCU_GetBrightness   0xD2
+#define MCU_SetBrightness   0xD3
+#define MCU_ContrastOff     0xD8
+#define MCU_BrightnessOff   0xD9
+#define MCU_PWMOFF          0xDF
+#define MCU_TxDummy         0x11
+#define MCU_ErrorCode       0x00
+
 #ifndef __ASSEMBLY__
 
 void jornada720_mcu_init(void);
-- 
cgit v1.2.3


From 645408d1ffe9f27b176a88302c22420f301607db Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 14 Oct 2006 16:50:38 +0100
Subject: [PATCH] gfp_t in netlabel

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/net/netlabel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlabel.h b/include/net/netlabel.h
index 113337c27955..12c214b9eadf 100644
--- a/include/net/netlabel.h
+++ b/include/net/netlabel.h
@@ -136,7 +136,7 @@ struct netlbl_lsm_secattr {
  * on success, NULL on failure.
  *
  */
-static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(int flags)
+static inline struct netlbl_lsm_cache *netlbl_secattr_cache_alloc(gfp_t flags)
 {
 	struct netlbl_lsm_cache *cache;
 
-- 
cgit v1.2.3


From cbff67668d597da48f8bc48549a9630cbf968f34 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Sat, 14 Oct 2006 16:53:38 +0100
Subject: [PATCH] sun3_ioremap() prototype

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-m68k/sun3mmu.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-m68k/sun3mmu.h b/include/asm-m68k/sun3mmu.h
index 6c8c17d047a1..d8f17a0d8c9f 100644
--- a/include/asm-m68k/sun3mmu.h
+++ b/include/asm-m68k/sun3mmu.h
@@ -4,6 +4,7 @@
 #ifndef __SUN3_MMU_H__
 #define __SUN3_MMU_H__
 
+#include <linux/types.h>
 #include <asm/movs.h>
 #include <asm/sun3-head.h>
 
@@ -160,7 +161,7 @@ static inline void sun3_put_context(unsigned char c)
 	return;
 }
 
-extern void *sun3_ioremap(unsigned long phys, unsigned long size,
+extern void __iomem *sun3_ioremap(unsigned long phys, unsigned long size,
 			  unsigned long type);
 
 extern int sun3_map_test(unsigned long addr, char *val);
-- 
cgit v1.2.3


From 4663afe2c848e2abc8791202beecf40684f13eb4 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Thu, 12 Oct 2006 21:21:06 -0700
Subject: [NET]: reduce sizeof(struct inet_peer), cleanup, change in
 peer_check_expire()

1) shrink struct inet_peer on 64 bits platforms.
---
 include/net/inetpeer.h | 17 ++---------------
 net/ipv4/inetpeer.c    | 29 ++++++++++++++++++++++-------
 2 files changed, 24 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index 925573fd2aed..f13cc0c2b163 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -19,7 +19,7 @@ struct inet_peer
 {
 	struct inet_peer	*avl_left, *avl_right;
 	struct inet_peer	*unused_next, **unused_prevp;
-	unsigned long		dtime;		/* the time of last use of not
+	__u32			dtime;		/* the time of last use of not
 						 * referenced entries */
 	atomic_t		refcnt;
 	__be32			v4daddr;	/* peer's address */
@@ -35,21 +35,8 @@ void			inet_initpeers(void) __init;
 /* can be called with or without local BH being disabled */
 struct inet_peer	*inet_getpeer(__be32 daddr, int create);
 
-extern spinlock_t inet_peer_unused_lock;
-extern struct inet_peer **inet_peer_unused_tailp;
 /* can be called from BH context or outside */
-static inline void	inet_putpeer(struct inet_peer *p)
-{
-	spin_lock_bh(&inet_peer_unused_lock);
-	if (atomic_dec_and_test(&p->refcnt)) {
-		p->unused_prevp = inet_peer_unused_tailp;
-		p->unused_next = NULL;
-		*inet_peer_unused_tailp = p;
-		inet_peer_unused_tailp = &p->unused_next;
-		p->dtime = jiffies;
-	}
-	spin_unlock_bh(&inet_peer_unused_lock);
-}
+extern void inet_putpeer(struct inet_peer *p);
 
 extern spinlock_t inet_peer_idlock;
 /* can be called with or without local BH being disabled */
diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c
index 2b1a54b59c48..f072f3875af8 100644
--- a/net/ipv4/inetpeer.c
+++ b/net/ipv4/inetpeer.c
@@ -94,10 +94,8 @@ int inet_peer_minttl = 120 * HZ;	/* TTL under high load: 120 sec */
 int inet_peer_maxttl = 10 * 60 * HZ;	/* usual time to live: 10 min */
 
 static struct inet_peer *inet_peer_unused_head;
-/* Exported for inet_putpeer inline function.  */
-struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
-DEFINE_SPINLOCK(inet_peer_unused_lock);
-#define PEER_MAX_CLEANUP_WORK 30
+static struct inet_peer **inet_peer_unused_tailp = &inet_peer_unused_head;
+static DEFINE_SPINLOCK(inet_peer_unused_lock);
 
 static void peer_check_expire(unsigned long dummy);
 static DEFINE_TIMER(peer_periodic_timer, peer_check_expire, 0, 0);
@@ -340,7 +338,8 @@ static int cleanup_once(unsigned long ttl)
 	spin_lock_bh(&inet_peer_unused_lock);
 	p = inet_peer_unused_head;
 	if (p != NULL) {
-		if (time_after(p->dtime + ttl, jiffies)) {
+		__u32 delta = (__u32)jiffies - p->dtime;
+		if (delta < ttl) {
 			/* Do not prune fresh entries. */
 			spin_unlock_bh(&inet_peer_unused_lock);
 			return -1;
@@ -432,7 +431,7 @@ out_free:
 /* Called with local BH disabled. */
 static void peer_check_expire(unsigned long dummy)
 {
-	int i;
+	unsigned long now = jiffies;
 	int ttl;
 
 	if (peer_total >= inet_peer_threshold)
@@ -441,7 +440,10 @@ static void peer_check_expire(unsigned long dummy)
 		ttl = inet_peer_maxttl
 				- (inet_peer_maxttl - inet_peer_minttl) / HZ *
 					peer_total / inet_peer_threshold * HZ;
-	for (i = 0; i < PEER_MAX_CLEANUP_WORK && !cleanup_once(ttl); i++);
+	while (!cleanup_once(ttl)) {
+		if (jiffies != now)
+			break;
+	}
 
 	/* Trigger the timer after inet_peer_gc_mintime .. inet_peer_gc_maxtime
 	 * interval depending on the total number of entries (more entries,
@@ -455,3 +457,16 @@ static void peer_check_expire(unsigned long dummy)
 				peer_total / inet_peer_threshold * HZ;
 	add_timer(&peer_periodic_timer);
 }
+
+void inet_putpeer(struct inet_peer *p)
+{
+	spin_lock_bh(&inet_peer_unused_lock);
+	if (atomic_dec_and_test(&p->refcnt)) {
+		p->unused_prevp = inet_peer_unused_tailp;
+		p->unused_next = NULL;
+		*inet_peer_unused_tailp = p;
+		inet_peer_unused_tailp = &p->unused_next;
+		p->dtime = (__u32)jiffies;
+	}
+	spin_unlock_bh(&inet_peer_unused_lock);
+}
-- 
cgit v1.2.3


From 4c67bc74f016b0d360b8573e18969c0ff7926974 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 15 Oct 2006 17:30:56 +0200
Subject: [Bluetooth] Support concurrent connect requests

Most Bluetooth chips don't support concurrent connect requests, because
this would involve a multiple baseband page with only one radio. In the
case an upper layer like L2CAP requests a concurrent connect these chips
return the error "Command Disallowed" for the second request. If this
happens it the responsibility of the Bluetooth core to queue the request
and try again after the previous connect attempt has been completed.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 17 +++++++++++++++++
 net/bluetooth/af_bluetooth.c     |  2 +-
 net/bluetooth/hci_conn.c         |  6 ++++--
 net/bluetooth/hci_event.c        | 15 +++++++++++----
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index df22efcfcc0b..c0fc39620f36 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -153,6 +153,7 @@ struct hci_conn {
 	__u8             mode;
 	__u8		 type;
 	__u8		 out;
+	__u8		 attempt;
 	__u8		 dev_class[3];
 	__u8             features[8];
 	__u16            interval;
@@ -289,6 +290,22 @@ static inline struct hci_conn *hci_conn_hash_lookup_ba(struct hci_dev *hdev,
 	return NULL;
 }
 
+static inline struct hci_conn *hci_conn_hash_lookup_state(struct hci_dev *hdev,
+					__u8 type, __u16 state)
+{
+	struct hci_conn_hash *h = &hdev->conn_hash;
+	struct list_head *p;
+	struct hci_conn  *c;
+
+	list_for_each(p, &h->list) {
+		c = list_entry(p, struct hci_conn, list);
+		if (c->type == type && c->state == state)
+			return c;
+	}
+	return NULL;
+}
+
+void hci_acl_connect(struct hci_conn *conn);
 void hci_acl_disconn(struct hci_conn *conn, __u8 reason);
 void hci_add_sco(struct hci_conn *conn, __u16 handle);
 
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 305a099b7477..a91fee4f2705 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -48,7 +48,7 @@
 #define BT_DBG(D...)
 #endif
 
-#define VERSION "2.10"
+#define VERSION "2.11"
 
 /* Bluetooth sockets */
 #define BT_MAX_PROTO	8
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 90e3a285a17e..6cd5711fa28a 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -51,7 +51,7 @@
 #define BT_DBG(D...)
 #endif
 
-static void hci_acl_connect(struct hci_conn *conn)
+void hci_acl_connect(struct hci_conn *conn)
 {
 	struct hci_dev *hdev = conn->hdev;
 	struct inquiry_entry *ie;
@@ -63,6 +63,8 @@ static void hci_acl_connect(struct hci_conn *conn)
 	conn->out   = 1;
 	conn->link_mode = HCI_LM_MASTER;
 
+	conn->attempt++;
+
 	memset(&cp, 0, sizeof(cp));
 	bacpy(&cp.bdaddr, &conn->dst);
 	cp.pscan_rep_mode = 0x02;
@@ -80,7 +82,7 @@ static void hci_acl_connect(struct hci_conn *conn)
 		cp.role_switch	= 0x01;
 	else
 		cp.role_switch	= 0x00;
-		
+
 	hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp);
 }
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index d43d0c890975..65f094845719 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -414,9 +414,12 @@ static inline void hci_cs_create_conn(struct hci_dev *hdev, __u8 status)
 
 	if (status) {
 		if (conn && conn->state == BT_CONNECT) {
-			conn->state = BT_CLOSED;
-			hci_proto_connect_cfm(conn, status);
-			hci_conn_del(conn);
+			if (status != 0x0c || conn->attempt > 2) {
+				conn->state = BT_CLOSED;
+				hci_proto_connect_cfm(conn, status);
+				hci_conn_del(conn);
+			} else
+				conn->state = BT_CONNECT2;
 		}
 	} else {
 		if (!conn) {
@@ -728,7 +731,7 @@ static inline void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *sk
 static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_conn_complete *ev = (struct hci_ev_conn_complete *) skb->data;
-	struct hci_conn *conn;
+	struct hci_conn *conn, *pend;
 
 	BT_DBG("%s", hdev->name);
 
@@ -801,6 +804,10 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s
 	if (ev->status)
 		hci_conn_del(conn);
 
+	pend = hci_conn_hash_lookup_state(hdev, ACL_LINK, BT_CONNECT2);
+	if (pend)
+		hci_acl_connect(pend);
+
 	hci_dev_unlock(hdev);
 }
 
-- 
cgit v1.2.3


From 29da7eb0ec69245c6e9b4eb5bdaa04af685f5c4f Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Mon, 16 Oct 2006 14:10:49 +0100
Subject: [PATCH] FRV: Use the correct preemption primitives in kmap_atomic()
 and co

Use inc/dec_preempt_count() rather than preempt_enable/disable() and manually
add in the compiler barriers that were provided by the latter.  This makes FRV
consistent with other archs.

Furthermore, the compiler barrier effects are now there unconditionally - at
least as far as preemption is concerned - because we don't want the compiler
moving memory accesses out of the section of code in which the mapping is in
force - in effect the kmap_atomic() must imply a LOCK-class barrier and the
kunmap_atomic() must imply an UNLOCK-class barrier to the compiler.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-frv/highmem.h | 27 ++++++++++++++-------------
 1 file changed, 14 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/asm-frv/highmem.h b/include/asm-frv/highmem.h
index e2247c22a638..0f390f41f816 100644
--- a/include/asm-frv/highmem.h
+++ b/include/asm-frv/highmem.h
@@ -82,11 +82,11 @@ extern struct page *kmap_atomic_to_page(void *ptr);
 	dampr = paddr | xAMPRx_L | xAMPRx_M | xAMPRx_S | xAMPRx_SS_16Kb | xAMPRx_V;		\
 												\
 	if (type != __KM_CACHE)									\
-		asm volatile("movgs %0,dampr"#ampr :: "r"(dampr));				\
+		asm volatile("movgs %0,dampr"#ampr :: "r"(dampr) : "memory");			\
 	else											\
 		asm volatile("movgs %0,iampr"#ampr"\n"						\
 			     "movgs %0,dampr"#ampr"\n"						\
-			     :: "r"(dampr)							\
+			     :: "r"(dampr) : "memory"						\
 			     );									\
 												\
 	asm("movsg damlr"#ampr",%0" : "=r"(damlr));						\
@@ -104,7 +104,7 @@ extern struct page *kmap_atomic_to_page(void *ptr);
 	asm volatile("movgs %0,tplr \n"								  \
 		     "movgs %1,tppr \n"								  \
 		     "tlbpr %0,gr0,#2,#1"							  \
-		     : : "r"(damlr), "r"(dampr));						  \
+		     : : "r"(damlr), "r"(dampr) : "memory");					  \
 												  \
 	/*printk("TLB: SECN sl=%d L=%08lx P=%08lx\n", slot, damlr, dampr);*/			  \
 												  \
@@ -115,7 +115,7 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
 {
 	unsigned long paddr;
 
-	preempt_disable();
+	inc_preempt_count();
 	paddr = page_to_phys(page);
 
 	switch (type) {
@@ -138,16 +138,16 @@ static inline void *kmap_atomic(struct page *page, enum km_type type)
 	}
 }
 
-#define __kunmap_atomic_primary(type, ampr)			\
-do {								\
-	asm volatile("movgs gr0,dampr"#ampr"\n");		\
-	if (type == __KM_CACHE)					\
-		asm volatile("movgs gr0,iampr"#ampr"\n");	\
+#define __kunmap_atomic_primary(type, ampr)				\
+do {									\
+	asm volatile("movgs gr0,dampr"#ampr"\n" ::: "memory");		\
+	if (type == __KM_CACHE)						\
+		asm volatile("movgs gr0,iampr"#ampr"\n" ::: "memory");	\
 } while(0)
 
-#define __kunmap_atomic_secondary(slot, vaddr)			\
-do {								\
-	asm volatile("tlbpr %0,gr0,#4,#1" : : "r"(vaddr));	\
+#define __kunmap_atomic_secondary(slot, vaddr)				\
+do {									\
+	asm volatile("tlbpr %0,gr0,#4,#1" : : "r"(vaddr) : "memory");	\
 } while(0)
 
 static inline void kunmap_atomic(void *kvaddr, enum km_type type)
@@ -170,7 +170,8 @@ static inline void kunmap_atomic(void *kvaddr, enum km_type type)
 	default:
 		BUG();
 	}
-	preempt_enable();
+	dec_preempt_count();
+	preempt_check_resched();
 }
 
 #endif /* !__ASSEMBLY__ */
-- 
cgit v1.2.3


From 7c28ad2d83ecc637237fe684659a6afbce0bb2a8 Mon Sep 17 00:00:00 2001
From: Michael Buesch <mb@bu3sch.de>
Date: Wed, 27 Sep 2006 15:26:33 +0300
Subject: [PATCH] softmac: Fix WX and association related races

This fixes some race conditions in the WirelessExtension
handling and association handling code.

Signed-off-by: Michael Buesch <mb@bu3sch.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/bcm43xx/bcm43xx_leds.c     |  2 +-
 drivers/net/wireless/bcm43xx/bcm43xx_wx.c       |  2 +-
 include/net/ieee80211softmac.h                  | 35 ++++++------
 net/ieee80211/softmac/ieee80211softmac_assoc.c  | 56 ++++++++++---------
 net/ieee80211/softmac/ieee80211softmac_io.c     |  9 +++-
 net/ieee80211/softmac/ieee80211softmac_module.c |  1 +
 net/ieee80211/softmac/ieee80211softmac_wx.c     | 71 +++++++++++++++----------
 7 files changed, 97 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
index c3f90c8563d9..2ddbec6bf15b 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
@@ -242,7 +242,7 @@ void bcm43xx_leds_update(struct bcm43xx_private *bcm, int activity)
 			//TODO
 			break;
 		case BCM43xx_LED_ASSOC:
-			if (bcm->softmac->associated)
+			if (bcm->softmac->associnfo.associated)
 				turn_on = 1;
 			break;
 #ifdef CONFIG_BCM43XX_DEBUG
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_wx.c b/drivers/net/wireless/bcm43xx/bcm43xx_wx.c
index 9b7b15cf6561..d27016f8c736 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_wx.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_wx.c
@@ -847,7 +847,7 @@ static struct iw_statistics *bcm43xx_get_wireless_stats(struct net_device *net_d
 	unsigned long flags;
 
 	wstats = &bcm->stats.wstats;
-	if (!mac->associated) {
+	if (!mac->associnfo.associated) {
 		wstats->miss.beacon = 0;
 //		bcm->ieee->ieee_stats.tx_retry_limit_exceeded = 0; // FIXME: should this be cleared here?
 		wstats->discard.retries = 0;
diff --git a/include/net/ieee80211softmac.h b/include/net/ieee80211softmac.h
index 425b3a57ac74..617b672b1132 100644
--- a/include/net/ieee80211softmac.h
+++ b/include/net/ieee80211softmac.h
@@ -63,13 +63,11 @@ struct ieee80211softmac_wpa {
 
 /*
  * Information about association
- *
- * Do we need a lock for this?
- * We only ever use this structure inlined
- * into our global struct. I've used its lock,
- * but maybe we need a local one here?
  */
 struct ieee80211softmac_assoc_info {
+
+	struct mutex mutex;
+
 	/*
 	 * This is the requested ESSID. It is written
 	 * only by the WX handlers.
@@ -99,12 +97,13 @@ struct ieee80211softmac_assoc_info {
 	 *
 	 * bssfixed is used for SIOCSIWAP.
 	 */
-	u8 static_essid:1,
-	   short_preamble_available:1,
-	   associating:1,
-	   assoc_wait:1,
-	   bssvalid:1,
-	   bssfixed:1;
+	u8 static_essid;
+	u8 short_preamble_available;
+	u8 associating;
+	u8 associated;
+	u8 assoc_wait;
+	u8 bssvalid;
+	u8 bssfixed;
 
 	/* Scan retries remaining */
 	int scan_retry;
@@ -229,12 +228,10 @@ struct ieee80211softmac_device {
 	/* private stuff follows */
 	/* this lock protects this structure */
 	spinlock_t lock;
-	
-	/* couple of flags */
-	u8 scanning:1, /* protects scanning from being done multiple times at once */
-	   associated:1,
-	   running:1;
-	
+
+	u8 running; /* SoftMAC started? */
+	u8 scanning;
+
 	struct ieee80211softmac_scaninfo *scaninfo;
 	struct ieee80211softmac_assoc_info associnfo;
 	struct ieee80211softmac_bss_info bssinfo;
@@ -250,7 +247,7 @@ struct ieee80211softmac_device {
 
 	/* we need to keep a list of network structs we copied */
 	struct list_head network_list;
-	
+
 	/* This must be the last item so that it points to the data
 	 * allocated beyond this structure by alloc_ieee80211 */
 	u8 priv[0];
@@ -295,7 +292,7 @@ static inline u8 ieee80211softmac_suggest_txrate(struct ieee80211softmac_device
 {
 	struct ieee80211softmac_txrates *txrates = &mac->txrates;
 
-	if (!mac->associated)
+	if (!mac->associnfo.associated)
 		return txrates->mgt_mcast_rate;
 
 	/* We are associated, sending unicast frame */
diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c
index 589f6d2c548a..cf51c87a971d 100644
--- a/net/ieee80211/softmac/ieee80211softmac_assoc.c
+++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c
@@ -48,7 +48,7 @@ ieee80211softmac_assoc(struct ieee80211softmac_device *mac, struct ieee80211soft
 	dprintk(KERN_INFO PFX "sent association request!\n");
 
 	spin_lock_irqsave(&mac->lock, flags);
-	mac->associated = 0; /* just to make sure */
+	mac->associnfo.associated = 0; /* just to make sure */
 
 	/* Set a timer for timeout */
 	/* FIXME: make timeout configurable */
@@ -62,24 +62,22 @@ ieee80211softmac_assoc_timeout(void *d)
 {
 	struct ieee80211softmac_device *mac = (struct ieee80211softmac_device *)d;
 	struct ieee80211softmac_network *n;
-	unsigned long flags;
 
-	spin_lock_irqsave(&mac->lock, flags);
+	mutex_lock(&mac->associnfo.mutex);
 	/* we might race against ieee80211softmac_handle_assoc_response,
 	 * so make sure only one of us does something */
-	if (!mac->associnfo.associating) {
-		spin_unlock_irqrestore(&mac->lock, flags);
-		return;
-	}
+	if (!mac->associnfo.associating)
+		goto out;
 	mac->associnfo.associating = 0;
 	mac->associnfo.bssvalid = 0;
-	mac->associated = 0;
+	mac->associnfo.associated = 0;
 
 	n = ieee80211softmac_get_network_by_bssid_locked(mac, mac->associnfo.bssid);
-	spin_unlock_irqrestore(&mac->lock, flags);
 
 	dprintk(KERN_INFO PFX "assoc request timed out!\n");
 	ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_TIMEOUT, n);
+out:
+	mutex_unlock(&mac->associnfo.mutex);
 }
 
 void
@@ -93,7 +91,7 @@ ieee80211softmac_disassoc(struct ieee80211softmac_device *mac)
 
 	netif_carrier_off(mac->dev);
 
-	mac->associated = 0;
+	mac->associnfo.associated = 0;
 	mac->associnfo.bssvalid = 0;
 	mac->associnfo.associating = 0;
 	ieee80211softmac_init_bss(mac);
@@ -107,7 +105,7 @@ ieee80211softmac_send_disassoc_req(struct ieee80211softmac_device *mac, u16 reas
 {
 	struct ieee80211softmac_network *found;
 
-	if (mac->associnfo.bssvalid && mac->associated) {
+	if (mac->associnfo.bssvalid && mac->associnfo.associated) {
 		found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);
 		if (found)
 			ieee80211softmac_send_mgt_frame(mac, found, IEEE80211_STYPE_DISASSOC, reason);
@@ -196,17 +194,18 @@ ieee80211softmac_assoc_work(void *d)
 	int bssvalid;
 	unsigned long flags;
 
+	mutex_lock(&mac->associnfo.mutex);
+
+	if (!mac->associnfo.associating)
+		goto out;
+
 	/* ieee80211_disassoc might clear this */
 	bssvalid = mac->associnfo.bssvalid;
 
 	/* meh */
-	if (mac->associated)
+	if (mac->associnfo.associated)
 		ieee80211softmac_send_disassoc_req(mac, WLAN_REASON_DISASSOC_STA_HAS_LEFT);
 
-	spin_lock_irqsave(&mac->lock, flags);
-	mac->associnfo.associating = 1;
-	spin_unlock_irqrestore(&mac->lock, flags);
-
 	/* try to find the requested network in our list, if we found one already */
 	if (bssvalid || mac->associnfo.bssfixed)
 		found = ieee80211softmac_get_network_by_bssid(mac, mac->associnfo.bssid);	
@@ -260,10 +259,8 @@ ieee80211softmac_assoc_work(void *d)
 
 	if (!found) {
 		if (mac->associnfo.scan_retry > 0) {
-			spin_lock_irqsave(&mac->lock, flags);
 			mac->associnfo.scan_retry--;
-			spin_unlock_irqrestore(&mac->lock, flags);
-		
+
 			/* We know of no such network. Let's scan. 
 			 * NB: this also happens if we had no memory to copy the network info...
 			 * Maybe we can hope to have more memory after scanning finishes ;)
@@ -272,19 +269,17 @@ ieee80211softmac_assoc_work(void *d)
 			ieee80211softmac_notify(mac->dev, IEEE80211SOFTMAC_EVENT_SCAN_FINISHED, ieee80211softmac_assoc_notify_scan, NULL);
 			if (ieee80211softmac_start_scan(mac))
 				dprintk(KERN_INFO PFX "Associate: failed to initiate scan. Is device up?\n");
-			return;
+			goto out;
 		} else {
-			spin_lock_irqsave(&mac->lock, flags);
 			mac->associnfo.associating = 0;
-			mac->associated = 0;
-			spin_unlock_irqrestore(&mac->lock, flags);
+			mac->associnfo.associated = 0;
 
 			dprintk(KERN_INFO PFX "Unable to find matching network after scan!\n");
 			/* reset the retry counter for the next user request since we
 			 * break out and don't reschedule ourselves after this point. */
 			mac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT;
 			ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_NET_NOT_FOUND, NULL);
-			return;
+			goto out;
 		}
 	}
 
@@ -297,7 +292,7 @@ ieee80211softmac_assoc_work(void *d)
 	/* copy the ESSID for displaying it */
 	mac->associnfo.associate_essid.len = found->essid.len;
 	memcpy(mac->associnfo.associate_essid.data, found->essid.data, IW_ESSID_MAX_SIZE + 1);
-	
+
 	/* we found a network! authenticate (if necessary) and associate to it. */
 	if (found->authenticating) {
 		dprintk(KERN_INFO PFX "Already requested authentication, waiting...\n");
@@ -305,7 +300,7 @@ ieee80211softmac_assoc_work(void *d)
 			mac->associnfo.assoc_wait = 1;
 			ieee80211softmac_notify_internal(mac, IEEE80211SOFTMAC_EVENT_ANY, found, ieee80211softmac_assoc_notify_auth, NULL, GFP_KERNEL);
 		}
-		return;
+		goto out;
 	}
 	if (!found->authenticated && !found->authenticating) {
 		/* This relies on the fact that _auth_req only queues the work,
@@ -321,11 +316,14 @@ ieee80211softmac_assoc_work(void *d)
 			mac->associnfo.assoc_wait = 0;
 			ieee80211softmac_call_events(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, found);
 		}
-		return;
+		goto out;
 	}
 	/* finally! now we can start associating */
 	mac->associnfo.assoc_wait = 0;
 	ieee80211softmac_assoc(mac, found);
+
+out:
+	mutex_unlock(&mac->associnfo.mutex);
 }
 
 /* call this to do whatever is necessary when we're associated */
@@ -341,7 +339,7 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac,
 	mac->bssinfo.supported_rates = net->supported_rates;
 	ieee80211softmac_recalc_txrates(mac);
 
-	mac->associated = 1;
+	mac->associnfo.associated = 1;
 
 	mac->associnfo.short_preamble_available =
 		(cap & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0;
@@ -421,7 +419,7 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev,
 			dprintk(KERN_INFO PFX "associating failed (reason: 0x%x)!\n", status);
 			mac->associnfo.associating = 0;
 			mac->associnfo.bssvalid = 0;
-			mac->associated = 0;
+			mac->associnfo.associated = 0;
 			ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_ASSOCIATE_FAILED, network);
 	}
 	
diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c
index 82bfddbf33a2..e8419cfb058f 100644
--- a/net/ieee80211/softmac/ieee80211softmac_io.c
+++ b/net/ieee80211/softmac/ieee80211softmac_io.c
@@ -475,8 +475,13 @@ int ieee80211softmac_handle_beacon(struct net_device *dev,
 {
 	struct ieee80211softmac_device *mac = ieee80211_priv(dev);
 
-	if (mac->associated && memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0)
-		ieee80211softmac_process_erp(mac, network->erp_value);
+	/* This might race, but we don't really care and it's not worth
+	 * adding heavyweight locking in this fastpath.
+	 */
+	if (mac->associnfo.associated) {
+		if (memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0)
+			ieee80211softmac_process_erp(mac, network->erp_value);
+	}
 
 	return 0;
 }
diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c
index addea1cf73ae..33aff4f4a471 100644
--- a/net/ieee80211/softmac/ieee80211softmac_module.c
+++ b/net/ieee80211/softmac/ieee80211softmac_module.c
@@ -57,6 +57,7 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv)
 	INIT_LIST_HEAD(&softmac->network_list);
 	INIT_LIST_HEAD(&softmac->events);
 
+	mutex_init(&softmac->associnfo.mutex);
 	INIT_WORK(&softmac->associnfo.work, ieee80211softmac_assoc_work, softmac);
 	INIT_WORK(&softmac->associnfo.timeout, ieee80211softmac_assoc_timeout, softmac);
 	softmac->start_scan = ieee80211softmac_start_scan_implementation;
diff --git a/net/ieee80211/softmac/ieee80211softmac_wx.c b/net/ieee80211/softmac/ieee80211softmac_wx.c
index 2aa779d18f38..23068a830f7d 100644
--- a/net/ieee80211/softmac/ieee80211softmac_wx.c
+++ b/net/ieee80211/softmac/ieee80211softmac_wx.c
@@ -73,13 +73,14 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 	struct ieee80211softmac_network *n;
 	struct ieee80211softmac_auth_queue_item *authptr;
 	int length = 0;
-	unsigned long flags;
+
+	mutex_lock(&sm->associnfo.mutex);
 
 	/* Check if we're already associating to this or another network
 	 * If it's another network, cancel and start over with our new network
 	 * If it's our network, ignore the change, we're already doing it!
 	 */
-	if((sm->associnfo.associating || sm->associated) &&
+	if((sm->associnfo.associating || sm->associnfo.associated) &&
 	   (data->essid.flags && data->essid.length)) {
 		/* Get the associating network */
 		n = ieee80211softmac_get_network_by_bssid(sm, sm->associnfo.bssid);
@@ -87,10 +88,9 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 		   !memcmp(n->essid.data, extra, n->essid.len)) {
 			dprintk(KERN_INFO PFX "Already associating or associated to "MAC_FMT"\n",
 				MAC_ARG(sm->associnfo.bssid));
-			return 0;
+			goto out;
 		} else {
 			dprintk(KERN_INFO PFX "Canceling existing associate request!\n");
-			spin_lock_irqsave(&sm->lock,flags);
 			/* Cancel assoc work */
 			cancel_delayed_work(&sm->associnfo.work);
 			/* We don't have to do this, but it's a little cleaner */
@@ -98,14 +98,13 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 				cancel_delayed_work(&authptr->work);
 			sm->associnfo.bssvalid = 0;
 			sm->associnfo.bssfixed = 0;
-			spin_unlock_irqrestore(&sm->lock,flags);
 			flush_scheduled_work();
+			sm->associnfo.associating = 0;
+			sm->associnfo.associated = 0;
 		}
 	}
 
 
-	spin_lock_irqsave(&sm->lock, flags);
-
 	sm->associnfo.static_essid = 0;
 	sm->associnfo.assoc_wait = 0;
 
@@ -121,10 +120,12 @@ ieee80211softmac_wx_set_essid(struct net_device *net_dev,
 	 * If applicable, we have already copied the data in */
 	sm->associnfo.req_essid.len = length;
 
+	sm->associnfo.associating = 1;
 	/* queue lower level code to do work (if necessary) */
 	schedule_work(&sm->associnfo.work);
+out:
+	mutex_unlock(&sm->associnfo.mutex);
 
-	spin_unlock_irqrestore(&sm->lock, flags);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_essid);
@@ -136,10 +137,8 @@ ieee80211softmac_wx_get_essid(struct net_device *net_dev,
 			      char *extra)
 {
 	struct ieee80211softmac_device *sm = ieee80211_priv(net_dev);
-	unsigned long flags;
 
-	/* avoid getting inconsistent information */
-	spin_lock_irqsave(&sm->lock, flags);
+	mutex_lock(&sm->associnfo.mutex);
 	/* If all fails, return ANY (empty) */
 	data->essid.length = 0;
 	data->essid.flags = 0;  /* active */
@@ -152,12 +151,13 @@ ieee80211softmac_wx_get_essid(struct net_device *net_dev,
 	}
 	
 	/* If we're associating/associated, return that */
-	if (sm->associated || sm->associnfo.associating) {
+	if (sm->associnfo.associated || sm->associnfo.associating) {
 		data->essid.length = sm->associnfo.associate_essid.len;
 		data->essid.flags = 1;  /* active */
 		memcpy(extra, sm->associnfo.associate_essid.data, sm->associnfo.associate_essid.len);
 	}
-	spin_unlock_irqrestore(&sm->lock, flags);
+	mutex_unlock(&sm->associnfo.mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_essid);
@@ -322,15 +322,15 @@ ieee80211softmac_wx_get_wap(struct net_device *net_dev,
 {
 	struct ieee80211softmac_device *mac = ieee80211_priv(net_dev);
 	int err = 0;
-	unsigned long flags;
 
-	spin_lock_irqsave(&mac->lock, flags);
+	mutex_lock(&mac->associnfo.mutex);
 	if (mac->associnfo.bssvalid)
 		memcpy(data->ap_addr.sa_data, mac->associnfo.bssid, ETH_ALEN);
 	else
 		memset(data->ap_addr.sa_data, 0xff, ETH_ALEN);
 	data->ap_addr.sa_family = ARPHRD_ETHER;
-	spin_unlock_irqrestore(&mac->lock, flags);
+	mutex_unlock(&mac->associnfo.mutex);
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_wap);
@@ -342,28 +342,27 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
 			    char *extra)
 {
 	struct ieee80211softmac_device *mac = ieee80211_priv(net_dev);
-	unsigned long flags;
 
 	/* sanity check */
 	if (data->ap_addr.sa_family != ARPHRD_ETHER) {
 		return -EINVAL;
 	}
 
-	spin_lock_irqsave(&mac->lock, flags);
+	mutex_lock(&mac->associnfo.mutex);
 	if (is_broadcast_ether_addr(data->ap_addr.sa_data)) {
 		/* the bssid we have is not to be fixed any longer,
 		 * and we should reassociate to the best AP. */
 		mac->associnfo.bssfixed = 0;
 		/* force reassociation */
 		mac->associnfo.bssvalid = 0;
-		if (mac->associated)
+		if (mac->associnfo.associated)
 			schedule_work(&mac->associnfo.work);
 	} else if (is_zero_ether_addr(data->ap_addr.sa_data)) {
 		/* the bssid we have is no longer fixed */
 		mac->associnfo.bssfixed = 0;
         } else {
 		if (!memcmp(mac->associnfo.bssid, data->ap_addr.sa_data, ETH_ALEN)) {
-			if (mac->associnfo.associating || mac->associated) {
+			if (mac->associnfo.associating || mac->associnfo.associated) {
 			/* bssid unchanged and associated or associating - just return */
 				goto out;
 			}
@@ -378,7 +377,8 @@ ieee80211softmac_wx_set_wap(struct net_device *net_dev,
         }
 
  out:
-	spin_unlock_irqrestore(&mac->lock, flags);
+	mutex_unlock(&mac->associnfo.mutex);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_wap);
@@ -394,7 +394,8 @@ ieee80211softmac_wx_set_genie(struct net_device *dev,
 	int err = 0;
 	char *buf;
 	int i;
-	
+
+	mutex_lock(&mac->associnfo.mutex);
 	spin_lock_irqsave(&mac->lock, flags);
 	/* bleh. shouldn't be locked for that kmalloc... */
 
@@ -432,6 +433,8 @@ ieee80211softmac_wx_set_genie(struct net_device *dev,
 
  out:	
 	spin_unlock_irqrestore(&mac->lock, flags);
+	mutex_unlock(&mac->associnfo.mutex);
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_genie);
@@ -446,7 +449,8 @@ ieee80211softmac_wx_get_genie(struct net_device *dev,
 	unsigned long flags;
 	int err = 0;
 	int space = wrqu->data.length;
-	
+
+	mutex_lock(&mac->associnfo.mutex);
 	spin_lock_irqsave(&mac->lock, flags);
 	
 	wrqu->data.length = 0;
@@ -459,6 +463,8 @@ ieee80211softmac_wx_get_genie(struct net_device *dev,
 			err = -E2BIG;
 	}
 	spin_unlock_irqrestore(&mac->lock, flags);
+	mutex_lock(&mac->associnfo.mutex);
+
 	return err;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_get_genie);
@@ -473,10 +479,13 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev,
 	struct iw_mlme *mlme = (struct iw_mlme *)extra;
 	u16 reason = cpu_to_le16(mlme->reason_code);
 	struct ieee80211softmac_network *net;
+	int err = -EINVAL;
+
+	mutex_lock(&mac->associnfo.mutex);
 
 	if (memcmp(mac->associnfo.bssid, mlme->addr.sa_data, ETH_ALEN)) {
 		printk(KERN_DEBUG PFX "wx_set_mlme: requested operation on net we don't use\n");
-		return -EINVAL;
+		goto out;
 	}
 
 	switch (mlme->cmd) {
@@ -484,14 +493,22 @@ ieee80211softmac_wx_set_mlme(struct net_device *dev,
 		net = ieee80211softmac_get_network_by_bssid_locked(mac, mlme->addr.sa_data);
 		if (!net) {
 			printk(KERN_DEBUG PFX "wx_set_mlme: we should know the net here...\n");
-			return -EINVAL;
+			goto out;
 		}
 		return ieee80211softmac_deauth_req(mac, net, reason);
 	case IW_MLME_DISASSOC:
 		ieee80211softmac_send_disassoc_req(mac, reason);
-		return 0;
+		mac->associnfo.associated = 0;
+		mac->associnfo.associating = 0;
+		err = 0;
+		goto out;
 	default:
-		return -EOPNOTSUPP;
+		err = -EOPNOTSUPP;
 	}
+
+out:
+	mutex_unlock(&mac->associnfo.mutex);
+
+	return err;
 }
 EXPORT_SYMBOL_GPL(ieee80211softmac_wx_set_mlme);
-- 
cgit v1.2.3


From aaa248f6c9c81b2683db7dbb0689cd5ed1c86d88 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@osdl.org>
Date: Tue, 17 Oct 2006 00:09:42 -0700
Subject: [PATCH] rename net_random to random32

Make net_random() more widely available by calling it random32

akpm: hopefully this will permit the removal of carta_random32.  That needs
confirmation from Stephane - this code looks somewhat more computationally
expensive, and has a different (ie: callee-stateful) interface.

[akpm@osdl.org: lots of build fixes, cleanups]
Signed-off-by: Stephen Hemminger <shemminger@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
Cc: Stephane Eranian <eranian@hpl.hp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/net.h    |   7 +--
 include/linux/random.h |   3 ++
 lib/Makefile           |   2 +-
 lib/random32.c         | 142 +++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/dev.c         |   2 -
 net/core/utils.c       | 116 ----------------------------------------
 6 files changed, 150 insertions(+), 122 deletions(-)
 create mode 100644 lib/random32.c

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index c257f716e00f..15c733b816f0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -19,6 +19,7 @@
 #define _LINUX_NET_H
 
 #include <linux/wait.h>
+#include <linux/random.h>
 #include <asm/socket.h>
 
 struct poll_table_struct;
@@ -193,9 +194,9 @@ extern int 	     sock_map_fd(struct socket *sock);
 extern struct socket *sockfd_lookup(int fd, int *err);
 #define		     sockfd_put(sock) fput(sock->file)
 extern int	     net_ratelimit(void);
-extern unsigned long net_random(void);
-extern void	     net_srandom(unsigned long);
-extern void	     net_random_init(void);
+
+#define net_random()		random32()
+#define net_srandom(seed)	srandom32(seed)
 
 extern int   	     kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 				    struct kvec *vec, size_t num, size_t len);
diff --git a/include/linux/random.h b/include/linux/random.h
index 5d6456bcdeba..0248b30e306d 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -69,6 +69,9 @@ extern struct file_operations random_fops, urandom_fops;
 unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
+u32 random32(void);
+void srandom32(u32 seed);
+
 #endif /* __KERNEL___ */
 
 #endif /* _LINUX_RANDOM_H */
diff --git a/lib/Makefile b/lib/Makefile
index 59070dbfbeb4..4b8052f6a7b0 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -12,7 +12,7 @@ lib-$(CONFIG_SMP) += cpumask.o
 
 lib-y	+= kobject.o kref.o kobject_uevent.o klist.o
 
-obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o
+obj-y += sort.o parser.o halfmd4.o iomap_copy.o debug_locks.o random32.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/random32.c b/lib/random32.c
new file mode 100644
index 000000000000..4a15ce51cea7
--- /dev/null
+++ b/lib/random32.c
@@ -0,0 +1,142 @@
+/*
+  This is a maximally equidistributed combined Tausworthe generator
+  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
+
+   x_n = (s1_n ^ s2_n ^ s3_n)
+
+   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
+   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
+   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
+
+   The period of this generator is about 2^88.
+
+   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
+   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
+
+   This is available on the net from L'Ecuyer's home page,
+
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
+   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
+
+   There is an erratum in the paper "Tables of Maximally
+   Equidistributed Combined LFSR Generators", Mathematics of
+   Computation, 68, 225 (1999), 261--269:
+   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
+
+        ... the k_j most significant bits of z_j must be non-
+        zero, for each j. (Note: this restriction also applies to the
+        computer code given in [4], but was mistakenly not mentioned in
+        that paper.)
+
+   This affects the seeding procedure by imposing the requirement
+   s1 > 1, s2 > 7, s3 > 15.
+
+*/
+
+#include <linux/types.h>
+#include <linux/percpu.h>
+#include <linux/module.h>
+#include <linux/random.h>
+
+struct rnd_state {
+	u32 s1, s2, s3;
+};
+
+static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
+
+static u32 __random32(struct rnd_state *state)
+{
+#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
+
+	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
+	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
+	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
+
+	return (state->s1 ^ state->s2 ^ state->s3);
+}
+
+static void __set_random32(struct rnd_state *state, unsigned long s)
+{
+	if (s == 0)
+		s = 1;      /* default seed is 1 */
+
+#define LCG(n) (69069 * n)
+	state->s1 = LCG(s);
+	state->s2 = LCG(state->s1);
+	state->s3 = LCG(state->s2);
+
+	/* "warm it up" */
+	__random32(state);
+	__random32(state);
+	__random32(state);
+	__random32(state);
+	__random32(state);
+	__random32(state);
+}
+
+/**
+ *	random32 - pseudo random number generator
+ *
+ *	A 32 bit pseudo-random number is generated using a fast
+ *	algorithm suitable for simulation. This algorithm is NOT
+ *	considered safe for cryptographic use.
+ */
+u32 random32(void)
+{
+	unsigned long r;
+	struct rnd_state *state = &get_cpu_var(net_rand_state);
+	r = __random32(state);
+	put_cpu_var(state);
+	return r;
+}
+EXPORT_SYMBOL(random32);
+
+/**
+ *	srandom32 - add entropy to pseudo random number generator
+ *	@seed: seed value
+ *
+ *	Add some additional seeding to the random32() pool.
+ *	Note: this pool is per cpu so it only affects current CPU.
+ */
+void srandom32(u32 entropy)
+{
+	struct rnd_state *state = &get_cpu_var(net_rand_state);
+	__set_random32(state, state->s1 ^ entropy);
+	put_cpu_var(state);
+}
+EXPORT_SYMBOL(srandom32);
+
+/*
+ *	Generate some initially weak seeding values to allow
+ *	to start the random32() engine.
+ */
+static int __init random32_init(void)
+{
+	int i;
+
+	for_each_possible_cpu(i) {
+		struct rnd_state *state = &per_cpu(net_rand_state,i);
+		__set_random32(state, i + jiffies);
+	}
+	return 0;
+}
+core_initcall(random32_init);
+
+/*
+ *	Generate better values after random number generator
+ *	is fully initalized.
+ */
+static int __init random32_reseed(void)
+{
+	int i;
+	unsigned long seed;
+
+	for_each_possible_cpu(i) {
+		struct rnd_state *state = &per_cpu(net_rand_state,i);
+
+		get_random_bytes(&seed, sizeof(seed));
+		__set_random32(state, seed);
+	}
+	return 0;
+}
+late_initcall(random32_reseed);
diff --git a/net/core/dev.c b/net/core/dev.c
index 4d891beab138..81c426adcd1e 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3502,8 +3502,6 @@ static int __init net_dev_init(void)
 
 	BUG_ON(!dev_boot_phase);
 
-	net_random_init();
-
 	if (dev_proc_init())
 		goto out;
 
diff --git a/net/core/utils.c b/net/core/utils.c
index 94c5d761c830..d93fe64f6693 100644
--- a/net/core/utils.c
+++ b/net/core/utils.c
@@ -30,119 +30,6 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
-/*
-  This is a maximally equidistributed combined Tausworthe generator
-  based on code from GNU Scientific Library 1.5 (30 Jun 2004)
-
-   x_n = (s1_n ^ s2_n ^ s3_n) 
-
-   s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19))
-   s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
-   s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
-
-   The period of this generator is about 2^88.
-
-   From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
-   Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
-
-   This is available on the net from L'Ecuyer's home page,
-
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
-   ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps 
-
-   There is an erratum in the paper "Tables of Maximally
-   Equidistributed Combined LFSR Generators", Mathematics of
-   Computation, 68, 225 (1999), 261--269:
-   http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps
-
-        ... the k_j most significant bits of z_j must be non-
-        zero, for each j. (Note: this restriction also applies to the 
-        computer code given in [4], but was mistakenly not mentioned in
-        that paper.)
-   
-   This affects the seeding procedure by imposing the requirement
-   s1 > 1, s2 > 7, s3 > 15.
-
-*/
-struct nrnd_state {
-	u32 s1, s2, s3;
-};
-
-static DEFINE_PER_CPU(struct nrnd_state, net_rand_state);
-
-static u32 __net_random(struct nrnd_state *state)
-{
-#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
-
-	state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12);
-	state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4);
-	state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17);
-
-	return (state->s1 ^ state->s2 ^ state->s3);
-}
-
-static void __net_srandom(struct nrnd_state *state, unsigned long s)
-{
-	if (s == 0)
-		s = 1;      /* default seed is 1 */
-
-#define LCG(n) (69069 * n)
-	state->s1 = LCG(s);
-	state->s2 = LCG(state->s1);
-	state->s3 = LCG(state->s2);
-
-	/* "warm it up" */
-	__net_random(state);
-	__net_random(state);
-	__net_random(state);
-	__net_random(state);
-	__net_random(state);
-	__net_random(state);
-}
-
-
-unsigned long net_random(void)
-{
-	unsigned long r;
-	struct nrnd_state *state = &get_cpu_var(net_rand_state);
-	r = __net_random(state);
-	put_cpu_var(state);
-	return r;
-}
-
-
-void net_srandom(unsigned long entropy)
-{
-	struct nrnd_state *state = &get_cpu_var(net_rand_state);
-	__net_srandom(state, state->s1^entropy);
-	put_cpu_var(state);
-}
-
-void __init net_random_init(void)
-{
-	int i;
-
-	for_each_possible_cpu(i) {
-		struct nrnd_state *state = &per_cpu(net_rand_state,i);
-		__net_srandom(state, i+jiffies);
-	}
-}
-
-static int net_random_reseed(void)
-{
-	int i;
-	unsigned long seed;
-
-	for_each_possible_cpu(i) {
-		struct nrnd_state *state = &per_cpu(net_rand_state,i);
-
-		get_random_bytes(&seed, sizeof(seed));
-		__net_srandom(state, seed);
-	}
-	return 0;
-}
-late_initcall(net_random_reseed);
-
 int net_msg_cost = 5*HZ;
 int net_msg_burst = 10;
 
@@ -153,10 +40,7 @@ int net_ratelimit(void)
 {
 	return __printk_ratelimit(net_msg_cost, net_msg_burst);
 }
-
-EXPORT_SYMBOL(net_random);
 EXPORT_SYMBOL(net_ratelimit);
-EXPORT_SYMBOL(net_srandom);
 
 /*
  * Convert an ASCII string to binary IP.
-- 
cgit v1.2.3


From a460e745e8f9c75a0525ff94154a0629f9d3e05d Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Tue, 17 Oct 2006 00:10:03 -0700
Subject: [PATCH] genirq: clean up irq-flow-type naming

Introduce desc->name and eliminate the handle_irq_name() hack.  Add
set_irq_chip_and_handler_name() to set the flow type and name at once.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Matthew Wilcox <willy@debian.org>
Cc: Kyle McMartin <kyle@mcmartin.ca>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/i8259.c     |  7 ++++---
 arch/i386/kernel/io_apic.c   | 17 ++++++++++-------
 arch/i386/kernel/irq.c       |  2 +-
 arch/x86_64/kernel/i8259.c   |  7 ++++---
 arch/x86_64/kernel/io_apic.c | 15 ++++++++-------
 arch/x86_64/kernel/irq.c     |  2 +-
 include/linux/irq.h          | 22 ++++++++++------------
 kernel/irq/chip.c            | 33 +++++++++------------------------
 8 files changed, 47 insertions(+), 58 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d53eafb6daa7..62996cd17084 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -113,7 +113,8 @@ void make_8259A_irq(unsigned int irq)
 {
 	disable_irq_nosync(irq);
 	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+				      "XT");
 	enable_irq(irq);
 }
 
@@ -369,8 +370,8 @@ void __init init_ISA_irqs (void)
 			/*
 			 * 16 old-style INTA-cycle interrupts:
 			 */
-			set_irq_chip_and_handler(i, &i8259A_chip,
-						 handle_level_irq);
+			set_irq_chip_and_handler_name(i, &i8259A_chip,
+						      handle_level_irq, "XT");
 		} else {
 			/*
 			 * 'high' PCI IRQs filled in on demand
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 27bceaf5ce40..350192d6ab98 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -1225,11 +1225,11 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 			trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_fasteoi_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					 handle_fasteoi_irq, "fasteoi");
 	else
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					 handle_edge_irq, "edge");
 	set_intr_gate(vector, interrupt[irq]);
 }
 
@@ -2235,7 +2235,8 @@ static inline void check_timer(void)
 	printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
 
 	disable_8259A_irq(0);
-	set_irq_chip_and_handler(0, &lapic_chip, handle_fasteoi_irq);
+	set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq,
+				      "fasteio");
 	apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector);	/* Fixed mode */
 	enable_8259A_irq(0);
 
@@ -2541,7 +2542,8 @@ int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
 
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler(irq, &msi_chip, handle_edge_irq);
+	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq,
+				      "edge");
 
 	return 0;
 }
@@ -2636,7 +2638,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		write_ht_irq_low(irq, low);
 		write_ht_irq_high(irq, high);
 
-		set_irq_chip_and_handler(irq, &ht_irq_chip, handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+					      handle_edge_irq, "edge");
 	}
 	return vector;
 }
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 8cfc7dbec7b9..3201d421090a 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -258,7 +258,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
+		seq_printf(p, "-%-8s", irq_desc[i].name);
 		seq_printf(p, "  %s", action->name);
 
 		for (action=action->next; action; action = action->next)
diff --git a/arch/x86_64/kernel/i8259.c b/arch/x86_64/kernel/i8259.c
index 0612a33bb896..c4ef801b765b 100644
--- a/arch/x86_64/kernel/i8259.c
+++ b/arch/x86_64/kernel/i8259.c
@@ -178,7 +178,8 @@ void make_8259A_irq(unsigned int irq)
 {
 	disable_irq_nosync(irq);
 	io_apic_irqs &= ~(1<<irq);
-	set_irq_chip_and_handler(irq, &i8259A_chip, handle_level_irq);
+	set_irq_chip_and_handler_name(irq, &i8259A_chip, handle_level_irq,
+				      "XT");
 	enable_irq(irq);
 }
 
@@ -431,8 +432,8 @@ void __init init_ISA_irqs (void)
 			/*
 			 * 16 old-style INTA-cycle interrupts:
 			 */
-			set_irq_chip_and_handler(i, &i8259A_chip,
-						 handle_level_irq);
+			set_irq_chip_and_handler_name(i, &i8259A_chip,
+						      handle_level_irq, "XT");
 		} else {
 			/*
 			 * 'high' PCI IRQs filled in on demand
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 44b55f833875..49e94f7994c5 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -696,11 +696,11 @@ static void ioapic_register_intr(int irq, int vector, unsigned long trigger)
 {
 	if ((trigger == IOAPIC_AUTO && IO_APIC_irq_trigger(irq)) ||
 			trigger == IOAPIC_LEVEL)
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_fasteoi_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_fasteoi_irq, "fasteoi");
 	else
-		set_irq_chip_and_handler(irq, &ioapic_chip,
-					 handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ioapic_chip,
+					      handle_edge_irq, "edge");
 }
 
 static void __init setup_IO_APIC_irqs(void)
@@ -806,7 +806,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
 	 * The timer IRQ doesn't have to know that behind the
 	 * scene we have a 8259A-master in AEOI mode ...
 	 */
-	set_irq_chip_and_handler(0, &ioapic_chip, handle_edge_irq);
+	set_irq_chip_and_handler_name(0, &ioapic_chip, handle_edge_irq, "edge");
 
 	/*
 	 * Add it to the IO-APIC irq-routing table:
@@ -1839,7 +1839,7 @@ int arch_setup_msi_irq(unsigned int irq, struct pci_dev *dev)
 
 	write_msi_msg(irq, &msg);
 
-	set_irq_chip_and_handler(irq, &msi_chip, handle_edge_irq);
+	set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
 
 	return 0;
 }
@@ -1936,7 +1936,8 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		write_ht_irq_low(irq, low);
 		write_ht_irq_high(irq, high);
 
-		set_irq_chip_and_handler(irq, &ht_irq_chip, handle_edge_irq);
+		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
+					      handle_edge_irq, "edge");
 	}
 	return vector;
 }
diff --git a/arch/x86_64/kernel/irq.c b/arch/x86_64/kernel/irq.c
index dff68eb2b787..e46c55856d40 100644
--- a/arch/x86_64/kernel/irq.c
+++ b/arch/x86_64/kernel/irq.c
@@ -75,7 +75,7 @@ int show_interrupts(struct seq_file *p, void *v)
 			seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
 #endif
 		seq_printf(p, " %8s", irq_desc[i].chip->name);
-		seq_printf(p, "-%s", handle_irq_name(irq_desc[i].handle_irq));
+		seq_printf(p, "-%-8s", irq_desc[i].name);
 
 		seq_printf(p, "  %s", action->name);
 		for (action=action->next; action; action = action->next)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index c64f3cc7e870..775f5a7da493 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -141,6 +141,7 @@ struct irq_chip {
  * @pending_mask:	pending rebalanced interrupts
  * @dir:		/proc/irq/ procfs entry
  * @affinity_entry:	/proc/irq/smp_affinity procfs entry on SMP
+ * @name:		flow handler name for /proc/interrupts output
  *
  * Pad this out to 32 bytes for cache and indexing reasons.
  */
@@ -165,8 +166,9 @@ struct irq_desc {
 	cpumask_t		pending_mask;
 #endif
 #ifdef CONFIG_PROC_FS
-	struct proc_dir_entry *dir;
+	struct proc_dir_entry	*dir;
 #endif
+	const char		*name;
 } ____cacheline_aligned;
 
 extern struct irq_desc irq_desc[NR_IRQS];
@@ -271,12 +273,6 @@ extern void fastcall handle_simple_irq(unsigned int irq, struct irq_desc *desc);
 extern void fastcall handle_percpu_irq(unsigned int irq, struct irq_desc *desc);
 extern void fastcall handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 
-/*
- * Get a descriptive string for the highlevel handler, for
- * /proc/interrupts output:
- */
-extern const char *handle_irq_name(irq_flow_handler_t handle);
-
 /*
  * Monolithic do_IRQ implementation.
  * (is an explicit fastcall, because i386 4KSTACKS calls it from assembly)
@@ -326,10 +322,12 @@ extern struct irq_chip no_irq_chip;
 extern struct irq_chip dummy_irq_chip;
 
 extern void
-set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
-			 irq_flow_handler_t handle);
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+			      irq_flow_handler_t handle, const char *name);
+
 extern void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained);
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+		  const char *name);
 
 /*
  * Set a highlevel flow handler for a given IRQ:
@@ -337,7 +335,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained);
 static inline void
 set_irq_handler(unsigned int irq, irq_flow_handler_t handle)
 {
-	__set_irq_handler(irq, handle, 0);
+	__set_irq_handler(irq, handle, 0, NULL);
 }
 
 /*
@@ -349,7 +347,7 @@ static inline void
 set_irq_chained_handler(unsigned int irq,
 			irq_flow_handler_t handle)
 {
-	__set_irq_handler(irq, handle, 1);
+	__set_irq_handler(irq, handle, 1, NULL);
 }
 
 /* Handle dynamic irq creation and destruction */
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 11c99697acfe..2d0dc3efe813 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -499,7 +499,8 @@ handle_percpu_irq(unsigned int irq, struct irq_desc *desc)
 #endif /* CONFIG_SMP */
 
 void
-__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
+__set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained,
+		  const char *name)
 {
 	struct irq_desc *desc;
 	unsigned long flags;
@@ -540,6 +541,7 @@ __set_irq_handler(unsigned int irq, irq_flow_handler_t handle, int is_chained)
 		desc->depth = 1;
 	}
 	desc->handle_irq = handle;
+	desc->name = name;
 
 	if (handle != handle_bad_irq && is_chained) {
 		desc->status &= ~IRQ_DISABLED;
@@ -555,30 +557,13 @@ set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
 			 irq_flow_handler_t handle)
 {
 	set_irq_chip(irq, chip);
-	__set_irq_handler(irq, handle, 0);
+	__set_irq_handler(irq, handle, 0, NULL);
 }
 
-/*
- * Get a descriptive string for the highlevel handler, for
- * /proc/interrupts output:
- */
-const char *
-handle_irq_name(irq_flow_handler_t handle)
+void
+set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
+			      irq_flow_handler_t handle, const char *name)
 {
-	if (handle == handle_level_irq)
-		return "level  ";
-	if (handle == handle_fasteoi_irq)
-		return "fasteoi";
-	if (handle == handle_edge_irq)
-		return "edge   ";
-	if (handle == handle_simple_irq)
-		return "simple ";
-#ifdef CONFIG_SMP
-	if (handle == handle_percpu_irq)
-		return "percpu ";
-#endif
-	if (handle == handle_bad_irq)
-		return "bad    ";
-
-	return NULL;
+	set_irq_chip(irq, chip);
+	__set_irq_handler(irq, handle, 0, name);
 }
-- 
cgit v1.2.3


From 7762f5a0b709b415fda132258ad37b9f2a1db994 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Tue, 17 Oct 2006 00:10:07 -0700
Subject: [PATCH] document i_size_write locking rules

Unless someone reads the documentation for write_seqcount_{begin,end} it is
not obvious, that i_size_write() needs locking.  Especially, that lack of such
locking can result in a system hang.

Signed-off-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/fs.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 34406ed467c3..661c7c572149 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -656,7 +656,11 @@ static inline loff_t i_size_read(struct inode *inode)
 #endif
 }
 
-
+/*
+ * NOTE: unlike i_size_read(), i_size_write() does need locking around it
+ * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount
+ * can be lost, resulting in subsequent i_size_read() calls spinning forever.
+ */
 static inline void i_size_write(struct inode *inode, loff_t i_size)
 {
 #if BITS_PER_LONG==32 && defined(CONFIG_SMP)
-- 
cgit v1.2.3


From d343fce148a4eee24a907a05c4101d3268045aae Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Tue, 17 Oct 2006 00:10:18 -0700
Subject: [PATCH] knfsd: Allow lockd to drop replies as appropriate

It is possible for the ->fopen callback from lockd into nfsd to find that an
answer cannot be given straight away (an upcall is needed) and so the request
has to be 'dropped', to be retried later.  That error status is not currently
propagated back.

So:
  Change nlm_fopen to return nlm error codes (rather than a private
  protocol) and define a new nlm_drop_reply code.
  Cause nlm_drop_reply to cause the rpc request to get rpc_drop_reply
  when this error comes back.
  Cause svc_process to drop a request which returns a status of
  rpc_drop_reply.

[akpm@osdl.org: fix warning storm]
Cc: Marc Eshel <eshel@almaden.ibm.com>
Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc4proc.c             | 12 ++++++------
 fs/lockd/svcproc.c              | 16 +++++++++-------
 fs/lockd/svcsubs.c              |  6 ------
 fs/nfsd/lockd.c                 | 14 ++++++++------
 include/linux/lockd/bind.h      |  5 +++++
 include/linux/lockd/xdr.h       |  4 ++++
 include/linux/sunrpc/msg_prot.h |  4 +++-
 include/linux/sunrpc/xdr.h      |  1 +
 net/sunrpc/svc.c                |  5 +++++
 9 files changed, 41 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index fa370f6eb07b..399ad11b97be 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -96,7 +96,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = nlmsvc_testlock(file, &argp->lock, &resp->lock);
@@ -126,7 +126,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 #if 0
 	/* If supplied state doesn't match current state, we assume it's
@@ -169,7 +169,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = nlmsvc_cancel_blocked(file, &argp->lock);
@@ -202,7 +202,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = nlmsvc_unlock(file, &argp->lock);
@@ -339,7 +339,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to create the share */
 	resp->status = nlmsvc_share_file(host, file, argp);
@@ -372,7 +372,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlm4svc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to lock the file */
 	resp->status = nlmsvc_unshare_file(host, file, argp);
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 75b2c81bcb93..6a931f4ab75c 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -59,7 +59,7 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error;
+	u32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
@@ -88,6 +88,8 @@ nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 no_locks:
 	if (host)
 		nlm_release_host(host);
+	if (error)
+		return error;
 	return nlm_lck_denied_nolocks;
 }
 
@@ -122,7 +124,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now check for conflicting locks */
 	resp->status = cast_status(nlmsvc_testlock(file, &argp->lock, &resp->lock));
@@ -153,7 +155,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 #if 0
 	/* If supplied state doesn't match current state, we assume it's
@@ -196,7 +198,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Try to cancel request. */
 	resp->status = cast_status(nlmsvc_cancel_blocked(file, &argp->lock));
@@ -229,7 +231,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to remove the lock */
 	resp->status = cast_status(nlmsvc_unlock(file, &argp->lock));
@@ -368,7 +370,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to create the share */
 	resp->status = cast_status(nlmsvc_share_file(host, file, argp));
@@ -401,7 +403,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 
 	/* Obtain client and file */
 	if ((resp->status = nlmsvc_retrieve_args(rqstp, argp, &host, &file)))
-		return rpc_success;
+		return resp->status == nlm_drop_reply ? rpc_drop_reply :rpc_success;
 
 	/* Now try to unshare the file */
 	resp->status = cast_status(nlmsvc_unshare_file(host, file, argp));
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index c5f9113cdc70..7dac96e6c82c 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -135,12 +135,6 @@ out_unlock:
 
 out_free:
 	kfree(file);
-#ifdef CONFIG_LOCKD_V4
-	if (nfserr == 1)
-		nfserr = nlm4_stale_fh;
-	else
-#endif
-	nfserr = nlm_lck_denied;
 	goto out_unlock;
 }
 
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 7b889ff15ae6..9b9e7e127c03 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -39,18 +39,20 @@ nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 	fh_put(&fh);
 	rqstp->rq_client = NULL;
 	exp_readunlock();
- 	/* nlm and nfsd don't share error codes.
-	 * we invent: 0 = no error
-	 *            1 = stale file handle
-	 *	      2 = other error
+ 	/* We return nlm error codes as nlm doesn't know
+	 * about nfsd, but nfsd does know about nlm..
 	 */
 	switch (nfserr) {
 	case nfs_ok:
 		return 0;
+	case nfserr_dropit:
+		return nlm_drop_reply;
+#ifdef CONFIG_LOCKD_V4
 	case nfserr_stale:
-		return 1;
+		return nlm4_stale_fh;
+#endif
 	default:
-		return 2;
+		return nlm_lck_denied;
 	}
 }
 
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 81e3a185f951..aa50d89eacd7 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -10,6 +10,11 @@
 #define LINUX_LOCKD_BIND_H
 
 #include <linux/lockd/nlm.h>
+/* need xdr-encoded error codes too, so... */
+#include <linux/lockd/xdr.h>
+#ifdef CONFIG_LOCKD_V4
+#include <linux/lockd/xdr4.h>
+#endif
 
 /* Dummy declarations */
 struct svc_rqst;
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index bb0a0f1caa91..66fdae3b490c 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -13,6 +13,8 @@
 #include <linux/nfs.h>
 #include <linux/sunrpc/xdr.h>
 
+struct svc_rqst;
+
 #define NLM_MAXCOOKIELEN    	32
 #define NLM_MAXSTRLEN		1024
 
@@ -22,6 +24,8 @@
 #define	nlm_lck_blocked		__constant_htonl(NLM_LCK_BLOCKED)
 #define	nlm_lck_denied_grace_period	__constant_htonl(NLM_LCK_DENIED_GRACE_PERIOD)
 
+#define nlm_drop_reply		__constant_htonl(30000)
+
 /* Lock info passed via NLM */
 struct nlm_lock {
 	char *			caller;
diff --git a/include/linux/sunrpc/msg_prot.h b/include/linux/sunrpc/msg_prot.h
index 1e65f2dd80e5..606cb2165232 100644
--- a/include/linux/sunrpc/msg_prot.h
+++ b/include/linux/sunrpc/msg_prot.h
@@ -56,7 +56,9 @@ enum rpc_accept_stat {
 	RPC_PROG_MISMATCH = 2,
 	RPC_PROC_UNAVAIL = 3,
 	RPC_GARBAGE_ARGS = 4,
-	RPC_SYSTEM_ERR = 5
+	RPC_SYSTEM_ERR = 5,
+	/* internal use only */
+	RPC_DROP_REPLY = 60000,
 };
 
 enum rpc_reject_stat {
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 953723b09bc6..ac69e5511606 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -74,6 +74,7 @@ struct xdr_buf {
 #define	rpc_proc_unavail	__constant_htonl(RPC_PROC_UNAVAIL)
 #define	rpc_garbage_args	__constant_htonl(RPC_GARBAGE_ARGS)
 #define	rpc_system_err		__constant_htonl(RPC_SYSTEM_ERR)
+#define	rpc_drop_reply		__constant_htonl(RPC_DROP_REPLY)
 
 #define	rpc_auth_ok		__constant_htonl(RPC_AUTH_OK)
 #define	rpc_autherr_badcred	__constant_htonl(RPC_AUTH_BADCRED)
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2807fa0eab40..eb44ec929ca1 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -828,6 +828,11 @@ svc_process(struct svc_rqst *rqstp)
 		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 
 		/* Encode reply */
+		if (*statp == rpc_drop_reply) {
+			if (procp->pc_release)
+				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
+			goto dropit;
+		}
 		if (*statp == rpc_success && (xdr = procp->pc_encode)
 		 && !xdr(rqstp, resv->iov_base+resv->iov_len, rqstp->rq_resp)) {
 			dprintk("svc: failed to encode reply\n");
-- 
cgit v1.2.3


From 58ff407bee5a55f9c1188a3f9d70ffc79485183c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 17 Oct 2006 00:10:19 -0700
Subject: [PATCH] Fix IO error reporting on fsync()

When IO error happens on metadata buffer, buffer is freed from memory and
later fsync() is called, filesystems like ext2 fail to report EIO.  We

solve the problem by introducing a pointer to associated address space into
the buffer_head.  When a buffer is removed from a list of metadata buffers
associated with an address space, IO error is transferred from the buffer to
the address space, so that fsync can later report it.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c                 | 11 +++++++++--
 include/linux/buffer_head.h |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/fs/buffer.c b/fs/buffer.c
index f65ef8821c73..35527dca1dbc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -452,6 +452,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate)
 			       bdevname(bh->b_bdev, b));
 		}
 		set_bit(AS_EIO, &page->mapping->flags);
+		set_buffer_write_io_error(bh);
 		clear_buffer_uptodate(bh);
 		SetPageError(page);
 	}
@@ -571,6 +572,10 @@ EXPORT_SYMBOL(mark_buffer_async_write);
 static inline void __remove_assoc_queue(struct buffer_head *bh)
 {
 	list_del_init(&bh->b_assoc_buffers);
+	WARN_ON(!bh->b_assoc_map);
+	if (buffer_write_io_error(bh))
+		set_bit(AS_EIO, &bh->b_assoc_map->flags);
+	bh->b_assoc_map = NULL;
 }
 
 int inode_has_buffers(struct inode *inode)
@@ -669,6 +674,7 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode)
 		spin_lock(&buffer_mapping->private_lock);
 		list_move_tail(&bh->b_assoc_buffers,
 				&mapping->private_list);
+		bh->b_assoc_map = mapping;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 }
@@ -765,7 +771,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 	spin_lock(lock);
 	while (!list_empty(list)) {
 		bh = BH_ENTRY(list->next);
-		list_del_init(&bh->b_assoc_buffers);
+		__remove_assoc_queue(bh);
 		if (buffer_dirty(bh) || buffer_locked(bh)) {
 			list_add(&bh->b_assoc_buffers, &tmp);
 			if (buffer_dirty(bh)) {
@@ -786,7 +792,7 @@ static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 
 	while (!list_empty(&tmp)) {
 		bh = BH_ENTRY(tmp.prev);
-		__remove_assoc_queue(bh);
+		list_del_init(&bh->b_assoc_buffers);
 		get_bh(bh);
 		spin_unlock(lock);
 		wait_on_buffer(bh);
@@ -1167,6 +1173,7 @@ void __bforget(struct buffer_head *bh)
 
 		spin_lock(&buffer_mapping->private_lock);
 		list_del_init(&bh->b_assoc_buffers);
+		bh->b_assoc_map = NULL;
 		spin_unlock(&buffer_mapping->private_lock);
 	}
 	__brelse(bh);
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 131ffd37e716..5d9fb0e94156 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -69,6 +69,8 @@ struct buffer_head {
 	bh_end_io_t *b_end_io;		/* I/O completion */
  	void *b_private;		/* reserved for b_end_io */
 	struct list_head b_assoc_buffers; /* associated with another mapping */
+	struct address_space *b_assoc_map;	/* mapping this buffer is
+						   associated with */
 	atomic_t b_count;		/* users using this buffer_head */
 };
 
-- 
cgit v1.2.3


From ffc45571dfb4b70e7eda8d97f64a05f5e5a992ac Mon Sep 17 00:00:00 2001
From: Aron Griffis <aron@hp.com>
Date: Tue, 17 Oct 2006 00:28:15 -0400
Subject: [IA64] move ioremap/ioremap_nocache under __KERNEL__

I noticed these are declared extern outside of __KERNEL__, but surely
they wouldn't be available to userland since they're defined in
ioremap.c.  Am I missing something here?

If I'm right about this, then there's probably a good deal of other
stuff in io.h that could move inside __KERNEL__, but at least this is
a start.

Signed-off-by: Aron Griffis <aron@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/io.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-ia64/io.h b/include/asm-ia64/io.h
index 43bfff6c6b87..855c30af72a9 100644
--- a/include/asm-ia64/io.h
+++ b/include/asm-ia64/io.h
@@ -417,6 +417,8 @@ __writeq (unsigned long val, volatile void __iomem *addr)
 # define outl_p		outl
 #endif
 
+# ifdef __KERNEL__
+
 extern void __iomem * ioremap(unsigned long offset, unsigned long size);
 extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size);
 
@@ -430,8 +432,6 @@ iounmap (volatile void __iomem *addr)
 #define dmi_iounmap(x,l) iounmap(x)
 #define dmi_alloc(l) kmalloc(l, GFP_ATOMIC)
 
-# ifdef __KERNEL__
-
 /*
  * String version of IO memory access ops:
  */
-- 
cgit v1.2.3


From c12fb1885787dcc2e20c4b88149e1e607e1293b2 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bjorn.helgaas@hp.com>
Date: Thu, 12 Oct 2006 16:20:59 -0600
Subject: [IA64] remove unused PAL_CALL_IC_OFF

Linux maps PAL instructions with an ITR, but uses a DTC for PAL data.
Section 11.10.2.1.3, "Making PAL Procedures Calls in Physical or Virtual
Mode," of the SDM (rev 2.2), says we must therefore make all PAL calls
with PSR.ic = 1 so that Linux can handle any TLB faults.

PAL_CALL_IC_OFF is currently unused, and as long as we use the ITR + DTC
strategy, we can't use it.  So remove it.  I also removed the code in
ia64_pal_call_static() that conditionally cleared PSR.ic.

Signed-off-by: Bjorn Helgaas <bjorn.helgaas@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/pal.S | 11 +++--------
 include/asm-ia64/pal.h | 11 ++---------
 2 files changed, 5 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/pal.S b/arch/ia64/kernel/pal.S
index ebaf1e685f5e..5f50e6bbc686 100644
--- a/arch/ia64/kernel/pal.S
+++ b/arch/ia64/kernel/pal.S
@@ -50,12 +50,10 @@ END(ia64_pal_default_handler)
  *
  * in0         Index of PAL service
  * in1 - in3   Remaining PAL arguments
- * in4	       1 ==> clear psr.ic,  0 ==> don't clear psr.ic
- *
  */
 GLOBAL_ENTRY(ia64_pal_call_static)
-	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(5)
-	alloc loc1 = ar.pfs,5,5,0,0
+	.prologue ASM_UNW_PRLG_RP|ASM_UNW_PRLG_PFS, ASM_UNW_PRLG_GRSAVE(4)
+	alloc loc1 = ar.pfs,4,5,0,0
 	movl loc2 = pal_entry_point
 1:	{
 	  mov r28 = in0
@@ -64,7 +62,6 @@ GLOBAL_ENTRY(ia64_pal_call_static)
 	}
 	;;
 	ld8 loc2 = [loc2]		// loc2 <- entry point
-	tbit.nz p6,p7 = in4, 0
 	adds r8 = 1f-1b,r8
 	mov loc4=ar.rsc			// save RSE configuration
 	;;
@@ -74,13 +71,11 @@ GLOBAL_ENTRY(ia64_pal_call_static)
 	.body
 	mov r30 = in2
 
-(p6)	rsm psr.i | psr.ic
 	mov r31 = in3
 	mov b7 = loc2
 
-(p7)	rsm psr.i
+	rsm psr.i
 	;;
-(p6)	srlz.i
 	mov rp = r8
 	br.cond.sptk.many b7
 1:	mov psr.l = loc3
diff --git a/include/asm-ia64/pal.h b/include/asm-ia64/pal.h
index 2c8fd92d0ece..4283ddcc25fb 100644
--- a/include/asm-ia64/pal.h
+++ b/include/asm-ia64/pal.h
@@ -764,7 +764,7 @@ struct ia64_pal_retval {
  * (generally 0) MUST be passed.  Reserved parameters are not optional
  * parameters.
  */
-extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64, u64);
+extern struct ia64_pal_retval ia64_pal_call_static (u64, u64, u64, u64);
 extern struct ia64_pal_retval ia64_pal_call_stacked (u64, u64, u64, u64);
 extern struct ia64_pal_retval ia64_pal_call_phys_static (u64, u64, u64, u64);
 extern struct ia64_pal_retval ia64_pal_call_phys_stacked (u64, u64, u64, u64);
@@ -774,14 +774,7 @@ extern void ia64_load_scratch_fpregs (struct ia64_fpreg *);
 #define PAL_CALL(iprv,a0,a1,a2,a3) do {			\
 	struct ia64_fpreg fr[6];			\
 	ia64_save_scratch_fpregs(fr);			\
-	iprv = ia64_pal_call_static(a0, a1, a2, a3, 0);	\
-	ia64_load_scratch_fpregs(fr);			\
-} while (0)
-
-#define PAL_CALL_IC_OFF(iprv,a0,a1,a2,a3) do {		\
-	struct ia64_fpreg fr[6];			\
-	ia64_save_scratch_fpregs(fr);			\
-	iprv = ia64_pal_call_static(a0, a1, a2, a3, 1);	\
+	iprv = ia64_pal_call_static(a0, a1, a2, a3);	\
 	ia64_load_scratch_fpregs(fr);			\
 } while (0)
 
-- 
cgit v1.2.3


From 74d919465a93b6c2b928b29a8ed3e5e41adbfa93 Mon Sep 17 00:00:00 2001
From: Ben Collins <bcollins@ubuntu.com>
Date: Wed, 18 Oct 2006 08:55:54 -0400
Subject: [pci_ids] Add Quicknet XJ vendor/device ID's.

Signed-off-by: Ben Collins <bcollins@ubuntu.com>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f069df245469..f3a168f3c9df 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2351,3 +2351,5 @@
 #define PCI_DEVICE_ID_RME_DIGI32_PRO	0x9897
 #define PCI_DEVICE_ID_RME_DIGI32_8	0x9898
 
+#define PCI_VENDOR_ID_QUICKNET		0x15E2
+#define PCI_DEVICE_ID_QUICKNET_XJ	0x0500
-- 
cgit v1.2.3


From f5956f84072804712cb7b663c5c64e9800180833 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 18 Oct 2006 18:30:45 +0200
Subject: [S390] Wire up epoll_pwait syscall.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/kernel/syscalls.S | 1 +
 include/asm-s390/unistd.h   | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S
index e59baec56520..a4ceae3dbcf1 100644
--- a/arch/s390/kernel/syscalls.S
+++ b/arch/s390/kernel/syscalls.S
@@ -320,3 +320,4 @@ SYSCALL(sys_tee,sys_tee,sys_tee_wrapper)
 SYSCALL(sys_vmsplice,sys_vmsplice,compat_sys_vmsplice_wrapper)
 NI_SYSCALL							/* 310 sys_move_pages */
 SYSCALL(sys_getcpu,sys_getcpu,sys_getcpu_wrapper)
+SYSCALL(sys_epoll_pwait,sys_epoll_pwait,sys_ni_syscall)
diff --git a/include/asm-s390/unistd.h b/include/asm-s390/unistd.h
index a19238cbcffa..71d3c21b84f0 100644
--- a/include/asm-s390/unistd.h
+++ b/include/asm-s390/unistd.h
@@ -249,8 +249,9 @@
 #define __NR_vmsplice		309
 /* Number 310 is reserved for new sys_move_pages */
 #define __NR_getcpu		311
+#define __NR_epoll_pwait	312
 
-#define NR_syscalls 312
+#define NR_syscalls 313
 
 /* 
  * There are some system calls that are not present on 64 bit, some
-- 
cgit v1.2.3


From 833774849d50a59f58e9bdfc3d9c88e682b3596d Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Wed, 18 Oct 2006 18:30:51 +0200
Subject: [S390] Fix pte type checking.

handle_pte_fault uses pte_present, pte_none and pte_file to find out
the type of a pte. That is done without holding the page table lock.
This clashes with the way how ptep_clear_flush removes active page
table entries from the system. First the ipte instruction is used
to invalidate the pte and remove all plt entries for the page. The
ipte sets the hardware invalid bit without changing any other bit.
After the ipte finished the pte is cleared. A concurrent fault can
observe the the previously valid pte with the invalid bit set. With
the current encoding of the different pte types an invalidated
read-only pte can be misinterpreted as a swap-pte.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/asm-s390/pgtable.h | 50 ++++++++++++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-s390/pgtable.h b/include/asm-s390/pgtable.h
index 519f0a5ff181..36bb6dacf008 100644
--- a/include/asm-s390/pgtable.h
+++ b/include/asm-s390/pgtable.h
@@ -200,18 +200,45 @@ extern char empty_zero_page[PAGE_SIZE];
  */
 
 /* Hardware bits in the page table entry */
-#define _PAGE_RO        0x200          /* HW read-only                     */
-#define _PAGE_INVALID   0x400          /* HW invalid                       */
+#define _PAGE_RO	0x200		/* HW read-only bit  */
+#define _PAGE_INVALID	0x400		/* HW invalid bit    */
+#define _PAGE_SWT	0x001		/* SW pte type bit t */
+#define _PAGE_SWX	0x002		/* SW pte type bit x */
 
-/* Mask and six different types of pages. */
-#define _PAGE_TYPE_MASK		0x601
+/* Six different types of pages. */
 #define _PAGE_TYPE_EMPTY	0x400
 #define _PAGE_TYPE_NONE		0x401
-#define _PAGE_TYPE_SWAP		0x600
-#define _PAGE_TYPE_FILE		0x601
+#define _PAGE_TYPE_SWAP		0x403
+#define _PAGE_TYPE_FILE		0x601	/* bit 0x002 is used for offset !! */
 #define _PAGE_TYPE_RO		0x200
 #define _PAGE_TYPE_RW		0x000
 
+/*
+ * PTE type bits are rather complicated. handle_pte_fault uses pte_present,
+ * pte_none and pte_file to find out the pte type WITHOUT holding the page
+ * table lock. ptep_clear_flush on the other hand uses ptep_clear_flush to
+ * invalidate a given pte. ipte sets the hw invalid bit and clears all tlbs
+ * for the page. The page table entry is set to _PAGE_TYPE_EMPTY afterwards.
+ * This change is done while holding the lock, but the intermediate step
+ * of a previously valid pte with the hw invalid bit set can be observed by
+ * handle_pte_fault. That makes it necessary that all valid pte types with
+ * the hw invalid bit set must be distinguishable from the four pte types
+ * empty, none, swap and file.
+ *
+ *			irxt  ipte  irxt
+ * _PAGE_TYPE_EMPTY	1000   ->   1000
+ * _PAGE_TYPE_NONE	1001   ->   1001
+ * _PAGE_TYPE_SWAP	1011   ->   1011
+ * _PAGE_TYPE_FILE	11?1   ->   11?1
+ * _PAGE_TYPE_RO	0100   ->   1100
+ * _PAGE_TYPE_RW	0000   ->   1000
+ *
+ * pte_none is true for bits combinations 1000, 1100
+ * pte_present is true for bits combinations 0000, 0010, 0100, 0110, 1001
+ * pte_file is true for bits combinations 1101, 1111
+ * swap pte is 1011 and 0001, 0011, 0101, 0111, 1010 and 1110 are invalid.
+ */
+
 #ifndef __s390x__
 
 /* Bits in the segment table entry */
@@ -365,18 +392,21 @@ static inline int pmd_bad(pmd_t pmd)
 
 static inline int pte_none(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_EMPTY;
+	return (pte_val(pte) & _PAGE_INVALID) && !(pte_val(pte) & _PAGE_SWT);
 }
 
 static inline int pte_present(pte_t pte)
 {
-	return !(pte_val(pte) & _PAGE_INVALID) ||
-		(pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_NONE;
+	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT | _PAGE_SWX;
+	return (pte_val(pte) & mask) == _PAGE_TYPE_NONE ||
+		(!(pte_val(pte) & _PAGE_INVALID) &&
+		 !(pte_val(pte) & _PAGE_SWT));
 }
 
 static inline int pte_file(pte_t pte)
 {
-	return (pte_val(pte) & _PAGE_TYPE_MASK) == _PAGE_TYPE_FILE;
+	unsigned long mask = _PAGE_RO | _PAGE_INVALID | _PAGE_SWT;
+	return (pte_val(pte) & mask) == _PAGE_TYPE_FILE;
 }
 
 #define pte_same(a,b)	(pte_val(a) == pte_val(b))
-- 
cgit v1.2.3


From 29f3eb64634cf96903a3cdb56b1f9a80bebad17d Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@lxorguk.ukuu.org.uk>
Date: Mon, 16 Oct 2006 16:20:21 -0700
Subject: pci: Additional search functions

In order to finish converting to pci_get_* interfaces we need to add a couple
of bits of missing functionaility

pci_get_bus_and_slot() provides the equivalent to pci_find_slot()
(pci_get_slot is already taken as a name for something similar but not the
same)

pci_get_device_reverse() is the equivalent of pci_find_device_reverse but
refcounting

Signed-off-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/search.c | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++--
 include/linux/pci.h  |  3 ++-
 2 files changed, 72 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index d529462d1b53..2f13eba5d5ae 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -139,6 +139,31 @@ struct pci_dev * pci_get_slot(struct pci_bus *bus, unsigned int devfn)
 	return dev;
 }
 
+/**
+ * pci_get_bus_and_slot - locate PCI device from a given PCI slot
+ * @bus: number of PCI bus on which desired PCI device resides
+ * @devfn: encodes number of PCI slot in which the desired PCI
+ * device resides and the logical device number within that slot
+ * in case of multi-function devices.
+ *
+ * Given a PCI bus and slot/function number, the desired PCI device
+ * is located in system global list of PCI devices.  If the device
+ * is found, a pointer to its data structure is returned.  If no
+ * device is found, %NULL is returned. The returned device has its
+ * reference count bumped by one.
+ */
+
+struct pci_dev * pci_get_bus_and_slot(unsigned int bus, unsigned int devfn)
+{
+	struct pci_dev *dev = NULL;
+
+	while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
+		if (dev->bus->number == bus && dev->devfn == devfn)
+			return dev;
+	}
+	return NULL;
+}
+
 /**
  * pci_find_subsys - begin or continue searching for a PCI device by vendor/subvendor/device/subdevice id
  * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
@@ -274,6 +299,45 @@ pci_get_device(unsigned int vendor, unsigned int device, struct pci_dev *from)
 	return pci_get_subsys(vendor, device, PCI_ANY_ID, PCI_ANY_ID, from);
 }
 
+/**
+ * pci_get_device_reverse - begin or continue searching for a PCI device by vendor/device id
+ * @vendor: PCI vendor id to match, or %PCI_ANY_ID to match all vendor ids
+ * @device: PCI device id to match, or %PCI_ANY_ID to match all device ids
+ * @from: Previous PCI device found in search, or %NULL for new search.
+ *
+ * Iterates through the list of known PCI devices in the reverse order of
+ * pci_get_device.
+ * If a PCI device is found with a matching @vendor and @device, the reference
+ * count to the  device is incremented and a pointer to its device structure
+ * is returned Otherwise, %NULL is returned.  A new search is initiated by
+ * passing %NULL as the @from argument.  Otherwise if @from is not %NULL,
+ * searches continue from next device on the global list.  The reference
+ * count for @from is always decremented if it is not %NULL.
+ */
+struct pci_dev *
+pci_get_device_reverse(unsigned int vendor, unsigned int device, struct pci_dev *from)
+{
+	struct list_head *n;
+	struct pci_dev *dev;
+
+	WARN_ON(in_interrupt());
+	down_read(&pci_bus_sem);
+	n = from ? from->global_list.prev : pci_devices.prev;
+
+	while (n && (n != &pci_devices)) {
+		dev = pci_dev_g(n);
+		if ((vendor == PCI_ANY_ID || dev->vendor == vendor) &&
+		    (device == PCI_ANY_ID || dev->device == device))
+			goto exit;
+		n = n->prev;
+	}
+	dev = NULL;
+exit:
+	dev = pci_dev_get(dev);
+	up_read(&pci_bus_sem);
+	pci_dev_put(from);
+	return dev;
+}
 
 /**
  * pci_find_device_reverse - begin or continue searching for a PCI device by vendor/device id
@@ -382,12 +446,16 @@ exit:
 }
 EXPORT_SYMBOL(pci_dev_present);
 
-EXPORT_SYMBOL(pci_find_bus);
-EXPORT_SYMBOL(pci_find_next_bus);
 EXPORT_SYMBOL(pci_find_device);
 EXPORT_SYMBOL(pci_find_device_reverse);
 EXPORT_SYMBOL(pci_find_slot);
+/* For boot time work */
+EXPORT_SYMBOL(pci_find_bus);
+EXPORT_SYMBOL(pci_find_next_bus);
+/* For everyone */
 EXPORT_SYMBOL(pci_get_device);
+EXPORT_SYMBOL(pci_get_device_reverse);
 EXPORT_SYMBOL(pci_get_subsys);
 EXPORT_SYMBOL(pci_get_slot);
+EXPORT_SYMBOL(pci_get_bus_and_slot);
 EXPORT_SYMBOL(pci_get_class);
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5c604f5fad67..09bf88fc80c5 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -452,13 +452,14 @@ struct pci_dev *pci_find_slot (unsigned int bus, unsigned int devfn);
 int pci_find_capability (struct pci_dev *dev, int cap);
 int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap);
 int pci_find_ext_capability (struct pci_dev *dev, int cap);
-struct pci_bus * pci_find_next_bus(const struct pci_bus *from);
+struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
 
 struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from);
 struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device,
 				unsigned int ss_vendor, unsigned int ss_device,
 				struct pci_dev *from);
 struct pci_dev *pci_get_slot (struct pci_bus *bus, unsigned int devfn);
+struct pci_dev *pci_get_bus_and_slot (unsigned int bus, unsigned int devfn);
 struct pci_dev *pci_get_class (unsigned int class, struct pci_dev *from);
 int pci_dev_present(const struct pci_device_id *ids);
 
-- 
cgit v1.2.3


From 6b4b78fed47e7380dfe9280b154e8b9bfcd4c86c Mon Sep 17 00:00:00 2001
From: Matt Domsch <Matt_Domsch@dell.com>
Date: Fri, 29 Sep 2006 15:23:23 -0500
Subject: PCI: optionally sort device lists breadth-first

Problem:
New Dell PowerEdge servers have 2 embedded ethernet ports, which are
labeled NIC1 and NIC2 on the chassis, in the BIOS setup screens, and
in the printed documentation.  Assuming no other add-in ethernet ports
in the system, Linux 2.4 kernels name these eth0 and eth1
respectively.  Many people have come to expect this naming.  Linux 2.6
kernels name these eth1 and eth0 respectively (backwards from
expectations).  I also have reports that various Sun and HP servers
have similar behavior.


Root cause:
Linux 2.4 kernels walk the pci_devices list, which happens to be
sorted in breadth-first order (or pcbios_find_device order on i386,
which most often is breadth-first also).  2.6 kernels have both the
pci_devices list and the pci_bus_type.klist_devices list, the latter
is what is walked at driver load time to match the pci_id tables; this
klist happens to be in depth-first order.

On systems where, for physical routing reasons, NIC1 appears on a
lower bus number than NIC2, but NIC2's bridge is discovered first in
the depth-first ordering, NIC2 will be discovered before NIC1.  If the
list were sorted breadth-first, NIC1 would be discovered before NIC2.

A PowerEdge 1955 system has the following topology which easily
exhibits the difference between depth-first and breadth-first device
lists.

-[0000:00]-+-00.0  Intel Corporation 5000P Chipset Memory Controller Hub
           +-02.0-[0000:03-08]--+-00.0-[0000:04-07]--+-00.0-[0000:05-06]----00.0-[0000:06]----00.0  Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC2, 2.4 kernel name eth1, 2.6 kernel name eth0)
           +-1c.0-[0000:01-02]----00.0-[0000:02]----00.0  Broadcom Corporation NetXtreme II BCM5708S Gigabit Ethernet (labeled NIC1, 2.4 kernel name eth0, 2.6 kernel name eth1)


Other factors, such as device driver load order and the presence of
PCI slots at various points in the bus hierarchy further complicate
this problem; I'm not trying to solve those here, just restore the
device order, and thus basic behavior, that 2.4 kernels had.


Solution:

The solution can come in multiple steps.

Suggested fix #1: kernel
Patch below optionally sorts the two device lists into breadth-first
ordering to maintain compatibility with 2.4 kernels.  It adds two new
command line options:
  pci=bfsort
  pci=nobfsort
to force the sort order, or not, as you wish.  It also adds DMI checks
for the specific Dell systems which exhibit "backwards" ordering, to
make them "right".


Suggested fix #2: udev rules from userland
Many people also have the expectation that embedded NICs are always
discovered before add-in NICs (which this patch does not try to do).
Using the PCI IRQ Routing Table provided by system BIOS, it's easy to
determine which PCI devices are embedded, or if add-in, which PCI slot
they're in.  I'm working on a tool that would allow udev to name
ethernet devices in ascending embedded, slot 1 .. slot N order,
subsort by PCI bus/dev/fn breadth-first.  It'll be possible to use it
independent of udev as well for those distributions that don't use
udev in their installers.

Suggested fix #3: system board routing rules
One can constrain the system board layout to put NIC1 ahead of NIC2
regardless of breadth-first or depth-first discovery order.  This adds
a significant level of complexity to board routing, and may not be
possible in all instances (witness the above systems from several
major manufacturers).  I don't want to encourage this particular train
of thought too far, at the expense of not doing #1 or #2 above.


Feedback appreciated.  Patch tested on a Dell PowerEdge 1955 blade
with 2.6.18.

You'll also note I took some liberty and temporarily break the klist
abstraction to simplify and speed up the sort algorithm.  I think
that's both safe and appropriate in this instance.


Signed-off-by: Matt Domsch <Matt_Domsch@dell.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 Documentation/kernel-parameters.txt |  5 ++
 arch/i386/pci/common.c              | 59 +++++++++++++++++++++++-
 arch/i386/pci/pci.h                 |  7 +++
 drivers/pci/probe.c                 | 92 +++++++++++++++++++++++++++++++++++++
 include/linux/pci.h                 |  1 +
 5 files changed, 162 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index ff571f9298e0..dd00fd556a60 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1231,6 +1231,11 @@ and is between 256 and 4096 characters. It is defined in the file
 				machine check when some devices' config space
 				is read. But various workarounds are disabled
 				and some IOMMU drivers will not work.
+		bfsort		Sort PCI devices into breadth-first order.
+				This sorting is done to get a device
+				order compatible with older (<= 2.4) kernels.
+		nobfsort	Don't sort PCI devices into breadth-first order.
+
 	pcmv=		[HW,PCMCIA] BadgePAD 4
 
 	pd.		[PARIDE]
diff --git a/arch/i386/pci/common.c b/arch/i386/pci/common.c
index 68bce194e688..6d5ace845e44 100644
--- a/arch/i386/pci/common.c
+++ b/arch/i386/pci/common.c
@@ -20,6 +20,7 @@
 unsigned int pci_probe = PCI_PROBE_BIOS | PCI_PROBE_CONF1 | PCI_PROBE_CONF2 |
 				PCI_PROBE_MMCONF;
 
+int pci_bf_sort;
 int pci_routeirq;
 int pcibios_last_bus = -1;
 unsigned long pirq_table_addr;
@@ -117,6 +118,20 @@ void __devinit  pcibios_fixup_bus(struct pci_bus *b)
 	pci_read_bridge_bases(b);
 }
 
+/*
+ * Only use DMI information to set this if nothing was passed
+ * on the kernel command line (which was parsed earlier).
+ */
+
+static int __devinit set_bf_sort(struct dmi_system_id *d)
+{
+	if (pci_bf_sort == pci_bf_sort_default) {
+		pci_bf_sort = pci_dmi_bf;
+		printk(KERN_INFO "PCI: %s detected, enabling pci=bfsort.\n", d->ident);
+	}
+	return 0;
+}
+
 /*
  * Enable renumbering of PCI bus# ranges to reach all PCI busses (Cardbus)
  */
@@ -130,11 +145,11 @@ static int __devinit assign_all_busses(struct dmi_system_id *d)
 }
 #endif
 
+static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
+#ifdef __i386__
 /*
  * Laptops which need pci=assign-busses to see Cardbus cards
  */
-static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
-#ifdef __i386__
 	{
 		.callback = assign_all_busses,
 		.ident = "Samsung X20 Laptop",
@@ -144,6 +159,38 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
 		},
 	},
 #endif		/* __i386__ */
+	{
+		.callback = set_bf_sort,
+		.ident = "Dell PowerEdge 1950",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1950"),
+		},
+	},
+	{
+		.callback = set_bf_sort,
+		.ident = "Dell PowerEdge 1955",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 1955"),
+		},
+	},
+	{
+		.callback = set_bf_sort,
+		.ident = "Dell PowerEdge 2900",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2900"),
+		},
+	},
+	{
+		.callback = set_bf_sort,
+		.ident = "Dell PowerEdge 2950",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "Dell"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "PowerEdge 2950"),
+		},
+	},
 	{}
 };
 
@@ -189,6 +236,8 @@ static int __init pcibios_init(void)
 
 	pcibios_resource_survey();
 
+	if (pci_bf_sort >= pci_force_bf)
+		pci_sort_breadthfirst();
 #ifdef CONFIG_PCI_BIOS
 	if ((pci_probe & PCI_BIOS_SORT) && !(pci_probe & PCI_NO_SORT))
 		pcibios_sort();
@@ -203,6 +252,12 @@ char * __devinit  pcibios_setup(char *str)
 	if (!strcmp(str, "off")) {
 		pci_probe = 0;
 		return NULL;
+	} else if (!strcmp(str, "bfsort")) {
+		pci_bf_sort = pci_force_bf;
+		return NULL;
+	} else if (!strcmp(str, "nobfsort")) {
+		pci_bf_sort = pci_force_nobf;
+		return NULL;
 	}
 #ifdef CONFIG_PCI_BIOS
 	else if (!strcmp(str, "bios")) {
diff --git a/arch/i386/pci/pci.h b/arch/i386/pci/pci.h
index 1814f74569c6..ad065cebd7b9 100644
--- a/arch/i386/pci/pci.h
+++ b/arch/i386/pci/pci.h
@@ -30,6 +30,13 @@
 extern unsigned int pci_probe;
 extern unsigned long pirq_table_addr;
 
+enum pci_bf_sort_state {
+	pci_bf_sort_default,
+	pci_force_nobf,
+	pci_force_bf,
+	pci_dmi_bf,
+};
+
 /* pci-i386.c */
 
 extern unsigned int pcibios_max_latency;
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index a3b0a5eb5054..e159d6604494 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1067,3 +1067,95 @@ EXPORT_SYMBOL(pci_scan_bridge);
 EXPORT_SYMBOL(pci_scan_single_device);
 EXPORT_SYMBOL_GPL(pci_scan_child_bus);
 #endif
+
+static int __init pci_sort_bf_cmp(const struct pci_dev *a, const struct pci_dev *b)
+{
+	if      (pci_domain_nr(a->bus) < pci_domain_nr(b->bus)) return -1;
+	else if (pci_domain_nr(a->bus) > pci_domain_nr(b->bus)) return  1;
+
+	if      (a->bus->number < b->bus->number) return -1;
+	else if (a->bus->number > b->bus->number) return  1;
+
+	if      (a->devfn < b->devfn) return -1;
+	else if (a->devfn > b->devfn) return  1;
+
+	return 0;
+}
+
+/*
+ * Yes, this forcably breaks the klist abstraction temporarily.  It
+ * just wants to sort the klist, not change reference counts and
+ * take/drop locks rapidly in the process.  It does all this while
+ * holding the lock for the list, so objects can't otherwise be
+ * added/removed while we're swizzling.
+ */
+static void __init pci_insertion_sort_klist(struct pci_dev *a, struct list_head *list)
+{
+	struct list_head *pos;
+	struct klist_node *n;
+	struct device *dev;
+	struct pci_dev *b;
+
+	list_for_each(pos, list) {
+		n = container_of(pos, struct klist_node, n_node);
+		dev = container_of(n, struct device, knode_bus);
+		b = to_pci_dev(dev);
+		if (pci_sort_bf_cmp(a, b) <= 0) {
+			list_move_tail(&a->dev.knode_bus.n_node, &b->dev.knode_bus.n_node);
+			return;
+		}
+	}
+	list_move_tail(&a->dev.knode_bus.n_node, list);
+}
+
+static void __init pci_sort_breadthfirst_klist(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct list_head *pos, *tmp;
+	struct klist_node *n;
+	struct device *dev;
+	struct pci_dev *pdev;
+
+	spin_lock(&pci_bus_type.klist_devices.k_lock);
+	list_for_each_safe(pos, tmp, &pci_bus_type.klist_devices.k_list) {
+		n = container_of(pos, struct klist_node, n_node);
+		dev = container_of(n, struct device, knode_bus);
+		pdev = to_pci_dev(dev);
+		pci_insertion_sort_klist(pdev, &sorted_devices);
+	}
+	list_splice(&sorted_devices, &pci_bus_type.klist_devices.k_list);
+	spin_unlock(&pci_bus_type.klist_devices.k_lock);
+}
+
+static void __init pci_insertion_sort_devices(struct pci_dev *a, struct list_head *list)
+{
+	struct pci_dev *b;
+
+	list_for_each_entry(b, list, global_list) {
+		if (pci_sort_bf_cmp(a, b) <= 0) {
+			list_move_tail(&a->global_list, &b->global_list);
+			return;
+		}
+	}
+	list_move_tail(&a->global_list, list);
+}
+
+static void __init pci_sort_breadthfirst_devices(void)
+{
+	LIST_HEAD(sorted_devices);
+	struct pci_dev *dev, *tmp;
+
+	down_write(&pci_bus_sem);
+	list_for_each_entry_safe(dev, tmp, &pci_devices, global_list) {
+		pci_insertion_sort_devices(dev, &sorted_devices);
+	}
+	list_splice(&sorted_devices, &pci_devices);
+	up_write(&pci_bus_sem);
+}
+
+void __init pci_sort_breadthfirst(void)
+{
+	pci_sort_breadthfirst_devices();
+	pci_sort_breadthfirst_klist();
+}
+
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 09bf88fc80c5..4689e2a699c0 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -443,6 +443,7 @@ extern void pci_remove_bus(struct pci_bus *b);
 extern void pci_remove_bus_device(struct pci_dev *dev);
 extern void pci_stop_bus_device(struct pci_dev *dev);
 void pci_setup_cardbus(struct pci_bus *bus);
+extern void pci_sort_breadthfirst(void);
 
 /* Generic PCI functions exported to card drivers */
 
-- 
cgit v1.2.3


From 7a54f25cef6c763f16c9fd49ae382de162147873 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@suse.de>
Date: Fri, 13 Oct 2006 20:05:19 -0700
Subject: PCI Hotplug: move pci_hotplug.h to include/linux/

This makes it possible to build pci hotplug drivers outside of the main
kernel tree, and Sam keeps telling me to move local header files to
their proper places...

Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/pci/hotplug/acpi_pcihp.c        |   2 +-
 drivers/pci/hotplug/acpiphp.h           |   2 +-
 drivers/pci/hotplug/acpiphp_core.c      |   2 +-
 drivers/pci/hotplug/acpiphp_glue.c      |   2 +-
 drivers/pci/hotplug/acpiphp_ibm.c       |   1 -
 drivers/pci/hotplug/cpci_hotplug_core.c |   2 +-
 drivers/pci/hotplug/cpci_hotplug_pci.c  |   2 +-
 drivers/pci/hotplug/cpqphp.h            |   1 -
 drivers/pci/hotplug/cpqphp_core.c       |   1 +
 drivers/pci/hotplug/cpqphp_ctrl.c       |   1 +
 drivers/pci/hotplug/cpqphp_nvram.c      |   1 +
 drivers/pci/hotplug/cpqphp_pci.c        |   1 +
 drivers/pci/hotplug/cpqphp_sysfs.c      |   1 +
 drivers/pci/hotplug/fakephp.c           |   2 +-
 drivers/pci/hotplug/ibmphp.h            |   2 +-
 drivers/pci/hotplug/pci_hotplug.h       | 236 --------------------------------
 drivers/pci/hotplug/pci_hotplug_core.c  |   7 +-
 drivers/pci/hotplug/pciehp.h            |   2 +-
 drivers/pci/hotplug/pcihp_skeleton.c    |   2 +-
 drivers/pci/hotplug/rpadlpar_sysfs.c    |   2 +-
 drivers/pci/hotplug/rpaphp_core.c       |   2 +-
 drivers/pci/hotplug/sgi_hotplug.c       |   2 +-
 drivers/pci/hotplug/shpchp.h            |   3 +-
 include/linux/pci_hotplug.h             | 236 ++++++++++++++++++++++++++++++++
 24 files changed, 258 insertions(+), 257 deletions(-)
 delete mode 100644 drivers/pci/hotplug/pci_hotplug.h
 create mode 100644 include/linux/pci_hotplug.h

(limited to 'include')

diff --git a/drivers/pci/hotplug/acpi_pcihp.c b/drivers/pci/hotplug/acpi_pcihp.c
index 51cb9f817c22..270a33cc08f6 100644
--- a/drivers/pci/hotplug/acpi_pcihp.c
+++ b/drivers/pci/hotplug/acpi_pcihp.c
@@ -29,10 +29,10 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <acpi/acpi.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/actypes.h>
-#include "pci_hotplug.h"
 
 #define MY_NAME	"acpi_pcihp"
 
diff --git a/drivers/pci/hotplug/acpiphp.h b/drivers/pci/hotplug/acpiphp.h
index 7fff07e877c7..59c5b242d86d 100644
--- a/drivers/pci/hotplug/acpiphp.h
+++ b/drivers/pci/hotplug/acpiphp.h
@@ -38,7 +38,7 @@
 #include <linux/acpi.h>
 #include <linux/kobject.h>	/* for KOBJ_NAME_LEN */
 #include <linux/mutex.h>
-#include "pci_hotplug.h"
+#include <linux/pci_hotplug.h>
 
 #define dbg(format, arg...)					\
 	do {							\
diff --git a/drivers/pci/hotplug/acpiphp_core.c b/drivers/pci/hotplug/acpiphp_core.c
index e2fef60c2d06..c57d9d5ce84e 100644
--- a/drivers/pci/hotplug/acpiphp_core.c
+++ b/drivers/pci/hotplug/acpiphp_core.c
@@ -37,10 +37,10 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
-#include "pci_hotplug.h"
 #include "acpiphp.h"
 
 #define MY_NAME	"acpiphp"
diff --git a/drivers/pci/hotplug/acpiphp_glue.c b/drivers/pci/hotplug/acpiphp_glue.c
index 83e8e4412de5..c44311ac2fd3 100644
--- a/drivers/pci/hotplug/acpiphp_glue.c
+++ b/drivers/pci/hotplug/acpiphp_glue.c
@@ -45,11 +45,11 @@
 
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/smp_lock.h>
 #include <linux/mutex.h>
 
 #include "../pci.h"
-#include "pci_hotplug.h"
 #include "acpiphp.h"
 
 static LIST_HEAD(bridge_list);
diff --git a/drivers/pci/hotplug/acpiphp_ibm.c b/drivers/pci/hotplug/acpiphp_ibm.c
index d0a07d9ab30c..bd40aee10e16 100644
--- a/drivers/pci/hotplug/acpiphp_ibm.c
+++ b/drivers/pci/hotplug/acpiphp_ibm.c
@@ -35,7 +35,6 @@
 #include <linux/moduleparam.h>
 
 #include "acpiphp.h"
-#include "pci_hotplug.h"
 
 #define DRIVER_VERSION	"1.0.1"
 #define DRIVER_AUTHOR	"Irene Zubarev <zubarev@us.ibm.com>, Vernon Mauery <vernux@us.ibm.com>"
diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c
index d06ab4045134..684551559d44 100644
--- a/drivers/pci/hotplug/cpci_hotplug_core.c
+++ b/drivers/pci/hotplug/cpci_hotplug_core.c
@@ -29,12 +29,12 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/smp_lock.h>
 #include <asm/atomic.h>
 #include <linux/delay.h>
-#include "pci_hotplug.h"
 #include "cpci_hotplug.h"
 
 #define DRIVER_AUTHOR	"Scott Murray <scottm@somanetworks.com>"
diff --git a/drivers/pci/hotplug/cpci_hotplug_pci.c b/drivers/pci/hotplug/cpci_hotplug_pci.c
index 4afcaffd031c..7b1beaad2752 100644
--- a/drivers/pci/hotplug/cpci_hotplug_pci.c
+++ b/drivers/pci/hotplug/cpci_hotplug_pci.c
@@ -26,9 +26,9 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/proc_fs.h>
 #include "../pci.h"
-#include "pci_hotplug.h"
 #include "cpci_hotplug.h"
 
 #define MY_NAME	"cpci_hotplug"
diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h
index ea040c32f47d..298ad7f3f4f4 100644
--- a/drivers/pci/hotplug/cpqphp.h
+++ b/drivers/pci/hotplug/cpqphp.h
@@ -28,7 +28,6 @@
 #ifndef _CPQPHP_H
 #define _CPQPHP_H
 
-#include "pci_hotplug.h"
 #include <linux/interrupt.h>
 #include <asm/io.h>		/* for read? and write? functions */
 #include <linux/delay.h>	/* for delays */
diff --git a/drivers/pci/hotplug/cpqphp_core.c b/drivers/pci/hotplug/cpqphp_core.c
index 1fc259913b68..5617cfdadc5c 100644
--- a/drivers/pci/hotplug/cpqphp_core.c
+++ b/drivers/pci/hotplug/cpqphp_core.c
@@ -37,6 +37,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
 
diff --git a/drivers/pci/hotplug/cpqphp_ctrl.c b/drivers/pci/hotplug/cpqphp_ctrl.c
index 3ec2ad7db49a..79ff6b4de3a6 100644
--- a/drivers/pci/hotplug/cpqphp_ctrl.c
+++ b/drivers/pci/hotplug/cpqphp_ctrl.c
@@ -36,6 +36,7 @@
 #include <linux/wait.h>
 #include <linux/smp_lock.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include "cpqphp.h"
 
 static u32 configure_new_device(struct controller* ctrl, struct pci_func *func,
diff --git a/drivers/pci/hotplug/cpqphp_nvram.c b/drivers/pci/hotplug/cpqphp_nvram.c
index cf0878917537..298a6cfd8406 100644
--- a/drivers/pci/hotplug/cpqphp_nvram.c
+++ b/drivers/pci/hotplug/cpqphp_nvram.c
@@ -33,6 +33,7 @@
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
 #include "cpqphp.h"
diff --git a/drivers/pci/hotplug/cpqphp_pci.c b/drivers/pci/hotplug/cpqphp_pci.c
index 0d9688952f4a..fc7c74d72595 100644
--- a/drivers/pci/hotplug/cpqphp_pci.c
+++ b/drivers/pci/hotplug/cpqphp_pci.c
@@ -33,6 +33,7 @@
 #include <linux/workqueue.h>
 #include <linux/proc_fs.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include "../pci.h"
 #include "cpqphp.h"
 #include "cpqphp_nvram.h"
diff --git a/drivers/pci/hotplug/cpqphp_sysfs.c b/drivers/pci/hotplug/cpqphp_sysfs.c
index 5bab666cd67e..634f74d919d3 100644
--- a/drivers/pci/hotplug/cpqphp_sysfs.c
+++ b/drivers/pci/hotplug/cpqphp_sysfs.c
@@ -32,6 +32,7 @@
 #include <linux/proc_fs.h>
 #include <linux/workqueue.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/debugfs.h>
 #include "cpqphp.h"
 
diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c
index aaeb1129132e..e27907c91d92 100644
--- a/drivers/pci/hotplug/fakephp.c
+++ b/drivers/pci/hotplug/fakephp.c
@@ -35,10 +35,10 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/init.h>
 #include <linux/string.h>
 #include <linux/slab.h>
-#include "pci_hotplug.h"
 #include "../pci.h"
 
 #if !defined(MODULE)
diff --git a/drivers/pci/hotplug/ibmphp.h b/drivers/pci/hotplug/ibmphp.h
index dba6d8ca9bda..612d96301509 100644
--- a/drivers/pci/hotplug/ibmphp.h
+++ b/drivers/pci/hotplug/ibmphp.h
@@ -30,7 +30,7 @@
  *
  */
 
-#include "pci_hotplug.h"
+#include <linux/pci_hotplug.h>
 
 extern int ibmphp_debug;
 
diff --git a/drivers/pci/hotplug/pci_hotplug.h b/drivers/pci/hotplug/pci_hotplug.h
deleted file mode 100644
index a675a05c4091..000000000000
--- a/drivers/pci/hotplug/pci_hotplug.h
+++ /dev/null
@@ -1,236 +0,0 @@
-/*
- * PCI HotPlug Core Functions
- *
- * Copyright (C) 1995,2001 Compaq Computer Corporation
- * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
- * Copyright (C) 2001 IBM Corp.
- *
- * All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or (at
- * your option) any later version.
- *
- * This program is distributed in the hope that it will be useful, but
- * WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
- * NON INFRINGEMENT.  See the GNU General Public License for more
- * details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
- *
- * Send feedback to <kristen.c.accardi@intel.com>
- *
- */
-#ifndef _PCI_HOTPLUG_H
-#define _PCI_HOTPLUG_H
-
-
-/* These values come from the PCI Hotplug Spec */
-enum pci_bus_speed {
-	PCI_SPEED_33MHz			= 0x00,
-	PCI_SPEED_66MHz			= 0x01,
-	PCI_SPEED_66MHz_PCIX		= 0x02,
-	PCI_SPEED_100MHz_PCIX		= 0x03,
-	PCI_SPEED_133MHz_PCIX		= 0x04,
-	PCI_SPEED_66MHz_PCIX_ECC	= 0x05,
-	PCI_SPEED_100MHz_PCIX_ECC	= 0x06,
-	PCI_SPEED_133MHz_PCIX_ECC	= 0x07,
-	PCI_SPEED_66MHz_PCIX_266	= 0x09,
-	PCI_SPEED_100MHz_PCIX_266	= 0x0a,
-	PCI_SPEED_133MHz_PCIX_266	= 0x0b,
-	PCI_SPEED_66MHz_PCIX_533	= 0x11,
-	PCI_SPEED_100MHz_PCIX_533	= 0x12,
-	PCI_SPEED_133MHz_PCIX_533	= 0x13,
-	PCI_SPEED_UNKNOWN		= 0xff,
-};
-
-/* These values come from the PCI Express Spec */
-enum pcie_link_width {
-	PCIE_LNK_WIDTH_RESRV	= 0x00,
-	PCIE_LNK_X1		= 0x01,
-	PCIE_LNK_X2		= 0x02,
-	PCIE_LNK_X4		= 0x04,
-	PCIE_LNK_X8		= 0x08,
-	PCIE_LNK_X12		= 0x0C,
-	PCIE_LNK_X16		= 0x10,
-	PCIE_LNK_X32		= 0x20,
-	PCIE_LNK_WIDTH_UNKNOWN  = 0xFF,
-};
-
-enum pcie_link_speed {
-	PCIE_2PT5GB		= 0x14,
-	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
-};
-
-struct hotplug_slot;
-struct hotplug_slot_attribute {
-	struct attribute attr;
-	ssize_t (*show)(struct hotplug_slot *, char *);
-	ssize_t (*store)(struct hotplug_slot *, const char *, size_t);
-};
-#define to_hotplug_attr(n) container_of(n, struct hotplug_slot_attribute, attr);
-
-/**
- * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
- * @owner: The module owner of this structure
- * @enable_slot: Called when the user wants to enable a specific pci slot
- * @disable_slot: Called when the user wants to disable a specific pci slot
- * @set_attention_status: Called to set the specific slot's attention LED to
- * the specified value
- * @hardware_test: Called to run a specified hardware test on the specified
- * slot.
- * @get_power_status: Called to get the current power status of a slot.
- * 	If this field is NULL, the value passed in the struct hotplug_slot_info
- * 	will be used when this value is requested by a user.
- * @get_attention_status: Called to get the current attention status of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_latch_status: Called to get the current latch status of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_adapter_status: Called to get see if an adapter is present in the slot or not.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_address: Called to get pci address of a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_max_bus_speed: Called to get the max bus speed for a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- * @get_cur_bus_speed: Called to get the current bus speed for a slot.
- *	If this field is NULL, the value passed in the struct hotplug_slot_info
- *	will be used when this value is requested by a user.
- *
- * The table of function pointers that is passed to the hotplug pci core by a
- * hotplug pci driver.  These functions are called by the hotplug pci core when
- * the user wants to do something to a specific slot (query it for information,
- * set an LED, enable / disable power, etc.)
- */
-struct hotplug_slot_ops {
-	struct module *owner;
-	int (*enable_slot)		(struct hotplug_slot *slot);
-	int (*disable_slot)		(struct hotplug_slot *slot);
-	int (*set_attention_status)	(struct hotplug_slot *slot, u8 value);
-	int (*hardware_test)		(struct hotplug_slot *slot, u32 value);
-	int (*get_power_status)		(struct hotplug_slot *slot, u8 *value);
-	int (*get_attention_status)	(struct hotplug_slot *slot, u8 *value);
-	int (*get_latch_status)		(struct hotplug_slot *slot, u8 *value);
-	int (*get_adapter_status)	(struct hotplug_slot *slot, u8 *value);
-	int (*get_address)		(struct hotplug_slot *slot, u32 *value);
-	int (*get_max_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
-	int (*get_cur_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
-};
-
-/**
- * struct hotplug_slot_info - used to notify the hotplug pci core of the state of the slot
- * @power: if power is enabled or not (1/0)
- * @attention_status: if the attention light is enabled or not (1/0)
- * @latch_status: if the latch (if any) is open or closed (1/0)
- * @adapter_present: if there is a pci board present in the slot or not (1/0)
- * @address: (domain << 16 | bus << 8 | dev)
- *
- * Used to notify the hotplug pci core of the status of a specific slot.
- */
-struct hotplug_slot_info {
-	u8	power_status;
-	u8	attention_status;
-	u8	latch_status;
-	u8	adapter_status;
-	u32	address;
-	enum pci_bus_speed	max_bus_speed;
-	enum pci_bus_speed	cur_bus_speed;
-};
-
-/**
- * struct hotplug_slot - used to register a physical slot with the hotplug pci core
- * @name: the name of the slot being registered.  This string must
- * be unique amoung slots registered on this system.
- * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
- * @info: pointer to the &struct hotplug_slot_info for the initial values for
- * this slot.
- * @release: called during pci_hp_deregister to free memory allocated in a
- * hotplug_slot structure.
- * @private: used by the hotplug pci controller driver to store whatever it
- * needs.
- */
-struct hotplug_slot {
-	char				*name;
-	struct hotplug_slot_ops		*ops;
-	struct hotplug_slot_info	*info;
-	void (*release) (struct hotplug_slot *slot);
-	void				*private;
-
-	/* Variables below this are for use only by the hotplug pci core. */
-	struct list_head		slot_list;
-	struct kobject			kobj;
-};
-#define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj)
-
-extern int pci_hp_register		(struct hotplug_slot *slot);
-extern int pci_hp_deregister		(struct hotplug_slot *slot);
-extern int __must_check pci_hp_change_slot_info	(struct hotplug_slot *slot,
-						 struct hotplug_slot_info *info);
-extern struct subsystem pci_hotplug_slots_subsys;
-
-/* PCI Setting Record (Type 0) */
-struct hpp_type0 {
-	u32 revision;
-	u8  cache_line_size;
-	u8  latency_timer;
-	u8  enable_serr;
-	u8  enable_perr;
-};
-
-/* PCI-X Setting Record (Type 1) */
-struct hpp_type1 {
-	u32 revision;
-	u8  max_mem_read;
-	u8  avg_max_split;
-	u16 tot_max_split;
-};
-
-/* PCI Express Setting Record (Type 2) */
-struct hpp_type2 {
-	u32 revision;
-	u32 unc_err_mask_and;
-	u32 unc_err_mask_or;
-	u32 unc_err_sever_and;
-	u32 unc_err_sever_or;
-	u32 cor_err_mask_and;
-	u32 cor_err_mask_or;
-	u32 adv_err_cap_and;
-	u32 adv_err_cap_or;
-	u16 pci_exp_devctl_and;
-	u16 pci_exp_devctl_or;
-	u16 pci_exp_lnkctl_and;
-	u16 pci_exp_lnkctl_or;
-	u32 sec_unc_err_sever_and;
-	u32 sec_unc_err_sever_or;
-	u32 sec_unc_err_mask_and;
-	u32 sec_unc_err_mask_or;
-};
-
-struct hotplug_params {
-	struct hpp_type0 *t0;		/* Type0: NULL if not available */
-	struct hpp_type1 *t1;		/* Type1: NULL if not available */
-	struct hpp_type2 *t2;		/* Type2: NULL if not available */
-	struct hpp_type0 type0_data;
-	struct hpp_type1 type1_data;
-	struct hpp_type2 type2_data;
-};
-
-#ifdef CONFIG_ACPI
-#include <acpi/acpi.h>
-#include <acpi/acpi_bus.h>
-#include <acpi/actypes.h>
-extern acpi_status acpi_run_oshp(acpi_handle handle);
-extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
-				struct hotplug_params *hpp);
-int acpi_root_bridge(acpi_handle handle);
-#endif
-#endif
-
diff --git a/drivers/pci/hotplug/pci_hotplug_core.c b/drivers/pci/hotplug/pci_hotplug_core.c
index fa666d0cc489..f5d632e72323 100644
--- a/drivers/pci/hotplug/pci_hotplug_core.c
+++ b/drivers/pci/hotplug/pci_hotplug_core.c
@@ -30,6 +30,8 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/smp_lock.h>
@@ -37,11 +39,8 @@
 #include <linux/mount.h>
 #include <linux/namei.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <asm/uaccess.h>
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include "pci_hotplug.h"
-
 
 #define MY_NAME	"pci_hotplug"
 
diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h
index 30f021c55fe5..4fb12fcda563 100644
--- a/drivers/pci/hotplug/pciehp.h
+++ b/drivers/pci/hotplug/pciehp.h
@@ -31,11 +31,11 @@
 
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/delay.h>
 #include <linux/sched.h>		/* signal_pending() */
 #include <linux/pcieport_if.h>
 #include <linux/mutex.h>
-#include "pci_hotplug.h"
 
 #define MY_NAME	"pciehp"
 
diff --git a/drivers/pci/hotplug/pcihp_skeleton.c b/drivers/pci/hotplug/pcihp_skeleton.c
index 2b9e10e38613..50bcd3fe61da 100644
--- a/drivers/pci/hotplug/pcihp_skeleton.c
+++ b/drivers/pci/hotplug/pcihp_skeleton.c
@@ -33,8 +33,8 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/init.h>
-#include "pci_hotplug.h"
 
 #define SLOT_NAME_SIZE	10
 struct slot {
diff --git a/drivers/pci/hotplug/rpadlpar_sysfs.c b/drivers/pci/hotplug/rpadlpar_sysfs.c
index db69be85b458..6c5be3ff578c 100644
--- a/drivers/pci/hotplug/rpadlpar_sysfs.c
+++ b/drivers/pci/hotplug/rpadlpar_sysfs.c
@@ -14,7 +14,7 @@
  */
 #include <linux/kobject.h>
 #include <linux/string.h>
-#include "pci_hotplug.h"
+#include <linux/pci_hotplug.h>
 #include "rpadlpar.h"
 
 #define DLPAR_KOBJ_NAME       "control"
diff --git a/drivers/pci/hotplug/rpaphp_core.c b/drivers/pci/hotplug/rpaphp_core.c
index 7288a3eccfb3..141486df235b 100644
--- a/drivers/pci/hotplug/rpaphp_core.c
+++ b/drivers/pci/hotplug/rpaphp_core.c
@@ -26,6 +26,7 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
@@ -36,7 +37,6 @@
 #include "../pci.h"		/* for pci_add_new_bus */
 				/* and pci_do_scan_bus */
 #include "rpaphp.h"
-#include "pci_hotplug.h"
 
 int debug;
 static struct semaphore rpaphp_sem;
diff --git a/drivers/pci/hotplug/sgi_hotplug.c b/drivers/pci/hotplug/sgi_hotplug.c
index f31d83c2c633..b62ad31a9739 100644
--- a/drivers/pci/hotplug/sgi_hotplug.c
+++ b/drivers/pci/hotplug/sgi_hotplug.c
@@ -13,6 +13,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/proc_fs.h>
 #include <linux/types.h>
 #include <linux/mutex.h>
@@ -29,7 +30,6 @@
 #include <asm/sn/types.h>
 
 #include "../pci.h"
-#include "pci_hotplug.h"
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("SGI (prarit@sgi.com, dickie@sgi.com, habeck@sgi.com)");
diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h
index 7e7d490622e1..ea2087c34149 100644
--- a/drivers/pci/hotplug/shpchp.h
+++ b/drivers/pci/hotplug/shpchp.h
@@ -31,12 +31,11 @@
 
 #include <linux/types.h>
 #include <linux/pci.h>
+#include <linux/pci_hotplug.h>
 #include <linux/delay.h>
 #include <linux/sched.h>	/* signal_pending(), struct timer_list */
 #include <linux/mutex.h>
 
-#include "pci_hotplug.h"
-
 #if !defined(MODULE)
 	#define MY_NAME	"shpchp"
 #else
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
new file mode 100644
index 000000000000..a675a05c4091
--- /dev/null
+++ b/include/linux/pci_hotplug.h
@@ -0,0 +1,236 @@
+/*
+ * PCI HotPlug Core Functions
+ *
+ * Copyright (C) 1995,2001 Compaq Computer Corporation
+ * Copyright (C) 2001 Greg Kroah-Hartman (greg@kroah.com)
+ * Copyright (C) 2001 IBM Corp.
+ *
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
+ * NON INFRINGEMENT.  See the GNU General Public License for more
+ * details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * Send feedback to <kristen.c.accardi@intel.com>
+ *
+ */
+#ifndef _PCI_HOTPLUG_H
+#define _PCI_HOTPLUG_H
+
+
+/* These values come from the PCI Hotplug Spec */
+enum pci_bus_speed {
+	PCI_SPEED_33MHz			= 0x00,
+	PCI_SPEED_66MHz			= 0x01,
+	PCI_SPEED_66MHz_PCIX		= 0x02,
+	PCI_SPEED_100MHz_PCIX		= 0x03,
+	PCI_SPEED_133MHz_PCIX		= 0x04,
+	PCI_SPEED_66MHz_PCIX_ECC	= 0x05,
+	PCI_SPEED_100MHz_PCIX_ECC	= 0x06,
+	PCI_SPEED_133MHz_PCIX_ECC	= 0x07,
+	PCI_SPEED_66MHz_PCIX_266	= 0x09,
+	PCI_SPEED_100MHz_PCIX_266	= 0x0a,
+	PCI_SPEED_133MHz_PCIX_266	= 0x0b,
+	PCI_SPEED_66MHz_PCIX_533	= 0x11,
+	PCI_SPEED_100MHz_PCIX_533	= 0x12,
+	PCI_SPEED_133MHz_PCIX_533	= 0x13,
+	PCI_SPEED_UNKNOWN		= 0xff,
+};
+
+/* These values come from the PCI Express Spec */
+enum pcie_link_width {
+	PCIE_LNK_WIDTH_RESRV	= 0x00,
+	PCIE_LNK_X1		= 0x01,
+	PCIE_LNK_X2		= 0x02,
+	PCIE_LNK_X4		= 0x04,
+	PCIE_LNK_X8		= 0x08,
+	PCIE_LNK_X12		= 0x0C,
+	PCIE_LNK_X16		= 0x10,
+	PCIE_LNK_X32		= 0x20,
+	PCIE_LNK_WIDTH_UNKNOWN  = 0xFF,
+};
+
+enum pcie_link_speed {
+	PCIE_2PT5GB		= 0x14,
+	PCIE_LNK_SPEED_UNKNOWN	= 0xFF,
+};
+
+struct hotplug_slot;
+struct hotplug_slot_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct hotplug_slot *, char *);
+	ssize_t (*store)(struct hotplug_slot *, const char *, size_t);
+};
+#define to_hotplug_attr(n) container_of(n, struct hotplug_slot_attribute, attr);
+
+/**
+ * struct hotplug_slot_ops -the callbacks that the hotplug pci core can use
+ * @owner: The module owner of this structure
+ * @enable_slot: Called when the user wants to enable a specific pci slot
+ * @disable_slot: Called when the user wants to disable a specific pci slot
+ * @set_attention_status: Called to set the specific slot's attention LED to
+ * the specified value
+ * @hardware_test: Called to run a specified hardware test on the specified
+ * slot.
+ * @get_power_status: Called to get the current power status of a slot.
+ * 	If this field is NULL, the value passed in the struct hotplug_slot_info
+ * 	will be used when this value is requested by a user.
+ * @get_attention_status: Called to get the current attention status of a slot.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ * @get_latch_status: Called to get the current latch status of a slot.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ * @get_adapter_status: Called to get see if an adapter is present in the slot or not.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ * @get_address: Called to get pci address of a slot.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ * @get_max_bus_speed: Called to get the max bus speed for a slot.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ * @get_cur_bus_speed: Called to get the current bus speed for a slot.
+ *	If this field is NULL, the value passed in the struct hotplug_slot_info
+ *	will be used when this value is requested by a user.
+ *
+ * The table of function pointers that is passed to the hotplug pci core by a
+ * hotplug pci driver.  These functions are called by the hotplug pci core when
+ * the user wants to do something to a specific slot (query it for information,
+ * set an LED, enable / disable power, etc.)
+ */
+struct hotplug_slot_ops {
+	struct module *owner;
+	int (*enable_slot)		(struct hotplug_slot *slot);
+	int (*disable_slot)		(struct hotplug_slot *slot);
+	int (*set_attention_status)	(struct hotplug_slot *slot, u8 value);
+	int (*hardware_test)		(struct hotplug_slot *slot, u32 value);
+	int (*get_power_status)		(struct hotplug_slot *slot, u8 *value);
+	int (*get_attention_status)	(struct hotplug_slot *slot, u8 *value);
+	int (*get_latch_status)		(struct hotplug_slot *slot, u8 *value);
+	int (*get_adapter_status)	(struct hotplug_slot *slot, u8 *value);
+	int (*get_address)		(struct hotplug_slot *slot, u32 *value);
+	int (*get_max_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
+	int (*get_cur_bus_speed)	(struct hotplug_slot *slot, enum pci_bus_speed *value);
+};
+
+/**
+ * struct hotplug_slot_info - used to notify the hotplug pci core of the state of the slot
+ * @power: if power is enabled or not (1/0)
+ * @attention_status: if the attention light is enabled or not (1/0)
+ * @latch_status: if the latch (if any) is open or closed (1/0)
+ * @adapter_present: if there is a pci board present in the slot or not (1/0)
+ * @address: (domain << 16 | bus << 8 | dev)
+ *
+ * Used to notify the hotplug pci core of the status of a specific slot.
+ */
+struct hotplug_slot_info {
+	u8	power_status;
+	u8	attention_status;
+	u8	latch_status;
+	u8	adapter_status;
+	u32	address;
+	enum pci_bus_speed	max_bus_speed;
+	enum pci_bus_speed	cur_bus_speed;
+};
+
+/**
+ * struct hotplug_slot - used to register a physical slot with the hotplug pci core
+ * @name: the name of the slot being registered.  This string must
+ * be unique amoung slots registered on this system.
+ * @ops: pointer to the &struct hotplug_slot_ops to be used for this slot
+ * @info: pointer to the &struct hotplug_slot_info for the initial values for
+ * this slot.
+ * @release: called during pci_hp_deregister to free memory allocated in a
+ * hotplug_slot structure.
+ * @private: used by the hotplug pci controller driver to store whatever it
+ * needs.
+ */
+struct hotplug_slot {
+	char				*name;
+	struct hotplug_slot_ops		*ops;
+	struct hotplug_slot_info	*info;
+	void (*release) (struct hotplug_slot *slot);
+	void				*private;
+
+	/* Variables below this are for use only by the hotplug pci core. */
+	struct list_head		slot_list;
+	struct kobject			kobj;
+};
+#define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj)
+
+extern int pci_hp_register		(struct hotplug_slot *slot);
+extern int pci_hp_deregister		(struct hotplug_slot *slot);
+extern int __must_check pci_hp_change_slot_info	(struct hotplug_slot *slot,
+						 struct hotplug_slot_info *info);
+extern struct subsystem pci_hotplug_slots_subsys;
+
+/* PCI Setting Record (Type 0) */
+struct hpp_type0 {
+	u32 revision;
+	u8  cache_line_size;
+	u8  latency_timer;
+	u8  enable_serr;
+	u8  enable_perr;
+};
+
+/* PCI-X Setting Record (Type 1) */
+struct hpp_type1 {
+	u32 revision;
+	u8  max_mem_read;
+	u8  avg_max_split;
+	u16 tot_max_split;
+};
+
+/* PCI Express Setting Record (Type 2) */
+struct hpp_type2 {
+	u32 revision;
+	u32 unc_err_mask_and;
+	u32 unc_err_mask_or;
+	u32 unc_err_sever_and;
+	u32 unc_err_sever_or;
+	u32 cor_err_mask_and;
+	u32 cor_err_mask_or;
+	u32 adv_err_cap_and;
+	u32 adv_err_cap_or;
+	u16 pci_exp_devctl_and;
+	u16 pci_exp_devctl_or;
+	u16 pci_exp_lnkctl_and;
+	u16 pci_exp_lnkctl_or;
+	u32 sec_unc_err_sever_and;
+	u32 sec_unc_err_sever_or;
+	u32 sec_unc_err_mask_and;
+	u32 sec_unc_err_mask_or;
+};
+
+struct hotplug_params {
+	struct hpp_type0 *t0;		/* Type0: NULL if not available */
+	struct hpp_type1 *t1;		/* Type1: NULL if not available */
+	struct hpp_type2 *t2;		/* Type2: NULL if not available */
+	struct hpp_type0 type0_data;
+	struct hpp_type1 type1_data;
+	struct hpp_type2 type2_data;
+};
+
+#ifdef CONFIG_ACPI
+#include <acpi/acpi.h>
+#include <acpi/acpi_bus.h>
+#include <acpi/actypes.h>
+extern acpi_status acpi_run_oshp(acpi_handle handle);
+extern acpi_status acpi_get_hp_params_from_firmware(struct pci_bus *bus,
+				struct hotplug_params *hpp);
+int acpi_root_bridge(acpi_handle handle);
+#endif
+#endif
+
-- 
cgit v1.2.3


From eb409460b1abec0e2a1f9c9d07019f4157a6d6bc Mon Sep 17 00:00:00 2001
From: Lijun Chen <chenli@nortel.com>
Date: Mon, 16 Oct 2006 21:59:42 -0700
Subject: [TIPC]: Added subscription cancellation capability

This patch allows a TIPC application to cancel an existing
topology service subscription by re-requesting the subscription
with the TIPC_SUB_CANCEL filter bit set.  (All other bits of
the cancel request must match the original subscription request.)

Signed-off-by: Allan Stephens <allan.stephens@windriver.com>
Signed-off-by: Per Liden <per.liden@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tipc.h |  1 +
 net/tipc/subscr.c    | 99 ++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 85 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/tipc.h b/include/linux/tipc.h
index 243a15f54002..bea469455a0c 100644
--- a/include/linux/tipc.h
+++ b/include/linux/tipc.h
@@ -129,6 +129,7 @@ static inline unsigned int tipc_node(__u32 addr)
 
 #define TIPC_SUB_PORTS     	0x01  	/* filter for port availability */
 #define TIPC_SUB_SERVICE     	0x02  	/* filter for service availability */
+#define TIPC_SUB_CANCEL         0x04    /* cancel a subscription */
 #if 0
 /* The following filter options are not currently implemented */
 #define TIPC_SUB_NO_BIND_EVTS	0x04	/* filter out "publish" events */
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index c51600ba5f4a..7a918f12a5df 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -155,7 +155,7 @@ void tipc_subscr_report_overlap(struct subscription *sub,
 	    sub->seq.upper, found_lower, found_upper);
 	if (!tipc_subscr_overlap(sub, found_lower, found_upper))
 		return;
-	if (!must && (sub->filter != TIPC_SUB_PORTS))
+	if (!must && !(sub->filter & TIPC_SUB_PORTS))
 		return;
 	subscr_send_event(sub, found_lower, found_upper, event, port_ref, node);
 }
@@ -176,6 +176,13 @@ static void subscr_timeout(struct subscription *sub)
 	if (subscriber == NULL)
 		return;
 
+	/* Validate timeout (in case subscription is being cancelled) */
+
+	if (sub->timeout == TIPC_WAIT_FOREVER) {
+		tipc_ref_unlock(subscriber_ref);
+		return;
+	}
+
 	/* Unlink subscription from name table */
 
 	tipc_nametbl_unsubscribe(sub);
@@ -198,6 +205,20 @@ static void subscr_timeout(struct subscription *sub)
 	atomic_dec(&topsrv.subscription_count);
 }
 
+/**
+ * subscr_del - delete a subscription within a subscription list
+ *
+ * Called with subscriber locked.
+ */
+
+static void subscr_del(struct subscription *sub)
+{
+	tipc_nametbl_unsubscribe(sub);
+	list_del(&sub->subscription_list);
+	kfree(sub);
+	atomic_dec(&topsrv.subscription_count);
+}
+
 /**
  * subscr_terminate - terminate communication with a subscriber
  * 
@@ -227,12 +248,9 @@ static void subscr_terminate(struct subscriber *subscriber)
 			k_cancel_timer(&sub->timer);
 			k_term_timer(&sub->timer);
 		}
-		tipc_nametbl_unsubscribe(sub);
-		list_del(&sub->subscription_list);
-		dbg("Term: Removed sub %u,%u,%u from subscriber %x list\n",
+		dbg("Term: Removing sub %u,%u,%u from subscriber %x list\n",
 		    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
-		kfree(sub);
-		atomic_dec(&topsrv.subscription_count);
+		subscr_del(sub);
 	}
 
 	/* Sever connection to subscriber */
@@ -252,6 +270,49 @@ static void subscr_terminate(struct subscriber *subscriber)
 	kfree(subscriber);
 }
 
+/**
+ * subscr_cancel - handle subscription cancellation request
+ *
+ * Called with subscriber locked.  Routine must temporarily release this lock
+ * to enable the subscription timeout routine to finish without deadlocking;
+ * the lock is then reclaimed to allow caller to release it upon return.
+ *
+ * Note that fields of 's' use subscriber's endianness!
+ */
+
+static void subscr_cancel(struct tipc_subscr *s,
+			  struct subscriber *subscriber)
+{
+	struct subscription *sub;
+	struct subscription *sub_temp;
+	int found = 0;
+
+	/* Find first matching subscription, exit if not found */
+
+	list_for_each_entry_safe(sub, sub_temp, &subscriber->subscription_list,
+				 subscription_list) {
+		if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) {
+			found = 1;
+			break;
+		}
+	}
+	if (!found)
+		return;
+
+	/* Cancel subscription timer (if used), then delete subscription */
+
+	if (sub->timeout != TIPC_WAIT_FOREVER) {
+		sub->timeout = TIPC_WAIT_FOREVER;
+		spin_unlock_bh(subscriber->lock);
+		k_cancel_timer(&sub->timer);
+		k_term_timer(&sub->timer);
+		spin_lock_bh(subscriber->lock);
+	}
+	dbg("Cancel: removing sub %u,%u,%u from subscriber %x list\n",
+	    sub->seq.type, sub->seq.lower, sub->seq.upper, subscriber);
+	subscr_del(sub);
+}
+
 /**
  * subscr_subscribe - create subscription for subscriber
  * 
@@ -263,6 +324,21 @@ static void subscr_subscribe(struct tipc_subscr *s,
 {
 	struct subscription *sub;
 
+	/* Determine/update subscriber's endianness */
+
+	if (s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE))
+		subscriber->swap = 0;
+	else
+		subscriber->swap = 1;
+
+	/* Detect & process a subscription cancellation request */
+
+	if (s->filter & htohl(TIPC_SUB_CANCEL, subscriber->swap)) {
+		s->filter &= ~htohl(TIPC_SUB_CANCEL, subscriber->swap);
+		subscr_cancel(s, subscriber);
+		return;
+	}
+
 	/* Refuse subscription if global limit exceeded */
 
 	if (atomic_read(&topsrv.subscription_count) >= tipc_max_subscriptions) {
@@ -281,13 +357,6 @@ static void subscr_subscribe(struct tipc_subscr *s,
 		return;
 	}
 
-	/* Determine/update subscriber's endianness */
-
-	if ((s->filter == TIPC_SUB_PORTS) || (s->filter == TIPC_SUB_SERVICE))
-		subscriber->swap = 0;
-	else
-		subscriber->swap = 1;
-
 	/* Initialize subscription object */
 
 	memset(sub, 0, sizeof(*sub));
@@ -296,8 +365,8 @@ static void subscr_subscribe(struct tipc_subscr *s,
 	sub->seq.upper = htohl(s->seq.upper, subscriber->swap);
 	sub->timeout = htohl(s->timeout, subscriber->swap);
 	sub->filter = htohl(s->filter, subscriber->swap);
-	if ((((sub->filter != TIPC_SUB_PORTS) 
-	      && (sub->filter != TIPC_SUB_SERVICE)))
+	if ((!(sub->filter & TIPC_SUB_PORTS)
+	     == !(sub->filter & TIPC_SUB_SERVICE))
 	    || (sub->seq.lower > sub->seq.upper)) {
 		warn("Subscription rejected, illegal request\n");
 		kfree(sub);
-- 
cgit v1.2.3


From e320af1df4c47305e829e8e1a40e5fad0e5e9fba Mon Sep 17 00:00:00 2001
From: Ville Nuorvala <vnuorval@tcs.hut.fi>
Date: Mon, 16 Oct 2006 22:05:55 -0700
Subject: [IPV6]: Remove struct pol_chain.

Struct pol_chain has existed since at least the 2.2 kernel, but isn't used
anymore. As the IPv6 policy routing is implemented in a totally different
way in the current kernel, just get rid of it.

Signed-off-by: Ville Nuorvala <vnuorval@tcs.hut.fi>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 6ca6b71dfe0f..c14b70ed4c57 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -36,13 +36,6 @@ struct route_info {
 #define RT6_LOOKUP_F_REACHABLE	0x2
 #define RT6_LOOKUP_F_HAS_SADDR	0x4
 
-struct pol_chain {
-	int			type;
-	int			priority;
-	struct fib6_node	*rules;
-	struct pol_chain	*next;
-};
-
 extern struct rt6_info	ip6_null_entry;
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
-- 
cgit v1.2.3


From b52f070c9c3c09ed3b7f699280193aae7e25d816 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Wed, 18 Oct 2006 20:26:36 -0700
Subject: [IPv4] fib: Remove unused fib_config members

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    | 5 +----
 net/ipv4/fib_frontend.c | 5 -----
 2 files changed, 1 insertion(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 82229146bac7..949b932d2f08 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -21,17 +21,14 @@
 #include <net/fib_rules.h>
 
 struct fib_config {
-	u8			fc_family;
 	u8			fc_dst_len;
-	u8			fc_src_len;
 	u8			fc_tos;
 	u8			fc_protocol;
 	u8			fc_scope;
 	u8			fc_type;
-	/* 1 byte unused */
+	/* 3 bytes unused */
 	u32			fc_table;
 	__be32			fc_dst;
-	__be32			fc_src;
 	__be32			fc_gw;
 	int			fc_oif;
 	u32			fc_flags;
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 9c399a70dd5d..af0190d8b6c0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -482,9 +482,7 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 	memset(cfg, 0, sizeof(*cfg));
 
 	rtm = nlmsg_data(nlh);
-	cfg->fc_family = rtm->rtm_family;
 	cfg->fc_dst_len = rtm->rtm_dst_len;
-	cfg->fc_src_len = rtm->rtm_src_len;
 	cfg->fc_tos = rtm->rtm_tos;
 	cfg->fc_table = rtm->rtm_table;
 	cfg->fc_protocol = rtm->rtm_protocol;
@@ -501,9 +499,6 @@ static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 		case RTA_DST:
 			cfg->fc_dst = nla_get_be32(attr);
 			break;
-		case RTA_SRC:
-			cfg->fc_src = nla_get_be32(attr);
-			break;
 		case RTA_OIF:
 			cfg->fc_oif = nla_get_u32(attr);
 			break;
-- 
cgit v1.2.3


From ae8064ac32d07f609114d73928cdef803be87134 Mon Sep 17 00:00:00 2001
From: John Heffner <jheffner@psc.edu>
Date: Wed, 18 Oct 2006 20:36:48 -0700
Subject: [TCP]: Bound TSO defer time

This patch limits the amount of time you will defer sending a TSO segment
to less than two clock ticks, or the time between two acks, whichever is
longer.

On slow links, deferring causes significant bursts.  See attached plots,
which show RTT through a 1 Mbps link with a 100 ms RTT and ~100 ms queue
for (a) non-TSO, (b) currnet TSO, and (c) patched TSO.  This burstiness
causes significant jitter, tends to overflow queues early (bad for short
queues), and makes delay-based congestion control more difficult.

Deferring by a couple clock ticks I believe will have a relatively small
impact on performance.

Signed-off-by: John Heffner <jheffner@psc.edu>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 ++
 net/ipv4/tcp_output.c | 20 +++++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 0e058a2d1c6d..2d36f6db3706 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -342,6 +342,8 @@ struct tcp_sock {
 
 	unsigned long last_synq_overflow; 
 
+	__u32	tso_deferred;
+
 /* Receiver side RTT estimation */
 	struct {
 		__u32	rtt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f22536e32cb1..ca406157724c 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1096,10 +1096,14 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
 	u32 send_win, cong_win, limit, in_flight;
 
 	if (TCP_SKB_CB(skb)->flags & TCPCB_FLAG_FIN)
-		return 0;
+		goto send_now;
 
 	if (icsk->icsk_ca_state != TCP_CA_Open)
-		return 0;
+		goto send_now;
+
+	/* Defer for less than two clock ticks. */
+	if (!tp->tso_deferred && ((jiffies<<1)>>1) - (tp->tso_deferred>>1) > 1)
+		goto send_now;
 
 	in_flight = tcp_packets_in_flight(tp);
 
@@ -1115,7 +1119,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
 
 	/* If a full-sized TSO skb can be sent, do it. */
 	if (limit >= 65536)
-		return 0;
+		goto send_now;
 
 	if (sysctl_tcp_tso_win_divisor) {
 		u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache);
@@ -1125,7 +1129,7 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
 		 */
 		chunk /= sysctl_tcp_tso_win_divisor;
 		if (limit >= chunk)
-			return 0;
+			goto send_now;
 	} else {
 		/* Different approach, try not to defer past a single
 		 * ACK.  Receiver should ACK every other full sized
@@ -1133,11 +1137,17 @@ static int tcp_tso_should_defer(struct sock *sk, struct tcp_sock *tp, struct sk_
 		 * then send now.
 		 */
 		if (limit > tcp_max_burst(tp) * tp->mss_cache)
-			return 0;
+			goto send_now;
 	}
 
 	/* Ok, it looks like it is advisable to defer.  */
+	tp->tso_deferred = 1 | (jiffies<<1);
+
 	return 1;
+
+send_now:
+	tp->tso_deferred = 0;
+	return 0;
 }
 
 /* Create a new MTU probe if we are ready.
-- 
cgit v1.2.3


From 6b0022305f80cf249de69e746f6f5ccf7ffc5b7c Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 12 Oct 2006 17:07:45 +0900
Subject: sh: Proper show_stack/show_trace() implementation.

This splits out some of the previous show_stack() implementation which
was mostly doing the show_trace() work without actually dumping any of
the stack contents. This now gets split in to two sections, where we
do the fetching of the stack pointer and subsequent stack dumping in
show_stack(), while moving the call trace in to show_trace().

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/process.c   |  12 +----
 arch/sh/kernel/traps.c     | 124 ++++++++++++++++++++++++++-------------------
 include/asm-sh/processor.h |   2 +
 3 files changed, 77 insertions(+), 61 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c
index 91516dca4a85..a52b13ac6b7f 100644
--- a/arch/sh/kernel/process.c
+++ b/arch/sh/kernel/process.c
@@ -105,7 +105,7 @@ void show_regs(struct pt_regs * regs)
 {
 	printk("\n");
 	printk("Pid : %d, Comm: %20s\n", current->pid, current->comm);
-	print_symbol("PC is at %s\n", regs->pc);
+	print_symbol("PC is at %s\n", instruction_pointer(regs));
 	printk("PC  : %08lx SP  : %08lx SR  : %08lx ",
 	       regs->pc, regs->regs[15], regs->sr);
 #ifdef CONFIG_MMU
@@ -130,15 +130,7 @@ void show_regs(struct pt_regs * regs)
 	printk("MACH: %08lx MACL: %08lx GBR : %08lx PR  : %08lx\n",
 	       regs->mach, regs->macl, regs->gbr, regs->pr);
 
-	/*
-	 * If we're in kernel mode, dump the stack too..
-	 */
-	if (!user_mode(regs)) {
-		extern void show_task(unsigned long *sp);
-		unsigned long sp = regs->regs[15];
-
-		show_task((unsigned long *)sp);
-	}
+	show_trace(NULL, (unsigned long *)regs->regs[15], regs);
 }
 
 /*
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index c2c597e09482..ffe127f09f3e 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -1,16 +1,15 @@
-/* $Id: traps.c,v 1.17 2004/05/02 01:46:30 sugioka Exp $
- *
- *  linux/arch/sh/traps.c
+/*
+ * 'traps.c' handles hardware traps and faults after we have saved some
+ * state in 'entry.S'.
  *
  *  SuperH version: Copyright (C) 1999 Niibe Yutaka
  *                  Copyright (C) 2000 Philipp Rumpf
  *                  Copyright (C) 2000 David Howells
- *                  Copyright (C) 2002, 2003 Paul Mundt
- */
-
-/*
- * 'Traps.c' handles hardware traps and faults after we have saved some
- * state in 'entry.S'.
+ *                  Copyright (C) 2002 - 2006 Paul Mundt
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
  */
 #include <linux/sched.h>
 #include <linux/kernel.h>
@@ -53,13 +52,32 @@
 #define TRAP_ILLEGAL_SLOT_INST	13
 #endif
 
-/*
- * These constants are for searching for possible module text
- * segments.  VMALLOC_OFFSET comes from mm/vmalloc.c; MODULE_RANGE is
- * a guess of how much space is likely to be vmalloced.
- */
-#define VMALLOC_OFFSET (8*1024*1024)
-#define MODULE_RANGE (8*1024*1024)
+static void dump_mem(const char *str, unsigned long bottom, unsigned long top)
+{
+	unsigned long p;
+	int i;
+
+	printk("%s(0x%08lx to 0x%08lx)\n", str, bottom, top);
+
+	for (p = bottom & ~31; p < top; ) {
+		printk("%04lx: ", p & 0xffff);
+
+		for (i = 0; i < 8; i++, p += 4) {
+			unsigned int val;
+
+			if (p < bottom || p >= top)
+				printk("         ");
+			else {
+				if (__get_user(val, (unsigned int __user *)p)) {
+					printk("\n");
+					return;
+				}
+				printk("%08x ", val);
+			}
+		}
+		printk("\n");
+	}
+}
 
 DEFINE_SPINLOCK(die_lock);
 
@@ -69,14 +87,28 @@ void die(const char * str, struct pt_regs * regs, long err)
 
 	console_verbose();
 	spin_lock_irq(&die_lock);
+	bust_spinlocks(1);
+
 	printk("%s: %04lx [#%d]\n", str, err & 0xffff, ++die_counter);
+
 	CHK_REMOTE_DEBUG(regs);
+	print_modules();
 	show_regs(regs);
+
+	printk("Process: %s (pid: %d, stack limit = %p)\n",
+	       current->comm, current->pid, task_stack_page(current) + 1);
+
+	if (!user_mode(regs) || in_interrupt())
+		dump_mem("Stack: ", regs->regs[15], THREAD_SIZE +
+		 	 (unsigned long)task_stack_page(current));
+
+	bust_spinlocks(0);
 	spin_unlock_irq(&die_lock);
 	do_exit(SIGSEGV);
 }
 
-static inline void die_if_kernel(const char * str, struct pt_regs * regs, long err)
+static inline void die_if_kernel(const char *str, struct pt_regs *regs,
+				 long err)
 {
 	if (!user_mode(regs))
 		die(str, regs, err);
@@ -93,8 +125,7 @@ static int handle_unaligned_notify_count = 10;
  */
 static int die_if_no_fixup(const char * str, struct pt_regs * regs, long err)
 {
-	if (!user_mode(regs))
-	{
+	if (!user_mode(regs)) {
 		const struct exception_table_entry *fixup;
 		fixup = search_exception_tables(regs->pc);
 		if (fixup) {
@@ -734,52 +765,43 @@ void __init trap_init(void)
 	per_cpu_trap_init();
 }
 
-void show_stack(struct task_struct *tsk, unsigned long *sp)
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+		struct pt_regs *regs)
 {
-	unsigned long *stack, addr;
-	unsigned long module_start = VMALLOC_START;
-	unsigned long module_end = VMALLOC_END;
-	int i = 1;
+	unsigned long addr;
 
-	if (!tsk)
-		tsk = current;
-	if (tsk == current)
-		sp = (unsigned long *)current_stack_pointer;
-	else
-		sp = (unsigned long *)tsk->thread.sp;
-
-	stack = sp;
+	if (regs && user_mode(regs))
+		return;
 
 	printk("\nCall trace: ");
 #ifdef CONFIG_KALLSYMS
 	printk("\n");
 #endif
 
-	while (!kstack_end(stack)) {
-		addr = *stack++;
-		if (((addr >= (unsigned long)_text) &&
-		     (addr <= (unsigned long)_etext)) ||
-		    ((addr >= module_start) && (addr <= module_end))) {
-			/*
-			 * For 80-columns display, 6 entry is maximum.
-			 * NOTE: '[<8c00abcd>] ' consumes 13 columns .
-			 */
-#ifndef CONFIG_KALLSYMS
-			if (i && ((i % 6) == 0))
-				printk("\n       ");
-#endif
-			printk("[<%08lx>] ", addr);
-			print_symbol("%s\n", addr);
-			i++;
-		}
+	while (!kstack_end(sp)) {
+		addr = *sp++;
+		if (kernel_text_address(addr))
+			print_ip_sym(addr);
 	}
 
 	printk("\n");
 }
 
-void show_task(unsigned long *sp)
+void show_stack(struct task_struct *tsk, unsigned long *sp)
 {
-	show_stack(NULL, sp);
+	unsigned long stack;
+
+	if (!tsk)
+		tsk = current;
+	if (tsk == current)
+		sp = (unsigned long *)current_stack_pointer;
+	else
+		sp = (unsigned long *)tsk->thread.sp;
+
+	stack = (unsigned long)sp;
+	dump_mem("Stack: ", stack, THREAD_SIZE +
+		 (unsigned long)task_stack_page(tsk));
+	show_trace(tsk, sp, NULL);
 }
 
 void dump_stack(void)
diff --git a/include/asm-sh/processor.h b/include/asm-sh/processor.h
index 474773853cd1..45bb74e35d32 100644
--- a/include/asm-sh/processor.h
+++ b/include/asm-sh/processor.h
@@ -255,6 +255,8 @@ extern void save_fpu(struct task_struct *__tsk, struct pt_regs *regs);
  */
 #define thread_saved_pc(tsk)	(tsk->thread.pc)
 
+void show_trace(struct task_struct *tsk, unsigned long *sp,
+		struct pt_regs *regs);
 extern unsigned long get_wchan(struct task_struct *p);
 
 #define KSTK_EIP(tsk)  ((tsk)->thread.pc)
-- 
cgit v1.2.3


From 4a58eaca7ca68abea37d6d2a4ea7deb394906183 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 19 Oct 2006 16:15:13 +0900
Subject: sh: Remove board-specific ide.h headers.

The driver that these were using never made it in to
drivers/ide, so kill off the rest of the cruft. These
will have to be reworked for board-specific platform
devices through libata when they're added back through
the setup code.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/asm-sh/hp6xx/ide.h       |  8 --------
 include/asm-sh/hs7751rvoip/ide.h |  8 --------
 include/asm-sh/irq.h             | 19 -------------------
 include/asm-sh/landisk/ide.h     | 14 --------------
 include/asm-sh/r7780rp/ide.h     |  8 --------
 include/asm-sh/rts7751r2d/ide.h  |  8 --------
 include/asm-sh/sh03/ide.h        |  7 -------
 7 files changed, 72 deletions(-)
 delete mode 100644 include/asm-sh/hp6xx/ide.h
 delete mode 100644 include/asm-sh/hs7751rvoip/ide.h
 delete mode 100644 include/asm-sh/landisk/ide.h
 delete mode 100644 include/asm-sh/r7780rp/ide.h
 delete mode 100644 include/asm-sh/rts7751r2d/ide.h
 delete mode 100644 include/asm-sh/sh03/ide.h

(limited to 'include')

diff --git a/include/asm-sh/hp6xx/ide.h b/include/asm-sh/hp6xx/ide.h
deleted file mode 100644
index 570395a5ebe5..000000000000
--- a/include/asm-sh/hp6xx/ide.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_SH_HP6XX_IDE_H
-#define __ASM_SH_HP6XX_IDE_H
-
-#define IRQ_CFCARD	93
-#define IRQ_PCMCIA	94
-
-#endif /* __ASM_SH_HP6XX_IDE_H */
-
diff --git a/include/asm-sh/hs7751rvoip/ide.h b/include/asm-sh/hs7751rvoip/ide.h
deleted file mode 100644
index 65ad1d0f763b..000000000000
--- a/include/asm-sh/hs7751rvoip/ide.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_SH_HS7751RVOIP_IDE_H
-#define __ASM_SH_HS7751RVOIP_IDE_H
-
-/* Nothing to see here.. */
-#include <asm/hs7751rvoip/hs7751rvoip.h>
-
-#endif /* __ASM_SH_HS7751RVOIP_IDE_H */
-
diff --git a/include/asm-sh/irq.h b/include/asm-sh/irq.h
index 28996f9c58cc..1837bdbf8e54 100644
--- a/include/asm-sh/irq.h
+++ b/include/asm-sh/irq.h
@@ -14,16 +14,6 @@
 #include <asm/machvec.h>
 #include <asm/ptrace.h>		/* for pt_regs */
 
-#if defined(CONFIG_SH_HP6XX) || \
-    defined(CONFIG_SH_RTS7751R2D) || \
-    defined(CONFIG_SH_HS7751RVOIP) || \
-    defined(CONFIG_SH_HS7751RVOIP) || \
-    defined(CONFIG_SH_SH03) || \
-    defined(CONFIG_SH_R7780RP) || \
-    defined(CONFIG_SH_LANDISK)
-#include <asm/mach/ide.h>
-#endif
-
 #ifndef CONFIG_CPU_SUBTYPE_SH7780
 
 #define INTC_DMAC0_MSK	0
@@ -38,15 +28,6 @@
 #define INTC_IPRD	0xffd00010UL
 #endif
 
-#ifdef CONFIG_IDE
-# ifndef IRQ_CFCARD
-#  define IRQ_CFCARD	14
-# endif
-# ifndef IRQ_PCMCIA
-#  define IRQ_PCMCIA	15
-# endif
-#endif
-
 #define TIMER_IRQ	16
 #define TIMER_IPR_ADDR	INTC_IPRA
 #define TIMER_IPR_POS	 3
diff --git a/include/asm-sh/landisk/ide.h b/include/asm-sh/landisk/ide.h
deleted file mode 100644
index 6490e28415ed..000000000000
--- a/include/asm-sh/landisk/ide.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/*
- * modifed by kogiidena
- * 2005.03.03
- */
-
-#ifndef __ASM_SH_LANDISK_IDE_H
-#define __ASM_SH_LANDISK_IDE_H
-
-/* Nothing to see here.. */
-#include <asm/landisk/iodata_landisk.h>
-#define IRQ_CFCARD	IRQ_FATA	/* CF Card IRQ */
-#define IRQ_PCMCIA	IRQ_ATA		/* PCMCIA IRQ */
-
-#endif /* __ASM_SH_LANDISK_IDE_H  */
diff --git a/include/asm-sh/r7780rp/ide.h b/include/asm-sh/r7780rp/ide.h
deleted file mode 100644
index a1ed78e0f617..000000000000
--- a/include/asm-sh/r7780rp/ide.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_SH_R7780RP_IDE_H
-#define __ASM_SH_R7780RP_IDE_H
-
-/* Nothing to see here.. */
-#include <asm/mach/r7780rp.h>
-
-#endif /* __ASM_SH_R7780RP_IDE_H */
-
diff --git a/include/asm-sh/rts7751r2d/ide.h b/include/asm-sh/rts7751r2d/ide.h
deleted file mode 100644
index 416f96b407cb..000000000000
--- a/include/asm-sh/rts7751r2d/ide.h
+++ /dev/null
@@ -1,8 +0,0 @@
-#ifndef __ASM_SH_RTS7751R2D_IDE_H
-#define __ASM_SH_RTS7751R2D_IDE_H
-
-/* Nothing to see here.. */
-#include <asm/rts7751r2d/rts7751r2d.h>
-
-#endif /* __ASM_SH_RTS7751R2D_IDE_H */
-
diff --git a/include/asm-sh/sh03/ide.h b/include/asm-sh/sh03/ide.h
deleted file mode 100644
index 73ee92e5c79e..000000000000
--- a/include/asm-sh/sh03/ide.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef __ASM_SH_SH03_IDE_H
-#define __ASM_SH_SH03_IDE_H
-
-#define IRQ_CFCARD	8
-#define IRQ_PCMCIA	8
-
-#endif /* __ASM_SH_SH03_IDE_H */
-- 
cgit v1.2.3


From 082c44d20eb4c6c4aa60ae7429ea184854cb0610 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 19 Oct 2006 16:16:18 +0900
Subject: sh: Cleanup board header directories.

Now with the ide.h mess sorted out, most of these boards
don't need their own directory. Move the headers out, and
update the driver paths.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/hp6xx/hp6xx_apm.c           |   2 +-
 arch/sh/boards/hp6xx/pm.c                  |   2 +-
 arch/sh/boards/hp6xx/setup.c               |   2 +-
 arch/sh/boards/renesas/hs7751rvoip/io.c    |   2 +-
 arch/sh/boards/renesas/hs7751rvoip/irq.c   |   2 +-
 arch/sh/boards/renesas/hs7751rvoip/setup.c |   7 +-
 arch/sh/boards/renesas/r7780rp/io.c        |   2 +-
 arch/sh/boards/renesas/r7780rp/irq.c       |   3 +-
 arch/sh/boards/renesas/r7780rp/setup.c     |   2 +-
 arch/sh/boards/renesas/rts7751r2d/io.c     |   4 +-
 arch/sh/boards/renesas/rts7751r2d/irq.c    |   6 +-
 arch/sh/boards/renesas/rts7751r2d/led.c    |  15 +--
 arch/sh/boards/renesas/rts7751r2d/setup.c  |   2 +-
 arch/sh/boards/shmin/setup.c               |   2 +-
 arch/sh/cchips/voyagergx/irq.c             |  22 +---
 arch/sh/drivers/pci/ops-r7780rp.c          |   2 +-
 arch/sh/drivers/pci/ops-rts7751r2d.c       |  24 ++--
 drivers/input/touchscreen/hp680_ts_input.c |   2 +-
 drivers/video/hitfb.c                      |   1 -
 include/asm-sh/edosk7705/io.h              |  30 -----
 include/asm-sh/hp6xx/hp6xx.h               |  80 -------------
 include/asm-sh/hp6xx/io.h                  |  10 --
 include/asm-sh/hs7751rvoip/hs7751rvoip.h   |  54 ---------
 include/asm-sh/r7780rp/r7780rp.h           | 177 -----------------------------
 include/asm-sh/rts7751r2d/rts7751r2d.h     |  74 ------------
 include/asm-sh/shmin/shmin.h               |   9 --
 sound/oss/sh_dac_audio.c                   |   2 +-
 27 files changed, 36 insertions(+), 504 deletions(-)
 delete mode 100644 include/asm-sh/edosk7705/io.h
 delete mode 100644 include/asm-sh/hp6xx/hp6xx.h
 delete mode 100644 include/asm-sh/hp6xx/io.h
 delete mode 100644 include/asm-sh/hs7751rvoip/hs7751rvoip.h
 delete mode 100644 include/asm-sh/r7780rp/r7780rp.h
 delete mode 100644 include/asm-sh/rts7751r2d/rts7751r2d.h
 delete mode 100644 include/asm-sh/shmin/shmin.h

(limited to 'include')

diff --git a/arch/sh/boards/hp6xx/hp6xx_apm.c b/arch/sh/boards/hp6xx/hp6xx_apm.c
index 219179114f0f..d146cdaa0b8b 100644
--- a/arch/sh/boards/hp6xx/hp6xx_apm.c
+++ b/arch/sh/boards/hp6xx/hp6xx_apm.c
@@ -14,7 +14,7 @@
 #include <asm/io.h>
 #include <asm/apm.h>
 #include <asm/adc.h>
-#include <asm/hp6xx/hp6xx.h>
+#include <asm/hp6xx.h>
 
 #define SH7709_PGDR			0xa400012c
 
diff --git a/arch/sh/boards/hp6xx/pm.c b/arch/sh/boards/hp6xx/pm.c
index 83d327212064..d1947732fb3e 100644
--- a/arch/sh/boards/hp6xx/pm.c
+++ b/arch/sh/boards/hp6xx/pm.c
@@ -12,7 +12,7 @@
 #include <linux/time.h>
 #include <asm/io.h>
 #include <asm/hd64461.h>
-#include <asm/hp6xx/hp6xx.h>
+#include <asm/hp6xx.h>
 #include <asm/cpu/dac.h>
 #include <asm/pm.h>
 
diff --git a/arch/sh/boards/hp6xx/setup.c b/arch/sh/boards/hp6xx/setup.c
index 2d3a5b4faf58..b5a96649ed26 100644
--- a/arch/sh/boards/hp6xx/setup.c
+++ b/arch/sh/boards/hp6xx/setup.c
@@ -13,7 +13,7 @@
 #include <asm/hd64461.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/hp6xx/hp6xx.h>
+#include <asm/hp6xx.h>
 #include <asm/cpu/dac.h>
 
 #define	SCPCR	0xa4000116
diff --git a/arch/sh/boards/renesas/hs7751rvoip/io.c b/arch/sh/boards/renesas/hs7751rvoip/io.c
index 51f3f6574210..bb9aa0d62852 100644
--- a/arch/sh/boards/renesas/hs7751rvoip/io.c
+++ b/arch/sh/boards/renesas/hs7751rvoip/io.c
@@ -15,7 +15,7 @@
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <asm/io.h>
-#include <asm/hs7751rvoip/hs7751rvoip.h>
+#include <asm/hs7751rvoip.h>
 #include <asm/addrspace.h>
 
 extern void *area6_io8_base;	/* Area 6 8bit I/O Base address */
diff --git a/arch/sh/boards/renesas/hs7751rvoip/irq.c b/arch/sh/boards/renesas/hs7751rvoip/irq.c
index c617b188258a..943f93aa6052 100644
--- a/arch/sh/boards/renesas/hs7751rvoip/irq.c
+++ b/arch/sh/boards/renesas/hs7751rvoip/irq.c
@@ -14,7 +14,7 @@
 #include <linux/irq.h>
 #include <asm/io.h>
 #include <asm/irq.h>
-#include <asm/hs7751rvoip/hs7751rvoip.h>
+#include <asm/hs7751rvoip.h>
 
 static int mask_pos[] = {8, 9, 10, 11, 12, 13, 0, 1, 2, 3, 4, 5, 6, 7};
 
diff --git a/arch/sh/boards/renesas/hs7751rvoip/setup.c b/arch/sh/boards/renesas/hs7751rvoip/setup.c
index 0414c15c3458..1d997ffd7931 100644
--- a/arch/sh/boards/renesas/hs7751rvoip/setup.c
+++ b/arch/sh/boards/renesas/hs7751rvoip/setup.c
@@ -10,15 +10,10 @@
 #include <linux/init.h>
 #include <linux/irq.h>
 #include <linux/mm.h>
-#include <linux/vmalloc.h>
-#include <linux/hdreg.h>
-#include <linux/ide.h>
 #include <linux/pm.h>
+#include <asm/hs7751rvoip.h>
 #include <asm/io.h>
-#include <asm/hs7751rvoip/hs7751rvoip.h>
 #include <asm/machvec.h>
-#include <asm/rtc.h>
-#include <asm/irq.h>
 
 static void __init hs7751rvoip_init_irq(void)
 {
diff --git a/arch/sh/boards/renesas/r7780rp/io.c b/arch/sh/boards/renesas/r7780rp/io.c
index db92d6e6ae99..311ccccba718 100644
--- a/arch/sh/boards/renesas/r7780rp/io.c
+++ b/arch/sh/boards/renesas/r7780rp/io.c
@@ -11,7 +11,7 @@
 #include <linux/pci.h>
 #include <linux/kernel.h>
 #include <linux/types.h>
-#include <asm/r7780rp/r7780rp.h>
+#include <asm/r7780rp.h>
 #include <asm/addrspace.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/renesas/r7780rp/irq.c b/arch/sh/boards/renesas/r7780rp/irq.c
index b544772cbc72..cbb4ea28e9ad 100644
--- a/arch/sh/boards/renesas/r7780rp/irq.c
+++ b/arch/sh/boards/renesas/r7780rp/irq.c
@@ -10,7 +10,8 @@
  */
 #include <linux/init.h>
 #include <linux/irq.h>
-#include <asm/io.h>
+#include <linux/io.h>
+#include <asm/r7780rp.h>
 
 #ifdef CONFIG_SH_R7780MP
 static int mask_pos[] = {12, 11, 9, 14, 15, 8, 13, 6, 5, 4, 3, 2, 0, 0, 1, 0};
diff --git a/arch/sh/boards/renesas/r7780rp/setup.c b/arch/sh/boards/renesas/r7780rp/setup.c
index b941aa0aa34e..c331caeb694b 100644
--- a/arch/sh/boards/renesas/r7780rp/setup.c
+++ b/arch/sh/boards/renesas/r7780rp/setup.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/platform_device.h>
 #include <asm/machvec.h>
-#include <asm/r7780rp/r7780rp.h>
+#include <asm/r7780rp.h>
 #include <asm/clock.h>
 #include <asm/io.h>
 
diff --git a/arch/sh/boards/renesas/rts7751r2d/io.c b/arch/sh/boards/renesas/rts7751r2d/io.c
index 135aa0b5e62d..f2507a804979 100644
--- a/arch/sh/boards/renesas/rts7751r2d/io.c
+++ b/arch/sh/boards/renesas/rts7751r2d/io.c
@@ -11,8 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/pci.h>
-#include <asm/rts7751r2d/rts7751r2d.h>
-#include <asm/io.h>
+#include <linux/io.h>
+#include <asm/rts7751r2d.h>
 #include <asm/addrspace.h>
 
 /*
diff --git a/arch/sh/boards/renesas/rts7751r2d/irq.c b/arch/sh/boards/renesas/rts7751r2d/irq.c
index c915e7a3693a..cb0eb20d1b43 100644
--- a/arch/sh/boards/renesas/rts7751r2d/irq.c
+++ b/arch/sh/boards/renesas/rts7751r2d/irq.c
@@ -8,12 +8,10 @@
  * Modified for RTS7751R2D by
  * Atom Create Engineering Co., Ltd. 2002.
  */
-
 #include <linux/init.h>
 #include <linux/irq.h>
-#include <asm/io.h>
-#include <asm/irq.h>
-#include <asm/rts7751r2d/rts7751r2d.h>
+#include <linux/io.h>
+#include <asm/rts7751r2d.h>
 
 #if defined(CONFIG_RTS7751R2D_REV11)
 static int mask_pos[] = {11, 9, 8, 12, 10, 6, 5, 4, 7, 14, 13, 0, 0, 0, 0};
diff --git a/arch/sh/boards/renesas/rts7751r2d/led.c b/arch/sh/boards/renesas/rts7751r2d/led.c
index a7ce66c1e4f0..509f548bdce0 100644
--- a/arch/sh/boards/renesas/rts7751r2d/led.c
+++ b/arch/sh/boards/renesas/rts7751r2d/led.c
@@ -8,13 +8,9 @@
  *
  * This file contains Renesas Technology Sales RTS7751R2D specific LED code.
  */
-
-#include <asm/io.h>
-#include <asm/rts7751r2d/rts7751r2d.h>
-
-#ifdef CONFIG_HEARTBEAT
-
+#include <linux/io.h>
 #include <linux/sched.h>
+#include <asm/rts7751r2d.h>
 
 /* Cycle the LED's in the clasic Knightriger/Sun pattern */
 void heartbeat_rts7751r2d(void)
@@ -46,10 +42,3 @@ void heartbeat_rts7751r2d(void)
 	else
 		bit--;
 }
-#endif /* CONFIG_HEARTBEAT */
-
-void rts7751r2d_led(unsigned short value)
-{
-	ctrl_outw(value, PA_OUTPORT);
-}
-
diff --git a/arch/sh/boards/renesas/rts7751r2d/setup.c b/arch/sh/boards/renesas/rts7751r2d/setup.c
index 20597a6e6702..5c042d35ec91 100644
--- a/arch/sh/boards/renesas/rts7751r2d/setup.c
+++ b/arch/sh/boards/renesas/rts7751r2d/setup.c
@@ -12,9 +12,9 @@
 #include <linux/platform_device.h>
 #include <linux/serial_8250.h>
 #include <linux/pm.h>
-#include <asm/io.h>
 #include <asm/machvec.h>
 #include <asm/mach/rts7751r2d.h>
+#include <asm/io.h>
 #include <asm/voyagergx.h>
 
 extern void heartbeat_rts7751r2d(void);
diff --git a/arch/sh/boards/shmin/setup.c b/arch/sh/boards/shmin/setup.c
index 2f0c19706cf9..a31a1d1e2681 100644
--- a/arch/sh/boards/shmin/setup.c
+++ b/arch/sh/boards/shmin/setup.c
@@ -7,7 +7,7 @@
  */
 #include <linux/init.h>
 #include <asm/machvec.h>
-#include <asm/shmin/shmin.h>
+#include <asm/shmin.h>
 #include <asm/clock.h>
 #include <asm/irq.h>
 #include <asm/io.h>
diff --git a/arch/sh/cchips/voyagergx/irq.c b/arch/sh/cchips/voyagergx/irq.c
index bf1b28feca06..f7ea700d05ae 100644
--- a/arch/sh/cchips/voyagergx/irq.c
+++ b/arch/sh/cchips/voyagergx/irq.c
@@ -17,29 +17,18 @@
 
     Copyright 2003 (c) Lineo uSolutions,Inc.
 */
-/* -------------------------------------------------------------------- */
-
-#undef DEBUG
-
-#include <linux/sched.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/param.h>
-#include <linux/ioport.h>
 #include <linux/interrupt.h>
 #include <linux/init.h>
-#include <linux/irq.h>
-
-#include <asm/io.h>
-#include <asm/irq.h>
+#include <linux/io.h>
 #include <asm/voyagergx.h>
+#include <asm/rts7751r2d.h>
 
 static void disable_voyagergx_irq(unsigned int irq)
 {
 	unsigned long val;
 	unsigned long mask = 1 << (irq - VOYAGER_IRQ_BASE);
 
-    	pr_debug("disable_voyagergx_irq(%d): mask=%x\n", irq, mask);
+    	pr_debug("disable_voyagergx_irq(%d): mask=%lx\n", irq, mask);
         val = inl(VOYAGER_INT_MASK);
         val &= ~mask;
         outl(val, VOYAGER_INT_MASK);
@@ -50,7 +39,7 @@ static void enable_voyagergx_irq(unsigned int irq)
         unsigned long val;
         unsigned long mask = 1 << (irq - VOYAGER_IRQ_BASE);
 
-        pr_debug("disable_voyagergx_irq(%d): mask=%x\n", irq, mask);
+        pr_debug("disable_voyagergx_irq(%d): mask=%lx\n", irq, mask);
         val = inl(VOYAGER_INT_MASK);
         val |= mask;
         outl(val, VOYAGER_INT_MASK);
@@ -137,7 +126,7 @@ int voyagergx_irq_demux(int irq)
 		} else {
 			printk("Unexpected IRQ irq = %d status = 0x%08lx\n", irq, val);
 		}
-		pr_debug("voyagergx_irq_demux %d \n", i);
+		pr_debug("voyagergx_irq_demux %ld\n", i);
 #else
 		for (bit = 1, i = 0 ; i < VOYAGER_IRQ_NUM ; bit <<= 1, i++)
 			if (val & bit)
@@ -185,4 +174,3 @@ void __init setup_voyagergx_irq(void)
 
 	setup_irq(IRQ_VOYAGER, &irq0);
 }
-
diff --git a/arch/sh/drivers/pci/ops-r7780rp.c b/arch/sh/drivers/pci/ops-r7780rp.c
index 6e3ba9c65b40..eeea1577e112 100644
--- a/arch/sh/drivers/pci/ops-r7780rp.c
+++ b/arch/sh/drivers/pci/ops-r7780rp.c
@@ -13,7 +13,7 @@
 #include <linux/init.h>
 #include <linux/delay.h>
 #include <linux/pci.h>
-#include <asm/r7780rp/r7780rp.h>
+#include <asm/r7780rp.h>
 #include <asm/io.h>
 #include "pci-sh4.h"
 
diff --git a/arch/sh/drivers/pci/ops-rts7751r2d.c b/arch/sh/drivers/pci/ops-rts7751r2d.c
index b68824c8b81e..4a518d948049 100644
--- a/arch/sh/drivers/pci/ops-rts7751r2d.c
+++ b/arch/sh/drivers/pci/ops-rts7751r2d.c
@@ -10,28 +10,24 @@
  *
  * PCI initialization for the Renesas SH7751R RTS7751R2D board
  */
-
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
-#include <linux/delay.h>
 #include <linux/pci.h>
-#include <linux/module.h>
-#include <asm/rts7751r2d/rts7751r2d.h>
-#include <asm/io.h>
+#include <linux/io.h>
+#include <asm/rts7751r2d.h>
 #include "pci-sh4.h"
 
+static u8 rts7751r2d_irq_tab[] __initdata = {
+	IRQ_PCISLOT1,
+	IRQ_PCISLOT2,
+	IRQ_PCMCIA,
+	IRQ_PCIETH,
+};
+
 int __init pcibios_map_platform_irq(struct pci_dev *pdev, u8 slot, u8 pin)
 {
-        switch (slot) {
-	case 0: return IRQ_PCISLOT1;	/* PCI Extend slot #1 */
-	case 1: return IRQ_PCISLOT2;	/* PCI Extend slot #2 */
-	case 2: return IRQ_PCMCIA;	/* PCI Cardbus Bridge */
-	case 3: return IRQ_PCIETH;	/* Realtek Ethernet controller */
-	default:
-		printk("PCI: Bad IRQ mapping request for slot %d\n", slot);
-		return -1;
-	}
+	return rts7751r2d_irq_tab[slot];
 }
 
 static struct resource sh7751_io_resource = {
diff --git a/drivers/input/touchscreen/hp680_ts_input.c b/drivers/input/touchscreen/hp680_ts_input.c
index e31c6c55b2e2..58fca316786c 100644
--- a/drivers/input/touchscreen/hp680_ts_input.c
+++ b/drivers/input/touchscreen/hp680_ts_input.c
@@ -6,7 +6,7 @@
 #include <asm/io.h>
 #include <asm/delay.h>
 #include <asm/adc.h>
-#include <asm/hp6xx/hp6xx.h>
+#include <asm/hp6xx.h>
 
 #define MODNAME "hp680_ts_input"
 
diff --git a/drivers/video/hitfb.c b/drivers/video/hitfb.c
index 3afb472763c0..3dc49424dc75 100644
--- a/drivers/video/hitfb.c
+++ b/drivers/video/hitfb.c
@@ -29,7 +29,6 @@
 #include <asm/io.h>
 #include <asm/hd64461.h>
 #include <asm/cpu/dac.h>
-#include <asm/hp6xx/hp6xx.h>
 
 #define	WIDTH 640
 
diff --git a/include/asm-sh/edosk7705/io.h b/include/asm-sh/edosk7705/io.h
deleted file mode 100644
index a1089a65bc36..000000000000
--- a/include/asm-sh/edosk7705/io.h
+++ /dev/null
@@ -1,30 +0,0 @@
-/*
- * include/asm-sh/edosk7705/io.h
- *
- * Modified version of io_se.h for the EDOSK7705 specific functions.
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * IO functions for an Hitachi EDOSK7705 development board
- */
-
-#ifndef __ASM_SH_EDOSK7705_IO_H
-#define __ASM_SH_EDOSK7705_IO_H
-
-#include <asm/io_generic.h>
-
-extern unsigned char sh_edosk7705_inb(unsigned long port);
-extern unsigned int sh_edosk7705_inl(unsigned long port);
-
-extern void sh_edosk7705_outb(unsigned char value, unsigned long port);
-extern void sh_edosk7705_outl(unsigned int value, unsigned long port);
-
-extern void sh_edosk7705_insb(unsigned long port, void *addr, unsigned long count);
-extern void sh_edosk7705_insl(unsigned long port, void *addr, unsigned long count);
-extern void sh_edosk7705_outsb(unsigned long port, const void *addr, unsigned long count);
-extern void sh_edosk7705_outsl(unsigned long port, const void *addr, unsigned long count);
-
-extern unsigned long sh_edosk7705_isa_port2addr(unsigned long offset);
-
-#endif /* __ASM_SH_EDOSK7705_IO_H */
diff --git a/include/asm-sh/hp6xx/hp6xx.h b/include/asm-sh/hp6xx/hp6xx.h
deleted file mode 100644
index f35134c159dd..000000000000
--- a/include/asm-sh/hp6xx/hp6xx.h
+++ /dev/null
@@ -1,80 +0,0 @@
-#ifndef __ASM_SH_HP6XX_H
-#define __ASM_SH_HP6XX_H
-
-/*
- * Copyright (C) 2003, 2004, 2005  Andriy Skulysh
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- */
-
-#define HP680_BTN_IRQ		IRQ0_IRQ
-#define HP680_TS_IRQ		IRQ3_IRQ
-#define HP680_HD64461_IRQ	IRQ4_IRQ
-
-#define DAC_LCD_BRIGHTNESS	0
-#define DAC_SPEAKER_VOLUME	1
-
-#define PGDR_OPENED		0x01
-#define PGDR_MAIN_BATTERY_OUT	0x04
-#define PGDR_PLAY_BUTTON	0x08
-#define PGDR_REWIND_BUTTON	0x10
-#define PGDR_RECORD_BUTTON	0x20
-
-#define PHDR_TS_PEN_DOWN	0x08
-
-#define PJDR_LED_BLINK		0x02
-
-#define PKDR_LED_GREEN		0x10
-
-#define SCPDR_TS_SCAN_ENABLE	0x20
-#define SCPDR_TS_SCAN_Y		0x02
-#define SCPDR_TS_SCAN_X		0x01
-
-#define SCPCR_TS_ENABLE		0x405
-#define SCPCR_TS_MASK		0xc0f
-
-#define ADC_CHANNEL_TS_Y	1
-#define ADC_CHANNEL_TS_X	2
-#define ADC_CHANNEL_BATTERY	3
-#define ADC_CHANNEL_BACKUP	4
-#define ADC_CHANNEL_CHARGE	5
-
-#define HD64461_GPADR_SPEAKER	0x01
-#define HD64461_GPADR_PCMCIA0	(0x02|0x08)
-
-#define HD64461_GPBDR_LCDOFF	0x01
-#define HD64461_GPBDR_LCD_CONTRAST_MASK	0x78
-#define HD64461_GPBDR_LED_RED	0x80
-
-#include <asm/hd64461.h>
-#include <asm/io.h>
-
-#define PJDR	0xa4000130
-#define PKDR	0xa4000132
-
-static inline void hp6xx_led_red(int on)
-{
-	u16 v16;
-	v16 = ctrl_inw(CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
-	if (on)
-	    ctrl_outw(v16 & (~HD64461_GPBDR_LED_RED), CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
-	else
-	    ctrl_outw(v16 | HD64461_GPBDR_LED_RED, CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
-}
-
-static inline void hp6xx_led_green(int on)
-{
-	u8 v8;
-
-	v8 = ctrl_inb(PKDR);
-	if (on)
-	    ctrl_outb(v8 & (~PKDR_LED_GREEN), PKDR);
-	else
-	    ctrl_outb(v8 | PKDR_LED_GREEN, PKDR);
-}
-
-
-#endif /* __ASM_SH_HP6XX_H */
diff --git a/include/asm-sh/hp6xx/io.h b/include/asm-sh/hp6xx/io.h
deleted file mode 100644
index 2044476ab199..000000000000
--- a/include/asm-sh/hp6xx/io.h
+++ /dev/null
@@ -1,10 +0,0 @@
-#ifndef __ASM_SH_HP6XX_IO_H
-#define __ASM_SH_HP6XX_IO_H
-
-/*
- * Nothing special here.. just use the generic cchip io routines.
- */
-#include <asm/hd64461.h>
-
-#endif /* __ASM_SH_HP6XX_IO_H */
-
diff --git a/include/asm-sh/hs7751rvoip/hs7751rvoip.h b/include/asm-sh/hs7751rvoip/hs7751rvoip.h
deleted file mode 100644
index c4cff9d33927..000000000000
--- a/include/asm-sh/hs7751rvoip/hs7751rvoip.h
+++ /dev/null
@@ -1,54 +0,0 @@
-#ifndef __ASM_SH_RENESAS_HS7751RVOIP_H
-#define __ASM_SH_RENESAS_HS7751RVOIP_H
-
-/*
- * linux/include/asm-sh/hs7751rvoip/hs7751rvoip.h
- *
- * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
- *
- * Renesas Technology Sales HS7751RVoIP support
- */
-
-/* Box specific addresses.  */
-
-#define PA_BCR		0xa4000000	/* FPGA */
-#define PA_SLICCNTR1	0xa4000006	/* SLIC PIO Control 1 */
-#define PA_SLICCNTR2	0xa4000008	/* SLIC PIO Control 2 */
-#define PA_DMACNTR	0xa400000a	/* USB DMA Control */
-#define PA_INPORTR	0xa400000c	/* Input Port Register */
-#define PA_OUTPORTR	0xa400000e	/* Output Port Reguster */
-#define PA_VERREG	0xa4000014	/* FPGA Version Register */
-
-#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
-
-#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
-#define IRLCNTR2	(PA_BCR + 2)	/* Interrupt Control Register2 */
-#define IRLCNTR3	(PA_BCR + 4)	/* Interrupt Control Register3 */
-#define IRLCNTR4	(PA_BCR + 16)	/* Interrupt Control Register4 */
-#define IRLCNTR5	(PA_BCR + 18)	/* Interrupt Control Register5 */
-
-#define IRQ_PCIETH	6		/* PCI Ethernet IRQ */
-#define IRQ_PCIHUB	7		/* PCI Ethernet Hub IRQ */
-#define IRQ_USBCOM	8		/* USB Comunication IRQ */
-#define IRQ_USBCON	9		/* USB Connect IRQ */
-#define IRQ_USBDMA	10		/* USB DMA IRQ */
-#define IRQ_CFCARD	11		/* CF Card IRQ */
-#define IRQ_PCMCIA	12		/* PCMCIA IRQ */
-#define IRQ_PCISLOT	13		/* PCI Slot #1 IRQ */
-#define IRQ_ONHOOK1	0		/* ON HOOK1 IRQ */
-#define IRQ_OFFHOOK1	1		/* OFF HOOK1 IRQ */
-#define IRQ_ONHOOK2	2		/* ON HOOK2 IRQ */
-#define IRQ_OFFHOOK2	3		/* OFF HOOK2 IRQ */
-#define	IRQ_RINGING	4		/* Ringing IRQ */
-#define	IRQ_CODEC	5		/* CODEC IRQ */
-
-#define __IO_PREFIX	hs7751rvoip
-#include <asm/io_generic.h>
-
-/* arch/sh/boards/renesas/hs7751rvoip/irq.c */
-void init_hs7751rvoip_IRQ(void);
-
-/* arch/sh/boards/renesas/hs7751rvoip/io.c */
-void *hs7751rvoip_ioremap(unsigned long, unsigned long);
-
-#endif  /* __ASM_SH_RENESAS_HS7751RVOIP */
diff --git a/include/asm-sh/r7780rp/r7780rp.h b/include/asm-sh/r7780rp/r7780rp.h
deleted file mode 100644
index f95d9dba31a2..000000000000
--- a/include/asm-sh/r7780rp/r7780rp.h
+++ /dev/null
@@ -1,177 +0,0 @@
-#ifndef __ASM_SH_RENESAS_R7780RP_H
-#define __ASM_SH_RENESAS_R7780RP_H
-
-/*
- * linux/include/asm-sh/r7780rp.h
- *
- * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
- *
- * Renesas Solutions Highlander R7780RP support
- */
-
-/* Box specific addresses.  */
-#if defined(CONFIG_SH_R7780MP)
-#define PA_BCR          0xa4000000      /* FPGA */
-#define PA_IRLMSK       (PA_BCR+0x0000) /* Interrupt Mask control */
-#define PA_IRLMON       (PA_BCR+0x0002) /* Interrupt Status control */
-#define PA_IRLPRI1      (PA_BCR+0x0004) /* Interrupt Priorty 1 */
-#define PA_IRLPRI2      (PA_BCR+0x0006) /* Interrupt Priorty 2 */
-#define PA_IRLPRI3      (PA_BCR+0x0008) /* Interrupt Priorty 3 */
-#define PA_IRLPRI4      (PA_BCR+0x000a) /* Interrupt Priorty 4 */
-#define PA_RSTCTL       (PA_BCR+0x000c) /* Reset Control */
-#define PA_PCIBD        (PA_BCR+0x000e) /* PCI Board detect control */
-#define PA_PCICD        (PA_BCR+0x0010) /* PCI Conector detect control */
-#define PA_EXTGIO       (PA_BCR+0x0016) /* Extension GPIO Control */
-#define PA_IVDRMON      (PA_BCR+0x0018) /* iVDR Moniter control */
-#define PA_IVDRCTL      (PA_BCR+0x001a) /* iVDR control */
-#define PA_OBLED        (PA_BCR+0x001c) /* On Board LED control */
-#define PA_OBSW         (PA_BCR+0x001e) /* On Board Switch control */
-#define PA_AUDIOSEL     (PA_BCR+0x0020) /* Sound Interface Select control */
-#define PA_EXTPLR       (PA_BCR+0x001e) /* Extention Pin Polarity control */
-#define PA_TPCTL        (PA_BCR+0x0100) /* Touch Panel Access control */
-#define PA_TPDCKCTL     (PA_BCR+0x0102) /* Touch Panel Access data control */
-#define PA_TPCTLCLR     (PA_BCR+0x0104) /* Touch Panel Access control */
-#define PA_TPXPOS       (PA_BCR+0x0106) /* Touch Panel X position control */
-#define PA_TPYPOS       (PA_BCR+0x0108) /* Touch Panel Y position control */
-#define PA_DBSW         (PA_BCR+0x0200) /* Debug Board Switch control */
-#define PA_CFCTL        (PA_BCR+0x0300) /* CF Timing control */
-#define PA_CFPOW        (PA_BCR+0x0302) /* CF Power control */
-#define PA_CFCDINTCLR   (PA_BCR+0x0304) /* CF Insert Interrupt clear */
-#define PA_SCSMR0       (PA_BCR+0x0400) /* SCIF0 Serial mode control */
-#define PA_SCBRR0       (PA_BCR+0x0404) /* SCIF0 Bit rate control */
-#define PA_SCSCR0       (PA_BCR+0x0408) /* SCIF0 Serial control */
-#define PA_SCFTDR0      (PA_BCR+0x040c) /* SCIF0 Send FIFO control */
-#define PA_SCFSR0       (PA_BCR+0x0410) /* SCIF0 Serial status control */
-#define PA_SCFRDR0      (PA_BCR+0x0414) /* SCIF0 Receive FIFO control */
-#define PA_SCFCR0       (PA_BCR+0x0418) /* SCIF0 FIFO control */
-#define PA_SCTFDR0      (PA_BCR+0x041c) /* SCIF0 Send FIFO data control */
-#define PA_SCRFDR0      (PA_BCR+0x0420) /* SCIF0 Receive FIFO data control */
-#define PA_SCSPTR0      (PA_BCR+0x0424) /* SCIF0 Serial Port control */
-#define PA_SCLSR0       (PA_BCR+0x0428) /* SCIF0 Line Status control */
-#define PA_SCRER0       (PA_BCR+0x042c) /* SCIF0 Serial Error control */
-#define PA_SCSMR1       (PA_BCR+0x0500) /* SCIF1 Serial mode control */
-#define PA_SCBRR1       (PA_BCR+0x0504) /* SCIF1 Bit rate control */
-#define PA_SCSCR1       (PA_BCR+0x0508) /* SCIF1 Serial control */
-#define PA_SCFTDR1      (PA_BCR+0x050c) /* SCIF1 Send FIFO control */
-#define PA_SCFSR1       (PA_BCR+0x0510) /* SCIF1 Serial status control */
-#define PA_SCFRDR1      (PA_BCR+0x0514) /* SCIF1 Receive FIFO control */
-#define PA_SCFCR1       (PA_BCR+0x0518) /* SCIF1 FIFO control */
-#define PA_SCTFDR1      (PA_BCR+0x051c) /* SCIF1 Send FIFO data control */
-#define PA_SCRFDR1      (PA_BCR+0x0520) /* SCIF1 Receive FIFO data control */
-#define PA_SCSPTR1      (PA_BCR+0x0524) /* SCIF1 Serial Port control */
-#define PA_SCLSR1       (PA_BCR+0x0528) /* SCIF1 Line Status control */
-#define PA_SCRER1       (PA_BCR+0x052c) /* SCIF1 Serial Error control */
-#define PA_ICCR         (PA_BCR+0x0600) /* Serial control */
-#define PA_SAR          (PA_BCR+0x0602) /* Serial Slave control */
-#define PA_MDR          (PA_BCR+0x0604) /* Serial Mode control */
-#define PA_ADR1         (PA_BCR+0x0606) /* Serial Address1 control */
-#define PA_DAR1         (PA_BCR+0x0646) /* Serial Data1 control */
-#define PA_VERREG       (PA_BCR+0x0700) /* FPGA Version Register */
-#define PA_POFF         (PA_BCR+0x0800) /* System Power Off control */
-#define PA_PMR          (PA_BCR+0x0900) /*  */
-
-#define PA_AX88796L     0xa4100400      /* AX88796L Area */
-#define PA_SC1602BSLB   0xa6000000      /* SC1602BSLB Area */
-#define PA_AREA5_IO     0xb4000000      /* Area 5 IO Memory */
-#define PA_AREA6_IO     0xb8000000      /* Area 6 IO Memory */
-#define PA_IDE_OFFSET   0x1f0           /* CF IDE Offset */
-#define AX88796L_IO_BASE        0x1000  /* AX88796L IO Base Address */
-
-#define IRLCNTR1        (PA_BCR + 0)    /* Interrupt Control Register1 */
-
-#define IRQ_PCISLOT1    65              /* PCI Slot #1 IRQ */
-#define IRQ_PCISLOT2    66              /* PCI Slot #2 IRQ */
-#define IRQ_PCISLOT3    67              /* PCI Slot #3 IRQ */
-#define IRQ_PCISLOT4    68              /* PCI Slot #4 IRQ */
-#define IRQ_CFCARD      1               /* CF Card IRQ */
-// #define IRQ_CFINST   0               /* CF Card Insert IRQ */
-#define IRQ_TP          2               /* Touch Panel IRQ */
-#define IRQ_SCI1        3               /* SCI1 IRQ */
-#define IRQ_SCI0        4               /* SCI0 IRQ */
-#define IRQ_2SERIAL     5               /* Serial IRQ */
-#define IRQ_RTC         6               /* RTC A / B IRQ */
-#define IRQ_EXTENTION6  7               /* EXT6n IRQ */
-#define IRQ_EXTENTION5  8               /* EXT5n IRQ */
-#define IRQ_EXTENTION4  9               /* EXT4n IRQ */
-#define IRQ_EXTENTION2  10              /* EXT2n IRQ */
-#define IRQ_EXTENTION1  11              /* EXT1n IRQ */
-#define IRQ_ONETH       13              /* On board Ethernet IRQ */
-#define IRQ_PSW         14              /* Push Switch IRQ */
-
-#else /* R7780RP */
-
-#define PA_BCR		0xa5000000	/* FPGA */
-#define	PA_IRLMSK	(PA_BCR+0x0000)	/* Interrupt Mask control */
-#define PA_IRLMON	(PA_BCR+0x0002)	/* Interrupt Status control */
-#define	PA_SDPOW	(PA_BCR+0x0004)	/* SD Power control */
-#define	PA_RSTCTL	(PA_BCR+0x0006)	/* Device Reset control */
-#define	PA_PCIBD	(PA_BCR+0x0008)	/* PCI Board detect control */
-#define	PA_PCICD	(PA_BCR+0x000a)	/* PCI Conector detect control */
-#define	PA_ZIGIO1	(PA_BCR+0x000c)	/* Zigbee IO control 1 */
-#define	PA_ZIGIO2	(PA_BCR+0x000e)	/* Zigbee IO control 2 */
-#define	PA_ZIGIO3	(PA_BCR+0x0010)	/* Zigbee IO control 3 */
-#define	PA_ZIGIO4	(PA_BCR+0x0012)	/* Zigbee IO control 4 */
-#define	PA_IVDRMON	(PA_BCR+0x0014)	/* iVDR Moniter control */
-#define	PA_IVDRCTL	(PA_BCR+0x0016)	/* iVDR control */
-#define PA_OBLED	(PA_BCR+0x0018)	/* On Board LED control */
-#define PA_OBSW		(PA_BCR+0x001a)	/* On Board Switch control */
-#define PA_AUDIOSEL	(PA_BCR+0x001c)	/* Sound Interface Select control */
-#define PA_EXTPLR	(PA_BCR+0x001e)	/* Extention Pin Polarity control */
-#define PA_TPCTL	(PA_BCR+0x0100)	/* Touch Panel Access control */
-#define PA_TPDCKCTL	(PA_BCR+0x0102)	/* Touch Panel Access data control */
-#define PA_TPCTLCLR	(PA_BCR+0x0104)	/* Touch Panel Access control */
-#define PA_TPXPOS	(PA_BCR+0x0106)	/* Touch Panel X position control */
-#define PA_TPYPOS	(PA_BCR+0x0108)	/* Touch Panel Y position control */
-#define PA_DBDET	(PA_BCR+0x0200)	/* Debug Board detect control */
-#define PA_DBDISPCTL	(PA_BCR+0x0202)	/* Debug Board Dot timing control */
-#define PA_DBSW		(PA_BCR+0x0204)	/* Debug Board Switch control */
-#define PA_CFCTL	(PA_BCR+0x0300)	/* CF Timing control */
-#define PA_CFPOW	(PA_BCR+0x0302)	/* CF Power control */
-#define PA_CFCDINTCLR	(PA_BCR+0x0304)	/* CF Insert Interrupt clear */
-#define PA_SCSMR	(PA_BCR+0x0400)	/* SCIF Serial mode control */
-#define PA_SCBRR	(PA_BCR+0x0402)	/* SCIF Bit rate control */
-#define PA_SCSCR	(PA_BCR+0x0404)	/* SCIF Serial control */
-#define PA_SCFDTR	(PA_BCR+0x0406)	/* SCIF Send FIFO control */
-#define PA_SCFSR	(PA_BCR+0x0408)	/* SCIF Serial status control */
-#define PA_SCFRDR	(PA_BCR+0x040a)	/* SCIF Receive FIFO control */
-#define PA_SCFCR	(PA_BCR+0x040c)	/* SCIF FIFO control */
-#define PA_SCFDR	(PA_BCR+0x040e)	/* SCIF FIFO data control */
-#define PA_SCLSR	(PA_BCR+0x0412)	/* SCIF Line Status control */
-#define PA_ICCR		(PA_BCR+0x0500)	/* Serial control */
-#define PA_SAR		(PA_BCR+0x0502)	/* Serial Slave control */
-#define PA_MDR		(PA_BCR+0x0504)	/* Serial Mode control */
-#define PA_ADR1		(PA_BCR+0x0506)	/* Serial Address1 control */
-#define PA_DAR1		(PA_BCR+0x0546)	/* Serial Data1 control */
-#define PA_VERREG	(PA_BCR+0x0600)	/* FPGA Version Register */
-
-#define PA_AX88796L	0xa5800400	/* AX88796L Area */
-#define PA_SC1602BSLB	0xa6000000	/* SC1602BSLB Area */
-#define PA_AREA5_IO	0xb4000000	/* Area 5 IO Memory */
-#define PA_AREA6_IO	0xb8000000	/* Area 6 IO Memory */
-#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
-#define AX88796L_IO_BASE	0x1000	/* AX88796L IO Base Address */
-
-#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
-
-#define IRQ_PCISLOT1	0		/* PCI Slot #1 IRQ */
-#define IRQ_PCISLOT2	1		/* PCI Slot #2 IRQ */
-#define IRQ_PCISLOT3	2		/* PCI Slot #3 IRQ */
-#define IRQ_PCISLOT4	3		/* PCI Slot #4 IRQ */
-#define IRQ_CFCARD	4		/* CF Card IRQ */
-#define IRQ_CFINST	5		/* CF Card Insert IRQ */
-#define IRQ_M66596	6		/* M66596 IRQ */
-#define IRQ_SDCARD	7		/* SD Card IRQ */
-#define IRQ_TUCHPANEL	8		/* Touch Panel IRQ */
-#define IRQ_SCI		9		/* SCI IRQ */
-#define IRQ_2SERIAL	10		/* Serial IRQ */
-#define	IRQ_EXTENTION	11		/* EXTn IRQ */
-#define IRQ_ONETH	12		/* On board Ethernet IRQ */
-#define IRQ_PSW		13		/* Push Switch IRQ */
-#define IRQ_ZIGBEE	14		/* Ziggbee IO IRQ */
-
-#endif  /* CONFIG_SH_R7780MP */
-
-#define __IO_PREFIX	r7780rp
-#include <asm/io_generic.h>
-
-#endif  /* __ASM_SH_RENESAS_R7780RP */
diff --git a/include/asm-sh/rts7751r2d/rts7751r2d.h b/include/asm-sh/rts7751r2d/rts7751r2d.h
deleted file mode 100644
index 796b8fcb81a8..000000000000
--- a/include/asm-sh/rts7751r2d/rts7751r2d.h
+++ /dev/null
@@ -1,74 +0,0 @@
-#ifndef __ASM_SH_RENESAS_RTS7751R2D_H
-#define __ASM_SH_RENESAS_RTS7751R2D_H
-
-/*
- * linux/include/asm-sh/renesas_rts7751r2d.h
- *
- * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
- *
- * Renesas Technology Sales RTS7751R2D support
- */
-
-/* Box specific addresses.  */
-
-#define PA_BCR		0xa4000000	/* FPGA */
-#define PA_IRLMON	0xa4000002	/* Interrupt Status control */
-#define PA_CFCTL	0xa4000004	/* CF Timing control */
-#define PA_CFPOW	0xa4000006	/* CF Power control */
-#define PA_DISPCTL	0xa4000008	/* Display Timing control */
-#define PA_SDMPOW	0xa400000a	/* SD Power control */
-#define PA_RTCCE	0xa400000c	/* RTC(9701) Enable control */
-#define PA_PCICD	0xa400000e	/* PCI Extention detect control */
-#define PA_VOYAGERRTS	0xa4000020	/* VOYAGER Reset control */
-#if defined(CONFIG_RTS7751R2D_REV11)
-#define PA_AXRST	0xa4000022	/* AX_LAN Reset control */
-#define PA_CFRST	0xa4000024	/* CF Reset control */
-#define	PA_ADMRTS	0xa4000026	/* SD Reset control */
-#define PA_EXTRST	0xa4000028	/* Extention Reset control */
-#define PA_CFCDINTCLR	0xa400002a	/* CF Insert Interrupt clear */
-#else
-#define PA_CFRST	0xa4000022	/* CF Reset control */
-#define	PA_ADMRTS	0xa4000024	/* SD Reset control */
-#define PA_EXTRST	0xa4000026	/* Extention Reset control */
-#define PA_CFCDINTCLR	0xa4000028	/* CF Insert Interrupt clear */
-#define	PA_KEYCTLCLR	0xa400002a	/* Key Interrupt clear */
-#endif
-#define PA_POWOFF	0xa4000030	/* Board Power OFF control */
-#define PA_VERREG	0xa4000032	/* FPGA Version Register */
-#define PA_INPORT	0xa4000034	/* KEY Input Port control */
-#define PA_OUTPORT	0xa4000036	/* LED control */
-#define PA_DMPORT	0xa4000038	/* DM270 Output Port control */
-
-#define PA_AX88796L	0xaa000400	/* AX88796L Area */
-#define PA_VOYAGER	0xab000000	/* VOYAGER GX Area */
-#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
-#define AX88796L_IO_BASE	0x1000	/* AX88796L IO Base Address */
-
-#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
-
-#if defined(CONFIG_RTS7751R2D_REV11)
-#define IRQ_PCIETH	0		/* PCI Ethernet IRQ */
-#define IRQ_CFCARD	1		/* CF Card IRQ */
-#define IRQ_CFINST	2		/* CF Card Insert IRQ */
-#define IRQ_PCMCIA	3		/* PCMCIA IRQ */
-#define IRQ_VOYAGER	4		/* VOYAGER IRQ */
-#define IRQ_ONETH	5		/* On board Ethernet IRQ */
-#else
-#define IRQ_KEYIN	0		/* Key Input IRQ */
-#define IRQ_PCIETH	1		/* PCI Ethernet IRQ */
-#define IRQ_CFCARD	2		/* CF Card IRQ */
-#define IRQ_CFINST	3		/* CF Card Insert IRQ */
-#define IRQ_PCMCIA	4		/* PCMCIA IRQ */
-#define IRQ_VOYAGER	5		/* VOYAGER IRQ */
-#endif
-#define IRQ_RTCALM	6		/* RTC Alarm IRQ */
-#define IRQ_RTCTIME	7		/* RTC Timer IRQ */
-#define IRQ_SDCARD	8		/* SD Card IRQ */
-#define IRQ_PCISLOT1	9		/* PCI Slot #1 IRQ */
-#define IRQ_PCISLOT2	10		/* PCI Slot #2 IRQ */
-#define	IRQ_EXTENTION	11		/* EXTn IRQ */
-
-#define __IO_PREFIX rts7751r2d
-#include <asm/io_generic.h>
-
-#endif  /* __ASM_SH_RENESAS_RTS7751R2D */
diff --git a/include/asm-sh/shmin/shmin.h b/include/asm-sh/shmin/shmin.h
deleted file mode 100644
index 36ba138a81fb..000000000000
--- a/include/asm-sh/shmin/shmin.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_SH_SHMIN_H
-#define __ASM_SH_SHMIN_H
-
-#define SHMIN_IO_BASE 0xb0000000UL
-
-#define SHMIN_NE_IRQ IRQ2_IRQ
-#define SHMIN_NE_BASE 0x300
-
-#endif
diff --git a/sound/oss/sh_dac_audio.c b/sound/oss/sh_dac_audio.c
index 3b3b4da8cfd3..51f554154c48 100644
--- a/sound/oss/sh_dac_audio.c
+++ b/sound/oss/sh_dac_audio.c
@@ -26,7 +26,7 @@
 #include <asm/cpu/dac.h>
 #include <asm/cpu/timer.h>
 #include <asm/machvec.h>
-#include <asm/hp6xx/hp6xx.h>
+#include <asm/hp6xx.h>
 #include <asm/hd64461.h>
 
 #define MODNAME "sh_dac_audio"
-- 
cgit v1.2.3


From 1f666587dbf6bc660b23d8dd8abb6c572ce3eae5 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 19 Oct 2006 16:20:25 +0900
Subject: sh: Fix exception_handling_table alignment.

With the recent change ripping out interrupt_table, explicit
padding of the table was missing, causing bad things to happen
when manually inserting handlers in to the table. This problem
particularly showed up in relation to do_fpu_state_restore()
which was inserted quite deeply in to the table and ended up
scribbling over a slab object.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/sh3/ex.S |  9 ++++++++-
 arch/sh/kernel/cpu/sh4/ex.S |  9 ++++++++-
 arch/sh/kernel/traps.c      | 43 +++++++++++++++++++++----------------------
 include/asm-sh/system.h     |  7 +++++++
 4 files changed, 44 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/sh3/ex.S b/arch/sh/kernel/cpu/sh3/ex.S
index 6be46f0686b7..ba3082d640b5 100644
--- a/arch/sh/kernel/cpu/sh3/ex.S
+++ b/arch/sh/kernel/cpu/sh3/ex.S
@@ -4,7 +4,7 @@
  *  The SH-3 exception vector table.
 
  *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003  Paul Mundt
+ *  Copyright (C) 2003 - 2006  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -49,3 +49,10 @@ ENTRY(nmi_slot)
 #endif
 ENTRY(user_break_point_trap)
 	.long	break_point_trap	/* 1E0 */
+
+	/*
+	 * Pad the remainder of the table out, exceptions residing in far
+	 * away offsets can be manually inserted in to their appropriate
+	 * location via set_exception_table_{evt,vec}().
+	 */
+	.balign 4096,0,4096
diff --git a/arch/sh/kernel/cpu/sh4/ex.S b/arch/sh/kernel/cpu/sh4/ex.S
index 3f4cd043e900..ac8ab57413cc 100644
--- a/arch/sh/kernel/cpu/sh4/ex.S
+++ b/arch/sh/kernel/cpu/sh4/ex.S
@@ -4,7 +4,7 @@
  *  The SH-4 exception vector table.
 
  *  Copyright (C) 1999, 2000, 2002  Niibe Yutaka
- *  Copyright (C) 2003  Paul Mundt
+ *  Copyright (C) 2003 - 2006  Paul Mundt
  *
  * This file is subject to the terms and conditions of the GNU General Public
  * License.  See the file "COPYING" in the main directory of this archive
@@ -53,3 +53,10 @@ ENTRY(nmi_slot)
 #endif
 ENTRY(user_break_point_trap)
 	.long	break_point_trap	/* 1E0 */
+
+	/*
+	 * Pad the remainder of the table out, exceptions residing in far
+	 * away offsets can be manually inserted in to their appropriate
+	 * location via set_exception_table_{evt,vec}().
+	 */
+	.balign	4096,0,4096
diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c
index ffe127f09f3e..53dfa55f3156 100644
--- a/arch/sh/kernel/traps.c
+++ b/arch/sh/kernel/traps.c
@@ -11,27 +11,15 @@
  * License.  See the file "COPYING" in the main directory of this archive
  * for more details.
  */
-#include <linux/sched.h>
 #include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
 #include <linux/ptrace.h>
-#include <linux/timer.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/smp_lock.h>
 #include <linux/init.h>
-#include <linux/delay.h>
 #include <linux/spinlock.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
-
+#include <linux/io.h>
 #include <asm/system.h>
 #include <asm/uaccess.h>
-#include <asm/io.h>
-#include <asm/atomic.h>
-#include <asm/processor.h>
-#include <asm/sections.h>
 
 #ifdef CONFIG_SH_KGDB
 #include <asm/kgdb.h>
@@ -581,7 +569,10 @@ int is_dsp_inst(struct pt_regs *regs)
 #define is_dsp_inst(regs)	(0)
 #endif /* CONFIG_SH_DSP */
 
-extern int do_fpu_inst(unsigned short, struct pt_regs*);
+/* arch/sh/kernel/cpu/sh4/fpu.c */
+extern int do_fpu_inst(unsigned short, struct pt_regs *);
+extern asmlinkage void do_fpu_state_restore(unsigned long r4, unsigned long r5,
+		unsigned long r6, unsigned long r7, struct pt_regs regs);
 
 asmlinkage void do_reserved_inst(unsigned long r4, unsigned long r5,
 				unsigned long r6, unsigned long r7,
@@ -740,14 +731,20 @@ void __init per_cpu_trap_init(void)
 		     : "memory");
 }
 
-void __init trap_init(void)
+void *set_exception_table_vec(unsigned int vec, void *handler)
 {
 	extern void *exception_handling_table[];
+	void *old_handler;
+	
+	old_handler = exception_handling_table[vec];
+	exception_handling_table[vec] = handler;
+	return old_handler;
+}
 
-	exception_handling_table[TRAP_RESERVED_INST]
-		= (void *)do_reserved_inst;
-	exception_handling_table[TRAP_ILLEGAL_SLOT_INST]
-		= (void *)do_illegal_slot_inst;
+void __init trap_init(void)
+{
+	set_exception_table_vec(TRAP_RESERVED_INST, do_reserved_inst);
+	set_exception_table_vec(TRAP_ILLEGAL_SLOT_INST, do_illegal_slot_inst);
 
 #if defined(CONFIG_CPU_SH4) && !defined(CONFIG_SH_FPU) || \
     defined(CONFIG_SH_FPU_EMU)
@@ -756,9 +753,11 @@ void __init trap_init(void)
 	 * reserved. They'll be handled in the math-emu case, or faulted on
 	 * otherwise.
 	 */
-	/* entry 64 corresponds to EXPEVT=0x800 */
-	exception_handling_table[64] = (void *)do_reserved_inst;
-	exception_handling_table[65] = (void *)do_illegal_slot_inst;
+	set_exception_table_evt(0x800, do_reserved_inst);
+	set_exception_table_evt(0x820, do_illegal_slot_inst);
+#elif defined(CONFIG_SH_FPU)
+	set_exception_table_evt(0x800, do_fpu_state_restore);
+	set_exception_table_evt(0x820, do_fpu_state_restore);
 #endif
 		
 	/* Setup VBR for boot cpu */
diff --git a/include/asm-sh/system.h b/include/asm-sh/system.h
index 6c1f8fde5ac4..3340126f4e0f 100644
--- a/include/asm-sh/system.h
+++ b/include/asm-sh/system.h
@@ -353,6 +353,13 @@ static inline unsigned long __cmpxchg(volatile void * ptr, unsigned long old,
 				    (unsigned long)_n_, sizeof(*(ptr))); \
   })
 
+extern void *set_exception_table_vec(unsigned int vec, void *handler);
+
+static inline void *set_exception_table_evt(unsigned int evt, void *handler)
+{
+	return set_exception_table_vec(evt >> 5, handler);
+}
+
 /* XXX
  * disable hlt during certain critical i/o operations
  */
-- 
cgit v1.2.3


From c2a560f5334c55da1e8bfa17586cc1d4e7f8ed85 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Thu, 19 Oct 2006 17:31:22 +0900
Subject: sh: Add some missing board headers.

Some of these were dropped in the header directory rework, add
the few missing ones back in.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 include/asm-sh/edosk7705.h   |  30 ++++++++
 include/asm-sh/hp6xx.h       |  80 ++++++++++++++++++++
 include/asm-sh/hs7751rvoip.h |  54 ++++++++++++++
 include/asm-sh/r7780rp.h     | 173 +++++++++++++++++++++++++++++++++++++++++++
 include/asm-sh/rts7751r2d.h  |  74 ++++++++++++++++++
 include/asm-sh/shmin.h       |   9 +++
 6 files changed, 420 insertions(+)
 create mode 100644 include/asm-sh/edosk7705.h
 create mode 100644 include/asm-sh/hp6xx.h
 create mode 100644 include/asm-sh/hs7751rvoip.h
 create mode 100644 include/asm-sh/r7780rp.h
 create mode 100644 include/asm-sh/rts7751r2d.h
 create mode 100644 include/asm-sh/shmin.h

(limited to 'include')

diff --git a/include/asm-sh/edosk7705.h b/include/asm-sh/edosk7705.h
new file mode 100644
index 000000000000..a1089a65bc36
--- /dev/null
+++ b/include/asm-sh/edosk7705.h
@@ -0,0 +1,30 @@
+/*
+ * include/asm-sh/edosk7705/io.h
+ *
+ * Modified version of io_se.h for the EDOSK7705 specific functions.
+ *
+ * May be copied or modified under the terms of the GNU General Public
+ * License.  See linux/COPYING for more information.
+ *
+ * IO functions for an Hitachi EDOSK7705 development board
+ */
+
+#ifndef __ASM_SH_EDOSK7705_IO_H
+#define __ASM_SH_EDOSK7705_IO_H
+
+#include <asm/io_generic.h>
+
+extern unsigned char sh_edosk7705_inb(unsigned long port);
+extern unsigned int sh_edosk7705_inl(unsigned long port);
+
+extern void sh_edosk7705_outb(unsigned char value, unsigned long port);
+extern void sh_edosk7705_outl(unsigned int value, unsigned long port);
+
+extern void sh_edosk7705_insb(unsigned long port, void *addr, unsigned long count);
+extern void sh_edosk7705_insl(unsigned long port, void *addr, unsigned long count);
+extern void sh_edosk7705_outsb(unsigned long port, const void *addr, unsigned long count);
+extern void sh_edosk7705_outsl(unsigned long port, const void *addr, unsigned long count);
+
+extern unsigned long sh_edosk7705_isa_port2addr(unsigned long offset);
+
+#endif /* __ASM_SH_EDOSK7705_IO_H */
diff --git a/include/asm-sh/hp6xx.h b/include/asm-sh/hp6xx.h
new file mode 100644
index 000000000000..f35134c159dd
--- /dev/null
+++ b/include/asm-sh/hp6xx.h
@@ -0,0 +1,80 @@
+#ifndef __ASM_SH_HP6XX_H
+#define __ASM_SH_HP6XX_H
+
+/*
+ * Copyright (C) 2003, 2004, 2005  Andriy Skulysh
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ */
+
+#define HP680_BTN_IRQ		IRQ0_IRQ
+#define HP680_TS_IRQ		IRQ3_IRQ
+#define HP680_HD64461_IRQ	IRQ4_IRQ
+
+#define DAC_LCD_BRIGHTNESS	0
+#define DAC_SPEAKER_VOLUME	1
+
+#define PGDR_OPENED		0x01
+#define PGDR_MAIN_BATTERY_OUT	0x04
+#define PGDR_PLAY_BUTTON	0x08
+#define PGDR_REWIND_BUTTON	0x10
+#define PGDR_RECORD_BUTTON	0x20
+
+#define PHDR_TS_PEN_DOWN	0x08
+
+#define PJDR_LED_BLINK		0x02
+
+#define PKDR_LED_GREEN		0x10
+
+#define SCPDR_TS_SCAN_ENABLE	0x20
+#define SCPDR_TS_SCAN_Y		0x02
+#define SCPDR_TS_SCAN_X		0x01
+
+#define SCPCR_TS_ENABLE		0x405
+#define SCPCR_TS_MASK		0xc0f
+
+#define ADC_CHANNEL_TS_Y	1
+#define ADC_CHANNEL_TS_X	2
+#define ADC_CHANNEL_BATTERY	3
+#define ADC_CHANNEL_BACKUP	4
+#define ADC_CHANNEL_CHARGE	5
+
+#define HD64461_GPADR_SPEAKER	0x01
+#define HD64461_GPADR_PCMCIA0	(0x02|0x08)
+
+#define HD64461_GPBDR_LCDOFF	0x01
+#define HD64461_GPBDR_LCD_CONTRAST_MASK	0x78
+#define HD64461_GPBDR_LED_RED	0x80
+
+#include <asm/hd64461.h>
+#include <asm/io.h>
+
+#define PJDR	0xa4000130
+#define PKDR	0xa4000132
+
+static inline void hp6xx_led_red(int on)
+{
+	u16 v16;
+	v16 = ctrl_inw(CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
+	if (on)
+	    ctrl_outw(v16 & (~HD64461_GPBDR_LED_RED), CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
+	else
+	    ctrl_outw(v16 | HD64461_GPBDR_LED_RED, CONFIG_HD64461_IOBASE + HD64461_GPBDR - 0x10000);
+}
+
+static inline void hp6xx_led_green(int on)
+{
+	u8 v8;
+
+	v8 = ctrl_inb(PKDR);
+	if (on)
+	    ctrl_outb(v8 & (~PKDR_LED_GREEN), PKDR);
+	else
+	    ctrl_outb(v8 | PKDR_LED_GREEN, PKDR);
+}
+
+
+#endif /* __ASM_SH_HP6XX_H */
diff --git a/include/asm-sh/hs7751rvoip.h b/include/asm-sh/hs7751rvoip.h
new file mode 100644
index 000000000000..c4cff9d33927
--- /dev/null
+++ b/include/asm-sh/hs7751rvoip.h
@@ -0,0 +1,54 @@
+#ifndef __ASM_SH_RENESAS_HS7751RVOIP_H
+#define __ASM_SH_RENESAS_HS7751RVOIP_H
+
+/*
+ * linux/include/asm-sh/hs7751rvoip/hs7751rvoip.h
+ *
+ * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
+ *
+ * Renesas Technology Sales HS7751RVoIP support
+ */
+
+/* Box specific addresses.  */
+
+#define PA_BCR		0xa4000000	/* FPGA */
+#define PA_SLICCNTR1	0xa4000006	/* SLIC PIO Control 1 */
+#define PA_SLICCNTR2	0xa4000008	/* SLIC PIO Control 2 */
+#define PA_DMACNTR	0xa400000a	/* USB DMA Control */
+#define PA_INPORTR	0xa400000c	/* Input Port Register */
+#define PA_OUTPORTR	0xa400000e	/* Output Port Reguster */
+#define PA_VERREG	0xa4000014	/* FPGA Version Register */
+
+#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
+
+#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
+#define IRLCNTR2	(PA_BCR + 2)	/* Interrupt Control Register2 */
+#define IRLCNTR3	(PA_BCR + 4)	/* Interrupt Control Register3 */
+#define IRLCNTR4	(PA_BCR + 16)	/* Interrupt Control Register4 */
+#define IRLCNTR5	(PA_BCR + 18)	/* Interrupt Control Register5 */
+
+#define IRQ_PCIETH	6		/* PCI Ethernet IRQ */
+#define IRQ_PCIHUB	7		/* PCI Ethernet Hub IRQ */
+#define IRQ_USBCOM	8		/* USB Comunication IRQ */
+#define IRQ_USBCON	9		/* USB Connect IRQ */
+#define IRQ_USBDMA	10		/* USB DMA IRQ */
+#define IRQ_CFCARD	11		/* CF Card IRQ */
+#define IRQ_PCMCIA	12		/* PCMCIA IRQ */
+#define IRQ_PCISLOT	13		/* PCI Slot #1 IRQ */
+#define IRQ_ONHOOK1	0		/* ON HOOK1 IRQ */
+#define IRQ_OFFHOOK1	1		/* OFF HOOK1 IRQ */
+#define IRQ_ONHOOK2	2		/* ON HOOK2 IRQ */
+#define IRQ_OFFHOOK2	3		/* OFF HOOK2 IRQ */
+#define	IRQ_RINGING	4		/* Ringing IRQ */
+#define	IRQ_CODEC	5		/* CODEC IRQ */
+
+#define __IO_PREFIX	hs7751rvoip
+#include <asm/io_generic.h>
+
+/* arch/sh/boards/renesas/hs7751rvoip/irq.c */
+void init_hs7751rvoip_IRQ(void);
+
+/* arch/sh/boards/renesas/hs7751rvoip/io.c */
+void *hs7751rvoip_ioremap(unsigned long, unsigned long);
+
+#endif  /* __ASM_SH_RENESAS_HS7751RVOIP */
diff --git a/include/asm-sh/r7780rp.h b/include/asm-sh/r7780rp.h
new file mode 100644
index 000000000000..ddd67b60bb43
--- /dev/null
+++ b/include/asm-sh/r7780rp.h
@@ -0,0 +1,173 @@
+#ifndef __ASM_SH_RENESAS_R7780RP_H
+#define __ASM_SH_RENESAS_R7780RP_H
+
+/*
+ * linux/include/asm-sh/r7780rp.h
+ *
+ * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
+ *
+ * Renesas Solutions Highlander R7780RP support
+ */
+
+/* Box specific addresses.  */
+#if defined(CONFIG_SH_R7780MP)
+#define PA_BCR          0xa4000000      /* FPGA */
+#define PA_IRLMSK       (PA_BCR+0x0000) /* Interrupt Mask control */
+#define PA_IRLMON       (PA_BCR+0x0002) /* Interrupt Status control */
+#define PA_IRLPRI1      (PA_BCR+0x0004) /* Interrupt Priorty 1 */
+#define PA_IRLPRI2      (PA_BCR+0x0006) /* Interrupt Priorty 2 */
+#define PA_IRLPRI3      (PA_BCR+0x0008) /* Interrupt Priorty 3 */
+#define PA_IRLPRI4      (PA_BCR+0x000a) /* Interrupt Priorty 4 */
+#define PA_RSTCTL       (PA_BCR+0x000c) /* Reset Control */
+#define PA_PCIBD        (PA_BCR+0x000e) /* PCI Board detect control */
+#define PA_PCICD        (PA_BCR+0x0010) /* PCI Conector detect control */
+#define PA_EXTGIO       (PA_BCR+0x0016) /* Extension GPIO Control */
+#define PA_IVDRMON      (PA_BCR+0x0018) /* iVDR Moniter control */
+#define PA_IVDRCTL      (PA_BCR+0x001a) /* iVDR control */
+#define PA_OBLED        (PA_BCR+0x001c) /* On Board LED control */
+#define PA_OBSW         (PA_BCR+0x001e) /* On Board Switch control */
+#define PA_AUDIOSEL     (PA_BCR+0x0020) /* Sound Interface Select control */
+#define PA_EXTPLR       (PA_BCR+0x001e) /* Extention Pin Polarity control */
+#define PA_TPCTL        (PA_BCR+0x0100) /* Touch Panel Access control */
+#define PA_TPDCKCTL     (PA_BCR+0x0102) /* Touch Panel Access data control */
+#define PA_TPCTLCLR     (PA_BCR+0x0104) /* Touch Panel Access control */
+#define PA_TPXPOS       (PA_BCR+0x0106) /* Touch Panel X position control */
+#define PA_TPYPOS       (PA_BCR+0x0108) /* Touch Panel Y position control */
+#define PA_DBSW         (PA_BCR+0x0200) /* Debug Board Switch control */
+#define PA_CFCTL        (PA_BCR+0x0300) /* CF Timing control */
+#define PA_CFPOW        (PA_BCR+0x0302) /* CF Power control */
+#define PA_CFCDINTCLR   (PA_BCR+0x0304) /* CF Insert Interrupt clear */
+#define PA_SCSMR0       (PA_BCR+0x0400) /* SCIF0 Serial mode control */
+#define PA_SCBRR0       (PA_BCR+0x0404) /* SCIF0 Bit rate control */
+#define PA_SCSCR0       (PA_BCR+0x0408) /* SCIF0 Serial control */
+#define PA_SCFTDR0      (PA_BCR+0x040c) /* SCIF0 Send FIFO control */
+#define PA_SCFSR0       (PA_BCR+0x0410) /* SCIF0 Serial status control */
+#define PA_SCFRDR0      (PA_BCR+0x0414) /* SCIF0 Receive FIFO control */
+#define PA_SCFCR0       (PA_BCR+0x0418) /* SCIF0 FIFO control */
+#define PA_SCTFDR0      (PA_BCR+0x041c) /* SCIF0 Send FIFO data control */
+#define PA_SCRFDR0      (PA_BCR+0x0420) /* SCIF0 Receive FIFO data control */
+#define PA_SCSPTR0      (PA_BCR+0x0424) /* SCIF0 Serial Port control */
+#define PA_SCLSR0       (PA_BCR+0x0428) /* SCIF0 Line Status control */
+#define PA_SCRER0       (PA_BCR+0x042c) /* SCIF0 Serial Error control */
+#define PA_SCSMR1       (PA_BCR+0x0500) /* SCIF1 Serial mode control */
+#define PA_SCBRR1       (PA_BCR+0x0504) /* SCIF1 Bit rate control */
+#define PA_SCSCR1       (PA_BCR+0x0508) /* SCIF1 Serial control */
+#define PA_SCFTDR1      (PA_BCR+0x050c) /* SCIF1 Send FIFO control */
+#define PA_SCFSR1       (PA_BCR+0x0510) /* SCIF1 Serial status control */
+#define PA_SCFRDR1      (PA_BCR+0x0514) /* SCIF1 Receive FIFO control */
+#define PA_SCFCR1       (PA_BCR+0x0518) /* SCIF1 FIFO control */
+#define PA_SCTFDR1      (PA_BCR+0x051c) /* SCIF1 Send FIFO data control */
+#define PA_SCRFDR1      (PA_BCR+0x0520) /* SCIF1 Receive FIFO data control */
+#define PA_SCSPTR1      (PA_BCR+0x0524) /* SCIF1 Serial Port control */
+#define PA_SCLSR1       (PA_BCR+0x0528) /* SCIF1 Line Status control */
+#define PA_SCRER1       (PA_BCR+0x052c) /* SCIF1 Serial Error control */
+#define PA_ICCR         (PA_BCR+0x0600) /* Serial control */
+#define PA_SAR          (PA_BCR+0x0602) /* Serial Slave control */
+#define PA_MDR          (PA_BCR+0x0604) /* Serial Mode control */
+#define PA_ADR1         (PA_BCR+0x0606) /* Serial Address1 control */
+#define PA_DAR1         (PA_BCR+0x0646) /* Serial Data1 control */
+#define PA_VERREG       (PA_BCR+0x0700) /* FPGA Version Register */
+#define PA_POFF         (PA_BCR+0x0800) /* System Power Off control */
+#define PA_PMR          (PA_BCR+0x0900) /*  */
+
+#define PA_AX88796L     0xa4100400      /* AX88796L Area */
+#define PA_SC1602BSLB   0xa6000000      /* SC1602BSLB Area */
+#define PA_IDE_OFFSET   0x1f0           /* CF IDE Offset */
+#define AX88796L_IO_BASE        0x1000  /* AX88796L IO Base Address */
+
+#define IRLCNTR1        (PA_BCR + 0)    /* Interrupt Control Register1 */
+
+#define IRQ_PCISLOT1    65              /* PCI Slot #1 IRQ */
+#define IRQ_PCISLOT2    66              /* PCI Slot #2 IRQ */
+#define IRQ_PCISLOT3    67              /* PCI Slot #3 IRQ */
+#define IRQ_PCISLOT4    68              /* PCI Slot #4 IRQ */
+#define IRQ_CFCARD      1               /* CF Card IRQ */
+// #define IRQ_CFINST   0               /* CF Card Insert IRQ */
+#define IRQ_TP          2               /* Touch Panel IRQ */
+#define IRQ_SCI1        3               /* SCI1 IRQ */
+#define IRQ_SCI0        4               /* SCI0 IRQ */
+#define IRQ_2SERIAL     5               /* Serial IRQ */
+#define IRQ_RTC         6               /* RTC A / B IRQ */
+#define IRQ_EXTENTION6  7               /* EXT6n IRQ */
+#define IRQ_EXTENTION5  8               /* EXT5n IRQ */
+#define IRQ_EXTENTION4  9               /* EXT4n IRQ */
+#define IRQ_EXTENTION2  10              /* EXT2n IRQ */
+#define IRQ_EXTENTION1  11              /* EXT1n IRQ */
+#define IRQ_ONETH       13              /* On board Ethernet IRQ */
+#define IRQ_PSW         14              /* Push Switch IRQ */
+
+#else /* R7780RP */
+
+#define PA_BCR		0xa5000000	/* FPGA */
+#define	PA_IRLMSK	(PA_BCR+0x0000)	/* Interrupt Mask control */
+#define PA_IRLMON	(PA_BCR+0x0002)	/* Interrupt Status control */
+#define	PA_SDPOW	(PA_BCR+0x0004)	/* SD Power control */
+#define	PA_RSTCTL	(PA_BCR+0x0006)	/* Device Reset control */
+#define	PA_PCIBD	(PA_BCR+0x0008)	/* PCI Board detect control */
+#define	PA_PCICD	(PA_BCR+0x000a)	/* PCI Conector detect control */
+#define	PA_ZIGIO1	(PA_BCR+0x000c)	/* Zigbee IO control 1 */
+#define	PA_ZIGIO2	(PA_BCR+0x000e)	/* Zigbee IO control 2 */
+#define	PA_ZIGIO3	(PA_BCR+0x0010)	/* Zigbee IO control 3 */
+#define	PA_ZIGIO4	(PA_BCR+0x0012)	/* Zigbee IO control 4 */
+#define	PA_IVDRMON	(PA_BCR+0x0014)	/* iVDR Moniter control */
+#define	PA_IVDRCTL	(PA_BCR+0x0016)	/* iVDR control */
+#define PA_OBLED	(PA_BCR+0x0018)	/* On Board LED control */
+#define PA_OBSW		(PA_BCR+0x001a)	/* On Board Switch control */
+#define PA_AUDIOSEL	(PA_BCR+0x001c)	/* Sound Interface Select control */
+#define PA_EXTPLR	(PA_BCR+0x001e)	/* Extention Pin Polarity control */
+#define PA_TPCTL	(PA_BCR+0x0100)	/* Touch Panel Access control */
+#define PA_TPDCKCTL	(PA_BCR+0x0102)	/* Touch Panel Access data control */
+#define PA_TPCTLCLR	(PA_BCR+0x0104)	/* Touch Panel Access control */
+#define PA_TPXPOS	(PA_BCR+0x0106)	/* Touch Panel X position control */
+#define PA_TPYPOS	(PA_BCR+0x0108)	/* Touch Panel Y position control */
+#define PA_DBDET	(PA_BCR+0x0200)	/* Debug Board detect control */
+#define PA_DBDISPCTL	(PA_BCR+0x0202)	/* Debug Board Dot timing control */
+#define PA_DBSW		(PA_BCR+0x0204)	/* Debug Board Switch control */
+#define PA_CFCTL	(PA_BCR+0x0300)	/* CF Timing control */
+#define PA_CFPOW	(PA_BCR+0x0302)	/* CF Power control */
+#define PA_CFCDINTCLR	(PA_BCR+0x0304)	/* CF Insert Interrupt clear */
+#define PA_SCSMR	(PA_BCR+0x0400)	/* SCIF Serial mode control */
+#define PA_SCBRR	(PA_BCR+0x0402)	/* SCIF Bit rate control */
+#define PA_SCSCR	(PA_BCR+0x0404)	/* SCIF Serial control */
+#define PA_SCFDTR	(PA_BCR+0x0406)	/* SCIF Send FIFO control */
+#define PA_SCFSR	(PA_BCR+0x0408)	/* SCIF Serial status control */
+#define PA_SCFRDR	(PA_BCR+0x040a)	/* SCIF Receive FIFO control */
+#define PA_SCFCR	(PA_BCR+0x040c)	/* SCIF FIFO control */
+#define PA_SCFDR	(PA_BCR+0x040e)	/* SCIF FIFO data control */
+#define PA_SCLSR	(PA_BCR+0x0412)	/* SCIF Line Status control */
+#define PA_ICCR		(PA_BCR+0x0500)	/* Serial control */
+#define PA_SAR		(PA_BCR+0x0502)	/* Serial Slave control */
+#define PA_MDR		(PA_BCR+0x0504)	/* Serial Mode control */
+#define PA_ADR1		(PA_BCR+0x0506)	/* Serial Address1 control */
+#define PA_DAR1		(PA_BCR+0x0546)	/* Serial Data1 control */
+#define PA_VERREG	(PA_BCR+0x0600)	/* FPGA Version Register */
+
+#define PA_AX88796L	0xa5800400	/* AX88796L Area */
+#define PA_SC1602BSLB	0xa6000000	/* SC1602BSLB Area */
+#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
+#define AX88796L_IO_BASE	0x1000	/* AX88796L IO Base Address */
+
+#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
+
+#define IRQ_PCISLOT1	0		/* PCI Slot #1 IRQ */
+#define IRQ_PCISLOT2	1		/* PCI Slot #2 IRQ */
+#define IRQ_PCISLOT3	2		/* PCI Slot #3 IRQ */
+#define IRQ_PCISLOT4	3		/* PCI Slot #4 IRQ */
+#define IRQ_CFCARD	4		/* CF Card IRQ */
+#define IRQ_CFINST	5		/* CF Card Insert IRQ */
+#define IRQ_M66596	6		/* M66596 IRQ */
+#define IRQ_SDCARD	7		/* SD Card IRQ */
+#define IRQ_TUCHPANEL	8		/* Touch Panel IRQ */
+#define IRQ_SCI		9		/* SCI IRQ */
+#define IRQ_2SERIAL	10		/* Serial IRQ */
+#define	IRQ_EXTENTION	11		/* EXTn IRQ */
+#define IRQ_ONETH	12		/* On board Ethernet IRQ */
+#define IRQ_PSW		13		/* Push Switch IRQ */
+#define IRQ_ZIGBEE	14		/* Ziggbee IO IRQ */
+
+#endif  /* CONFIG_SH_R7780MP */
+
+#define __IO_PREFIX	r7780rp
+#include <asm/io_generic.h>
+
+#endif  /* __ASM_SH_RENESAS_R7780RP */
diff --git a/include/asm-sh/rts7751r2d.h b/include/asm-sh/rts7751r2d.h
new file mode 100644
index 000000000000..796b8fcb81a8
--- /dev/null
+++ b/include/asm-sh/rts7751r2d.h
@@ -0,0 +1,74 @@
+#ifndef __ASM_SH_RENESAS_RTS7751R2D_H
+#define __ASM_SH_RENESAS_RTS7751R2D_H
+
+/*
+ * linux/include/asm-sh/renesas_rts7751r2d.h
+ *
+ * Copyright (C) 2000  Atom Create Engineering Co., Ltd.
+ *
+ * Renesas Technology Sales RTS7751R2D support
+ */
+
+/* Box specific addresses.  */
+
+#define PA_BCR		0xa4000000	/* FPGA */
+#define PA_IRLMON	0xa4000002	/* Interrupt Status control */
+#define PA_CFCTL	0xa4000004	/* CF Timing control */
+#define PA_CFPOW	0xa4000006	/* CF Power control */
+#define PA_DISPCTL	0xa4000008	/* Display Timing control */
+#define PA_SDMPOW	0xa400000a	/* SD Power control */
+#define PA_RTCCE	0xa400000c	/* RTC(9701) Enable control */
+#define PA_PCICD	0xa400000e	/* PCI Extention detect control */
+#define PA_VOYAGERRTS	0xa4000020	/* VOYAGER Reset control */
+#if defined(CONFIG_RTS7751R2D_REV11)
+#define PA_AXRST	0xa4000022	/* AX_LAN Reset control */
+#define PA_CFRST	0xa4000024	/* CF Reset control */
+#define	PA_ADMRTS	0xa4000026	/* SD Reset control */
+#define PA_EXTRST	0xa4000028	/* Extention Reset control */
+#define PA_CFCDINTCLR	0xa400002a	/* CF Insert Interrupt clear */
+#else
+#define PA_CFRST	0xa4000022	/* CF Reset control */
+#define	PA_ADMRTS	0xa4000024	/* SD Reset control */
+#define PA_EXTRST	0xa4000026	/* Extention Reset control */
+#define PA_CFCDINTCLR	0xa4000028	/* CF Insert Interrupt clear */
+#define	PA_KEYCTLCLR	0xa400002a	/* Key Interrupt clear */
+#endif
+#define PA_POWOFF	0xa4000030	/* Board Power OFF control */
+#define PA_VERREG	0xa4000032	/* FPGA Version Register */
+#define PA_INPORT	0xa4000034	/* KEY Input Port control */
+#define PA_OUTPORT	0xa4000036	/* LED control */
+#define PA_DMPORT	0xa4000038	/* DM270 Output Port control */
+
+#define PA_AX88796L	0xaa000400	/* AX88796L Area */
+#define PA_VOYAGER	0xab000000	/* VOYAGER GX Area */
+#define PA_IDE_OFFSET	0x1f0		/* CF IDE Offset */
+#define AX88796L_IO_BASE	0x1000	/* AX88796L IO Base Address */
+
+#define IRLCNTR1	(PA_BCR + 0)	/* Interrupt Control Register1 */
+
+#if defined(CONFIG_RTS7751R2D_REV11)
+#define IRQ_PCIETH	0		/* PCI Ethernet IRQ */
+#define IRQ_CFCARD	1		/* CF Card IRQ */
+#define IRQ_CFINST	2		/* CF Card Insert IRQ */
+#define IRQ_PCMCIA	3		/* PCMCIA IRQ */
+#define IRQ_VOYAGER	4		/* VOYAGER IRQ */
+#define IRQ_ONETH	5		/* On board Ethernet IRQ */
+#else
+#define IRQ_KEYIN	0		/* Key Input IRQ */
+#define IRQ_PCIETH	1		/* PCI Ethernet IRQ */
+#define IRQ_CFCARD	2		/* CF Card IRQ */
+#define IRQ_CFINST	3		/* CF Card Insert IRQ */
+#define IRQ_PCMCIA	4		/* PCMCIA IRQ */
+#define IRQ_VOYAGER	5		/* VOYAGER IRQ */
+#endif
+#define IRQ_RTCALM	6		/* RTC Alarm IRQ */
+#define IRQ_RTCTIME	7		/* RTC Timer IRQ */
+#define IRQ_SDCARD	8		/* SD Card IRQ */
+#define IRQ_PCISLOT1	9		/* PCI Slot #1 IRQ */
+#define IRQ_PCISLOT2	10		/* PCI Slot #2 IRQ */
+#define	IRQ_EXTENTION	11		/* EXTn IRQ */
+
+#define __IO_PREFIX rts7751r2d
+#include <asm/io_generic.h>
+
+#endif  /* __ASM_SH_RENESAS_RTS7751R2D */
diff --git a/include/asm-sh/shmin.h b/include/asm-sh/shmin.h
new file mode 100644
index 000000000000..36ba138a81fb
--- /dev/null
+++ b/include/asm-sh/shmin.h
@@ -0,0 +1,9 @@
+#ifndef __ASM_SH_SHMIN_H
+#define __ASM_SH_SHMIN_H
+
+#define SHMIN_IO_BASE 0xb0000000UL
+
+#define SHMIN_NE_IRQ IRQ2_IRQ
+#define SHMIN_NE_BASE 0x300
+
+#endif
-- 
cgit v1.2.3


From 94399ea62fc1047eded76b45b972e7850a800a1b Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Wed, 18 Oct 2006 23:27:29 +0900
Subject: [MIPS] More vr41xx pt_regs fixups

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/vr41xx/vr41xx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-mips/vr41xx/vr41xx.h b/include/asm-mips/vr41xx/vr41xx.h
index dd3eb3dc5886..88b492f6ea9c 100644
--- a/include/asm-mips/vr41xx/vr41xx.h
+++ b/include/asm-mips/vr41xx/vr41xx.h
@@ -75,7 +75,7 @@ extern void vr41xx_mask_clock(vr41xx_clock_t clock);
  * Interrupt Control Unit
  */
 extern int vr41xx_set_intassign(unsigned int irq, unsigned char intassign);
-extern int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int, struct pt_regs *));
+extern int cascade_irq(unsigned int irq, int (*get_irq)(unsigned int));
 
 #define PIUINT_COMMAND		0x0040
 #define PIUINT_DATA		0x0020
-- 
cgit v1.2.3


From d2bcf87d0fcdc10d1be65b03fd032bec05efe49f Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Wed, 18 Oct 2006 23:52:17 +0100
Subject: [MIPS] Reserve syscall numbers for kexec_load.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/scall32-o32.S |  3 ++-
 arch/mips/kernel/scall64-64.S  |  1 +
 arch/mips/kernel/scall64-n32.S |  1 +
 arch/mips/kernel/scall64-o32.S |  1 +
 include/asm-mips/unistd.h      | 15 +++++++++------
 5 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 61362e6fa9ec..720fac3435d5 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -652,7 +652,8 @@ einval:	li	v0, -EINVAL
 	sys	sys_vmsplice		4
 	sys	sys_move_pages		6
 	sys	sys_set_robust_list	2
-	sys	sys_get_robust_list	3
+	sys	sys_get_robust_list	3	/* 4310 */
+	sys	sys_ni_syscall		0
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 6c7b5ed0ea6e..3a34f62c8b1b 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -468,3 +468,4 @@ sys_call_table:
 	PTR	sys_move_pages
 	PTR	sys_set_robust_list
 	PTR	sys_get_robust_list
+	PTR	sys_ni_syscall			/* 5270 */
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 8c453f8ffea6..67b92a1d6c72 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -394,3 +394,4 @@ EXPORT(sysn32_call_table)
 	PTR	sys_move_pages
 	PTR	compat_sys_set_robust_list
 	PTR	compat_sys_get_robust_list
+	PTR	sys_ni_syscall
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index d105917d6d93..2875c4a3fa58 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -516,4 +516,5 @@ sys_call_table:
 	PTR	compat_sys_move_pages
 	PTR	compat_sys_set_robust_list
 	PTR	compat_sys_get_robust_list	/* 4310 */
+	PTR	sys_ni_syscall
 	.size	sys_call_table,.-sys_call_table
diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index 685c91467e63..30240a445dbb 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -331,16 +331,17 @@
 #define __NR_move_pages			(__NR_Linux + 308)
 #define __NR_set_robust_list		(__NR_Linux + 309)
 #define __NR_get_robust_list		(__NR_Linux + 310)
+#define __NR_kexec_load			(__NR_Linux + 311)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		310
+#define __NR_Linux_syscalls		311
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		310
+#define __NR_O32_Linux_syscalls		311
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -618,16 +619,17 @@
 #define __NR_move_pages			(__NR_Linux + 267)
 #define __NR_set_robust_list		(__NR_Linux + 268)
 #define __NR_get_robust_list		(__NR_Linux + 269)
+#define __NR_kexec_load			(__NR_Linux + 270)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		269
+#define __NR_Linux_syscalls		270
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		269
+#define __NR_64_Linux_syscalls		270
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -909,16 +911,17 @@
 #define __NR_move_pages			(__NR_Linux + 271)
 #define __NR_set_robust_list		(__NR_Linux + 272)
 #define __NR_get_robust_list		(__NR_Linux + 273)
+#define __NR_kexec_load			(__NR_Linux + 274)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		273
+#define __NR_Linux_syscalls		274
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		273
+#define __NR_N32_Linux_syscalls		274
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From d89e36d8df547fde2beaea82211954868da2282d Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Oct 2006 14:21:47 +0100
Subject: [MIPS] Fix iounmap argument to const volatile.

With the existing prototype the following code:

    const void __iomem *io = ioremap();
    x = readb(io);
    iounmap(io);

did result in a warning.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/ioremap.c | 2 +-
 include/asm-mips/io.h  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c
index 3101d1db5592..cea7d0ea36e4 100644
--- a/arch/mips/mm/ioremap.c
+++ b/arch/mips/mm/ioremap.c
@@ -176,7 +176,7 @@ void __iomem * __ioremap(phys_t phys_addr, phys_t size, unsigned long flags)
 
 #define IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
 
-void __iounmap(volatile void __iomem *addr)
+void __iounmap(const volatile void __iomem *addr)
 {
 	struct vm_struct *p;
 
diff --git a/include/asm-mips/io.h b/include/asm-mips/io.h
index c2d124badbe5..bc5f3c53155f 100644
--- a/include/asm-mips/io.h
+++ b/include/asm-mips/io.h
@@ -172,7 +172,7 @@ extern unsigned long isa_slot_offset;
 #define page_to_phys(page)	((dma_addr_t)page_to_pfn(page) << PAGE_SHIFT)
 
 extern void __iomem * __ioremap(phys_t offset, phys_t size, unsigned long flags);
-extern void __iounmap(volatile void __iomem *addr);
+extern void __iounmap(const volatile void __iomem *addr);
 
 static inline void __iomem * __ioremap_mode(phys_t offset, unsigned long size,
 	unsigned long flags)
@@ -279,7 +279,7 @@ static inline void __iomem * __ioremap_mode(phys_t offset, unsigned long size,
 #define ioremap_uncached_accelerated(offset, size)			\
 	__ioremap_mode((offset), (size), _CACHE_UNCACHED_ACCELERATED)
 
-static inline void iounmap(volatile void __iomem *addr)
+static inline void iounmap(const volatile void __iomem *addr)
 {
 #define __IS_KSEG1(addr) (((unsigned long)(addr) & ~0x1fffffffUL) == CKSEG1)
 
-- 
cgit v1.2.3


From 62752ee198dca9209b7dee504763e51b11e9e0ca Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 10:31:38 +0200
Subject: [PATCH] Take i_mutex in splice_from_pipe()

The splice_actor may be calling ->prepare_write() and ->commit_write(). We
want i_mutex on the inode being written to before calling those so that we
don't race i_size changes.

The double locking behavior is done elsewhere in splice.c, and if we
eventually want _nolock variants of generic_file_splice_write(), fs modules
might have to replicate the nasty locking code. We introduce
inode_double_lock() and inode_double_unlock() to consolidate the locking
rules into one set of functions.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/inode.c         | 36 ++++++++++++++++++++++++++++++++++++
 fs/splice.c        | 24 +++++++++++-------------
 include/linux/fs.h |  3 +++
 3 files changed, 50 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/fs/inode.c b/fs/inode.c
index d9a21d122926..26cdb115ce67 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1306,6 +1306,42 @@ void wake_up_inode(struct inode *inode)
 	wake_up_bit(&inode->i_state, __I_LOCK);
 }
 
+/*
+ * We rarely want to lock two inodes that do not have a parent/child
+ * relationship (such as directory, child inode) simultaneously. The
+ * vast majority of file systems should be able to get along fine
+ * without this. Do not use these functions except as a last resort.
+ */
+void inode_double_lock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1 == NULL || inode2 == NULL || inode1 == inode2) {
+		if (inode1)
+			mutex_lock(&inode1->i_mutex);
+		else if (inode2)
+			mutex_lock(&inode2->i_mutex);
+		return;
+	}
+
+	if (inode1 < inode2) {
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_CHILD);
+	} else {
+		mutex_lock_nested(&inode2->i_mutex, I_MUTEX_PARENT);
+		mutex_lock_nested(&inode1->i_mutex, I_MUTEX_CHILD);
+	}
+}
+EXPORT_SYMBOL(inode_double_lock);
+
+void inode_double_unlock(struct inode *inode1, struct inode *inode2)
+{
+	if (inode1)
+		mutex_unlock(&inode1->i_mutex);
+
+	if (inode2 && inode2 != inode1)
+		mutex_unlock(&inode2->i_mutex);
+}
+EXPORT_SYMBOL(inode_double_unlock);
+
 static __initdata unsigned long ihash_entries;
 static int __init set_ihash_entries(char *str)
 {
diff --git a/fs/splice.c b/fs/splice.c
index a567010b62ac..c1072b6940c3 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -713,6 +713,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
+	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -722,8 +723,13 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	if (pipe->inode)
-		mutex_lock(&pipe->inode->i_mutex);
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
 
 	for (;;) {
 		if (pipe->nrbufs) {
@@ -797,8 +803,7 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	if (pipe->inode)
-		mutex_unlock(&pipe->inode->i_mutex);
+	inode_double_unlock(inode, pipe->inode);
 
 	if (do_wakeup) {
 		smp_mb();
@@ -1400,13 +1405,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 	 * grabbing by inode address. Otherwise two different processes
 	 * could deadlock (one doing tee from A -> B, the other from B -> A).
 	 */
-	if (ipipe->inode < opipe->inode) {
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_CHILD);
-	} else {
-		mutex_lock_nested(&opipe->inode->i_mutex, I_MUTEX_PARENT);
-		mutex_lock_nested(&ipipe->inode->i_mutex, I_MUTEX_CHILD);
-	}
+	inode_double_lock(ipipe->inode, opipe->inode);
 
 	do {
 		if (!opipe->readers) {
@@ -1450,8 +1449,7 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		i++;
 	} while (len);
 
-	mutex_unlock(&ipipe->inode->i_mutex);
-	mutex_unlock(&opipe->inode->i_mutex);
+	inode_double_unlock(ipipe->inode, opipe->inode);
 
 	/*
 	 * If we put data in the output pipe, wakeup any potential readers.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 661c7c572149..853a02f23936 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -623,6 +623,9 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_QUOTA
 };
 
+extern void inode_double_lock(struct inode *inode1, struct inode *inode2);
+extern void inode_double_unlock(struct inode *inode1, struct inode *inode2);
+
 /*
  * NOTE: in a 32bit arch with a preemptable kernel and
  * an UP compile the i_size_read/write must be atomic
-- 
cgit v1.2.3


From 6da61809822c22634a3de2dcb3c60283b836a88a Mon Sep 17 00:00:00 2001
From: Mark Fasheh <mark.fasheh@oracle.com>
Date: Tue, 17 Oct 2006 18:43:07 +0200
Subject: [PATCH] Introduce generic_file_splice_write_nolock()

This allows file systems to manage their own i_mutex locking while
still re-using the generic_file_splice_write() logic.

OCFS2 in particular wants this so that it can order cluster locks within
i_mutex.

Signed-off-by: Mark Fasheh <mark.fasheh@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 fs/splice.c        | 80 ++++++++++++++++++++++++++++++++++++++++++++----------
 include/linux/fs.h |  2 ++
 2 files changed, 68 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index c1072b6940c3..68e20e65c6e1 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -707,13 +707,12 @@ out_ret:
  * key here is the 'actor' worker passed in that actually moves the data
  * to the wanted destination. See pipe_to_file/pipe_to_sendpage above.
  */
-ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
-			 loff_t *ppos, size_t len, unsigned int flags,
-			 splice_actor *actor)
+static ssize_t __splice_from_pipe(struct pipe_inode_info *pipe,
+				  struct file *out, loff_t *ppos, size_t len,
+				  unsigned int flags, splice_actor *actor)
 {
 	int ret, do_wakeup, err;
 	struct splice_desc sd;
-	struct inode *inode = out->f_mapping->host;
 
 	ret = 0;
 	do_wakeup = 0;
@@ -723,14 +722,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	sd.file = out;
 	sd.pos = *ppos;
 
-	/*
-	 * The actor worker might be calling ->prepare_write and
-	 * ->commit_write. Most of the time, these expect i_mutex to
-	 * be held. Since this may result in an ABBA deadlock with
-	 * pipe->inode, we have to order lock acquiry here.
-	 */
-	inode_double_lock(inode, pipe->inode);
-
 	for (;;) {
 		if (pipe->nrbufs) {
 			struct pipe_buffer *buf = pipe->bufs + pipe->curbuf;
@@ -803,8 +794,6 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 		pipe_wait(pipe);
 	}
 
-	inode_double_unlock(inode, pipe->inode);
-
 	if (do_wakeup) {
 		smp_mb();
 		if (waitqueue_active(&pipe->wait))
@@ -815,6 +804,69 @@ ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
 	return ret;
 }
 
+ssize_t splice_from_pipe(struct pipe_inode_info *pipe, struct file *out,
+			 loff_t *ppos, size_t len, unsigned int flags,
+			 splice_actor *actor)
+{
+	ssize_t ret;
+	struct inode *inode = out->f_mapping->host;
+
+	/*
+	 * The actor worker might be calling ->prepare_write and
+	 * ->commit_write. Most of the time, these expect i_mutex to
+	 * be held. Since this may result in an ABBA deadlock with
+	 * pipe->inode, we have to order lock acquiry here.
+	 */
+	inode_double_lock(inode, pipe->inode);
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, actor);
+	inode_double_unlock(inode, pipe->inode);
+
+	return ret;
+}
+
+/**
+ * generic_file_splice_write_nolock - generic_file_splice_write without mutexes
+ * @pipe:	pipe info
+ * @out:	file to write to
+ * @len:	number of bytes to splice
+ * @flags:	splice modifier flags
+ *
+ * Will either move or copy pages (determined by @flags options) from
+ * the given pipe inode to the given file. The caller is responsible
+ * for acquiring i_mutex on both inodes.
+ *
+ */
+ssize_t
+generic_file_splice_write_nolock(struct pipe_inode_info *pipe, struct file *out,
+				 loff_t *ppos, size_t len, unsigned int flags)
+{
+	struct address_space *mapping = out->f_mapping;
+	struct inode *inode = mapping->host;
+	ssize_t ret;
+	int err;
+
+	ret = __splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
+	if (ret > 0) {
+		*ppos += ret;
+
+		/*
+		 * If file or inode is SYNC and we actually wrote some data,
+		 * sync it.
+		 */
+		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
+			err = generic_osync_inode(inode, mapping,
+						  OSYNC_METADATA|OSYNC_DATA);
+
+			if (err)
+				ret = err;
+		}
+	}
+
+	return ret;
+}
+
+EXPORT_SYMBOL(generic_file_splice_write_nolock);
+
 /**
  * generic_file_splice_write - splice data from a pipe to a file
  * @pipe:	pipe info
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 853a02f23936..d695ba2346a3 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1758,6 +1758,8 @@ extern ssize_t generic_file_splice_read(struct file *, loff_t *,
 		struct pipe_inode_info *, size_t, unsigned int);
 extern ssize_t generic_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
+extern ssize_t generic_file_splice_write_nolock(struct pipe_inode_info *,
+		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
 extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-- 
cgit v1.2.3


From 01de85e057328ecbef36e108673b1e81059d54c1 Mon Sep 17 00:00:00 2001
From: Jens Axboe <jens.axboe@oracle.com>
Date: Tue, 17 Oct 2006 19:50:36 +0200
Subject: [PATCH] Add lockless helpers for remove_suid()

Right now users have to grab i_mutex before calling remove_suid(), in the
unlikely event that a call to ->setattr() may be needed. Split up the
function in two parts:

- One to check if we need to remove suid
- One to actually remove it

The first we can call lockless.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/fs.h |  2 ++
 mm/filemap.c       | 30 ++++++++++++++++++++++--------
 2 files changed, 24 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index d695ba2346a3..2fe6e3f900ba 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1712,6 +1712,8 @@ extern void __iget(struct inode * inode);
 extern void clear_inode(struct inode *);
 extern void destroy_inode(struct inode *);
 extern struct inode *new_inode(struct super_block *);
+extern int __remove_suid(struct dentry *, int);
+extern int should_remove_suid(struct dentry *);
 extern int remove_suid(struct dentry *);
 extern void remove_dquot_ref(struct super_block *, int, struct list_head *);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index 3464b681f844..7c7addb9333c 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1884,11 +1884,10 @@ repeat:
  *	if suid or (sgid and xgrp)
  *		remove privs
  */
-int remove_suid(struct dentry *dentry)
+int should_remove_suid(struct dentry *dentry)
 {
 	mode_t mode = dentry->d_inode->i_mode;
 	int kill = 0;
-	int result = 0;
 
 	/* suid always must be killed */
 	if (unlikely(mode & S_ISUID))
@@ -1901,13 +1900,28 @@ int remove_suid(struct dentry *dentry)
 	if (unlikely((mode & S_ISGID) && (mode & S_IXGRP)))
 		kill |= ATTR_KILL_SGID;
 
-	if (unlikely(kill && !capable(CAP_FSETID))) {
-		struct iattr newattrs;
+	if (unlikely(kill && !capable(CAP_FSETID)))
+		return kill;
 
-		newattrs.ia_valid = ATTR_FORCE | kill;
-		result = notify_change(dentry, &newattrs);
-	}
-	return result;
+	return 0;
+}
+
+int __remove_suid(struct dentry *dentry, int kill)
+{
+	struct iattr newattrs;
+
+	newattrs.ia_valid = ATTR_FORCE | kill;
+	return notify_change(dentry, &newattrs);
+}
+
+int remove_suid(struct dentry *dentry)
+{
+	int kill = should_remove_suid(dentry);
+
+	if (unlikely(kill))
+		return __remove_suid(dentry, kill);
+
+	return 0;
 }
 EXPORT_SYMBOL(remove_suid);
 
-- 
cgit v1.2.3


From 66a740572d7bcb18469e71cb014bfed3ff75a773 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Fri, 20 Oct 2006 15:30:55 +0900
Subject: sh: Convert INTC2 to IRQ table registration.

Currently the INTC2 code contains a fixed IRQ table that it
iterates through to set the handler type, we move this in to
the CPU subtype setup code instead and allow for submitting
the table that way.

This drops the ST40 tables, as nothing has been happening
with those processors, while converting the only existing
users to use the new table directly (SH7760 and SH7780).

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/cpu/irq/intc2.c        | 164 +++++-----------------------------
 arch/sh/kernel/cpu/sh4/setup-sh7760.c |  63 +++++++++++++
 arch/sh/kernel/cpu/sh4/setup-sh7780.c |  27 ++++++
 include/asm-sh/irq-sh7780.h           |  10 ---
 include/asm-sh/irq.h                  |   2 +-
 include/asm-sh/r7780rp.h              |   2 -
 6 files changed, 115 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/arch/sh/kernel/cpu/irq/intc2.c b/arch/sh/kernel/cpu/irq/intc2.c
index 212884adca03..74ca576a7ce5 100644
--- a/arch/sh/kernel/cpu/irq/intc2.c
+++ b/arch/sh/kernel/cpu/irq/intc2.c
@@ -11,10 +11,9 @@
  * Hitachi 7751, the STM ST40 STB1, SH7760, and SH7780.
  */
 #include <linux/kernel.h>
-#include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/io.h>
 #include <asm/system.h>
-#include <asm/io.h>
 
 static void disable_intc2_irq(unsigned int irq)
 {
@@ -45,151 +44,36 @@ static struct irq_chip intc2_irq_chip = {
  *    PIO1 which is INTPRI00[19,16] and INTMSK00[13]
  * would be:               ^     ^             ^  ^
  *                         |     |             |  |
- *    make_intc2_irq(84,   0,   16,            0, 13);
+ *     { 84,		   0,   16,            0, 13 },
+ *
+ * in the intc2_data table.
  */
-void make_intc2_irq(struct intc2_data *p)
+void make_intc2_irq(struct intc2_data *table, unsigned int nr_irqs)
 {
-	unsigned int flags;
-	unsigned long ipr;
-
-	disable_irq_nosync(p->irq);
-
-	/* Set the priority level */
-	local_irq_save(flags);
-
-	ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset);
-	ipr &= ~(0xf << p->ipr_shift);
-	ipr |= p->priority << p->ipr_shift;
-	ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET + p->ipr_offset);
-
-	local_irq_restore(flags);
+	int i;
 
-	set_irq_chip_and_handler_name(p->irq, &intc2_irq_chip,
-				      handle_level_irq, "level");
-	set_irq_chip_data(p->irq, p);
+	for (i = 0; i < nr_irqs; i++) {
+		unsigned long ipr, flags;
+		struct intc2_data *p = table + i;
 
-	enable_intc2_irq(p->irq);
-}
+		disable_irq_nosync(p->irq);
 
-static struct intc2_data intc2_irq_table[] = {
-#if defined(CONFIG_CPU_SUBTYPE_ST40)
-	{64,  0,  0, 0,  0, 13},	/* PCI serr */
-	{65,  0,  4, 0,  1, 13},	/* PCI err */
-	{66,  0,  4, 0,  2, 13},	/* PCI ad */
-	{67,  0,  4, 0,  3, 13},	/* PCI pwd down */
-	{72,  0,  8, 0,  5, 13},	/* DMAC INT0 */
-	{73,  0,  8, 0,  6, 13},	/* DMAC INT1 */
-	{74,  0,  8, 0,  7, 13},	/* DMAC INT2 */
-	{75,  0,  8, 0,  8, 13},	/* DMAC INT3 */
-	{76,  0,  8, 0,  9, 13},	/* DMAC INT4 */
-	{78,  0,  8, 0, 11, 13},	/* DMAC ERR */
-	{80,  0, 12, 0, 12, 13},	/* PIO0 */
-	{84,  0, 16, 0, 13, 13},	/* PIO1 */
-	{88,  0, 20, 0, 14, 13},	/* PIO2 */
-	{112, 4,  0, 4,  0, 13},	/* Mailbox */
- #ifdef CONFIG_CPU_SUBTYPE_ST40GX1
-	{116, 4,  4, 4,  4, 13},	/* SSC0 */
-	{120, 4,  8, 4,  8, 13},	/* IR Blaster */
-	{124, 4, 12, 4, 12, 13},	/* USB host */
-	{128, 4, 16, 4, 16, 13},	/* Video processor BLITTER */
-	{132, 4, 20, 4, 20, 13},	/* UART0 */
-	{134, 4, 20, 4, 22, 13},	/* UART2 */
-	{136, 4, 24, 4, 24, 13},	/* IO_PIO0 */
-	{140, 4, 28, 4, 28, 13},	/* EMPI */
-	{144, 8,  0, 8,  0, 13},	/* MAFE */
-	{148, 8,  4, 8,  4, 13},	/* PWM */
-	{152, 8,  8, 8,  8, 13},	/* SSC1 */
-	{156, 8, 12, 8, 12, 13},	/* IO_PIO1 */
-	{160, 8, 16, 8, 16, 13},	/* USB target */
-	{164, 8, 20, 8, 20, 13},	/* UART1 */
-	{168, 8, 24, 8, 24, 13},	/* Teletext */
-	{172, 8, 28, 8, 28, 13},	/* VideoSync VTG */
-	{173, 8, 28, 8, 29, 13},	/* VideoSync DVP0 */
-	{174, 8, 28, 8, 30, 13},	/* VideoSync DVP1 */
-#endif
-#elif defined(CONFIG_CPU_SUBTYPE_SH7760)
-/*
- * SH7760 INTC2-Style interrupts, vectors IRQ48-111 INTEVT 0x800-0xFE0
- */
-	/* INTPRIO0 | INTMSK0 */
-	{48,  0, 28, 0, 31,  3},	/* IRQ 4 */
-	{49,  0, 24, 0, 30,  3},	/* IRQ 3 */
-	{50,  0, 20, 0, 29,  3},	/* IRQ 2 */
-	{51,  0, 16, 0, 28,  3},	/* IRQ 1 */
-	/* 52-55 (INTEVT 0x880-0x8E0) unused/reserved */
-	/* INTPRIO4 | INTMSK0 */
-	{56,  4, 28, 0, 25,  3},	/* HCAN2_CHAN0 */
-	{57,  4, 24, 0, 24,  3},	/* HCAN2_CHAN1 */
-	{58,  4, 20, 0, 23,  3},	/* I2S_CHAN0   */
-	{59,  4, 16, 0, 22,  3},	/* I2S_CHAN1   */
-	{60,  4, 12, 0, 21,  3},	/* AC97_CHAN0  */
-	{61,  4,  8, 0, 20,  3},	/* AC97_CHAN1  */
-	{62,  4,  4, 0, 19,  3},	/* I2C_CHAN0   */
-	{63,  4,  0, 0, 18,  3},	/* I2C_CHAN1   */
-	/* INTPRIO8 | INTMSK0 */
-	{52,  8, 16, 0, 11,  3},	/* SCIF0_ERI_IRQ */
-	{53,  8, 16, 0, 10,  3},	/* SCIF0_RXI_IRQ */
-	{54,  8, 16, 0,  9,  3},	/* SCIF0_BRI_IRQ */
-	{55,  8, 16, 0,  8,  3},	/* SCIF0_TXI_IRQ */
-	{64,  8, 28, 0, 17,  3},	/* USBHI_IRQ */
-	{65,  8, 24, 0, 16,  3},	/* LCDC      */
-	/* 66, 67 unused */
-	{68,  8, 20, 0, 14, 13},	/* DMABRGI0_IRQ */
-	{69,  8, 20, 0, 13, 13},	/* DMABRGI1_IRQ */
-	{70,  8, 20, 0, 12, 13},	/* DMABRGI2_IRQ */
-	/* 71 unused */
-	{72,  8, 12, 0,  7,  3},	/* SCIF1_ERI_IRQ */
-	{73,  8, 12, 0,  6,  3},	/* SCIF1_RXI_IRQ */
-	{74,  8, 12, 0,  5,  3},	/* SCIF1_BRI_IRQ */
-	{75,  8, 12, 0,  4,  3},	/* SCIF1_TXI_IRQ */
-	{76,  8,  8, 0,  3,  3},	/* SCIF2_ERI_IRQ */
-	{77,  8,  8, 0,  2,  3},	/* SCIF2_RXI_IRQ */
-	{78,  8,  8, 0,  1,  3},	/* SCIF2_BRI_IRQ */
-	{79,  8,  8, 0,  0,  3},	/* SCIF2_TXI_IRQ */
-	/*          | INTMSK4 */
-	{80,  8,  4, 4, 23,  3},	/* SIM_ERI */
-	{81,  8,  4, 4, 22,  3},	/* SIM_RXI */
-	{82,  8,  4, 4, 21,  3},	/* SIM_TXI */
-	{83,  8,  4, 4, 20,  3},	/* SIM_TEI */
-	{84,  8,  0, 4, 19,  3},	/* HSPII */
-	/* INTPRIOC | INTMSK4 */
-	/* 85-87 unused/reserved */
-	{88, 12, 20, 4, 18,  3},	/* MMCI0 */
-	{89, 12, 20, 4, 17,  3},	/* MMCI1 */
-	{90, 12, 20, 4, 16,  3},	/* MMCI2 */
-	{91, 12, 20, 4, 15,  3},	/* MMCI3 */
-	{92, 12, 12, 4,  6,  3},	/* MFI (unsure, bug? in my 7760 manual*/
-	/* 93-107 reserved/undocumented */
-	{108,12,  4, 4,  1,  3},	/* ADC  */
-	{109,12,  0, 4,  0,  3},	/* CMTI */
-	/* 110-111 reserved/unused */
-#elif defined(CONFIG_CPU_SUBTYPE_SH7780)
-	{ TIMER_IRQ, 0, 24, 0, INTC_TMU0_MSK, 2},
-	{ 21, 1, 0, 0, INTC_RTC_MSK, TIMER_PRIORITY },
-	{ 22, 1, 1, 0, INTC_RTC_MSK, TIMER_PRIORITY },
-	{ 23, 1, 2, 0, INTC_RTC_MSK, TIMER_PRIORITY },
-	{ SCIF0_ERI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
-	{ SCIF0_RXI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
-	{ SCIF0_BRI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
-	{ SCIF0_TXI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
+		/* Set the priority level */
+		local_irq_save(flags);
 
-	{ SCIF1_ERI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
-	{ SCIF1_RXI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
-	{ SCIF1_BRI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
-	{ SCIF1_TXI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
+		ipr = ctrl_inl(INTC2_BASE + INTC2_INTPRI_OFFSET +
+			       p->ipr_offset);
+		ipr &= ~(0xf << p->ipr_shift);
+		ipr |= p->priority << p->ipr_shift;
+		ctrl_outl(ipr, INTC2_BASE + INTC2_INTPRI_OFFSET +
+			  p->ipr_offset);
 
-	{ PCIC0_IRQ, 0x10,  8, 0, INTC_PCIC0_MSK, PCIC0_PRIORITY },
-	{ PCIC1_IRQ, 0x10,  0, 0, INTC_PCIC1_MSK, PCIC1_PRIORITY },
-	{ PCIC2_IRQ, 0x14, 24, 0, INTC_PCIC2_MSK, PCIC2_PRIORITY },
-	{ PCIC3_IRQ, 0x14, 16, 0, INTC_PCIC3_MSK, PCIC3_PRIORITY },
-	{ PCIC4_IRQ, 0x14,  8, 0, INTC_PCIC4_MSK, PCIC4_PRIORITY },
-#endif
-};
+		local_irq_restore(flags);
 
-void __init init_IRQ_intc2(void)
-{
-	int i;
+		set_irq_chip_and_handler_name(p->irq, &intc2_irq_chip,
+					      handle_level_irq, "level");
+		set_irq_chip_data(p->irq, p);
 
-	for (i = 0; i < ARRAY_SIZE(intc2_irq_table); i++)
-		make_intc2_irq(intc2_irq_table + i);
+		enable_intc2_irq(p->irq);
+	}
 }
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7760.c b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
index 97f1c9af35d6..07e5377bf550 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7760.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7760.c
@@ -51,3 +51,66 @@ static int __init sh7760_devices_setup(void)
 				    ARRAY_SIZE(sh7760_devices));
 }
 __initcall(sh7760_devices_setup);
+
+/*
+ * SH7760 INTC2-Style interrupts, vectors IRQ48-111 INTEVT 0x800-0xFE0
+ */
+static struct intc2_data intc2_irq_table[] = {
+	/* INTPRIO0 | INTMSK0 */
+	{48,  0, 28, 0, 31,  3},	/* IRQ 4 */
+	{49,  0, 24, 0, 30,  3},	/* IRQ 3 */
+	{50,  0, 20, 0, 29,  3},	/* IRQ 2 */
+	{51,  0, 16, 0, 28,  3},	/* IRQ 1 */
+	/* 52-55 (INTEVT 0x880-0x8E0) unused/reserved */
+	/* INTPRIO4 | INTMSK0 */
+	{56,  4, 28, 0, 25,  3},	/* HCAN2_CHAN0 */
+	{57,  4, 24, 0, 24,  3},	/* HCAN2_CHAN1 */
+	{58,  4, 20, 0, 23,  3},	/* I2S_CHAN0   */
+	{59,  4, 16, 0, 22,  3},	/* I2S_CHAN1   */
+	{60,  4, 12, 0, 21,  3},	/* AC97_CHAN0  */
+	{61,  4,  8, 0, 20,  3},	/* AC97_CHAN1  */
+	{62,  4,  4, 0, 19,  3},	/* I2C_CHAN0   */
+	{63,  4,  0, 0, 18,  3},	/* I2C_CHAN1   */
+	/* INTPRIO8 | INTMSK0 */
+	{52,  8, 16, 0, 11,  3},	/* SCIF0_ERI_IRQ */
+	{53,  8, 16, 0, 10,  3},	/* SCIF0_RXI_IRQ */
+	{54,  8, 16, 0,  9,  3},	/* SCIF0_BRI_IRQ */
+	{55,  8, 16, 0,  8,  3},	/* SCIF0_TXI_IRQ */
+	{64,  8, 28, 0, 17,  3},	/* USBHI_IRQ */
+	{65,  8, 24, 0, 16,  3},	/* LCDC      */
+	/* 66, 67 unused */
+	{68,  8, 20, 0, 14, 13},	/* DMABRGI0_IRQ */
+	{69,  8, 20, 0, 13, 13},	/* DMABRGI1_IRQ */
+	{70,  8, 20, 0, 12, 13},	/* DMABRGI2_IRQ */
+	/* 71 unused */
+	{72,  8, 12, 0,  7,  3},	/* SCIF1_ERI_IRQ */
+	{73,  8, 12, 0,  6,  3},	/* SCIF1_RXI_IRQ */
+	{74,  8, 12, 0,  5,  3},	/* SCIF1_BRI_IRQ */
+	{75,  8, 12, 0,  4,  3},	/* SCIF1_TXI_IRQ */
+	{76,  8,  8, 0,  3,  3},	/* SCIF2_ERI_IRQ */
+	{77,  8,  8, 0,  2,  3},	/* SCIF2_RXI_IRQ */
+	{78,  8,  8, 0,  1,  3},	/* SCIF2_BRI_IRQ */
+	{79,  8,  8, 0,  0,  3},	/* SCIF2_TXI_IRQ */
+	/*          | INTMSK4 */
+	{80,  8,  4, 4, 23,  3},	/* SIM_ERI */
+	{81,  8,  4, 4, 22,  3},	/* SIM_RXI */
+	{82,  8,  4, 4, 21,  3},	/* SIM_TXI */
+	{83,  8,  4, 4, 20,  3},	/* SIM_TEI */
+	{84,  8,  0, 4, 19,  3},	/* HSPII */
+	/* INTPRIOC | INTMSK4 */
+	/* 85-87 unused/reserved */
+	{88, 12, 20, 4, 18,  3},	/* MMCI0 */
+	{89, 12, 20, 4, 17,  3},	/* MMCI1 */
+	{90, 12, 20, 4, 16,  3},	/* MMCI2 */
+	{91, 12, 20, 4, 15,  3},	/* MMCI3 */
+	{92, 12, 12, 4,  6,  3},	/* MFI (unsure, bug? in my 7760 manual*/
+	/* 93-107 reserved/undocumented */
+	{108,12,  4, 4,  1,  3},	/* ADC  */
+	{109,12,  0, 4,  0,  3},	/* CMTI */
+	/* 110-111 reserved/unused */
+};
+
+void __init init_IRQ_intc2(void)
+{
+	make_intc2_irq(intc2_irq_table, ARRAY_SIZE(intc2_irq_table));
+}
diff --git a/arch/sh/kernel/cpu/sh4/setup-sh7780.c b/arch/sh/kernel/cpu/sh4/setup-sh7780.c
index 72493f259edc..814ddb226531 100644
--- a/arch/sh/kernel/cpu/sh4/setup-sh7780.c
+++ b/arch/sh/kernel/cpu/sh4/setup-sh7780.c
@@ -77,3 +77,30 @@ static int __init sh7780_devices_setup(void)
 				    ARRAY_SIZE(sh7780_devices));
 }
 __initcall(sh7780_devices_setup);
+
+static struct intc2_data intc2_irq_table[] = {
+	{ TIMER_IRQ, 0, 24, 0, INTC_TMU0_MSK, 2 },
+	{ 21, 1, 0, 0, INTC_RTC_MSK, TIMER_PRIORITY },
+	{ 22, 1, 1, 0, INTC_RTC_MSK, TIMER_PRIORITY },
+	{ 23, 1, 2, 0, INTC_RTC_MSK, TIMER_PRIORITY },
+	{ SCIF0_ERI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
+	{ SCIF0_RXI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
+	{ SCIF0_BRI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
+	{ SCIF0_TXI_IRQ, 8, 24, 0, INTC_SCIF0_MSK, SCIF0_PRIORITY },
+
+	{ SCIF1_ERI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
+	{ SCIF1_RXI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
+	{ SCIF1_BRI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
+	{ SCIF1_TXI_IRQ, 8, 16, 0, INTC_SCIF1_MSK, SCIF1_PRIORITY },
+
+	{ PCIC0_IRQ, 0x10,  8, 0, INTC_PCIC0_MSK, PCIC0_PRIORITY },
+	{ PCIC1_IRQ, 0x10,  0, 0, INTC_PCIC1_MSK, PCIC1_PRIORITY },
+	{ PCIC2_IRQ, 0x14, 24, 0, INTC_PCIC2_MSK, PCIC2_PRIORITY },
+	{ PCIC3_IRQ, 0x14, 16, 0, INTC_PCIC3_MSK, PCIC3_PRIORITY },
+	{ PCIC4_IRQ, 0x14,  8, 0, INTC_PCIC4_MSK, PCIC4_PRIORITY },
+};
+
+void __init init_IRQ_intc2(void)
+{
+	make_intc2_irq(intc2_irq_table, ARRAY_SIZE(intc2_irq_table));
+}
diff --git a/include/asm-sh/irq-sh7780.h b/include/asm-sh/irq-sh7780.h
index 895c5780e454..19912ae6a7f7 100644
--- a/include/asm-sh/irq-sh7780.h
+++ b/include/asm-sh/irq-sh7780.h
@@ -6,16 +6,6 @@
  *
  * Copyright (C) 2004 Takashi SHUDO <shudo@hitachi-ul.co.jp>
  */
-
-#ifdef CONFIG_IDE
-# ifndef IRQ_CFCARD
-#  define IRQ_CFCARD	14
-# endif
-# ifndef IRQ_PCMCIA
-#  define IRQ_PCMCIA	15
-# endif
-#endif
-
 #define INTC_BASE	0xffd00000
 #define INTC_ICR0	(INTC_BASE+0x0)
 #define INTC_ICR1	(INTC_BASE+0x1c)
diff --git a/include/asm-sh/irq.h b/include/asm-sh/irq.h
index 1837bdbf8e54..7596ab83e0d4 100644
--- a/include/asm-sh/irq.h
+++ b/include/asm-sh/irq.h
@@ -685,7 +685,7 @@ struct intc2_data {
 	unsigned char priority;
 };
 
-void make_intc2_irq(struct intc2_data *);
+void make_intc2_irq(struct intc2_data *, unsigned int nr_irqs);
 void init_IRQ_intc2(void);
 #endif
 
diff --git a/include/asm-sh/r7780rp.h b/include/asm-sh/r7780rp.h
index ddd67b60bb43..c18f648a7995 100644
--- a/include/asm-sh/r7780rp.h
+++ b/include/asm-sh/r7780rp.h
@@ -81,7 +81,6 @@
 #define IRQ_PCISLOT2    66              /* PCI Slot #2 IRQ */
 #define IRQ_PCISLOT3    67              /* PCI Slot #3 IRQ */
 #define IRQ_PCISLOT4    68              /* PCI Slot #4 IRQ */
-#define IRQ_CFCARD      1               /* CF Card IRQ */
 // #define IRQ_CFINST   0               /* CF Card Insert IRQ */
 #define IRQ_TP          2               /* Touch Panel IRQ */
 #define IRQ_SCI1        3               /* SCI1 IRQ */
@@ -153,7 +152,6 @@
 #define IRQ_PCISLOT2	1		/* PCI Slot #2 IRQ */
 #define IRQ_PCISLOT3	2		/* PCI Slot #3 IRQ */
 #define IRQ_PCISLOT4	3		/* PCI Slot #4 IRQ */
-#define IRQ_CFCARD	4		/* CF Card IRQ */
 #define IRQ_CFINST	5		/* CF Card Insert IRQ */
 #define IRQ_M66596	6		/* M66596 IRQ */
 #define IRQ_SDCARD	7		/* SD Card IRQ */
-- 
cgit v1.2.3


From 78d79423179c0efc7ec34b55d287e7be4ca07da6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 20 Oct 2006 00:28:35 -0700
Subject: [IPV4] inet_peer: Group together avl_left, avl_right, v4daddr to
 speedup lookups on some CPUS

Lot of routers/embedded devices still use CPUS with 16/32 bytes cache
lines.  (486, Pentium, ...  PIII) It makes sense to group together
fields used at lookup time so they fit in one cache line.  This reduce
cache footprint and speedup lookups.

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inetpeer.h | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h
index f13cc0c2b163..aa10a8178e70 100644
--- a/include/net/inetpeer.h
+++ b/include/net/inetpeer.h
@@ -17,14 +17,15 @@
 
 struct inet_peer
 {
+	/* group together avl_left,avl_right,v4daddr to speedup lookups */
 	struct inet_peer	*avl_left, *avl_right;
+	__be32			v4daddr;	/* peer's address */
+	__u16			avl_height;
+	__u16			ip_id_count;	/* IP ID for the next packet */
 	struct inet_peer	*unused_next, **unused_prevp;
 	__u32			dtime;		/* the time of last use of not
 						 * referenced entries */
 	atomic_t		refcnt;
-	__be32			v4daddr;	/* peer's address */
-	__u16			avl_height;
-	__u16			ip_id_count;	/* IP ID for the next packet */
 	atomic_t		rid;		/* Frag reception counter */
 	__u32			tcp_ts;
 	unsigned long		tcp_ts_stamp;
-- 
cgit v1.2.3


From 6cf431d77c3e917399a847e3a7ec239d5163056b Mon Sep 17 00:00:00 2001
From: David Woodhouse <dwmw2@infradead.org>
Date: Fri, 20 Oct 2006 00:29:33 -0700
Subject: [SPARC]: Clean up asm-sparc/elf.h pollution in userspace.

We don't need to export sparc_elf_hwcap() to userspace, and it doesn't
build there.  Remove it by moving it inside #ifdef __KERNEL__, along with
some other things which don't need to be exported.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc/elf.h | 41 ++++++++++++++++++++---------------------
 1 file changed, 20 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc/elf.h b/include/asm-sparc/elf.h
index 83a3dd15a6ed..aaf6ef40ee2f 100644
--- a/include/asm-sparc/elf.h
+++ b/include/asm-sparc/elf.h
@@ -8,11 +8,6 @@
 
 #include <asm/ptrace.h>
 
-#ifdef __KERNEL__
-#include <asm/mbus.h>
-#include <asm/uaccess.h>
-#endif
-
 /*
  * Sparc section types
  */
@@ -77,6 +72,23 @@ typedef unsigned long elf_greg_t;
 #define ELF_NGREG 38
 typedef elf_greg_t elf_gregset_t[ELF_NGREG];
 
+typedef struct {
+	union {
+		unsigned long	pr_regs[32];
+		double		pr_dregs[16];
+	} pr_fr;
+	unsigned long __unused;
+	unsigned long	pr_fsr;
+	unsigned char	pr_qcnt;
+	unsigned char	pr_q_entrysize;
+	unsigned char	pr_en;
+	unsigned int	pr_q[64];
+} elf_fpregset_t;
+
+#ifdef __KERNEL__
+#include <asm/mbus.h>
+#include <asm/uaccess.h>
+
 /* Format is:
  * 	G0 --> G7
  *	O0 --> O7
@@ -99,20 +111,7 @@ do {	unsigned long *dest = &(__elf_regs[0]);		\
 	dest[34] = src->npc;				\
 	dest[35] = src->y;				\
 	dest[36] = dest[37] = 0; /* XXX */		\
-} while(0); /* Janitors: Don't touch this colon. */
-
-typedef struct {
-	union {
-		unsigned long	pr_regs[32];
-		double		pr_dregs[16];
-	} pr_fr;
-	unsigned long __unused;
-	unsigned long	pr_fsr;
-	unsigned char	pr_qcnt;
-	unsigned char	pr_q_entrysize;
-	unsigned char	pr_en;
-	unsigned int	pr_q[64];
-} elf_fpregset_t;
+} while(0); /* Janitors: Don't touch this semicolon. */
 
 #define ELF_CORE_COPY_TASK_REGS(__tsk, __elf_regs)	\
 	({ ELF_CORE_COPY_REGS((*(__elf_regs)), (__tsk)->thread.kregs); 1; })
@@ -165,8 +164,8 @@ typedef struct {
 
 #define ELF_PLATFORM	(NULL)
 
-#ifdef __KERNEL__
 #define SET_PERSONALITY(ex, ibcs2) set_personality((ibcs2)?PER_SVR4:PER_LINUX)
-#endif
+
+#endif /* __KERNEL__ */
 
 #endif /* !(__ASMSPARC_ELF_H) */
-- 
cgit v1.2.3


From 79e2de4bc53d7ca2a8eedee49e4a92479b4b530e Mon Sep 17 00:00:00 2001
From: Thomas Maier <balagi@justmail.de>
Date: Thu, 19 Oct 2006 23:28:15 -0700
Subject: [PATCH] export clear_queue_congested and set_queue_congested

Export the clear_queue_congested() and set_queue_congested() functions
located in ll_rw_blk.c

The functions are renamed to blk_clear_queue_congested() and
blk_set_queue_congested().

(needed in the pktcdvd driver's bio write congestion control)

Signed-off-by: Thomas Maier <balagi@justmail.de>
Cc: Peter Osterlund <petero2@telia.com>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 block/ll_rw_blk.c      | 20 ++++++++++----------
 include/linux/blkdev.h |  2 ++
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index c847e17e5caa..132a858ce2c5 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -117,7 +117,7 @@ static void blk_queue_congestion_threshold(struct request_queue *q)
  * congested queues, and wake up anyone who was waiting for requests to be
  * put back.
  */
-static void clear_queue_congested(request_queue_t *q, int rw)
+void blk_clear_queue_congested(request_queue_t *q, int rw)
 {
 	enum bdi_state bit;
 	wait_queue_head_t *wqh = &congestion_wqh[rw];
@@ -128,18 +128,20 @@ static void clear_queue_congested(request_queue_t *q, int rw)
 	if (waitqueue_active(wqh))
 		wake_up(wqh);
 }
+EXPORT_SYMBOL(blk_clear_queue_congested);
 
 /*
  * A queue has just entered congestion.  Flag that in the queue's VM-visible
  * state flags and increment the global gounter of congested queues.
  */
-static void set_queue_congested(request_queue_t *q, int rw)
+void blk_set_queue_congested(request_queue_t *q, int rw)
 {
 	enum bdi_state bit;
 
 	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
 	set_bit(bit, &q->backing_dev_info.state);
 }
+EXPORT_SYMBOL(blk_set_queue_congested);
 
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
@@ -159,7 +161,6 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 		ret = &q->backing_dev_info;
 	return ret;
 }
-
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
 void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
@@ -167,7 +168,6 @@ void blk_queue_activity_fn(request_queue_t *q, activity_fn *fn, void *data)
 	q->activity_fn = fn;
 	q->activity_data = data;
 }
-
 EXPORT_SYMBOL(blk_queue_activity_fn);
 
 /**
@@ -2067,7 +2067,7 @@ static void __freed_request(request_queue_t *q, int rw)
 	struct request_list *rl = &q->rq;
 
 	if (rl->count[rw] < queue_congestion_off_threshold(q))
-		clear_queue_congested(q, rw);
+		blk_clear_queue_congested(q, rw);
 
 	if (rl->count[rw] + 1 <= q->nr_requests) {
 		if (waitqueue_active(&rl->wait[rw]))
@@ -2137,7 +2137,7 @@ static struct request *get_request(request_queue_t *q, int rw, struct bio *bio,
 				}
 			}
 		}
-		set_queue_congested(q, rw);
+		blk_set_queue_congested(q, rw);
 	}
 
 	/*
@@ -3765,14 +3765,14 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count)
 	blk_queue_congestion_threshold(q);
 
 	if (rl->count[READ] >= queue_congestion_on_threshold(q))
-		set_queue_congested(q, READ);
+		blk_set_queue_congested(q, READ);
 	else if (rl->count[READ] < queue_congestion_off_threshold(q))
-		clear_queue_congested(q, READ);
+		blk_clear_queue_congested(q, READ);
 
 	if (rl->count[WRITE] >= queue_congestion_on_threshold(q))
-		set_queue_congested(q, WRITE);
+		blk_set_queue_congested(q, WRITE);
 	else if (rl->count[WRITE] < queue_congestion_off_threshold(q))
-		clear_queue_congested(q, WRITE);
+		blk_clear_queue_congested(q, WRITE);
 
 	if (rl->count[READ] >= q->nr_requests) {
 		blk_set_queue_full(q, READ);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d370d2cfe138..9575e3a5ff2a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -651,6 +651,8 @@ extern void blk_recount_segments(request_queue_t *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
 		struct gendisk *, struct scsi_ioctl_command __user *);
+extern void blk_clear_queue_congested(request_queue_t *q, int rw);
+extern void blk_set_queue_congested(request_queue_t *q, int rw);
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void blk_sync_queue(struct request_queue *q);
-- 
cgit v1.2.3


From 3fcfab16c5b86eaa3db3a9a31adba550c5b67141 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 19 Oct 2006 23:28:16 -0700
Subject: [PATCH] separate bdi congestion functions from queue congestion
 functions

Separate out the concept of "queue congestion" from "backing-dev congestion".
Congestion is a backing-dev concept, not a queue concept.

The blk_* congestion functions are retained, as wrappers around the core
backing-dev congestion functions.

This proper layering is needed so that NFS can cleanly use the congestion
functions, and so that CONFIG_BLOCK=n actually links.

Cc: "Thomas Maier" <balagi@justmail.de>
Cc: "Jens Axboe" <jens.axboe@oracle.com>
Cc: Trond Myklebust <trond.myklebust@fys.uio.no>
Cc: David Howells <dhowells@redhat.com>
Cc: Peter Osterlund <petero2@telia.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/lib/usercopy.c    |  3 +-
 block/ll_rw_blk.c           | 71 ---------------------------------------------
 drivers/md/dm-crypt.c       |  3 +-
 fs/fat/file.c               |  3 +-
 fs/nfs/write.c              |  4 ++-
 fs/reiserfs/journal.c       |  3 +-
 fs/xfs/linux-2.6/kmem.c     |  5 ++--
 fs/xfs/linux-2.6/xfs_buf.c  |  3 +-
 include/linux/backing-dev.h |  7 +++++
 include/linux/blkdev.h      | 24 ++++++++++++---
 include/linux/writeback.h   |  1 -
 mm/Makefile                 |  3 +-
 mm/backing-dev.c            | 69 +++++++++++++++++++++++++++++++++++++++++++
 mm/page-writeback.c         | 17 +++--------
 mm/page_alloc.c             |  5 ++--
 mm/shmem.c                  |  3 +-
 mm/vmscan.c                 |  6 ++--
 17 files changed, 126 insertions(+), 104 deletions(-)
 create mode 100644 mm/backing-dev.c

(limited to 'include')

diff --git a/arch/i386/lib/usercopy.c b/arch/i386/lib/usercopy.c
index 258df6b4d7d7..d22cfc9d656c 100644
--- a/arch/i386/lib/usercopy.c
+++ b/arch/i386/lib/usercopy.c
@@ -9,6 +9,7 @@
 #include <linux/highmem.h>
 #include <linux/blkdev.h>
 #include <linux/module.h>
+#include <linux/backing-dev.h>
 #include <asm/uaccess.h>
 #include <asm/mmx.h>
 
@@ -741,7 +742,7 @@ survive:
 
 			if (retval == -ENOMEM && is_init(current)) {
 				up_read(&current->mm->mmap_sem);
-				blk_congestion_wait(WRITE, HZ/50);
+				congestion_wait(WRITE, HZ/50);
 				goto survive;
 			}
 
diff --git a/block/ll_rw_blk.c b/block/ll_rw_blk.c
index 132a858ce2c5..136066583c68 100644
--- a/block/ll_rw_blk.c
+++ b/block/ll_rw_blk.c
@@ -56,11 +56,6 @@ static kmem_cache_t *requestq_cachep;
  */
 static kmem_cache_t *iocontext_cachep;
 
-static wait_queue_head_t congestion_wqh[2] = {
-		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
-		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
-	};
-
 /*
  * Controlling structure to kblockd
  */
@@ -112,37 +107,6 @@ static void blk_queue_congestion_threshold(struct request_queue *q)
 	q->nr_congestion_off = nr;
 }
 
-/*
- * A queue has just exitted congestion.  Note this in the global counter of
- * congested queues, and wake up anyone who was waiting for requests to be
- * put back.
- */
-void blk_clear_queue_congested(request_queue_t *q, int rw)
-{
-	enum bdi_state bit;
-	wait_queue_head_t *wqh = &congestion_wqh[rw];
-
-	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
-	clear_bit(bit, &q->backing_dev_info.state);
-	smp_mb__after_clear_bit();
-	if (waitqueue_active(wqh))
-		wake_up(wqh);
-}
-EXPORT_SYMBOL(blk_clear_queue_congested);
-
-/*
- * A queue has just entered congestion.  Flag that in the queue's VM-visible
- * state flags and increment the global gounter of congested queues.
- */
-void blk_set_queue_congested(request_queue_t *q, int rw)
-{
-	enum bdi_state bit;
-
-	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
-	set_bit(bit, &q->backing_dev_info.state);
-}
-EXPORT_SYMBOL(blk_set_queue_congested);
-
 /**
  * blk_get_backing_dev_info - get the address of a queue's backing_dev_info
  * @bdev:	device
@@ -2755,41 +2719,6 @@ void blk_end_sync_rq(struct request *rq, int error)
 }
 EXPORT_SYMBOL(blk_end_sync_rq);
 
-/**
- * blk_congestion_wait - wait for a queue to become uncongested
- * @rw: READ or WRITE
- * @timeout: timeout in jiffies
- *
- * Waits for up to @timeout jiffies for a queue (any queue) to exit congestion.
- * If no queues are congested then just wait for the next request to be
- * returned.
- */
-long blk_congestion_wait(int rw, long timeout)
-{
-	long ret;
-	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = &congestion_wqh[rw];
-
-	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-	ret = io_schedule_timeout(timeout);
-	finish_wait(wqh, &wait);
-	return ret;
-}
-
-EXPORT_SYMBOL(blk_congestion_wait);
-
-/**
- * blk_congestion_end - wake up sleepers on a congestion queue
- * @rw: READ or WRITE
- */
-void blk_congestion_end(int rw)
-{
-	wait_queue_head_t *wqh = &congestion_wqh[rw];
-
-	if (waitqueue_active(wqh))
-		wake_up(wqh);
-}
-
 /*
  * Has to be called with the request spinlock acquired
  */
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 655d816760e5..a625576fdeeb 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -16,6 +16,7 @@
 #include <linux/slab.h>
 #include <linux/crypto.h>
 #include <linux/workqueue.h>
+#include <linux/backing-dev.h>
 #include <asm/atomic.h>
 #include <linux/scatterlist.h>
 #include <asm/page.h>
@@ -602,7 +603,7 @@ static void process_write(struct crypt_io *io)
 
 		/* out of memory -> run queues */
 		if (remaining)
-			blk_congestion_wait(bio_data_dir(clone), HZ/100);
+			congestion_wait(bio_data_dir(clone), HZ/100);
 	}
 }
 
diff --git a/fs/fat/file.c b/fs/fat/file.c
index f4b8f8b3fbdd..8337451e7897 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -13,6 +13,7 @@
 #include <linux/smp_lock.h>
 #include <linux/buffer_head.h>
 #include <linux/writeback.h>
+#include <linux/backing-dev.h>
 #include <linux/blkdev.h>
 
 int fat_generic_ioctl(struct inode *inode, struct file *filp,
@@ -118,7 +119,7 @@ static int fat_file_release(struct inode *inode, struct file *filp)
 	if ((filp->f_mode & FMODE_WRITE) &&
 	     MSDOS_SB(inode->i_sb)->options.flush) {
 		fat_flush_inodes(inode->i_sb, inode, NULL);
-		blk_congestion_wait(WRITE, HZ/10);
+		congestion_wait(WRITE, HZ/10);
 	}
 	return 0;
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f6675d2c386c..ca92ac36fe9d 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -57,6 +57,8 @@
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
 #include <linux/nfs_page.h>
+#include <linux/backing-dev.h>
+
 #include <asm/uaccess.h>
 #include <linux/smp_lock.h>
 
@@ -395,7 +397,7 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 out:
 	clear_bit(BDI_write_congested, &bdi->state);
 	wake_up_all(&nfs_write_congestion);
-	writeback_congestion_end();
+	congestion_end(WRITE);
 	return err;
 }
 
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index ad8cbc49883a..85ce23268302 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -53,6 +53,7 @@
 #include <linux/workqueue.h>
 #include <linux/writeback.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 
 /* gets a struct reiserfs_journal_list * from a list head */
 #define JOURNAL_LIST_ENTRY(h) (list_entry((h), struct reiserfs_journal_list, \
@@ -970,7 +971,7 @@ int reiserfs_async_progress_wait(struct super_block *s)
 	DEFINE_WAIT(wait);
 	struct reiserfs_journal *j = SB_JOURNAL(s);
 	if (atomic_read(&j->j_async_throttle))
-		blk_congestion_wait(WRITE, HZ / 10);
+		congestion_wait(WRITE, HZ / 10);
 	return 0;
 }
 
diff --git a/fs/xfs/linux-2.6/kmem.c b/fs/xfs/linux-2.6/kmem.c
index d59737589815..004baf600611 100644
--- a/fs/xfs/linux-2.6/kmem.c
+++ b/fs/xfs/linux-2.6/kmem.c
@@ -21,6 +21,7 @@
 #include <linux/highmem.h>
 #include <linux/swap.h>
 #include <linux/blkdev.h>
+#include <linux/backing-dev.h>
 #include "time.h"
 #include "kmem.h"
 
@@ -53,7 +54,7 @@ kmem_alloc(size_t size, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__FUNCTION__, lflags);
-		blk_congestion_wait(WRITE, HZ/50);
+		congestion_wait(WRITE, HZ/50);
 	} while (1);
 }
 
@@ -131,7 +132,7 @@ kmem_zone_alloc(kmem_zone_t *zone, unsigned int __nocast flags)
 			printk(KERN_ERR "XFS: possible memory allocation "
 					"deadlock in %s (mode:0x%x)\n",
 					__FUNCTION__, lflags);
-		blk_congestion_wait(WRITE, HZ/50);
+		congestion_wait(WRITE, HZ/50);
 	} while (1);
 }
 
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c
index 9bbadafdcb00..db5f5a3608ca 100644
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -30,6 +30,7 @@
 #include <linux/hash.h>
 #include <linux/kthread.h>
 #include <linux/migrate.h>
+#include <linux/backing-dev.h>
 #include "xfs_linux.h"
 
 STATIC kmem_zone_t *xfs_buf_zone;
@@ -395,7 +396,7 @@ _xfs_buf_lookup_pages(
 
 			XFS_STATS_INC(xb_page_retries);
 			xfsbufd_wakeup(0, gfp_mask);
-			blk_congestion_wait(WRITE, HZ/50);
+			congestion_wait(WRITE, HZ/50);
 			goto retry;
 		}
 
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index f7a1390d67f5..7011d6255593 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -10,6 +10,8 @@
 
 #include <asm/atomic.h>
 
+struct page;
+
 /*
  * Bits in backing_dev_info.state
  */
@@ -88,6 +90,11 @@ static inline int bdi_rw_congested(struct backing_dev_info *bdi)
 				  (1 << BDI_write_congested));
 }
 
+void clear_bdi_congested(struct backing_dev_info *bdi, int rw);
+void set_bdi_congested(struct backing_dev_info *bdi, int rw);
+long congestion_wait(int rw, long timeout);
+void congestion_end(int rw);
+
 #define bdi_cap_writeback_dirty(bdi) \
 	(!((bdi)->capabilities & BDI_CAP_NO_WRITEBACK))
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9575e3a5ff2a..7bfcde2d5578 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -651,8 +651,26 @@ extern void blk_recount_segments(request_queue_t *, struct bio *);
 extern int scsi_cmd_ioctl(struct file *, struct gendisk *, unsigned int, void __user *);
 extern int sg_scsi_ioctl(struct file *, struct request_queue *,
 		struct gendisk *, struct scsi_ioctl_command __user *);
-extern void blk_clear_queue_congested(request_queue_t *q, int rw);
-extern void blk_set_queue_congested(request_queue_t *q, int rw);
+
+/*
+ * A queue has just exitted congestion.  Note this in the global counter of
+ * congested queues, and wake up anyone who was waiting for requests to be
+ * put back.
+ */
+static inline void blk_clear_queue_congested(request_queue_t *q, int rw)
+{
+	clear_bdi_congested(&q->backing_dev_info, rw);
+}
+
+/*
+ * A queue has just entered congestion.  Flag that in the queue's VM-visible
+ * state flags and increment the global gounter of congested queues.
+ */
+static inline void blk_set_queue_congested(request_queue_t *q, int rw)
+{
+	set_bdi_congested(&q->backing_dev_info, rw);
+}
+
 extern void blk_start_queue(request_queue_t *q);
 extern void blk_stop_queue(request_queue_t *q);
 extern void blk_sync_queue(struct request_queue *q);
@@ -767,10 +785,8 @@ extern int blk_queue_init_tags(request_queue_t *, int, struct blk_queue_tag *);
 extern void blk_queue_free_tags(request_queue_t *);
 extern int blk_queue_resize_tags(request_queue_t *, int);
 extern void blk_queue_invalidate_tags(request_queue_t *);
-extern long blk_congestion_wait(int rw, long timeout);
 extern struct blk_queue_tag *blk_init_tags(int);
 extern void blk_free_tags(struct blk_queue_tag *);
-extern void blk_congestion_end(int rw);
 
 static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt,
 						int tag)
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a341c8032866..fc35e6bdfb93 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -85,7 +85,6 @@ int wakeup_pdflush(long nr_pages);
 void laptop_io_completion(void);
 void laptop_sync_completion(void);
 void throttle_vm_writeout(void);
-void writeback_congestion_end(void);
 
 /* These are exported to sysctl. */
 extern int dirty_background_ratio;
diff --git a/mm/Makefile b/mm/Makefile
index 12b3a4eee88d..f3c077eb0b8e 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -10,7 +10,8 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o madvise.o memory.o mincore.o \
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
 			   page_alloc.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
-			   prio_tree.o util.o mmzone.o vmstat.o $(mmu-y)
+			   prio_tree.o util.o mmzone.o vmstat.o backing-dev.o \
+			   $(mmu-y)
 
 ifeq ($(CONFIG_MMU)$(CONFIG_BLOCK),yy)
 obj-y			+= bounce.o
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
new file mode 100644
index 000000000000..f50a2811f9dc
--- /dev/null
+++ b/mm/backing-dev.c
@@ -0,0 +1,69 @@
+
+#include <linux/wait.h>
+#include <linux/backing-dev.h>
+#include <linux/fs.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+
+static wait_queue_head_t congestion_wqh[2] = {
+		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[0]),
+		__WAIT_QUEUE_HEAD_INITIALIZER(congestion_wqh[1])
+	};
+
+
+void clear_bdi_congested(struct backing_dev_info *bdi, int rw)
+{
+	enum bdi_state bit;
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+	clear_bit(bit, &bdi->state);
+	smp_mb__after_clear_bit();
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+EXPORT_SYMBOL(clear_bdi_congested);
+
+void set_bdi_congested(struct backing_dev_info *bdi, int rw)
+{
+	enum bdi_state bit;
+
+	bit = (rw == WRITE) ? BDI_write_congested : BDI_read_congested;
+	set_bit(bit, &bdi->state);
+}
+EXPORT_SYMBOL(set_bdi_congested);
+
+/**
+ * congestion_wait - wait for a backing_dev to become uncongested
+ * @rw: READ or WRITE
+ * @timeout: timeout in jiffies
+ *
+ * Waits for up to @timeout jiffies for a backing_dev (any backing_dev) to exit
+ * write congestion.  If no backing_devs are congested then just wait for the
+ * next write to be completed.
+ */
+long congestion_wait(int rw, long timeout)
+{
+	long ret;
+	DEFINE_WAIT(wait);
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
+	ret = io_schedule_timeout(timeout);
+	finish_wait(wqh, &wait);
+	return ret;
+}
+EXPORT_SYMBOL(congestion_wait);
+
+/**
+ * congestion_end - wake up sleepers on a congested backing_dev_info
+ * @rw: READ or WRITE
+ */
+void congestion_end(int rw)
+{
+	wait_queue_head_t *wqh = &congestion_wqh[rw];
+
+	if (waitqueue_active(wqh))
+		wake_up(wqh);
+}
+EXPORT_SYMBOL(congestion_end);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index a0f339057449..8d9b19f239c3 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -222,7 +222,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 			if (pages_written >= write_chunk)
 				break;		/* We've done our duty */
 		}
-		blk_congestion_wait(WRITE, HZ/10);
+		congestion_wait(WRITE, HZ/10);
 	}
 
 	if (nr_reclaimable + global_page_state(NR_WRITEBACK)
@@ -314,7 +314,7 @@ void throttle_vm_writeout(void)
                 if (global_page_state(NR_UNSTABLE_NFS) +
 			global_page_state(NR_WRITEBACK) <= dirty_thresh)
                         	break;
-                blk_congestion_wait(WRITE, HZ/10);
+                congestion_wait(WRITE, HZ/10);
         }
 }
 
@@ -351,7 +351,7 @@ static void background_writeout(unsigned long _min_pages)
 		min_pages -= MAX_WRITEBACK_PAGES - wbc.nr_to_write;
 		if (wbc.nr_to_write > 0 || wbc.pages_skipped > 0) {
 			/* Wrote less than expected */
-			blk_congestion_wait(WRITE, HZ/10);
+			congestion_wait(WRITE, HZ/10);
 			if (!wbc.encountered_congestion)
 				break;
 		}
@@ -422,7 +422,7 @@ static void wb_kupdate(unsigned long arg)
 		writeback_inodes(&wbc);
 		if (wbc.nr_to_write > 0) {
 			if (wbc.encountered_congestion)
-				blk_congestion_wait(WRITE, HZ/10);
+				congestion_wait(WRITE, HZ/10);
 			else
 				break;	/* All the old data is written */
 		}
@@ -955,15 +955,6 @@ int test_set_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL(test_set_page_writeback);
 
-/*
- * Wakes up tasks that are being throttled due to writeback congestion
- */
-void writeback_congestion_end(void)
-{
-	blk_congestion_end(WRITE);
-}
-EXPORT_SYMBOL(writeback_congestion_end);
-
 /*
  * Return true if any of the pages in the mapping are marged with the
  * passed tag.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 40db96a655d0..afee38f04d84 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -39,6 +39,7 @@
 #include <linux/stop_machine.h>
 #include <linux/sort.h>
 #include <linux/pfn.h>
+#include <linux/backing-dev.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -1050,7 +1051,7 @@ nofail_alloc:
 			if (page)
 				goto got_pg;
 			if (gfp_mask & __GFP_NOFAIL) {
-				blk_congestion_wait(WRITE, HZ/50);
+				congestion_wait(WRITE, HZ/50);
 				goto nofail_alloc;
 			}
 		}
@@ -1113,7 +1114,7 @@ rebalance:
 			do_retry = 1;
 	}
 	if (do_retry) {
-		blk_congestion_wait(WRITE, HZ/50);
+		congestion_wait(WRITE, HZ/50);
 		goto rebalance;
 	}
 
diff --git a/mm/shmem.c b/mm/shmem.c
index b378f66cf2f9..4959535fc14c 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -48,6 +48,7 @@
 #include <linux/ctype.h>
 #include <linux/migrate.h>
 #include <linux/highmem.h>
+#include <linux/backing-dev.h>
 
 #include <asm/uaccess.h>
 #include <asm/div64.h>
@@ -1131,7 +1132,7 @@ repeat:
 			page_cache_release(swappage);
 			if (error == -ENOMEM) {
 				/* let kswapd refresh zone for GFP_ATOMICs */
-				blk_congestion_wait(WRITE, HZ/50);
+				congestion_wait(WRITE, HZ/50);
 			}
 			goto repeat;
 		}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index af73c14f9d88..f05527bf792b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1059,7 +1059,7 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 
 		/* Take a nap, wait for some writeback to complete */
 		if (sc.nr_scanned && priority < DEF_PRIORITY - 2)
-			blk_congestion_wait(WRITE, HZ/10);
+			congestion_wait(WRITE, HZ/10);
 	}
 	/* top priority shrink_caches still had more to do? don't OOM, then */
 	if (!sc.all_unreclaimable)
@@ -1214,7 +1214,7 @@ scan:
 		 * another pass across the zones.
 		 */
 		if (total_scanned && priority < DEF_PRIORITY - 2)
-			blk_congestion_wait(WRITE, HZ/10);
+			congestion_wait(WRITE, HZ/10);
 
 		/*
 		 * We do this so kswapd doesn't build up large priorities for
@@ -1458,7 +1458,7 @@ unsigned long shrink_all_memory(unsigned long nr_pages)
 				goto out;
 
 			if (sc.nr_scanned && prio < DEF_PRIORITY - 2)
-				blk_congestion_wait(WRITE, HZ / 10);
+				congestion_wait(WRITE, HZ / 10);
 		}
 
 		lru_pages = 0;
-- 
cgit v1.2.3


From 34e856e6a522a8fc0feba7497f5b05aeaa13d473 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Oct 2006 23:28:17 -0700
Subject: [PATCH] Make <linux/personality.h> userspace proof

<linux/personality.h> contains the constants for personality(2) but also
some defintions that are useless or even harmful in userspace such as the
personality() macro.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/personality.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include')

diff --git a/include/linux/personality.h b/include/linux/personality.h
index 80d780e5a8f5..bf4cf2080e5c 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -1,6 +1,8 @@
 #ifndef _LINUX_PERSONALITY_H
 #define _LINUX_PERSONALITY_H
 
+#ifdef __KERNEL__
+
 /*
  * Handling of different ABIs (personalities).
  */
@@ -12,6 +14,8 @@ extern int		register_exec_domain(struct exec_domain *);
 extern int		unregister_exec_domain(struct exec_domain *);
 extern int		__set_personality(unsigned long);
 
+#endif /* __KERNEL__ */
+
 /*
  * Flags for bug emulation.
  *
@@ -71,6 +75,7 @@ enum {
 	PER_MASK =		0x00ff,
 };
 
+#ifdef __KERNEL__
 
 /*
  * Description of an execution domain.
@@ -111,4 +116,6 @@ struct exec_domain {
 #define set_personality(pers) \
 	((current->personality == pers) ? 0 : __set_personality(pers))
 
+#endif /* __KERNEL__ */
+
 #endif /* _LINUX_PERSONALITY_H */
-- 
cgit v1.2.3


From 02a5323d8060d7259277e9e2936fd02129dc0984 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Thu, 19 Oct 2006 23:28:20 -0700
Subject: [PATCH] uml: remove some leftover PPC code

I happened to notice that this code is a leftover and it should be removed -
since there are sporadical efforts to revive the PPC port doing such cleanups
is not useless.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Cc: Jeff Dike <jdike@addtoit.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-um/archparam-ppc.h | 9 ---------
 1 file changed, 9 deletions(-)

(limited to 'include')

diff --git a/include/asm-um/archparam-ppc.h b/include/asm-um/archparam-ppc.h
index 172cd6ffacc4..4269d8a37b4f 100644
--- a/include/asm-um/archparam-ppc.h
+++ b/include/asm-um/archparam-ppc.h
@@ -1,15 +1,6 @@
 #ifndef __UM_ARCHPARAM_PPC_H
 #define __UM_ARCHPARAM_PPC_H
 
-/********* Bits for asm-um/hw_irq.h **********/
-
-struct hw_interrupt_type;
-
-/********* Bits for asm-um/hardirq.h **********/
-
-#define irq_enter(cpu, irq) hardirq_enter(cpu)
-#define irq_exit(cpu, irq) hardirq_exit(cpu)
-
 /********* Bits for asm-um/string.h **********/
 
 #define __HAVE_ARCH_STRRCHR
-- 
cgit v1.2.3


From 145fc655a1ceabda76cf2ad74f7cf96863c65b65 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Thu, 19 Oct 2006 23:28:28 -0700
Subject: [PATCH] genirq: clean up irq-flow-type naming, fix

Re-add the set_irq_chip_and_handler() prototype, it's still widely used.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
Cc: Olaf Hering <olaf@aepfle.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/irq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 775f5a7da493..52fc4052a0ae 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -321,6 +321,9 @@ extern int can_request_irq(unsigned int irq, unsigned long irqflags);
 extern struct irq_chip no_irq_chip;
 extern struct irq_chip dummy_irq_chip;
 
+extern void
+set_irq_chip_and_handler(unsigned int irq, struct irq_chip *chip,
+			 irq_flow_handler_t handle);
 extern void
 set_irq_chip_and_handler_name(unsigned int irq, struct irq_chip *chip,
 			      irq_flow_handler_t handle, const char *name);
-- 
cgit v1.2.3


From 8ac773b4f73afa6fd66695131103944b975d5d5c Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 19 Oct 2006 23:28:32 -0700
Subject: [PATCH] OOM killer meets userspace headers

Despite mm.h is not being exported header, it does contain one thing
which is part of userspace ABI -- value disabling OOM killer for given
process. So,
a) create and export include/linux/oom.h
b) move OOM_DISABLE define there.
c) turn bounding values of /proc/$PID/oom_adj into defines and export
   them too.

Note: mass __KERNEL__ removal will be done later.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Christoph Hellwig <hch@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c       |  4 +++-
 include/linux/Kbuild |  1 +
 include/linux/mm.h   |  3 ---
 include/linux/oom.h  | 10 ++++++++++
 mm/oom_kill.c        |  1 +
 5 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 include/linux/oom.h

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 26a8f8416b79..8df27401d292 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -72,6 +72,7 @@
 #include <linux/audit.h>
 #include <linux/poll.h>
 #include <linux/nsproxy.h>
+#include <linux/oom.h>
 #include "internal.h"
 
 /* NOTE:
@@ -689,7 +690,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
 	if (copy_from_user(buffer, buf, count))
 		return -EFAULT;
 	oom_adjust = simple_strtol(buffer, &end, 0);
-	if ((oom_adjust < -16 || oom_adjust > 15) && oom_adjust != OOM_DISABLE)
+	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
+	     oom_adjust != OOM_DISABLE)
 		return -EINVAL;
 	if (*end == '\n')
 		end++;
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 5114ff18101d..a1155a2beb32 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -120,6 +120,7 @@ header-y += netrom.h
 header-y += nfs2.h
 header-y += nfs4_mount.h
 header-y += nfs_mount.h
+header-y += oom.h
 header-y += param.h
 header-y += pci_ids.h
 header-y += pci_regs.h
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a6068ff5556..d538de901965 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1115,9 +1115,6 @@ int in_gate_area_no_task(unsigned long addr);
 #define in_gate_area(task, addr) ({(void)task; in_gate_area_no_task(addr);})
 #endif	/* __HAVE_ARCH_GATE_AREA */
 
-/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
-#define OOM_DISABLE -17
-
 int drop_caches_sysctl_handler(struct ctl_table *, int, struct file *,
 					void __user *, size_t *, loff_t *);
 unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
diff --git a/include/linux/oom.h b/include/linux/oom.h
new file mode 100644
index 000000000000..ad76463629a0
--- /dev/null
+++ b/include/linux/oom.h
@@ -0,0 +1,10 @@
+#ifndef __INCLUDE_LINUX_OOM_H
+#define __INCLUDE_LINUX_OOM_H
+
+/* /proc/<pid>/oom_adj set to -17 protects from the oom-killer */
+#define OOM_DISABLE (-17)
+/* inclusive */
+#define OOM_ADJUST_MIN (-16)
+#define OOM_ADJUST_MAX 15
+
+#endif
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 20f41b082e16..2e3ce3a928b9 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -15,6 +15,7 @@
  *  kernel subsystems and hints as to where to find out what things do.
  */
 
+#include <linux/oom.h>
 #include <linux/mm.h>
 #include <linux/sched.h>
 #include <linux/swap.h>
-- 
cgit v1.2.3


From 8c7c7c9bf39470c9689ad43cae3142cf948f4cfb Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Thu, 19 Oct 2006 23:28:34 -0700
Subject: [PATCH] Fix warnings for WARN_ON if CONFIG_BUG is disabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In most cases the return value of WARN_ON() is ignored.  If the generic
definition for the !CONFIG_BUG case is used this will result in a warning:

  CC      kernel/sched.o
In file included from include/linux/bio.h:25,
                 from include/linux/blkdev.h:14,
                 from kernel/sched.c:39:
include/linux/ioprio.h: In function âtask_ioprioâ:
include/linux/ioprio.h:50: warning: statement with no effect
kernel/sched.c: In function âcontext_switchâ:
kernel/sched.c:1834: warning: statement with no effect

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Cc: Jeremy Fitzhardinge <jeremy@goop.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-generic/bug.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 1d9573cf4a0b..c92ae0f166ff 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -37,7 +37,10 @@
 #endif
 
 #ifndef HAVE_ARCH_WARN_ON
-#define WARN_ON(condition) unlikely((condition))
+#define WARN_ON(condition) ({						\
+	typeof(condition) __ret_warn_on = (condition);			\
+	unlikely(__ret_warn_on);					\
+})
 #endif
 #endif
 
-- 
cgit v1.2.3


From cd9ae2b6a75bb1fa0d370929c2d7a7da1ed719d9 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <Trond.Myklebust@netapp.com>
Date: Thu, 19 Oct 2006 23:28:40 -0700
Subject: [PATCH] NFS: Deal with failure of invalidate_inode_pages2()

If invalidate_inode_pages2() fails, then it should in principle just be
because the current process was signalled.  In that case, we just want to
ensure that the inode's page cache remains marked as invalid.

Also add a helper to allow the O_DIRECT code to simply mark the page cache as
invalid once it is finished writing, instead of calling
invalidate_inode_pages2() itself.

Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c           |  6 ++++--
 fs/nfs/direct.c        | 13 ++-----------
 fs/nfs/inode.c         | 28 +++++++++++++++++++++++-----
 include/linux/nfs_fs.h |  1 +
 4 files changed, 30 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 481f8892a919..58d44057813e 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -203,8 +203,10 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 	 * Note: assumes we have exclusive access to this mapping either
 	 *	 through inode->i_mutex or some other mechanism.
 	 */
-	if (page->index == 0)
-		invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1);
+	if (page->index == 0 && invalidate_inode_pages2_range(inode->i_mapping, PAGE_CACHE_SIZE, -1) < 0) {
+		/* Should never happen */
+		nfs_zap_mapping(inode, inode->i_mapping);
+	}
 	unlock_page(page);
 	return 0;
  error:
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1e873fcab947..bdfabf854a51 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -497,6 +497,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 			if (dreq->commit_data != NULL)
 				nfs_commit_free(dreq->commit_data);
 			nfs_direct_free_writedata(dreq);
+			nfs_zap_mapping(inode, inode->i_mapping);
 			nfs_direct_complete(dreq);
 	}
 }
@@ -517,6 +518,7 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 {
 	nfs_end_data_update(inode);
 	nfs_direct_free_writedata(dreq);
+	nfs_zap_mapping(inode, inode->i_mapping);
 	nfs_direct_complete(dreq);
 }
 #endif
@@ -830,17 +832,6 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
 
 	retval = nfs_direct_write(iocb, (unsigned long) buf, count, pos);
 
-	/*
-	 * XXX: nfs_end_data_update() already ensures this file's
-	 *      cached data is subsequently invalidated.  Do we really
-	 *      need to call invalidate_inode_pages2() again here?
-	 *
-	 *      For aio writes, this invalidation will almost certainly
-	 *      occur before the writes complete.  Kind of racey.
-	 */
-	if (mapping->nrpages)
-		invalidate_inode_pages2(mapping);
-
 	if (retval > 0)
 		iocb->ki_pos = pos + retval;
 
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index bc9376ca86cd..9979ad1cf8eb 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -131,6 +131,15 @@ void nfs_zap_caches(struct inode *inode)
 	spin_unlock(&inode->i_lock);
 }
 
+void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
+{
+	if (mapping->nrpages != 0) {
+		spin_lock(&inode->i_lock);
+		NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
+		spin_unlock(&inode->i_lock);
+	}
+}
+
 static void nfs_zap_acl_cache(struct inode *inode)
 {
 	void (*clear_acl_cache)(struct inode *);
@@ -671,13 +680,20 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 	if ((nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
 			|| nfs_attribute_timeout(inode))
 		ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+	if (ret < 0)
+		goto out;
 
 	if (nfsi->cache_validity & NFS_INO_INVALID_DATA) {
-		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
-		if (S_ISREG(inode->i_mode))
-			nfs_sync_mapping(mapping);
-		invalidate_inode_pages2(mapping);
-
+		if (mapping->nrpages != 0) {
+			if (S_ISREG(inode->i_mode)) {
+				ret = nfs_sync_mapping(mapping);
+				if (ret < 0)
+					goto out;
+			}
+			ret = invalidate_inode_pages2(mapping);
+			if (ret < 0)
+				goto out;
+		}
 		spin_lock(&inode->i_lock);
 		nfsi->cache_validity &= ~NFS_INO_INVALID_DATA;
 		if (S_ISDIR(inode->i_mode)) {
@@ -687,10 +703,12 @@ int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
 		}
 		spin_unlock(&inode->i_lock);
 
+		nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 		dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
 				inode->i_sb->s_id,
 				(long long)NFS_FILEID(inode));
 	}
+out:
 	return ret;
 }
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 76ff54846ada..6b2de1be5815 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -290,6 +290,7 @@ static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long
  * linux/fs/nfs/inode.c
  */
 extern int nfs_sync_mapping(struct address_space *mapping);
+extern void nfs_zap_mapping(struct inode *inode, struct address_space *mapping);
 extern void nfs_zap_caches(struct inode *);
 extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
 				struct nfs_fattr *);
-- 
cgit v1.2.3


From 7111c66e4e70588c9602035a4996c9cdc2087d2d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:45 -0700
Subject: [PATCH] fix svc_procfunc declaration

svc_procfunc instances return __be32, not int

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc4proc.c        | 40 ++++++++++++++++++++--------------------
 fs/lockd/svcproc.c         | 40 ++++++++++++++++++++--------------------
 fs/nfs/callback_xdr.c      |  4 ++--
 fs/nfsd/nfs2acl.c          | 10 +++++-----
 fs/nfsd/nfs3acl.c          |  6 +++---
 fs/nfsd/nfs3proc.c         | 44 ++++++++++++++++++++++----------------------
 fs/nfsd/nfs4proc.c         |  4 ++--
 fs/nfsd/nfsproc.c          | 32 ++++++++++++++++----------------
 include/linux/sunrpc/svc.h |  2 +-
 9 files changed, 91 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 399ad11b97be..4e719860b4bf 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -68,7 +68,7 @@ no_locks:
 /*
  * NULL: Test for presence of service
  */
-static int
+static __be32
 nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	dprintk("lockd: NULL          called\n");
@@ -78,7 +78,7 @@ nlm4svc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * TEST: Check for conflicting lock
  */
-static int
+static __be32
 nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -107,7 +107,7 @@ nlm4svc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -150,7 +150,7 @@ nlm4svc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -183,7 +183,7 @@ nlm4svc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNLOCK: release a lock
  */
-static int
+static __be32
 nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -217,7 +217,7 @@ nlm4svc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
-static int
+static __be32
 nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -253,12 +253,12 @@ static const struct rpc_call_ops nlm4svc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
-	int stat;
+	__be32 stat;
 
 	host = nlmsvc_lookup_host(rqstp,
 				  argp->lock.caller,
@@ -282,35 +282,35 @@ static int nlm4svc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *a
 	return rpc_success;
 }
 
-static int nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_TEST_RES, argp, nlm4svc_proc_test);
 }
 
-static int nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlm4svc_proc_lock);
 }
 
-static int nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlm4svc_proc_cancel);
 }
 
-static int nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
 	dprintk("lockd: UNLOCK_MSG    called\n");
 	return nlm4svc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlm4svc_proc_unlock);
 }
 
-static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
 	dprintk("lockd: GRANTED_MSG   called\n");
@@ -320,7 +320,7 @@ static int nlm4svc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *arg
 /*
  * SHARE: create a DOS share or alter existing share.
  */
-static int
+static __be32
 nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 				          struct nlm_res  *resp)
 {
@@ -353,7 +353,7 @@ nlm4svc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNSHARE: Release a DOS share.
  */
-static int
+static __be32
 nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -386,7 +386,7 @@ nlm4svc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * NM_LOCK: Create an unmonitored lock
  */
-static int
+static __be32
 nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -399,7 +399,7 @@ nlm4svc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
-static int
+static __be32
 nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void            *resp)
 {
@@ -417,7 +417,7 @@ nlm4svc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
-static int
+static __be32
 nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
@@ -446,7 +446,7 @@ nlm4svc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 /*
  * client sent a GRANTED_RES, let's remove the associated block
  */
-static int
+static __be32
 nlm4svc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
                                                 void            *resp)
 {
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index 6a931f4ab75c..db8d85c32d29 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -96,7 +96,7 @@ no_locks:
 /*
  * NULL: Test for presence of service
  */
-static int
+static __be32
 nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	dprintk("lockd: NULL          called\n");
@@ -106,7 +106,7 @@ nlmsvc_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * TEST: Check for conflicting lock
  */
-static int
+static __be32
 nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -136,7 +136,7 @@ nlmsvc_proc_test(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				         struct nlm_res  *resp)
 {
@@ -179,7 +179,7 @@ nlmsvc_proc_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return rpc_success;
 }
 
-static int
+static __be32
 nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -212,7 +212,7 @@ nlmsvc_proc_cancel(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNLOCK: release a lock
  */
-static int
+static __be32
 nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				           struct nlm_res  *resp)
 {
@@ -246,7 +246,7 @@ nlmsvc_proc_unlock(struct svc_rqst *rqstp, struct nlm_args *argp,
  * GRANTED: A server calls us to tell that a process' lock request
  * was granted
  */
-static int
+static __be32
 nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -282,12 +282,12 @@ static const struct rpc_call_ops nlmsvc_callback_ops = {
  * because we send the callback before the reply proper. I hope this
  * doesn't break any clients.
  */
-static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
-		int (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
+static __be32 nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *argp,
+		__be32 (*func)(struct svc_rqst *, struct nlm_args *, struct nlm_res  *))
 {
 	struct nlm_host	*host;
 	struct nlm_rqst	*call;
-	int stat;
+	__be32 stat;
 
 	host = nlmsvc_lookup_host(rqstp,
 				  argp->lock.caller,
@@ -311,28 +311,28 @@ static int nlmsvc_callback(struct svc_rqst *rqstp, u32 proc, struct nlm_args *ar
 	return rpc_success;
 }
 
-static int nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_test_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: TEST_MSG      called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_TEST_RES, argp, nlmsvc_proc_test);
 }
 
-static int nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_lock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void	     *resp)
 {
 	dprintk("lockd: LOCK_MSG      called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_LOCK_RES, argp, nlmsvc_proc_lock);
 }
 
-static int nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
+static __be32 nlmsvc_proc_cancel_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 					       void	       *resp)
 {
 	dprintk("lockd: CANCEL_MSG    called\n");
 	return nlmsvc_callback(rqstp, NLMPROC_CANCEL_RES, argp, nlmsvc_proc_cancel);
 }
 
-static int
+static __be32
 nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                void            *resp)
 {
@@ -340,7 +340,7 @@ nlmsvc_proc_unlock_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 	return nlmsvc_callback(rqstp, NLMPROC_UNLOCK_RES, argp, nlmsvc_proc_unlock);
 }
 
-static int
+static __be32
 nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
                                                 void            *resp)
 {
@@ -351,7 +351,7 @@ nlmsvc_proc_granted_msg(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SHARE: create a DOS share or alter existing share.
  */
-static int
+static __be32
 nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 				          struct nlm_res  *resp)
 {
@@ -384,7 +384,7 @@ nlmsvc_proc_share(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * UNSHARE: Release a DOS share.
  */
-static int
+static __be32
 nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -417,7 +417,7 @@ nlmsvc_proc_unshare(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * NM_LOCK: Create an unmonitored lock
  */
-static int
+static __be32
 nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 				            struct nlm_res  *resp)
 {
@@ -430,7 +430,7 @@ nlmsvc_proc_nm_lock(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * FREE_ALL: Release all locks and shares held by client
  */
-static int
+static __be32
 nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 					     void            *resp)
 {
@@ -448,7 +448,7 @@ nlmsvc_proc_free_all(struct svc_rqst *rqstp, struct nlm_args *argp,
 /*
  * SM_NOTIFY: private callback from statd (not part of official NLM proto)
  */
-static int
+static __be32
 nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 					      void	        *resp)
 {
@@ -477,7 +477,7 @@ nlmsvc_proc_sm_notify(struct svc_rqst *rqstp, struct nlm_reboot *argp,
 /*
  * client sent a GRANTED_RES, let's remove the associated block
  */
-static int
+static __be32
 nlmsvc_proc_granted_res(struct svc_rqst *rqstp, struct nlm_res  *argp,
                                                 void            *resp)
 {
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 29f932192054..5998d0c71757 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -36,7 +36,7 @@ struct callback_op {
 
 static struct callback_op callback_ops[];
 
-static int nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return htonl(NFS4_OK);
 }
@@ -399,7 +399,7 @@ static unsigned process_op(struct svc_rqst *rqstp,
 /*
  * Decode, process and encode a COMPOUND
  */
-static int nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
+static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	struct cb_compound_hdr_arg hdr_arg;
 	struct cb_compound_hdr_res hdr_res;
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 9187755661df..8d48616882c1 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -21,7 +21,7 @@
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -30,7 +30,7 @@ nfsacld_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static int nfsacld_proc_getacl(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_getacl(struct svc_rqst * rqstp,
 		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
 {
 	svc_fh *fh;
@@ -97,7 +97,7 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd3_setaclargs *argp,
 		struct nfsd_attrstat *resp)
 {
@@ -128,7 +128,7 @@ static int nfsacld_proc_setacl(struct svc_rqst * rqstp,
 /*
  * Check file attributes
  */
-static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
+static __be32 nfsacld_proc_getattr(struct svc_rqst * rqstp,
 		struct nfsd_fhandle *argp, struct nfsd_attrstat *resp)
 {
 	dprintk("nfsd: GETATTR  %s\n", SVCFH_fmt(&argp->fh));
@@ -140,7 +140,7 @@ static int nfsacld_proc_getattr(struct svc_rqst * rqstp,
 /*
  * Check file access
  */
-static int nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
+static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 		struct nfsd3_accessres *resp)
 {
 	int nfserr;
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index d4bdc00c1169..ed6e2c27b5e8 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -19,7 +19,7 @@
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -28,7 +28,7 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get the Access and/or Default ACL of a file.
  */
-static int nfsd3_proc_getacl(struct svc_rqst * rqstp,
+static __be32 nfsd3_proc_getacl(struct svc_rqst * rqstp,
 		struct nfsd3_getaclargs *argp, struct nfsd3_getaclres *resp)
 {
 	svc_fh *fh;
@@ -93,7 +93,7 @@ fail:
 /*
  * Set the Access and/or Default ACL of a file.
  */
-static int nfsd3_proc_setacl(struct svc_rqst * rqstp,
+static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 		struct nfsd3_setaclargs *argp,
 		struct nfsd3_attrstat *resp)
 {
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a5ebc7dbb384..a12663fdfe16 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -43,7 +43,7 @@ static int	nfs3_ftypes[] = {
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -52,7 +52,7 @@ nfsd3_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 /*
  * Get a file's attributes
  */
-static int
+static __be32
 nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					   struct nfsd3_attrstat *resp)
 {
@@ -76,7 +76,7 @@ nfsd3_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 /*
  * Set a file's attributes
  */
-static int
+static __be32
 nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
 					   struct nfsd3_attrstat  *resp)
 {
@@ -94,7 +94,7 @@ nfsd3_proc_setattr(struct svc_rqst *rqstp, struct nfsd3_sattrargs *argp,
 /*
  * Look up a path name component
  */
-static int
+static __be32
 nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_diropres  *resp)
 {
@@ -118,7 +118,7 @@ nfsd3_proc_lookup(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 /*
  * Check file access
  */
-static int
+static __be32
 nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 					  struct nfsd3_accessres *resp)
 {
@@ -137,7 +137,7 @@ nfsd3_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessargs *argp,
 /*
  * Read a symlink.
  */
-static int
+static __be32
 nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
 					   struct nfsd3_readlinkres *resp)
 {
@@ -155,7 +155,7 @@ nfsd3_proc_readlink(struct svc_rqst *rqstp, struct nfsd3_readlinkargs *argp,
 /*
  * Read a portion of a file.
  */
-static int
+static __be32
 nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 				        struct nfsd3_readres  *resp)
 {
@@ -195,7 +195,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
 /*
  * Write data to a file
  */
-static int
+static __be32
 nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
 					 struct nfsd3_writeres  *resp)
 {
@@ -223,7 +223,7 @@ nfsd3_proc_write(struct svc_rqst *rqstp, struct nfsd3_writeargs *argp,
  * At least in theory; we'll see how it fares in practice when the
  * first reports about SunOS compatibility problems start to pour in...
  */
-static int
+static __be32
 nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 					  struct nfsd3_diropres   *resp)
 {
@@ -265,7 +265,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 /*
  * Make directory. This operation is not idempotent.
  */
-static int
+static __be32
 nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 					 struct nfsd3_diropres   *resp)
 {
@@ -285,7 +285,7 @@ nfsd3_proc_mkdir(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
 					   struct nfsd3_diropres    *resp)
 {
@@ -307,7 +307,7 @@ nfsd3_proc_symlink(struct svc_rqst *rqstp, struct nfsd3_symlinkargs *argp,
 /*
  * Make socket/fifo/device.
  */
-static int
+static __be32
 nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
 					 struct nfsd3_diropres  *resp)
 {
@@ -343,7 +343,7 @@ nfsd3_proc_mknod(struct svc_rqst *rqstp, struct nfsd3_mknodargs *argp,
 /*
  * Remove file/fifo/socket etc.
  */
-static int
+static __be32
 nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					  struct nfsd3_attrstat  *resp)
 {
@@ -363,7 +363,7 @@ nfsd3_proc_remove(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 /*
  * Remove a directory
  */
-static int
+static __be32
 nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 					 struct nfsd3_attrstat  *resp)
 {
@@ -379,7 +379,7 @@ nfsd3_proc_rmdir(struct svc_rqst *rqstp, struct nfsd3_diropargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 					  struct nfsd3_renameres  *resp)
 {
@@ -401,7 +401,7 @@ nfsd3_proc_rename(struct svc_rqst *rqstp, struct nfsd3_renameargs *argp,
 	RETURN_STATUS(nfserr);
 }
 
-static int
+static __be32
 nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
 					struct nfsd3_linkres  *resp)
 {
@@ -424,7 +424,7 @@ nfsd3_proc_link(struct svc_rqst *rqstp, struct nfsd3_linkargs *argp,
 /*
  * Read a portion of a directory.
  */
-static int
+static __be32
 nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					   struct nfsd3_readdirres  *resp)
 {
@@ -459,7 +459,7 @@ nfsd3_proc_readdir(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
  * Read a portion of a directory, including file handles and attrs.
  * For now, we choose to ignore the dircount parameter.
  */
-static int
+static __be32
 nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 					       struct nfsd3_readdirres  *resp)
 {
@@ -517,7 +517,7 @@ nfsd3_proc_readdirplus(struct svc_rqst *rqstp, struct nfsd3_readdirargs *argp,
 /*
  * Get file system stats
  */
-static int
+static __be32
 nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsstatres *resp)
 {
@@ -534,7 +534,7 @@ nfsd3_proc_fsstat(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 /*
  * Get file system info
  */
-static int
+static __be32
 nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 					   struct nfsd3_fsinfores *resp)
 {
@@ -576,7 +576,7 @@ nfsd3_proc_fsinfo(struct svc_rqst * rqstp, struct nfsd_fhandle    *argp,
 /*
  * Get pathconf info for the specified file
  */
-static int
+static __be32
 nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 					     struct nfsd3_pathconfres *resp)
 {
@@ -619,7 +619,7 @@ nfsd3_proc_pathconf(struct svc_rqst * rqstp, struct nfsd_fhandle      *argp,
 /*
  * Commit a file (range) to stable storage.
  */
-static int
+static __be32
 nfsd3_proc_commit(struct svc_rqst * rqstp, struct nfsd3_commitargs *argp,
 					   struct nfsd3_commitres  *resp)
 {
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d1fac6872c44..795ad6c5cb2c 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -715,7 +715,7 @@ out_kfree:
 /*
  * NULL call.
  */
-static int
+static __be32
 nfsd4_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -731,7 +731,7 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
 /*
  * COMPOUND call.
  */
-static int
+static __be32
 nfsd4_proc_compound(struct svc_rqst *rqstp,
 		    struct nfsd4_compoundargs *args,
 		    struct nfsd4_compoundres *resp)
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 9ee1dab5d44a..09030afd7249 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -30,7 +30,7 @@ typedef struct svc_buf	svc_buf;
 #define NFSDDBG_FACILITY		NFSDDBG_PROC
 
 
-static int
+static __be32
 nfsd_proc_null(struct svc_rqst *rqstp, void *argp, void *resp)
 {
 	return nfs_ok;
@@ -56,7 +56,7 @@ nfsd_return_dirop(int err, struct nfsd_diropres *resp)
  * Get a file's attributes
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
 					  struct nfsd_attrstat *resp)
 {
@@ -72,7 +72,7 @@ nfsd_proc_getattr(struct svc_rqst *rqstp, struct nfsd_fhandle  *argp,
  * Set a file's attributes
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
 					  struct nfsd_attrstat  *resp)
 {
@@ -92,7 +92,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp, struct nfsd_sattrargs *argp,
  * doesn't exist yet.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 struct nfsd_diropres  *resp)
 {
@@ -112,7 +112,7 @@ nfsd_proc_lookup(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 /*
  * Read a symlink.
  */
-static int
+static __be32
 nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
 					   struct nfsd_readlinkres *resp)
 {
@@ -132,7 +132,7 @@ nfsd_proc_readlink(struct svc_rqst *rqstp, struct nfsd_readlinkargs *argp,
  * Read a portion of a file.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
 				       struct nfsd_readres  *resp)
 {
@@ -172,7 +172,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
  * Write data to a file
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
 					struct nfsd_attrstat  *resp)
 {
@@ -197,7 +197,7 @@ nfsd_proc_write(struct svc_rqst *rqstp, struct nfsd_writeargs *argp,
  * and the actual create() call in compliance with VFS protocols.
  * N.B. After this call _both_ argp->fh and resp->fh need an fh_put
  */
-static int
+static __be32
 nfsd_proc_create(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					 struct nfsd_diropres   *resp)
 {
@@ -348,7 +348,7 @@ done:
 	return nfsd_return_dirop(nfserr, resp);
 }
 
-static int
+static __be32
 nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 					 void		       *resp)
 {
@@ -363,7 +363,7 @@ nfsd_proc_remove(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 				  	 void		        *resp)
 {
@@ -381,7 +381,7 @@ nfsd_proc_rename(struct svc_rqst *rqstp, struct nfsd_renameargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 				void			    *resp)
 {
@@ -401,7 +401,7 @@ nfsd_proc_link(struct svc_rqst *rqstp, struct nfsd_linkargs *argp,
 	return nfserr;
 }
 
-static int
+static __be32
 nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
 				          void			  *resp)
 {
@@ -430,7 +430,7 @@ nfsd_proc_symlink(struct svc_rqst *rqstp, struct nfsd_symlinkargs *argp,
  * Make directory. This operation is not idempotent.
  * N.B. After this call resp->fh needs an fh_put
  */
-static int
+static __be32
 nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 					struct nfsd_diropres   *resp)
 {
@@ -454,7 +454,7 @@ nfsd_proc_mkdir(struct svc_rqst *rqstp, struct nfsd_createargs *argp,
 /*
  * Remove a directory
  */
-static int
+static __be32
 nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 				 	void		      *resp)
 {
@@ -470,7 +470,7 @@ nfsd_proc_rmdir(struct svc_rqst *rqstp, struct nfsd_diropargs *argp,
 /*
  * Read a portion of a directory.
  */
-static int
+static __be32
 nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
 					  struct nfsd_readdirres  *resp)
 {
@@ -509,7 +509,7 @@ nfsd_proc_readdir(struct svc_rqst *rqstp, struct nfsd_readdirargs *argp,
 /*
  * Get file system info
  */
-static int
+static __be32
 nfsd_proc_statfs(struct svc_rqst * rqstp, struct nfsd_fhandle   *argp,
 					  struct nfsd_statfsres *resp)
 {
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 9c9a8ad92477..965d6c20086e 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -335,7 +335,7 @@ struct svc_version {
 /*
  * RPC procedure info
  */
-typedef int	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
+typedef __be32	(*svc_procfunc)(struct svc_rqst *, void *argp, void *resp);
 struct svc_procedure {
 	svc_procfunc		pc_func;	/* process the request */
 	kxdrproc_t		pc_decode;	/* XDR decode args */
-- 
cgit v1.2.3


From 52921e02a4f4163a7b1f4b5dde71e1debc71de4a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:46 -0700
Subject: [PATCH] lockd endianness annotations

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/clntlock.c         |  4 +--
 fs/lockd/mon.c              | 12 +++----
 fs/lockd/svc4proc.c         |  4 +--
 fs/lockd/svclock.c          | 10 +++---
 fs/lockd/svcproc.c          |  8 ++---
 fs/lockd/svcshare.c         |  4 +--
 fs/lockd/svcsubs.c          |  4 +--
 fs/lockd/xdr.c              | 76 +++++++++++++++++++++---------------------
 fs/lockd/xdr4.c             | 80 ++++++++++++++++++++++-----------------------
 include/linux/lockd/lockd.h | 12 +++----
 include/linux/lockd/share.h |  4 +--
 include/linux/lockd/xdr.h   | 26 +++++++--------
 include/linux/lockd/xdr4.h  | 26 +++++++--------
 13 files changed, 135 insertions(+), 135 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index e8c7765419e8..b85a0ad2cfb6 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -100,12 +100,12 @@ int nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout)
 /*
  * The server lockd has called us back to tell us the lock was granted
  */
-u32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
+__be32 nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *lock)
 {
 	const struct file_lock *fl = &lock->fl;
 	const struct nfs_fh *fh = &lock->fh;
 	struct nlm_wait	*block;
-	u32 res = nlm_lck_denied;
+	__be32 res = nlm_lck_denied;
 
 	/*
 	 * Look up blocked request based on arguments. 
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index e0179f8c327f..eb243edf8932 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -148,8 +148,8 @@ nsm_create(void)
  * XDR functions for NSM.
  */
 
-static u32 *
-xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+static __be32 *
+xdr_encode_common(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	char	buffer[20], *name;
 
@@ -176,7 +176,7 @@ xdr_encode_common(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+xdr_encode_mon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	p = xdr_encode_common(rqstp, p, argp);
 	if (IS_ERR(p))
@@ -192,7 +192,7 @@ xdr_encode_mon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_encode_unmon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
+xdr_encode_unmon(struct rpc_rqst *rqstp, __be32 *p, struct nsm_args *argp)
 {
 	p = xdr_encode_common(rqstp, p, argp);
 	if (IS_ERR(p))
@@ -202,7 +202,7 @@ xdr_encode_unmon(struct rpc_rqst *rqstp, u32 *p, struct nsm_args *argp)
 }
 
 static int
-xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+xdr_decode_stat_res(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 {
 	resp->status = ntohl(*p++);
 	resp->state = ntohl(*p++);
@@ -212,7 +212,7 @@ xdr_decode_stat_res(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
 }
 
 static int
-xdr_decode_stat(struct rpc_rqst *rqstp, u32 *p, struct nsm_res *resp)
+xdr_decode_stat(struct rpc_rqst *rqstp, __be32 *p, struct nsm_res *resp)
 {
 	resp->state = ntohl(*p++);
 	return 0;
diff --git a/fs/lockd/svc4proc.c b/fs/lockd/svc4proc.c
index 4e719860b4bf..0ce5c81ff507 100644
--- a/fs/lockd/svc4proc.c
+++ b/fs/lockd/svc4proc.c
@@ -24,14 +24,14 @@
 /*
  * Obtain client and file from arguments
  */
-static u32
+static __be32
 nlm4svc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 			struct nlm_host **hostp, struct nlm_file **filp)
 {
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error = 0;
+	__be32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 814c6064c9e0..7e219b938552 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -334,13 +334,13 @@ static void nlmsvc_freegrantargs(struct nlm_rqst *call)
  * Attempt to establish a lock, and if it can't be granted, block it
  * if required.
  */
-u32
+__be32
 nlmsvc_lock(struct svc_rqst *rqstp, struct nlm_file *file,
 			struct nlm_lock *lock, int wait, struct nlm_cookie *cookie)
 {
 	struct nlm_block	*block, *newblock = NULL;
 	int			error;
-	u32			ret;
+	__be32			ret;
 
 	dprintk("lockd: nlmsvc_lock(%s/%ld, ty=%d, pi=%d, %Ld-%Ld, bl=%d)\n",
 				file->f_file->f_dentry->d_inode->i_sb->s_id,
@@ -415,7 +415,7 @@ out:
 /*
  * Test for presence of a conflicting lock.
  */
-u32
+__be32
 nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
 				       struct nlm_lock *conflock)
 {
@@ -448,7 +448,7 @@ nlmsvc_testlock(struct nlm_file *file, struct nlm_lock *lock,
  * afterwards. In this case the block will still be there, and hence
  * must be removed.
  */
-u32
+__be32
 nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
 {
 	int	error;
@@ -476,7 +476,7 @@ nlmsvc_unlock(struct nlm_file *file, struct nlm_lock *lock)
  * be in progress.
  * The calling procedure must check whether the file can be closed.
  */
-u32
+__be32
 nlmsvc_cancel_blocked(struct nlm_file *file, struct nlm_lock *lock)
 {
 	struct nlm_block	*block;
diff --git a/fs/lockd/svcproc.c b/fs/lockd/svcproc.c
index db8d85c32d29..32e99a6e8dca 100644
--- a/fs/lockd/svcproc.c
+++ b/fs/lockd/svcproc.c
@@ -22,8 +22,8 @@
 #define NLMDBG_FACILITY		NLMDBG_CLIENT
 
 #ifdef CONFIG_LOCKD_V4
-static u32
-cast_to_nlm(u32 status, u32 vers)
+static __be32
+cast_to_nlm(__be32 status, u32 vers)
 {
 	/* Note: status is assumed to be in network byte order !!! */
 	if (vers != 4){
@@ -52,14 +52,14 @@ cast_to_nlm(u32 status, u32 vers)
 /*
  * Obtain client and file from arguments
  */
-static u32
+static __be32
 nlmsvc_retrieve_args(struct svc_rqst *rqstp, struct nlm_args *argp,
 			struct nlm_host **hostp, struct nlm_file **filp)
 {
 	struct nlm_host		*host = NULL;
 	struct nlm_file		*file = NULL;
 	struct nlm_lock		*lock = &argp->lock;
-	u32			error = 0;
+	__be32			error = 0;
 
 	/* nfsd callbacks must have been installed for this procedure */
 	if (!nlmsvc_ops)
diff --git a/fs/lockd/svcshare.c b/fs/lockd/svcshare.c
index b9926ce8782e..6220dc2a3f2c 100644
--- a/fs/lockd/svcshare.c
+++ b/fs/lockd/svcshare.c
@@ -23,7 +23,7 @@ nlm_cmp_owner(struct nlm_share *share, struct xdr_netobj *oh)
 	    && !memcmp(share->s_owner.data, oh->data, oh->len);
 }
 
-u32
+__be32
 nlmsvc_share_file(struct nlm_host *host, struct nlm_file *file,
 			struct nlm_args *argp)
 {
@@ -64,7 +64,7 @@ update:
 /*
  * Delete a share.
  */
-u32
+__be32
 nlmsvc_unshare_file(struct nlm_host *host, struct nlm_file *file,
 			struct nlm_args *argp)
 {
diff --git a/fs/lockd/svcsubs.c b/fs/lockd/svcsubs.c
index 7dac96e6c82c..e83024e16042 100644
--- a/fs/lockd/svcsubs.c
+++ b/fs/lockd/svcsubs.c
@@ -78,14 +78,14 @@ static inline unsigned int file_hash(struct nfs_fh *f)
  * This is not quite right, but for now, we assume the client performs
  * the proper R/W checking.
  */
-u32
+__be32
 nlm_lookup_file(struct svc_rqst *rqstp, struct nlm_file **result,
 					struct nfs_fh *f)
 {
 	struct hlist_node *pos;
 	struct nlm_file	*file;
 	unsigned int	hash;
-	u32		nfserr;
+	__be32		nfserr;
 
 	nlm_debug_print_fh("nlm_file_lookup", f);
 
diff --git a/fs/lockd/xdr.c b/fs/lockd/xdr.c
index 61c46facf257..b7c949256e5a 100644
--- a/fs/lockd/xdr.c
+++ b/fs/lockd/xdr.c
@@ -43,7 +43,7 @@ loff_t_to_s32(loff_t offset)
 /*
  * XDR functions for basic NLM types
  */
-static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *nlm_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	unsigned int	len;
 
@@ -69,8 +69,8 @@ static u32 *nlm_decode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static inline u32 *
-nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
+static inline __be32 *
+nlm_encode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	*p++ = htonl(c->len);
 	memcpy(p, c->data, c->len);
@@ -78,8 +78,8 @@ nlm_encode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm_decode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm_decode_fh(__be32 *p, struct nfs_fh *f)
 {
 	unsigned int	len;
 
@@ -95,8 +95,8 @@ nlm_decode_fh(u32 *p, struct nfs_fh *f)
 	return p + XDR_QUADLEN(NFS2_FHSIZE);
 }
 
-static inline u32 *
-nlm_encode_fh(u32 *p, struct nfs_fh *f)
+static inline __be32 *
+nlm_encode_fh(__be32 *p, struct nfs_fh *f)
 {
 	*p++ = htonl(NFS2_FHSIZE);
 	memcpy(p, f->data, NFS2_FHSIZE);
@@ -106,20 +106,20 @@ nlm_encode_fh(u32 *p, struct nfs_fh *f)
 /*
  * Encode and decode owner handle
  */
-static inline u32 *
-nlm_decode_oh(u32 *p, struct xdr_netobj *oh)
+static inline __be32 *
+nlm_decode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_decode_netobj(p, oh);
 }
 
-static inline u32 *
-nlm_encode_oh(u32 *p, struct xdr_netobj *oh)
+static inline __be32 *
+nlm_encode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_encode_netobj(p, oh);
 }
 
-static u32 *
-nlm_decode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	s32			start, len, end;
@@ -153,8 +153,8 @@ nlm_decode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode a lock as part of an NLM call
  */
-static u32 *
-nlm_encode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm_encode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s32			start, len;
@@ -184,8 +184,8 @@ nlm_encode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode result of a TEST/TEST_MSG call
  */
-static u32 *
-nlm_encode_testres(u32 *p, struct nlm_res *resp)
+static __be32 *
+nlm_encode_testres(__be32 *p, struct nlm_res *resp)
 {
 	s32		start, len;
 
@@ -221,7 +221,7 @@ nlm_encode_testres(u32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlmsvc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -238,7 +238,7 @@ nlmsvc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_testres(p, resp)))
 		return 0;
@@ -246,7 +246,7 @@ nlmsvc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -266,7 +266,7 @@ nlmsvc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -282,7 +282,7 @@ nlmsvc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	if (!(p = nlm_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm_decode_lock(p, &argp->lock)))
@@ -292,7 +292,7 @@ nlmsvc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlmsvc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -313,7 +313,7 @@ nlmsvc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlmsvc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -323,7 +323,7 @@ nlmsvc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -332,7 +332,7 @@ nlmsvc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
+nlmsvc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -344,7 +344,7 @@ nlmsvc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
 }
 
 int
-nlmsvc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
+nlmsvc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 {
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
@@ -357,7 +357,7 @@ nlmsvc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlmsvc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlmsvc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return 0;
@@ -366,13 +366,13 @@ nlmsvc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlmsvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlmsvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlmsvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlmsvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -389,7 +389,7 @@ nlmclt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
 #endif
 
 static int
-nlmclt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -403,7 +403,7 @@ nlmclt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -438,7 +438,7 @@ nlmclt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 
 
 static int
-nlmclt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -455,7 +455,7 @@ nlmclt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -470,7 +470,7 @@ nlmclt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlmclt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -483,7 +483,7 @@ nlmclt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlmclt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -493,7 +493,7 @@ nlmclt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlmclt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_encode_testres(p, resp)))
 		return -EIO;
@@ -502,7 +502,7 @@ nlmclt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlmclt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlmclt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm_decode_cookie(p, &resp->cookie)))
 		return -EIO;
diff --git a/fs/lockd/xdr4.c b/fs/lockd/xdr4.c
index 36eb175ec335..f4c0b2b9f75a 100644
--- a/fs/lockd/xdr4.c
+++ b/fs/lockd/xdr4.c
@@ -44,8 +44,8 @@ loff_t_to_s64(loff_t offset)
 /*
  * XDR functions for basic NLM types
  */
-static u32 *
-nlm4_decode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *
+nlm4_decode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	unsigned int	len;
 
@@ -71,8 +71,8 @@ nlm4_decode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm4_encode_cookie(u32 *p, struct nlm_cookie *c)
+static __be32 *
+nlm4_encode_cookie(__be32 *p, struct nlm_cookie *c)
 {
 	*p++ = htonl(c->len);
 	memcpy(p, c->data, c->len);
@@ -80,8 +80,8 @@ nlm4_encode_cookie(u32 *p, struct nlm_cookie *c)
 	return p;
 }
 
-static u32 *
-nlm4_decode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm4_decode_fh(__be32 *p, struct nfs_fh *f)
 {
 	memset(f->data, 0, sizeof(f->data));
 	f->size = ntohl(*p++);
@@ -95,8 +95,8 @@ nlm4_decode_fh(u32 *p, struct nfs_fh *f)
 	return p + XDR_QUADLEN(f->size);
 }
 
-static u32 *
-nlm4_encode_fh(u32 *p, struct nfs_fh *f)
+static __be32 *
+nlm4_encode_fh(__be32 *p, struct nfs_fh *f)
 {
 	*p++ = htonl(f->size);
 	if (f->size) p[XDR_QUADLEN(f->size)-1] = 0; /* don't leak anything */
@@ -107,20 +107,20 @@ nlm4_encode_fh(u32 *p, struct nfs_fh *f)
 /*
  * Encode and decode owner handle
  */
-static u32 *
-nlm4_decode_oh(u32 *p, struct xdr_netobj *oh)
+static __be32 *
+nlm4_decode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_decode_netobj(p, oh);
 }
 
-static u32 *
-nlm4_encode_oh(u32 *p, struct xdr_netobj *oh)
+static __be32 *
+nlm4_encode_oh(__be32 *p, struct xdr_netobj *oh)
 {
 	return xdr_encode_netobj(p, oh);
 }
 
-static u32 *
-nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm4_decode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s64			len, start, end;
@@ -153,8 +153,8 @@ nlm4_decode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode a lock as part of an NLM call
  */
-static u32 *
-nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
+static __be32 *
+nlm4_encode_lock(__be32 *p, struct nlm_lock *lock)
 {
 	struct file_lock	*fl = &lock->fl;
 	__s64			start, len;
@@ -185,8 +185,8 @@ nlm4_encode_lock(u32 *p, struct nlm_lock *lock)
 /*
  * Encode result of a TEST/TEST_MSG call
  */
-static u32 *
-nlm4_encode_testres(u32 *p, struct nlm_res *resp)
+static __be32 *
+nlm4_encode_testres(__be32 *p, struct nlm_res *resp)
 {
 	s64		start, len;
 
@@ -227,7 +227,7 @@ nlm4_encode_testres(u32 *p, struct nlm_res *resp)
  * First, the server side XDR functions
  */
 int
-nlm4svc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_testargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -244,7 +244,7 @@ nlm4svc_decode_testargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_testres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return 0;
@@ -252,7 +252,7 @@ nlm4svc_encode_testres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_lockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -272,7 +272,7 @@ nlm4svc_decode_lockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_cancargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	u32	exclusive;
 
@@ -288,7 +288,7 @@ nlm4svc_decode_cancargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	if (!(p = nlm4_decode_cookie(p, &argp->cookie))
 	 || !(p = nlm4_decode_lock(p, &argp->lock)))
@@ -298,7 +298,7 @@ nlm4svc_decode_unlockargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
+nlm4svc_decode_shareargs(struct svc_rqst *rqstp, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -319,7 +319,7 @@ nlm4svc_decode_shareargs(struct svc_rqst *rqstp, u32 *p, nlm_args *argp)
 }
 
 int
-nlm4svc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_shareres(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -329,7 +329,7 @@ nlm4svc_encode_shareres(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_encode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return 0;
@@ -338,7 +338,7 @@ nlm4svc_encode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
+nlm4svc_decode_notify(struct svc_rqst *rqstp, __be32 *p, struct nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -350,7 +350,7 @@ nlm4svc_decode_notify(struct svc_rqst *rqstp, u32 *p, struct nlm_args *argp)
 }
 
 int
-nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
+nlm4svc_decode_reboot(struct svc_rqst *rqstp, __be32 *p, struct nlm_reboot *argp)
 {
 	if (!(p = xdr_decode_string_inplace(p, &argp->mon, &argp->len, SM_MAXSTRLEN)))
 		return 0;
@@ -363,7 +363,7 @@ nlm4svc_decode_reboot(struct svc_rqst *rqstp, u32 *p, struct nlm_reboot *argp)
 }
 
 int
-nlm4svc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
+nlm4svc_decode_res(struct svc_rqst *rqstp, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return 0;
@@ -372,13 +372,13 @@ nlm4svc_decode_res(struct svc_rqst *rqstp, u32 *p, struct nlm_res *resp)
 }
 
 int
-nlm4svc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlm4svc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nlm4svc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nlm4svc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
@@ -388,14 +388,14 @@ nlm4svc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
  */
 #ifdef NLMCLNT_SUPPORT_SHARES
 static int
-nlm4clt_decode_void(struct rpc_rqst *req, u32 *p, void *ptr)
+nlm4clt_decode_void(struct rpc_rqst *req, __be32 *p, void *ptr)
 {
 	return 0;
 }
 #endif
 
 static int
-nlm4clt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_testargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -409,7 +409,7 @@ nlm4clt_encode_testargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_decode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -444,7 +444,7 @@ nlm4clt_decode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 
 
 static int
-nlm4clt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_lockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -461,7 +461,7 @@ nlm4clt_encode_lockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_cancargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -476,7 +476,7 @@ nlm4clt_encode_cancargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
+nlm4clt_encode_unlockargs(struct rpc_rqst *req, __be32 *p, nlm_args *argp)
 {
 	struct nlm_lock	*lock = &argp->lock;
 
@@ -489,7 +489,7 @@ nlm4clt_encode_unlockargs(struct rpc_rqst *req, u32 *p, nlm_args *argp)
 }
 
 static int
-nlm4clt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_encode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_cookie(p, &resp->cookie)))
 		return -EIO;
@@ -499,7 +499,7 @@ nlm4clt_encode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlm4clt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_encode_testres(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_encode_testres(p, resp)))
 		return -EIO;
@@ -508,7 +508,7 @@ nlm4clt_encode_testres(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
 }
 
 static int
-nlm4clt_decode_res(struct rpc_rqst *req, u32 *p, struct nlm_res *resp)
+nlm4clt_decode_res(struct rpc_rqst *req, __be32 *p, struct nlm_res *resp)
 {
 	if (!(p = nlm4_decode_cookie(p, &resp->cookie)))
 		return -EIO;
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 2909619c0295..862d9730a60d 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -154,7 +154,7 @@ int		  nlm_async_reply(struct nlm_rqst *, u32, const struct rpc_call_ops *);
 struct nlm_wait * nlmclnt_prepare_block(struct nlm_host *host, struct file_lock *fl);
 void		  nlmclnt_finish_block(struct nlm_wait *block);
 int		  nlmclnt_block(struct nlm_wait *block, struct nlm_rqst *req, long timeout);
-u32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
+__be32		  nlmclnt_grant(const struct sockaddr_in *addr, const struct nlm_lock *);
 void		  nlmclnt_recovery(struct nlm_host *);
 int		  nlmclnt_reclaim(struct nlm_host *, struct file_lock *);
 void		  nlmclnt_next_cookie(struct nlm_cookie *);
@@ -184,12 +184,12 @@ typedef int	  (*nlm_host_match_fn_t)(struct nlm_host *cur, struct nlm_host *ref)
 /*
  * Server-side lock handling
  */
-u32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
+__be32		  nlmsvc_lock(struct svc_rqst *, struct nlm_file *,
 					struct nlm_lock *, int, struct nlm_cookie *);
-u32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
-u32		  nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
+__be32		  nlmsvc_unlock(struct nlm_file *, struct nlm_lock *);
+__be32		  nlmsvc_testlock(struct nlm_file *, struct nlm_lock *,
 					struct nlm_lock *);
-u32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
+__be32		  nlmsvc_cancel_blocked(struct nlm_file *, struct nlm_lock *);
 unsigned long	  nlmsvc_retry_blocked(void);
 void		  nlmsvc_traverse_blocks(struct nlm_host *, struct nlm_file *,
 					nlm_host_match_fn_t match);
@@ -198,7 +198,7 @@ void		  nlmsvc_grant_reply(struct nlm_cookie *, u32);
 /*
  * File handling for the server personality
  */
-u32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
+__be32		  nlm_lookup_file(struct svc_rqst *, struct nlm_file **,
 					struct nfs_fh *);
 void		  nlm_release_file(struct nlm_file *);
 void		  nlmsvc_mark_resources(void);
diff --git a/include/linux/lockd/share.h b/include/linux/lockd/share.h
index cd7816e74c05..630c5bf69b07 100644
--- a/include/linux/lockd/share.h
+++ b/include/linux/lockd/share.h
@@ -21,9 +21,9 @@ struct nlm_share {
 	u32			s_mode;		/* deny mode */
 };
 
-u32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
+__be32	nlmsvc_share_file(struct nlm_host *, struct nlm_file *,
 					       struct nlm_args *);
-u32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
+__be32	nlmsvc_unshare_file(struct nlm_host *, struct nlm_file *,
 					       struct nlm_args *);
 void	nlmsvc_traverse_shares(struct nlm_host *, struct nlm_file *,
 					       nlm_host_match_fn_t);
diff --git a/include/linux/lockd/xdr.h b/include/linux/lockd/xdr.h
index 66fdae3b490c..29e7d9fc9dad 100644
--- a/include/linux/lockd/xdr.h
+++ b/include/linux/lockd/xdr.h
@@ -90,19 +90,19 @@ struct nlm_reboot {
  */
 #define NLMSVC_XDRSIZE		sizeof(struct nlm_args)
 
-int	nlmsvc_decode_testargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_encode_testres(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlmsvc_decode_lockargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_decode_cancargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_decode_unlockargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_encode_res(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlmsvc_decode_res(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlmsvc_encode_void(struct svc_rqst *, u32 *, void *);
-int	nlmsvc_decode_void(struct svc_rqst *, u32 *, void *);
-int	nlmsvc_decode_shareargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_encode_shareres(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlmsvc_decode_notify(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlmsvc_decode_reboot(struct svc_rqst *, u32 *, struct nlm_reboot *);
+int	nlmsvc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int	nlmsvc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlmsvc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlmsvc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
diff --git a/include/linux/lockd/xdr4.h b/include/linux/lockd/xdr4.h
index 3cc1ae25009b..dd12b4c9e613 100644
--- a/include/linux/lockd/xdr4.h
+++ b/include/linux/lockd/xdr4.h
@@ -23,19 +23,19 @@
 
 
-int	nlm4svc_decode_testargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_encode_testres(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlm4svc_decode_lockargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_decode_cancargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_decode_unlockargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_encode_res(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlm4svc_decode_res(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlm4svc_encode_void(struct svc_rqst *, u32 *, void *);
-int	nlm4svc_decode_void(struct svc_rqst *, u32 *, void *);
-int	nlm4svc_decode_shareargs(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_encode_shareres(struct svc_rqst *, u32 *, struct nlm_res *);
-int	nlm4svc_decode_notify(struct svc_rqst *, u32 *, struct nlm_args *);
-int	nlm4svc_decode_reboot(struct svc_rqst *, u32 *, struct nlm_reboot *);
+int	nlm4svc_decode_testargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_encode_testres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_lockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_cancargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_unlockargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_encode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_res(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_encode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_decode_void(struct svc_rqst *, __be32 *, void *);
+int	nlm4svc_decode_shareargs(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_encode_shareres(struct svc_rqst *, __be32 *, struct nlm_res *);
+int	nlm4svc_decode_notify(struct svc_rqst *, __be32 *, struct nlm_args *);
+int	nlm4svc_decode_reboot(struct svc_rqst *, __be32 *, struct nlm_reboot *);
 /*
 int	nlmclt_encode_testargs(struct rpc_rqst *, u32 *, struct nlm_args *);
 int	nlmclt_encode_lockargs(struct rpc_rqst *, u32 *, struct nlm_args *);
-- 
cgit v1.2.3


From 0dbb4c6799cf8fa8c5ba1926153a30960117477d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:49 -0700
Subject: [PATCH] xdr annotations: NFS readdir entries

on-the-wire data is big-endian

[in large part pulled from Alexey's patch]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/dir.c            | 6 +++---
 fs/nfs/internal.h       | 6 +++---
 fs/nfs/nfs2xdr.c        | 4 ++--
 fs/nfs/nfs3xdr.c        | 4 ++--
 fs/nfs/nfs4_fs.h        | 2 +-
 fs/nfs/nfs4proc.c       | 4 ++--
 fs/nfs/nfs4xdr.c        | 2 +-
 include/linux/nfs_xdr.h | 2 +-
 8 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index c86a1ead4772..4133ef5264e5 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -142,12 +142,12 @@ nfs_opendir(struct inode *inode, struct file *filp)
 	return res;
 }
 
-typedef u32 * (*decode_dirent_t)(u32 *, struct nfs_entry *, int);
+typedef __be32 * (*decode_dirent_t)(__be32 *, struct nfs_entry *, int);
 typedef struct {
 	struct file	*file;
 	struct page	*page;
 	unsigned long	page_index;
-	u32		*ptr;
+	__be32		*ptr;
 	u64		*dir_cookie;
 	loff_t		current_index;
 	struct nfs_entry *entry;
@@ -220,7 +220,7 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page *page)
 static inline
 int dir_decode(nfs_readdir_descriptor_t *desc)
 {
-	u32	*p = desc->ptr;
+	__be32	*p = desc->ptr;
 	p = desc->decode(p, desc->entry, desc->plus);
 	if (IS_ERR(p))
 		return PTR_ERR(p);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index bea0b016bd70..d205466233f6 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -93,15 +93,15 @@ extern void nfs_destroy_directcache(void);
 /* nfs2xdr.c */
 extern int nfs_stat_to_errno(int);
 extern struct rpc_procinfo nfs_procedures[];
-extern u32 * nfs_decode_dirent(u32 *, struct nfs_entry *, int);
+extern __be32 * nfs_decode_dirent(__be32 *, struct nfs_entry *, int);
 
 /* nfs3xdr.c */
 extern struct rpc_procinfo nfs3_procedures[];
-extern u32 *nfs3_decode_dirent(u32 *, struct nfs_entry *, int);
+extern __be32 *nfs3_decode_dirent(__be32 *, struct nfs_entry *, int);
 
 /* nfs4xdr.c */
 #ifdef CONFIG_NFS_V4
-extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 #endif
 
 /* nfs4proc.c */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index 1d801e30c40e..3be4e72a0227 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -468,8 +468,8 @@ err_unmap:
 	goto out;
 }
 
-u32 *
-nfs_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+__be32 *
+nfs_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	if (!*p++) {
 		if (!*p)
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index b4e740e4494a..0ace092d126f 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -583,8 +583,8 @@ err_unmap:
 	goto out;
 }
 
-u32 *
-nfs3_decode_dirent(u32 *p, struct nfs_entry *entry, int plus)
+__be32 *
+nfs3_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	struct nfs_entry old = *entry;
 
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 61095fe4b5ca..6f346677332d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -212,7 +212,7 @@ extern void nfs_free_seqid(struct nfs_seqid *seqid);
 extern const nfs4_stateid zero_stateid;
 
 /* nfs4xdr.c */
-extern uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus);
+extern __be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus);
 extern struct rpc_procinfo nfs4_procedures[];
 
 struct nfs4_mount_data;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7421bcb3b728..8d09b47c91b9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -141,7 +141,7 @@ const u32 nfs4_fs_locations_bitmap[2] = {
 static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
-	u32 *start, *p;
+	__be32 *start, *p;
 
 	BUG_ON(readdir->count < 80);
 	if (cookie > 2) {
@@ -162,7 +162,7 @@ static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
 	 * when talking to the server, we always send cookie 0
 	 * instead of 1 or 2.
 	 */
-	start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
+	start = p = kmap_atomic(*readdir->pages, KM_USER0);
 	
 	if (cookie == 0) {
 		*p++ = xdr_one;                                  /* next */
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e284123b9774..0cf3fa312a33 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -4421,7 +4421,7 @@ out:
 	return status;
 }
 
-uint32_t *nfs4_decode_dirent(uint32_t *p, struct nfs_entry *entry, int plus)
+__be32 *nfs4_decode_dirent(__be32 *p, struct nfs_entry *entry, int plus)
 {
 	uint32_t bitmap[2] = {0};
 	uint32_t len;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index dc5397d9d23c..ac8058bc8f94 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -811,7 +811,7 @@ struct nfs_rpc_ops {
 	int	(*pathconf) (struct nfs_server *, struct nfs_fh *,
 			     struct nfs_pathconf *);
 	int	(*set_capabilities)(struct nfs_server *, struct nfs_fh *);
-	u32 *	(*decode_dirent)(u32 *, struct nfs_entry *, int plus);
+	__be32 *(*decode_dirent)(__be32 *, struct nfs_entry *, int plus);
 	void	(*read_setup)   (struct nfs_read_data *);
 	int	(*read_done)  (struct rpc_task *, struct nfs_read_data *);
 	void	(*write_setup)  (struct nfs_write_data *, int how);
-- 
cgit v1.2.3


From bc4785cd475a11ba125df7af674e16c6ea1cfc30 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:51 -0700
Subject: [PATCH] nfs: verifier is network-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfs/nfs3proc.c       | 2 +-
 fs/nfs/nfs4proc.c       | 6 +++---
 include/linux/nfs_fs.h  | 2 +-
 include/linux/nfs_xdr.h | 8 ++++----
 4 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 3b234d4601e7..e5f128ffc32d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -668,7 +668,7 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
 {
 	struct inode		*dir = dentry->d_inode;
 	struct nfs_fattr	dir_attr;
-	u32			*verf = NFS_COOKIEVERF(dir);
+	__be32			*verf = NFS_COOKIEVERF(dir);
 	struct nfs3_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
 		.cookie		= cookie,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 8d09b47c91b9..8118036cc449 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -138,7 +138,7 @@ const u32 nfs4_fs_locations_bitmap[2] = {
 	| FATTR4_WORD1_MOUNTED_ON_FILEID
 };
 
-static void nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry,
+static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dentry,
 		struct nfs4_readdir_arg *readdir)
 {
 	__be32 *start, *p;
@@ -2915,11 +2915,11 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program, unsigned short po
 		.rpc_resp = clp,
 		.rpc_cred = cred,
 	};
-	u32 *p;
+	__be32 *p;
 	int loop = 0;
 	int status;
 
-	p = (u32*)sc_verifier.data;
+	p = (__be32*)sc_verifier.data;
 	*p++ = htonl((u32)clp->cl_boot_time.tv_sec);
 	*p = htonl((u32)clp->cl_boot_time.tv_nsec);
 
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 6b2de1be5815..45228c1a1195 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -157,7 +157,7 @@ struct nfs_inode {
 	 * This is the cookie verifier used for NFSv3 readdir
 	 * operations
 	 */
-	__u32			cookieverf[2];
+	__be32			cookieverf[2];
 
 	/*
 	 * This is the list of dirty unwritten pages.
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index ac8058bc8f94..768c1ad5ff6f 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -266,7 +266,7 @@ struct nfs_writeargs {
 
 struct nfs_writeverf {
 	enum nfs3_stable_how	committed;
-	__u32			verifier[2];
+	__be32			verifier[2];
 };
 
 struct nfs_writeres {
@@ -420,7 +420,7 @@ struct nfs3_createargs {
 	unsigned int		len;
 	struct iattr *		sattr;
 	enum nfs3_createmode	createmode;
-	__u32			verifier[2];
+	__be32			verifier[2];
 };
 
 struct nfs3_mkdirargs {
@@ -467,7 +467,7 @@ struct nfs3_linkargs {
 struct nfs3_readdirargs {
 	struct nfs_fh *		fh;
 	__u64			cookie;
-	__u32			verf[2];
+	__be32			verf[2];
 	int			plus;
 	unsigned int            count;
 	struct page **		pages;
@@ -503,7 +503,7 @@ struct nfs3_linkres {
 
 struct nfs3_readdirres {
 	struct nfs_fattr *	dir_attr;
-	__u32 *			verf;
+	__be32 *		verf;
 	int			plus;
 };
 
-- 
cgit v1.2.3


From 63f103111fdfc3cba00e4c94921d32362f375d93 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:54 -0700
Subject: [PATCH] nfsd: nfserrno() endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsproc.c           | 7 +++----
 include/linux/nfsd/export.h | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 09030afd7249..03ab6822291f 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -579,11 +579,11 @@ struct svc_version	nfsd_version2 = {
 /*
  * Map errnos to NFS errnos.
  */
-int
+__be32
 nfserrno (int errno)
 {
 	static struct {
-		int	nfserr;
+		__be32	nfserr;
 		int	syserr;
 	} nfs_errtbl[] = {
 		{ nfs_ok, 0 },
@@ -615,11 +615,10 @@ nfserrno (int errno)
 		{ nfserr_badname, -ESRCH },
 		{ nfserr_io, -ETXTBSY },
 		{ nfserr_notsupp, -EOPNOTSUPP },
-		{ -1, -EIO }
 	};
 	int	i;
 
-	for (i = 0; nfs_errtbl[i].nfserr != -1; i++) {
+	for (i = 0; i < ARRAY_SIZE(nfs_errtbl); i++) {
 		if (nfs_errtbl[i].syserr == errno)
 			return nfs_errtbl[i].nfserr;
 	}
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 6e78ea969f49..27666f5b8b53 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -118,7 +118,7 @@ struct svc_export *	exp_parent(struct auth_domain *clp,
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
 int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
-int			nfserrno(int errno);
+__be32			nfserrno(int errno);
 
 extern struct cache_detail svc_export_cache;
 
-- 
cgit v1.2.3


From 83b11340d683a67a77e35a5ffb5ad4afbf0be4e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:55 -0700
Subject: [PATCH] nfsfh simple endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfsfh.c            | 10 +++++-----
 include/linux/nfsd/nfsfh.h |  6 +++---
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 501d83884530..727ab3bd450d 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -76,7 +76,7 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
  * comment in the NFSv3 spec says this is incorrect (implementation notes for
  * the write call).
  */
-static inline int
+static inline __be32
 nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
 {
 	/* Type can be negative when creating hardlinks - not to a dir */
@@ -110,13 +110,13 @@ nfsd_mode_check(struct svc_rqst *rqstp, umode_t mode, int type)
  * This is only called at the start of an nfsproc call, so fhp points to
  * a svc_fh which is all 0 except for the over-the-wire file handle.
  */
-u32
+__be32
 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, int type, int access)
 {
 	struct knfsd_fh	*fh = &fhp->fh_handle;
 	struct svc_export *exp = NULL;
 	struct dentry	*dentry;
-	u32		error = 0;
+	__be32		error = 0;
 
 	dprintk("nfsd: fh_verify(%s)\n", SVCFH_fmt(fhp));
 
@@ -315,7 +315,7 @@ static inline void _fh_update_old(struct dentry *dentry,
 		fh->ofh_dirino = 0;
 }
 
-int
+__be32
 fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, struct svc_fh *ref_fh)
 {
 	/* ref_fh is a reference file handle.
@@ -451,7 +451,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, st
  * Update file handle information after changing a dentry.
  * This is only called by nfsd_create, nfsd_create_v3 and nfsd_proc_create
  */
-int
+__be32
 fh_update(struct svc_fh *fhp)
 {
 	struct dentry *dentry;
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 069257ea99a0..749bad1ca16f 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -209,9 +209,9 @@ extern char * SVCFH_fmt(struct svc_fh *fhp);
 /*
  * Function prototypes
  */
-u32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
-int	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
-int	fh_update(struct svc_fh *);
+__be32	fh_verify(struct svc_rqst *, struct svc_fh *, int, int);
+__be32	fh_compose(struct svc_fh *, struct svc_export *, struct dentry *, struct svc_fh *);
+__be32	fh_update(struct svc_fh *);
 void	fh_put(struct svc_fh *);
 
 static __inline__ struct svc_fh *
-- 
cgit v1.2.3


From 131a21c2177c267ab259fcd06947c6f593a7de8e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:56 -0700
Subject: [PATCH] xdr annotations: NFSv2 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs2acl.c         | 18 ++++++------
 fs/nfsd/nfsxdr.c          | 72 +++++++++++++++++++++++------------------------
 include/linux/nfsd/nfsd.h |  2 +-
 include/linux/nfsd/xdr.h  | 50 ++++++++++++++++----------------
 4 files changed, 71 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 8d48616882c1..fd5397d8c62a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -158,7 +158,7 @@ static __be32 nfsacld_proc_access(struct svc_rqst *rqstp, struct nfsd3_accessarg
 /*
  * XDR decode functions
  */
-static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -169,7 +169,7 @@ static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 
-static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_setaclargs *argp)
 {
 	struct kvec *head = rqstp->rq_arg.head;
@@ -194,7 +194,7 @@ static int nfsaclsvc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
 	return (n > 0);
 }
 
-static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_fhandle *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -202,7 +202,7 @@ static int nfsaclsvc_decode_fhandleargs(struct svc_rqst *rqstp, u32 *p,
 	return xdr_argsize_check(rqstp, p);
 }
 
-static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_accessargs *argp)
 {
 	if (!(p = nfs2svc_decode_fh(p, &argp->fh)))
@@ -217,7 +217,7 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
  */
 
 /* GETACL */
-static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	struct dentry *dentry = resp->fh.fh_dentry;
@@ -259,7 +259,7 @@ static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
 	return 1;
 }
 
-static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_attrstat *resp)
 {
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
@@ -267,7 +267,7 @@ static int nfsaclsvc_encode_attrstatres(struct svc_rqst *rqstp, u32 *p,
 }
 
 /* ACCESS */
-static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_accessres *resp)
 {
 	p = nfs2svc_encode_fattr(rqstp, p, &resp->fh);
@@ -278,7 +278,7 @@ static int nfsaclsvc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
 /*
  * XDR release functions
  */
-static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	fh_put(&resp->fh);
@@ -287,7 +287,7 @@ static int nfsaclsvc_release_getacl(struct svc_rqst *rqstp, u32 *p,
 	return 1;
 }
 
-static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+static int nfsaclsvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd_fhandle *resp)
 {
 	fh_put(&resp->fh);
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 1135c0d14557..56ebb1443e0e 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -37,8 +37,8 @@ static u32	nfs_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static u32 *
-decode_fh(u32 *p, struct svc_fh *fhp)
+static __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	fh_init(fhp, NFS_FHSIZE);
 	memcpy(&fhp->fh_handle.fh_base, p, NFS_FHSIZE);
@@ -50,13 +50,13 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper function for NFSv2 ACL code */
-u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp)
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	return decode_fh(p, fhp);
 }
 
-static inline u32 *
-encode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	memcpy(p, &fhp->fh_handle.fh_base, NFS_FHSIZE);
 	return p + (NFS_FHSIZE>> 2);
@@ -66,8 +66,8 @@ encode_fh(u32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline u32 *
-decode_filename(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -82,8 +82,8 @@ decode_filename(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_pathname(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_pathname(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -98,8 +98,8 @@ decode_pathname(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_sattr(u32 *p, struct iattr *iap)
+static inline __be32 *
+decode_sattr(__be32 *p, struct iattr *iap)
 {
 	u32	tmp, tmp1;
 
@@ -151,8 +151,8 @@ decode_sattr(u32 *p, struct iattr *iap)
 	return p;
 }
 
-static u32 *
-encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+static __be32 *
+encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	     struct kstat *stat)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
@@ -195,7 +195,7 @@ encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
 }
 
 /* Helper function for NFSv2 ACL code */
-u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct kstat stat;
 	vfs_getattr(fhp->fh_export->ex_mnt, fhp->fh_dentry, &stat);
@@ -206,13 +206,13 @@ u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfssvc_decode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_argsize_check(rqstp, p);
 }
 
 int
-nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -220,7 +220,7 @@ nfssvc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
 }
 
 int
-nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_sattrargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -231,7 +231,7 @@ nfssvc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_diropargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -242,7 +242,7 @@ nfssvc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readargs *args)
 {
 	unsigned int len;
@@ -273,7 +273,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_writeargs *args)
 {
 	unsigned int len;
@@ -303,7 +303,7 @@ nfssvc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -315,7 +315,7 @@ nfssvc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_renameargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -328,7 +328,7 @@ nfssvc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinkargs *args)
+nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd_readlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -338,7 +338,7 @@ nfssvc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p, struct nfsd_readlinka
 }
 
 int
-nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_linkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -350,7 +350,7 @@ nfssvc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_symlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -363,7 +363,7 @@ nfssvc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readdirargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -382,13 +382,13 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
  * XDR encode functions
  */
 int
-nfssvc_encode_void(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 int
-nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_attrstat *resp)
 {
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
@@ -396,7 +396,7 @@ nfssvc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_diropres *resp)
 {
 	p = encode_fh(p, &resp->fh);
@@ -405,7 +405,7 @@ nfssvc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readlinkres *resp)
 {
 	*p++ = htonl(resp->len);
@@ -421,7 +421,7 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readres *resp)
 {
 	p = encode_fattr(rqstp, p, &resp->fh, &resp->stat);
@@ -440,7 +440,7 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_readdirres *resp)
 {
 	xdr_ressize_check(rqstp, p);
@@ -453,7 +453,7 @@ nfssvc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfssvc_encode_statfsres(struct svc_rqst *rqstp, u32 *p,
+nfssvc_encode_statfsres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_statfsres *resp)
 {
 	struct kstatfs	*stat = &resp->stats;
@@ -471,7 +471,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
 		    int namlen, loff_t offset, ino_t ino, unsigned int d_type)
 {
 	struct nfsd_readdirres *cd = container_of(ccd, struct nfsd_readdirres, common);
-	u32	*p = cd->buffer;
+	__be32	*p = cd->buffer;
 	int	buflen, slen;
 
 	/*
@@ -497,7 +497,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
 	*p++ = htonl((u32) ino);		/* file id */
 	p    = xdr_encode_array(p, name, namlen);/* name length & name */
 	cd->offset = p;			/* remember pointer */
-	*p++ = ~(u32) 0;		/* offset of next entry */
+	*p++ = htonl(~0U);		/* offset of next entry */
 
 	cd->buflen = buflen;
 	cd->buffer = p;
@@ -509,7 +509,7 @@ nfssvc_encode_entry(struct readdir_cd *ccd, const char *name,
  * XDR release functions
  */
 int
-nfssvc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+nfssvc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd_fhandle *resp)
 {
 	fh_put(&resp->fh);
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index d0d4aae7085f..2f75160a5824 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -50,7 +50,7 @@
  * Callback function for readdir
  */
 struct readdir_cd {
-	int			err;	/* 0, nfserr, or nfserr_eof */
+	__be32			err;	/* 0, nfserr, or nfserr_eof */
 };
 typedef int		(*encode_dent_fn)(struct readdir_cd *, const char *,
 						int, loff_t, ino_t, unsigned int);
diff --git a/include/linux/nfsd/xdr.h b/include/linux/nfsd/xdr.h
index 0e53de87d886..877192d3ae79 100644
--- a/include/linux/nfsd/xdr.h
+++ b/include/linux/nfsd/xdr.h
@@ -81,7 +81,7 @@ struct nfsd_readdirargs {
 	struct svc_fh		fh;
 	__u32			cookie;
 	__u32			count;
-	u32 *			buffer;
+	__be32 *		buffer;
 };
 
 struct nfsd_attrstat {
@@ -108,9 +108,9 @@ struct nfsd_readdirres {
 	int			count;
 
 	struct readdir_cd	common;
-	u32 *			buffer;
+	__be32 *		buffer;
 	int			buflen;
-	u32 *			offset;
+	__be32 *		offset;
 };
 
 struct nfsd_statfsres {
@@ -135,43 +135,43 @@ union nfsd_xdrstore {
 #define NFS2_SVC_XDRSIZE	sizeof(union nfsd_xdrstore)
 
 
-int nfssvc_decode_void(struct svc_rqst *, u32 *, void *);
-int nfssvc_decode_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
-int nfssvc_decode_sattrargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *,
 				struct nfsd_sattrargs *);
-int nfssvc_decode_diropargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *,
 				struct nfsd_diropargs *);
-int nfssvc_decode_readargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_readargs(struct svc_rqst *, __be32 *,
 				struct nfsd_readargs *);
-int nfssvc_decode_writeargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_writeargs(struct svc_rqst *, __be32 *,
 				struct nfsd_writeargs *);
-int nfssvc_decode_createargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_createargs(struct svc_rqst *, __be32 *,
 				struct nfsd_createargs *);
-int nfssvc_decode_renameargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_renameargs(struct svc_rqst *, __be32 *,
 				struct nfsd_renameargs *);
-int nfssvc_decode_readlinkargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *,
 				struct nfsd_readlinkargs *);
-int nfssvc_decode_linkargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *,
 				struct nfsd_linkargs *);
-int nfssvc_decode_symlinkargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *,
 				struct nfsd_symlinkargs *);
-int nfssvc_decode_readdirargs(struct svc_rqst *, u32 *,
+int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *,
 				struct nfsd_readdirargs *);
-int nfssvc_encode_void(struct svc_rqst *, u32 *, void *);
-int nfssvc_encode_attrstat(struct svc_rqst *, u32 *, struct nfsd_attrstat *);
-int nfssvc_encode_diropres(struct svc_rqst *, u32 *, struct nfsd_diropres *);
-int nfssvc_encode_readlinkres(struct svc_rqst *, u32 *, struct nfsd_readlinkres *);
-int nfssvc_encode_readres(struct svc_rqst *, u32 *, struct nfsd_readres *);
-int nfssvc_encode_statfsres(struct svc_rqst *, u32 *, struct nfsd_statfsres *);
-int nfssvc_encode_readdirres(struct svc_rqst *, u32 *, struct nfsd_readdirres *);
+int nfssvc_encode_void(struct svc_rqst *, __be32 *, void *);
+int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *, struct nfsd_attrstat *);
+int nfssvc_encode_diropres(struct svc_rqst *, __be32 *, struct nfsd_diropres *);
+int nfssvc_encode_readlinkres(struct svc_rqst *, __be32 *, struct nfsd_readlinkres *);
+int nfssvc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd_readres *);
+int nfssvc_encode_statfsres(struct svc_rqst *, __be32 *, struct nfsd_statfsres *);
+int nfssvc_encode_readdirres(struct svc_rqst *, __be32 *, struct nfsd_readdirres *);
 
 int nfssvc_encode_entry(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino, unsigned int);
 
-int nfssvc_release_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
+int nfssvc_release_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
 
 /* Helper functions for NFSv2 ACL code */
-u32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp);
-u32 *nfs2svc_decode_fh(u32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp);
+__be32 *nfs2svc_decode_fh(__be32 *p, struct svc_fh *fhp);
 
 #endif /* LINUX_NFSD_H */
-- 
cgit v1.2.3


From 91f07168cef8e99dd16f608fbc703e7a5af0237f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:57 -0700
Subject: [PATCH] xdr annotations: NFSv3 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3acl.c         |  10 ++--
 fs/nfsd/nfs3xdr.c         | 126 +++++++++++++++++++++++-----------------------
 include/linux/nfsd/xdr3.h | 114 ++++++++++++++++++++---------------------
 3 files changed, 125 insertions(+), 125 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index ed6e2c27b5e8..78b2c83d00c5 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -122,7 +122,7 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst * rqstp,
 /*
  * XDR decode functions
  */
-static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclargs *args)
 {
 	if (!(p = nfs3svc_decode_fh(p, &args->fh)))
@@ -133,7 +133,7 @@ static int nfs3svc_decode_getaclargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 
-static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_setaclargs *args)
 {
 	struct kvec *head = rqstp->rq_arg.head;
@@ -163,7 +163,7 @@ static int nfs3svc_decode_setaclargs(struct svc_rqst *rqstp, u32 *p,
  */
 
 /* GETACL */
-static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	struct dentry *dentry = resp->fh.fh_dentry;
@@ -208,7 +208,7 @@ static int nfs3svc_encode_getaclres(struct svc_rqst *rqstp, u32 *p,
 }
 
 /* SETACL */
-static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_attrstat *resp)
 {
 	p = nfs3svc_encode_post_op_attr(rqstp, p, &resp->fh);
@@ -219,7 +219,7 @@ static int nfs3svc_encode_setaclres(struct svc_rqst *rqstp, u32 *p,
 /*
  * XDR release functions
  */
-static int nfs3svc_release_getacl(struct svc_rqst *rqstp, u32 *p,
+static int nfs3svc_release_getacl(struct svc_rqst *rqstp, __be32 *p,
 		struct nfsd3_getaclres *resp)
 {
 	fh_put(&resp->fh);
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 247d518248bf..b4baca3053c3 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -42,23 +42,23 @@ static u32	nfs3_ftypes[] = {
 /*
  * XDR functions for basic NFS types
  */
-static inline u32 *
-encode_time3(u32 *p, struct timespec *time)
+static inline __be32 *
+encode_time3(__be32 *p, struct timespec *time)
 {
 	*p++ = htonl((u32) time->tv_sec); *p++ = htonl(time->tv_nsec);
 	return p;
 }
 
-static inline u32 *
-decode_time3(u32 *p, struct timespec *time)
+static inline __be32 *
+decode_time3(__be32 *p, struct timespec *time)
 {
 	time->tv_sec = ntohl(*p++);
 	time->tv_nsec = ntohl(*p++);
 	return p;
 }
 
-static inline u32 *
-decode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size;
 	fh_init(fhp, NFS3_FHSIZE);
@@ -72,13 +72,13 @@ decode_fh(u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper function for NFSv3 ACL code */
-u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp)
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	return decode_fh(p, fhp);
 }
 
-static inline u32 *
-encode_fh(u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_fh(__be32 *p, struct svc_fh *fhp)
 {
 	unsigned int size = fhp->fh_handle.fh_size;
 	*p++ = htonl(size);
@@ -91,8 +91,8 @@ encode_fh(u32 *p, struct svc_fh *fhp)
  * Decode a file name and make sure that the path contains
  * no slashes or null bytes.
  */
-static inline u32 *
-decode_filename(u32 *p, char **namp, int *lenp)
+static inline __be32 *
+decode_filename(__be32 *p, char **namp, int *lenp)
 {
 	char		*name;
 	int		i;
@@ -107,8 +107,8 @@ decode_filename(u32 *p, char **namp, int *lenp)
 	return p;
 }
 
-static inline u32 *
-decode_sattr3(u32 *p, struct iattr *iap)
+static inline __be32 *
+decode_sattr3(__be32 *p, struct iattr *iap)
 {
 	u32	tmp;
 
@@ -153,8 +153,8 @@ decode_sattr3(u32 *p, struct iattr *iap)
 	return p;
 }
 
-static inline u32 *
-encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
+static inline __be32 *
+encode_fattr3(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp,
 	      struct kstat *stat)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
@@ -186,8 +186,8 @@ encode_fattr3(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp,
 	return p;
 }
 
-static inline u32 *
-encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static inline __be32 *
+encode_saved_post_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct inode	*inode = fhp->fh_dentry->d_inode;
 
@@ -224,8 +224,8 @@ encode_saved_post_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * The inode may be NULL if the call failed because of a stale file
  * handle. In this case, no attributes are returned.
  */
-static u32 *
-encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static __be32 *
+encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry *dentry = fhp->fh_dentry;
 	if (dentry && dentry->d_inode != NULL) {
@@ -243,8 +243,8 @@ encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 }
 
 /* Helper for NFSv3 ACLs */
-u32 *
-nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+__be32 *
+nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	return encode_post_op_attr(rqstp, p, fhp);
 }
@@ -252,8 +252,8 @@ nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
 /*
  * Enocde weak cache consistency data
  */
-static u32 *
-encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
+static __be32 *
+encode_wcc_data(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
 {
 	struct dentry	*dentry = fhp->fh_dentry;
 
@@ -278,7 +278,7 @@ encode_wcc_data(struct svc_rqst *rqstp, u32 *p, struct svc_fh *fhp)
  * XDR decode functions
  */
 int
-nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args)
+nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p, struct nfsd_fhandle *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
 		return 0;
@@ -286,7 +286,7 @@ nfs3svc_decode_fhandle(struct svc_rqst *rqstp, u32 *p, struct nfsd_fhandle *args
 }
 
 int
-nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_sattrargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -303,7 +303,7 @@ nfs3svc_decode_sattrargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_diropargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -314,7 +314,7 @@ nfs3svc_decode_diropargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_accessargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -325,7 +325,7 @@ nfs3svc_decode_accessargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readargs *args)
 {
 	unsigned int len;
@@ -355,7 +355,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_writeargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeargs *args)
 {
 	unsigned int len, v, hdr;
@@ -393,7 +393,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_createargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -417,7 +417,7 @@ nfs3svc_decode_createargs(struct svc_rqst *rqstp, u32 *p,
 	return xdr_argsize_check(rqstp, p);
 }
 int
-nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_createargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -429,7 +429,7 @@ nfs3svc_decode_mkdirargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_symlinkargs *args)
 {
 	unsigned int len;
@@ -481,7 +481,7 @@ nfs3svc_decode_symlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_mknodargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh))
@@ -505,7 +505,7 @@ nfs3svc_decode_mknodargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_renameargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_renameargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -518,7 +518,7 @@ nfs3svc_decode_renameargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readlinkargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -530,7 +530,7 @@ nfs3svc_decode_readlinkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_linkargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_linkargs *args)
 {
 	if (!(p = decode_fh(p, &args->ffh))
@@ -542,7 +542,7 @@ nfs3svc_decode_linkargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -562,7 +562,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirargs *args)
 {
 	int len, pn;
@@ -590,7 +590,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitargs *args)
 {
 	if (!(p = decode_fh(p, &args->fh)))
@@ -609,14 +609,14 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p,
  * will work properly.
  */
 int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
 	return xdr_ressize_check(rqstp, p);
 }
 
 /* GETATTR */
 int
-nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	if (resp->status == 0)
@@ -626,7 +626,7 @@ nfs3svc_encode_attrstat(struct svc_rqst *rqstp, u32 *p,
 
 /* SETATTR, REMOVE, RMDIR */
 int
-nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_wccstat(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -635,7 +635,7 @@ nfs3svc_encode_wccstat(struct svc_rqst *rqstp, u32 *p,
 
 /* LOOKUP */
 int
-nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_diropres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropres *resp)
 {
 	if (resp->status == 0) {
@@ -648,7 +648,7 @@ nfs3svc_encode_diropres(struct svc_rqst *rqstp, u32 *p,
 
 /* ACCESS */
 int
-nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_accessres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_accessres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -659,7 +659,7 @@ nfs3svc_encode_accessres(struct svc_rqst *rqstp, u32 *p,
 
 /* READLINK */
 int
-nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readlinkres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -680,7 +680,7 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, u32 *p,
 
 /* READ */
 int
-nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -704,7 +704,7 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, u32 *p,
 
 /* WRITE */
 int
-nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_writeres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_writeres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -719,7 +719,7 @@ nfs3svc_encode_writeres(struct svc_rqst *rqstp, u32 *p,
 
 /* CREATE, MKDIR, SYMLINK, MKNOD */
 int
-nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_createres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_diropres *resp)
 {
 	if (resp->status == 0) {
@@ -733,7 +733,7 @@ nfs3svc_encode_createres(struct svc_rqst *rqstp, u32 *p,
 
 /* RENAME */
 int
-nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_renameres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_renameres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->ffh);
@@ -743,7 +743,7 @@ nfs3svc_encode_renameres(struct svc_rqst *rqstp, u32 *p,
 
 /* LINK */
 int
-nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_linkres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_linkres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -753,7 +753,7 @@ nfs3svc_encode_linkres(struct svc_rqst *rqstp, u32 *p,
 
 /* READDIR */
 int
-nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_readdirres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_readdirres *resp)
 {
 	p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -776,8 +776,8 @@ nfs3svc_encode_readdirres(struct svc_rqst *rqstp, u32 *p,
 		return xdr_ressize_check(rqstp, p);
 }
 
-static inline u32 *
-encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
+static inline __be32 *
+encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name,
 	     int namlen, ino_t ino)
 {
 	*p++ = xdr_one;				 /* mark entry present */
@@ -790,8 +790,8 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, u32 *p, const char *name,
 	return p;
 }
 
-static inline u32 *
-encode_entryplus_baggage(struct nfsd3_readdirres *cd, u32 *p,
+static inline __be32 *
+encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p,
 		struct svc_fh *fhp)
 {
 		p = encode_post_op_attr(cd->rqstp, p, fhp);
@@ -853,7 +853,7 @@ encode_entry(struct readdir_cd *ccd, const char *name,
 {
 	struct nfsd3_readdirres *cd = container_of(ccd, struct nfsd3_readdirres,
 		       					common);
-	u32		*p = cd->buffer;
+	__be32		*p = cd->buffer;
 	caddr_t		curr_page_addr = NULL;
 	int		pn;		/* current page number */
 	int		slen;		/* string (name) length */
@@ -919,7 +919,7 @@ encode_entry(struct readdir_cd *ccd, const char *name,
 	} else if (cd->rqstp->rq_respages[pn+1] != NULL) {
 		/* temporarily encode entry into next page, then move back to
 		 * current and next page in rq_respages[] */
-		u32 *p1, *tmp;
+		__be32 *p1, *tmp;
 		int len1, len2;
 
 		/* grab next page for temporary storage of entry */
@@ -1009,7 +1009,7 @@ nfs3svc_encode_entry_plus(struct readdir_cd *cd, const char *name,
 
 /* FSSTAT */
 int
-nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fsstatres *resp)
 {
 	struct kstatfs	*s = &resp->stats;
@@ -1031,7 +1031,7 @@ nfs3svc_encode_fsstatres(struct svc_rqst *rqstp, u32 *p,
 
 /* FSINFO */
 int
-nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fsinfores *resp)
 {
 	*p++ = xdr_zero;	/* no post_op_attr */
@@ -1055,7 +1055,7 @@ nfs3svc_encode_fsinfores(struct svc_rqst *rqstp, u32 *p,
 
 /* PATHCONF */
 int
-nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_pathconfres *resp)
 {
 	*p++ = xdr_zero;	/* no post_op_attr */
@@ -1074,7 +1074,7 @@ nfs3svc_encode_pathconfres(struct svc_rqst *rqstp, u32 *p,
 
 /* COMMIT */
 int
-nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_encode_commitres(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_commitres *resp)
 {
 	p = encode_wcc_data(rqstp, p, &resp->fh);
@@ -1090,7 +1090,7 @@ nfs3svc_encode_commitres(struct svc_rqst *rqstp, u32 *p,
  * XDR release functions
  */
 int
-nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_release_fhandle(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_attrstat *resp)
 {
 	fh_put(&resp->fh);
@@ -1098,7 +1098,7 @@ nfs3svc_release_fhandle(struct svc_rqst *rqstp, u32 *p,
 }
 
 int
-nfs3svc_release_fhandle2(struct svc_rqst *rqstp, u32 *p,
+nfs3svc_release_fhandle2(struct svc_rqst *rqstp, __be32 *p,
 					struct nfsd3_fhandle_pair *resp)
 {
 	fh_put(&resp->fh1);
diff --git a/include/linux/nfsd/xdr3.h b/include/linux/nfsd/xdr3.h
index 474d882dc2f3..79963867b0d7 100644
--- a/include/linux/nfsd/xdr3.h
+++ b/include/linux/nfsd/xdr3.h
@@ -51,7 +51,7 @@ struct nfsd3_createargs {
 	int			len;
 	int			createmode;
 	struct iattr		attrs;
-	__u32 *			verf;
+	__be32 *		verf;
 };
 
 struct nfsd3_mknodargs {
@@ -98,8 +98,8 @@ struct nfsd3_readdirargs {
 	__u64			cookie;
 	__u32			dircount;
 	__u32			count;
-	__u32 *			verf;
-	u32 *			buffer;
+	__be32 *		verf;
+	__be32 *		buffer;
 };
 
 struct nfsd3_commitargs {
@@ -122,79 +122,79 @@ struct nfsd3_setaclargs {
 };
 
 struct nfsd3_attrstat {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	struct kstat            stat;
 };
 
 /* LOOKUP, CREATE, MKDIR, SYMLINK, MKNOD */
 struct nfsd3_diropres  {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		dirfh;
 	struct svc_fh		fh;
 };
 
 struct nfsd3_accessres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	__u32			access;
 };
 
 struct nfsd3_readlinkres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	__u32			len;
 };
 
 struct nfsd3_readres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	unsigned long		count;
 	int			eof;
 };
 
 struct nfsd3_writeres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	unsigned long		count;
 	int			committed;
 };
 
 struct nfsd3_renameres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		ffh;
 	struct svc_fh		tfh;
 };
 
 struct nfsd3_linkres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		tfh;
 	struct svc_fh		fh;
 };
 
 struct nfsd3_readdirres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	int			count;
-	__u32			verf[2];
+	__be32			verf[2];
 
 	struct readdir_cd	common;
-	u32 *			buffer;
+	__be32 *		buffer;
 	int			buflen;
-	u32 *			offset;
-	u32 *			offset1;
+	__be32 *		offset;
+	__be32 *		offset1;
 	struct svc_rqst *	rqstp;
 
 };
 
 struct nfsd3_fsstatres {
-	__u32			status;
+	__be32			status;
 	struct kstatfs		stats;
 	__u32			invarsec;
 };
 
 struct nfsd3_fsinfores {
-	__u32			status;
+	__be32			status;
 	__u32			f_rtmax;
 	__u32			f_rtpref;
 	__u32			f_rtmult;
@@ -207,7 +207,7 @@ struct nfsd3_fsinfores {
 };
 
 struct nfsd3_pathconfres {
-	__u32			status;
+	__be32			status;
 	__u32			p_link_max;
 	__u32			p_name_max;
 	__u32			p_no_trunc;
@@ -217,12 +217,12 @@ struct nfsd3_pathconfres {
 };
 
 struct nfsd3_commitres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 };
 
 struct nfsd3_getaclres {
-	__u32			status;
+	__be32			status;
 	struct svc_fh		fh;
 	int			mask;
 	struct posix_acl	*acl_access;
@@ -266,70 +266,70 @@ union nfsd3_xdrstore {
 
 #define NFS3_SVC_XDRSIZE		sizeof(union nfsd3_xdrstore)
 
-int nfs3svc_decode_fhandle(struct svc_rqst *, u32 *, struct nfsd_fhandle *);
-int nfs3svc_decode_sattrargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *, struct nfsd_fhandle *);
+int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_sattrargs *);
-int nfs3svc_decode_diropargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_diropargs *);
-int nfs3svc_decode_accessargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_accessargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_accessargs *);
-int nfs3svc_decode_readargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_readargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_readargs *);
-int nfs3svc_decode_writeargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_writeargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_writeargs *);
-int nfs3svc_decode_createargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_createargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_createargs *);
-int nfs3svc_decode_mkdirargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_mkdirargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_createargs *);
-int nfs3svc_decode_mknodargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_mknodargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_mknodargs *);
-int nfs3svc_decode_renameargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_renameargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_renameargs *);
-int nfs3svc_decode_readlinkargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_readlinkargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_readlinkargs *);
-int nfs3svc_decode_linkargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_linkargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_linkargs *);
-int nfs3svc_decode_symlinkargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_symlinkargs *);
-int nfs3svc_decode_readdirargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_readdirargs *);
-int nfs3svc_decode_readdirplusargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_readdirargs *);
-int nfs3svc_decode_commitargs(struct svc_rqst *, u32 *,
+int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitargs *);
-int nfs3svc_encode_voidres(struct svc_rqst *, u32 *, void *);
-int nfs3svc_encode_attrstat(struct svc_rqst *, u32 *,
+int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
-int nfs3svc_encode_wccstat(struct svc_rqst *, u32 *,
+int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
-int nfs3svc_encode_diropres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *,
 				struct nfsd3_diropres *);
-int nfs3svc_encode_accessres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_accessres(struct svc_rqst *, __be32 *,
 				struct nfsd3_accessres *);
-int nfs3svc_encode_readlinkres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_readlinkres(struct svc_rqst *, __be32 *,
 				struct nfsd3_readlinkres *);
-int nfs3svc_encode_readres(struct svc_rqst *, u32 *, struct nfsd3_readres *);
-int nfs3svc_encode_writeres(struct svc_rqst *, u32 *, struct nfsd3_writeres *);
-int nfs3svc_encode_createres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_readres(struct svc_rqst *, __be32 *, struct nfsd3_readres *);
+int nfs3svc_encode_writeres(struct svc_rqst *, __be32 *, struct nfsd3_writeres *);
+int nfs3svc_encode_createres(struct svc_rqst *, __be32 *,
 				struct nfsd3_diropres *);
-int nfs3svc_encode_renameres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_renameres(struct svc_rqst *, __be32 *,
 				struct nfsd3_renameres *);
-int nfs3svc_encode_linkres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_linkres(struct svc_rqst *, __be32 *,
 				struct nfsd3_linkres *);
-int nfs3svc_encode_readdirres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_readdirres(struct svc_rqst *, __be32 *,
 				struct nfsd3_readdirres *);
-int nfs3svc_encode_fsstatres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_fsstatres(struct svc_rqst *, __be32 *,
 				struct nfsd3_fsstatres *);
-int nfs3svc_encode_fsinfores(struct svc_rqst *, u32 *,
+int nfs3svc_encode_fsinfores(struct svc_rqst *, __be32 *,
 				struct nfsd3_fsinfores *);
-int nfs3svc_encode_pathconfres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_pathconfres(struct svc_rqst *, __be32 *,
 				struct nfsd3_pathconfres *);
-int nfs3svc_encode_commitres(struct svc_rqst *, u32 *,
+int nfs3svc_encode_commitres(struct svc_rqst *, __be32 *,
 				struct nfsd3_commitres *);
 
-int nfs3svc_release_fhandle(struct svc_rqst *, u32 *,
+int nfs3svc_release_fhandle(struct svc_rqst *, __be32 *,
 				struct nfsd3_attrstat *);
-int nfs3svc_release_fhandle2(struct svc_rqst *, u32 *,
+int nfs3svc_release_fhandle2(struct svc_rqst *, __be32 *,
 				struct nfsd3_fhandle_pair *);
 int nfs3svc_encode_entry(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino,
@@ -338,9 +338,9 @@ int nfs3svc_encode_entry_plus(struct readdir_cd *, const char *name,
 				int namlen, loff_t offset, ino_t ino,
 				unsigned int);
 /* Helper functions for NFSv3 ACL code */
-u32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, u32 *p,
+__be32 *nfs3svc_encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p,
 				struct svc_fh *fhp);
-u32 *nfs3svc_decode_fh(u32 *p, struct svc_fh *fhp);
+__be32 *nfs3svc_decode_fh(__be32 *p, struct svc_fh *fhp);
 
 
 #endif /* _LINUX_NFSD_XDR3_H */
-- 
cgit v1.2.3


From 2ebbc012a9433a252be7ab4ce54e94bf7b21e506 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:58 -0700
Subject: [PATCH] xdr annotations: NFSv4 server

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c        |  2 +-
 fs/nfsd/nfs4xdr.c         | 66 +++++++++++++++++++++++------------------------
 include/linux/nfsd/xdr4.h | 24 ++++++++---------
 3 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 795ad6c5cb2c..ca6414248527 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -664,7 +664,7 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
 static int
 nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
 {
-	u32 *buf, *p;
+	__be32 *buf, *p;
 	int count;
 	int status;
 
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 77be0c4785e6..3419d99aeb1a 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -94,7 +94,7 @@ check_filename(char *str, int len, int err)
  * consistent with the style used in NFSv2/v3...
  */
 #define DECODE_HEAD				\
-	u32 *p;					\
+	__be32 *p;				\
 	int status
 #define DECODE_TAIL				\
 	status = 0;				\
@@ -144,13 +144,13 @@ xdr_error:					\
 	}					\
 } while (0)
 
-static u32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
+static __be32 *read_buf(struct nfsd4_compoundargs *argp, int nbytes)
 {
 	/* We want more bytes than seem to be available.
 	 * Maybe we need a new page, maybe we have just run out
 	 */
 	int avail = (char*)argp->end - (char*)argp->p;
-	u32 *p;
+	__be32 *p;
 	if (avail + argp->pagelen < nbytes)
 		return NULL;
 	if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
@@ -197,7 +197,7 @@ defer_free(struct nfsd4_compoundargs *argp,
 	return 0;
 }
 
-static char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
+static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 {
 	void *new = NULL;
 	if (p == argp->tmp) {
@@ -951,8 +951,8 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 			argp->pagelen -= len;
 		}
 	}
-	argp->end = (u32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
-	argp->p = (u32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
+	argp->end = (__be32*) (argp->rqstp->rq_vec[v].iov_base + argp->rqstp->rq_vec[v].iov_len);
+	argp->p = (__be32*)  (argp->rqstp->rq_vec[v].iov_base + (XDR_QUADLEN(len) << 2));
 	argp->rqstp->rq_vec[v].iov_len = len;
 	write->wr_vlen = v+1;
 
@@ -1179,7 +1179,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * task to translate them into Linux-specific versions which are more
  * consistent with the style used in NFSv2/v3...
  */
-#define ENCODE_HEAD              u32 *p
+#define ENCODE_HEAD              __be32 *p
 
 #define WRITE32(n)               *p++ = htonl(n)
 #define WRITE64(n)               do {				\
@@ -1209,8 +1209,8 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * Header routine to setup seqid operation replay cache
  */
 #define ENCODE_SEQID_OP_HEAD					\
-	u32 *p;							\
-	u32 *save;						\
+	__be32 *p;						\
+	__be32 *save;						\
 								\
 	save = resp->p;
 
@@ -1235,10 +1235,10 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
  * seperated @sep.
  */
 static int nfsd4_encode_components(char sep, char *components,
-				   u32 **pp, int *buflen)
+				   __be32 **pp, int *buflen)
 {
-	u32 *p = *pp;
-	u32 *countp = p;
+	__be32 *p = *pp;
+	__be32 *countp = p;
 	int strlen, count=0;
 	char *str, *end;
 
@@ -1272,10 +1272,10 @@ static int nfsd4_encode_components(char sep, char *components,
  * encode a location element of a fs_locations structure
  */
 static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
-				    u32 **pp, int *buflen)
+				    __be32 **pp, int *buflen)
 {
 	int status;
-	u32 *p = *pp;
+	__be32 *p = *pp;
 
 	status = nfsd4_encode_components(':', location->hosts, &p, buflen);
 	if (status)
@@ -1320,11 +1320,11 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *sta
  */
 static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
 				     struct svc_export *exp,
-				     u32 **pp, int *buflen)
+				     __be32 **pp, int *buflen)
 {
 	u32 status;
 	int i;
-	u32 *p = *pp;
+	__be32 *p = *pp;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
 	char *root = nfsd4_path(rqstp, exp, &status);
 
@@ -1355,7 +1355,7 @@ static u32 nfs4_ftypes[16] = {
 
 static int
 nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
-			u32 **p, int *buflen)
+			__be32 **p, int *buflen)
 {
 	int status;
 
@@ -1376,20 +1376,20 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 }
 
 static inline int
-nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, u32 **p, int *buflen)
+nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
 }
 
 static inline int
-nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, u32 **p, int *buflen)
+nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
 }
 
 static inline int
 nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
-		u32 **p, int *buflen)
+		__be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, whotype, id, group, p, buflen);
 }
@@ -1423,7 +1423,7 @@ static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
  */
 int
 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval,
+		struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
 		struct svc_rqst *rqstp)
 {
 	u32 bmval0 = bmval[0];
@@ -1432,11 +1432,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	struct svc_fh tempfh;
 	struct kstatfs statfs;
 	int buflen = *countp << 2;
-	u32 *attrlenp;
+	__be32 *attrlenp;
 	u32 dummy;
 	u64 dummy64;
 	u32 rdattr_err = 0;
-	u32 *p = buffer;
+	__be32 *p = buffer;
 	int status;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
@@ -1831,7 +1831,7 @@ out_serverfault:
 
 static int
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
-		const char *name, int namlen, u32 *p, int *buflen)
+		const char *name, int namlen, __be32 *p, int *buflen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
@@ -1864,10 +1864,10 @@ out_put:
 	return nfserr;
 }
 
-static u32 *
-nfsd4_encode_rdattr_error(u32 *p, int buflen, int nfserr)
+static __be32 *
+nfsd4_encode_rdattr_error(__be32 *p, int buflen, int nfserr)
 {
-	u32 *attrlenp;
+	__be32 *attrlenp;
 
 	if (buflen < 6)
 		return NULL;
@@ -1887,7 +1887,7 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
 {
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
-	u32 *p = cd->buffer;
+	__be32 *p = cd->buffer;
 	int nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
@@ -2321,7 +2321,7 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_re
 {
 	int maxcount;
 	loff_t offset;
-	u32 *page, *savep, *tailbase;
+	__be32 *page, *savep, *tailbase;
 	ENCODE_HEAD;
 
 	if (nfserr)
@@ -2479,7 +2479,7 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_writ
 void
 nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
 {
-	u32 *statp;
+	__be32 *statp;
 	ENCODE_HEAD;
 
 	RESERVE_SPACE(8);
@@ -2617,7 +2617,7 @@ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
  */
 
 int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, u32 *p, void *dummy)
+nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p, void *dummy)
 {
         return xdr_ressize_check(rqstp, p);
 }
@@ -2639,7 +2639,7 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
 }
 
 int
-nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args)
+nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
 	int status;
 
@@ -2660,7 +2660,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoun
 }
 
 int
-nfs4svc_encode_compoundres(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundres *resp)
+nfs4svc_encode_compoundres(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundres *resp)
 {
 	/*
 	 * All that remains is to write the tag and operation count...
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 66e642762a07..003193fe6fc6 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -258,9 +258,9 @@ struct nfsd4_readdir {
 	struct svc_fh * rd_fhp;             /* response */
 
 	struct readdir_cd	common;
-	u32 *			buffer;
+	__be32 *		buffer;
 	int			buflen;
-	u32 *			offset;
+	__be32 *		offset;
 };
 
 struct nfsd4_release_lockowner {
@@ -371,12 +371,12 @@ struct nfsd4_op {
 
 struct nfsd4_compoundargs {
 	/* scratch variables for XDR decode */
-	u32 *				p;
-	u32 *				end;
+	__be32 *			p;
+	__be32 *			end;
 	struct page **			pagelist;
 	int				pagelen;
-	u32				tmp[8];
-	u32 *				tmpp;
+	__be32				tmp[8];
+	__be32 *			tmpp;
 	struct tmpbuf {
 		struct tmpbuf *next;
 		void (*release)(const void *);
@@ -395,15 +395,15 @@ struct nfsd4_compoundargs {
 
 struct nfsd4_compoundres {
 	/* scratch variables for XDR encode */
-	u32 *				p;
-	u32 *				end;
+	__be32 *			p;
+	__be32 *			end;
 	struct xdr_buf *		xbuf;
 	struct svc_rqst *		rqstp;
 
 	u32				taglen;
 	char *				tag;
 	u32				opcnt;
-	u32 *				tagp; /* where to encode tag and  opcount */
+	__be32 *			tagp; /* where to encode tag and  opcount */
 };
 
 #define NFS4_SVC_XDRSIZE		sizeof(struct nfsd4_compoundargs)
@@ -419,10 +419,10 @@ set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
 	cinfo->after_ctime_nsec = fhp->fh_post_ctime.tv_nsec;
 }
 
-int nfs4svc_encode_voidres(struct svc_rqst *, u32 *, void *);
-int nfs4svc_decode_compoundargs(struct svc_rqst *, u32 *, 
+int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *, void *);
+int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundargs *);
-int nfs4svc_encode_compoundres(struct svc_rqst *, u32 *, 
+int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-- 
cgit v1.2.3


From 6264d69d7df654ca64f625e9409189a0e50734e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:58 -0700
Subject: [PATCH] nfsd: vfs.c endianness annotations

don't use the same variable to store NFS and host error values

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/vfs.c             | 299 ++++++++++++++++++++++++----------------------
 include/linux/nfsd/nfsd.h |  38 +++---
 2 files changed, 176 insertions(+), 161 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1141bd29e4e3..f21e917bb8ed 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -110,7 +110,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 	struct dentry *dentry = *dpp;
 	struct vfsmount *mnt = mntget(exp->ex_mnt);
 	struct dentry *mounts = dget(dentry);
-	int err = nfs_ok;
+	int err = 0;
 
 	while (follow_down(&mnt,&mounts)&&d_mountpoint(mounts));
 
@@ -148,14 +148,15 @@ out:
  *   clients and is explicitly disallowed for NFSv3
  *      NeilBrown <neilb@cse.unsw.edu.au>
  */
-int
+__be32
 nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 					int len, struct svc_fh *resfh)
 {
 	struct svc_export	*exp;
 	struct dentry		*dparent;
 	struct dentry		*dentry;
-	int			err;
+	__be32			err;
+	int			host_err;
 
 	dprintk("nfsd: nfsd_lookup(fh %s, %.*s)\n", SVCFH_fmt(fhp), len,name);
 
@@ -193,7 +194,7 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 			exp2 = exp_parent(exp->ex_client, mnt, dentry,
 					  &rqstp->rq_chandle);
 			if (IS_ERR(exp2)) {
-				err = PTR_ERR(exp2);
+				host_err = PTR_ERR(exp2);
 				dput(dentry);
 				mntput(mnt);
 				goto out_nfserr;
@@ -210,14 +211,14 @@ nfsd_lookup(struct svc_rqst *rqstp, struct svc_fh *fhp, const char *name,
 	} else {
 		fh_lock(fhp);
 		dentry = lookup_one_len(name, dparent, len);
-		err = PTR_ERR(dentry);
+		host_err = PTR_ERR(dentry);
 		if (IS_ERR(dentry))
 			goto out_nfserr;
 		/*
 		 * check if we have crossed a mount point ...
 		 */
 		if (d_mountpoint(dentry)) {
-			if ((err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
+			if ((host_err = nfsd_cross_mnt(rqstp, &dentry, &exp))) {
 				dput(dentry);
 				goto out_nfserr;
 			}
@@ -236,7 +237,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -244,7 +245,7 @@ out_nfserr:
  * Set various file attributes.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	     int check_guard, time_t guardtime)
 {
@@ -253,7 +254,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	int		accmode = MAY_SATTR;
 	int		ftype = 0;
 	int		imode;
-	int		err;
+	__be32		err;
+	int		host_err;
 	int		size_change = 0;
 
 	if (iap->ia_valid & (ATTR_ATIME | ATTR_MTIME | ATTR_SIZE))
@@ -319,19 +321,19 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 		 * If we are changing the size of the file, then
 		 * we need to break all leases.
 		 */
-		err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
-		if (err == -EWOULDBLOCK)
-			err = -ETIMEDOUT;
-		if (err) /* ENOMEM or EWOULDBLOCK */
+		host_err = break_lease(inode, FMODE_WRITE | O_NONBLOCK);
+		if (host_err == -EWOULDBLOCK)
+			host_err = -ETIMEDOUT;
+		if (host_err) /* ENOMEM or EWOULDBLOCK */
 			goto out_nfserr;
 
-		err = get_write_access(inode);
-		if (err)
+		host_err = get_write_access(inode);
+		if (host_err)
 			goto out_nfserr;
 
 		size_change = 1;
-		err = locks_verify_truncate(inode, NULL, iap->ia_size);
-		if (err) {
+		host_err = locks_verify_truncate(inode, NULL, iap->ia_size);
+		if (host_err) {
 			put_write_access(inode);
 			goto out_nfserr;
 		}
@@ -357,8 +359,8 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
 	err = nfserr_notsync;
 	if (!check_guard || guardtime == inode->i_ctime.tv_sec) {
 		fh_lock(fhp);
-		err = notify_change(dentry, iap);
-		err = nfserrno(err);
+		host_err = notify_change(dentry, iap);
+		err = nfserrno(host_err);
 		fh_unlock(fhp);
 	}
 	if (size_change)
@@ -370,7 +372,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -420,11 +422,12 @@ out:
 	return error;
 }
 
-int
+__be32
 nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
     struct nfs4_acl *acl)
 {
-	int error;
+	__be32 error;
+	int host_error;
 	struct dentry *dentry;
 	struct inode *inode;
 	struct posix_acl *pacl = NULL, *dpacl = NULL;
@@ -440,20 +443,20 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (S_ISDIR(inode->i_mode))
 		flags = NFS4_ACL_DIR;
 
-	error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
-	if (error == -EINVAL) {
+	host_error = nfs4_acl_nfsv4_to_posix(acl, &pacl, &dpacl, flags);
+	if (host_error == -EINVAL) {
 		error = nfserr_attrnotsupp;
 		goto out;
-	} else if (error < 0)
+	} else if (host_error < 0)
 		goto out_nfserr;
 
-	error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
-	if (error < 0)
+	host_error = set_nfsv4_acl_one(dentry, pacl, POSIX_ACL_XATTR_ACCESS);
+	if (host_error < 0)
 		goto out_nfserr;
 
 	if (S_ISDIR(inode->i_mode)) {
-		error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
-		if (error < 0)
+		host_error = set_nfsv4_acl_one(dentry, dpacl, POSIX_ACL_XATTR_DEFAULT);
+		if (host_error < 0)
 			goto out_nfserr;
 	}
 
@@ -464,7 +467,7 @@ out:
 	posix_acl_release(dpacl);
 	return (error);
 out_nfserr:
-	error = nfserrno(error);
+	error = nfserrno(host_error);
 	goto out;
 }
 
@@ -571,14 +574,14 @@ static struct accessmap	nfs3_anyaccess[] = {
     {	0,			0				}
 };
 
-int
+__be32
 nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *supported)
 {
 	struct accessmap	*map;
 	struct svc_export	*export;
 	struct dentry		*dentry;
 	u32			query, result = 0, sresult = 0;
-	unsigned int		error;
+	__be32			error;
 
 	error = fh_verify(rqstp, fhp, 0, MAY_NOP);
 	if (error)
@@ -598,7 +601,7 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
 	query = *access;
 	for  (; map->access; map++) {
 		if (map->access & query) {
-			unsigned int err2;
+			__be32 err2;
 
 			sresult |= map->access;
 
@@ -637,13 +640,15 @@ nfsd_access(struct svc_rqst *rqstp, struct svc_fh *fhp, u32 *access, u32 *suppor
  * The access argument indicates the type of open (read/write/lock)
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 			int access, struct file **filp)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
-	int		flags = O_RDONLY|O_LARGEFILE, err;
+	int		flags = O_RDONLY|O_LARGEFILE;
+	__be32		err;
+	int		host_err;
 
 	/*
 	 * If we get here, then the client has already done an "open",
@@ -673,10 +678,10 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	 * Check to see if there are any leases on this file.
 	 * This may block while leases are broken.
 	 */
-	err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
-	if (err == -EWOULDBLOCK)
-		err = -ETIMEDOUT;
-	if (err) /* NOMEM or WOULDBLOCK */
+	host_err = break_lease(inode, O_NONBLOCK | ((access & MAY_WRITE) ? FMODE_WRITE : 0));
+	if (host_err == -EWOULDBLOCK)
+		host_err = -ETIMEDOUT;
+	if (host_err) /* NOMEM or WOULDBLOCK */
 		goto out_nfserr;
 
 	if (access & MAY_WRITE) {
@@ -689,10 +694,9 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	}
 	*filp = dentry_open(dget(dentry), mntget(fhp->fh_export->ex_mnt), flags);
 	if (IS_ERR(*filp))
-		err = PTR_ERR(*filp);
+		host_err = PTR_ERR(*filp);
 out_nfserr:
-	if (err)
-		err = nfserrno(err);
+	err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -830,14 +834,15 @@ nfsd_read_actor(read_descriptor_t *desc, struct page *page, unsigned long offset
 	return size;
 }
 
-static int
+static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
               loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
 	struct inode *inode;
 	struct raparms	*ra;
 	mm_segment_t	oldfs;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_perm;
 	inode = file->f_dentry->d_inode;
@@ -855,12 +860,12 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	if (file->f_op->sendfile && rqstp->rq_sendfile_ok) {
 		rqstp->rq_resused = 1;
-		err = file->f_op->sendfile(file, &offset, *count,
+		host_err = file->f_op->sendfile(file, &offset, *count,
 						 nfsd_read_actor, rqstp);
 	} else {
 		oldfs = get_fs();
 		set_fs(KERNEL_DS);
-		err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+		host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
 		set_fs(oldfs);
 	}
 
@@ -874,13 +879,13 @@ nfsd_vfs_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		spin_unlock(&rab->pb_lock);
 	}
 
-	if (err >= 0) {
-		nfsdstats.io_read += err;
-		*count = err;
+	if (host_err >= 0) {
+		nfsdstats.io_read += host_err;
+		*count = host_err;
 		err = 0;
 		fsnotify_access(file->f_dentry);
 	} else 
-		err = nfserrno(err);
+		err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -895,7 +900,7 @@ static void kill_suid(struct dentry *dentry)
 	mutex_unlock(&dentry->d_inode->i_mutex);
 }
 
-static int
+static __be32
 nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 				loff_t offset, struct kvec *vec, int vlen,
 	   			unsigned long cnt, int *stablep)
@@ -904,7 +909,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	struct dentry		*dentry;
 	struct inode		*inode;
 	mm_segment_t		oldfs;
-	int			err = 0;
+	__be32			err = 0;
+	int			host_err;
 	int			stable = *stablep;
 
 #ifdef MSNFS
@@ -940,18 +946,18 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 	/* Write the data. */
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
+	host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &offset);
 	set_fs(oldfs);
-	if (err >= 0) {
+	if (host_err >= 0) {
 		nfsdstats.io_write += cnt;
 		fsnotify_modify(file->f_dentry);
 	}
 
 	/* clear setuid/setgid flag after write */
-	if (err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
+	if (host_err >= 0 && (inode->i_mode & (S_ISUID | S_ISGID)))
 		kill_suid(dentry);
 
-	if (err >= 0 && stable) {
+	if (host_err >= 0 && stable) {
 		static ino_t	last_ino;
 		static dev_t	last_dev;
 
@@ -977,7 +983,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 
 			if (inode->i_state & I_DIRTY) {
 				dprintk("nfsd: write sync %d\n", current->pid);
-				err=nfsd_sync(file);
+				host_err=nfsd_sync(file);
 			}
 #if 0
 			wake_up(&inode->i_wait);
@@ -987,11 +993,11 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		last_dev = inode->i_sb->s_dev;
 	}
 
-	dprintk("nfsd: write complete err=%d\n", err);
-	if (err >= 0)
+	dprintk("nfsd: write complete host_err=%d\n", host_err);
+	if (host_err >= 0)
 		err = 0;
 	else 
-		err = nfserrno(err);
+		err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -1001,12 +1007,12 @@ out:
  * on entry. On return, *count contains the number of bytes actually read.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		loff_t offset, struct kvec *vec, int vlen,
 		unsigned long *count)
 {
-	int		err;
+	__be32		err;
 
 	if (file) {
 		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
@@ -1030,12 +1036,12 @@ out:
  * The stable flag requests synchronous writes.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 		loff_t offset, struct kvec *vec, int vlen, unsigned long cnt,
 		int *stablep)
 {
-	int			err = 0;
+	__be32			err = 0;
 
 	if (file) {
 		err = nfsd_permission(fhp->fh_export, fhp->fh_dentry,
@@ -1067,12 +1073,12 @@ out:
  * Unfortunately we cannot lock the file to make sure we return full WCC
  * data to the client, as locking happens lower down in the filesystem.
  */
-int
+__be32
 nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
                loff_t offset, unsigned long count)
 {
 	struct file	*file;
-	int		err;
+	__be32		err;
 
 	if ((u64)count > ~(u64)offset)
 		return nfserr_inval;
@@ -1100,14 +1106,15 @@ nfsd_commit(struct svc_rqst *rqstp, struct svc_fh *fhp,
  *
  * N.B. Every call to nfsd_create needs an fh_put for _both_ fhp and resfhp
  */
-int
+__be32
 nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		int type, dev_t rdev, struct svc_fh *resfhp)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_perm;
 	if (!flen)
@@ -1134,7 +1141,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		/* called from nfsd_proc_mkdir, or possibly nfsd3_proc_create */
 		fh_lock_nested(fhp, I_MUTEX_PARENT);
 		dchild = lookup_one_len(fname, dentry, flen);
-		err = PTR_ERR(dchild);
+		host_err = PTR_ERR(dchild);
 		if (IS_ERR(dchild))
 			goto out_nfserr;
 		err = fh_compose(resfhp, fhp->fh_export, dchild, fhp);
@@ -1173,22 +1180,22 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	err = nfserr_perm;
 	switch (type) {
 	case S_IFREG:
-		err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+		host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
 		break;
 	case S_IFDIR:
-		err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+		host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
 		break;
 	case S_IFCHR:
 	case S_IFBLK:
 	case S_IFIFO:
 	case S_IFSOCK:
-		err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+		host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
 		break;
 	default:
 	        printk("nfsd: bad file type %o in nfsd_create\n", type);
-		err = -EINVAL;
+		host_err = -EINVAL;
 	}
-	if (err < 0)
+	if (host_err < 0)
 		goto out_nfserr;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
@@ -1203,7 +1210,7 @@ nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * directories via NFS.
 	 */
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID|ATTR_MODE)) != 0) {
-		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
 		if (err2)
 			err = err2;
 	}
@@ -1218,7 +1225,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1226,7 +1233,7 @@ out_nfserr:
 /*
  * NFSv3 version of nfsd_create
  */
-int
+__be32
 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
@@ -1234,7 +1241,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 	__u32		v_mtime=0, v_atime=0;
 	int		v_mode=0;
 
@@ -1264,7 +1272,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 * Compose the response file handle.
 	 */
 	dchild = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(dchild);
+	host_err = PTR_ERR(dchild);
 	if (IS_ERR(dchild))
 		goto out_nfserr;
 
@@ -1320,8 +1328,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		goto out;
 	}
 
-	err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
-	if (err < 0)
+	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
+	if (host_err < 0)
 		goto out_nfserr;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
@@ -1350,7 +1358,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	 */
  set_attr:
 	if ((iap->ia_valid &= ~(ATTR_UID|ATTR_GID)) != 0) {
- 		int err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
+ 		__be32 err2 = nfsd_setattr(rqstp, resfhp, iap, 0, (time_t)0);
 		if (err2)
 			err = err2;
 	}
@@ -1368,7 +1376,7 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
  	return err;
  
  out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 #endif /* CONFIG_NFSD_V3 */
@@ -1378,13 +1386,14 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
  * fits into the buffer. On return, it contains the true length.
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 {
 	struct dentry	*dentry;
 	struct inode	*inode;
 	mm_segment_t	oldfs;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, fhp, S_IFLNK, MAY_NOP);
 	if (err)
@@ -1403,18 +1412,18 @@ nfsd_readlink(struct svc_rqst *rqstp, struct svc_fh *fhp, char *buf, int *lenp)
 	 */
 
 	oldfs = get_fs(); set_fs(KERNEL_DS);
-	err = inode->i_op->readlink(dentry, buf, *lenp);
+	host_err = inode->i_op->readlink(dentry, buf, *lenp);
 	set_fs(oldfs);
 
-	if (err < 0)
+	if (host_err < 0)
 		goto out_nfserr;
-	*lenp = err;
+	*lenp = host_err;
 	err = 0;
 out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1422,7 +1431,7 @@ out_nfserr:
  * Create a symlink and look up its inode
  * N.B. After this call _both_ fhp and resfhp need an fh_put
  */
-int
+__be32
 nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				char *fname, int flen,
 				char *path,  int plen,
@@ -1430,7 +1439,8 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 				struct iattr *iap)
 {
 	struct dentry	*dentry, *dnew;
-	int		err, cerr;
+	__be32		err, cerr;
+	int		host_err;
 	umode_t		mode;
 
 	err = nfserr_noent;
@@ -1446,7 +1456,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	fh_lock(fhp);
 	dentry = fhp->fh_dentry;
 	dnew = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(dnew);
+	host_err = PTR_ERR(dnew);
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
@@ -1458,21 +1468,21 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	if (unlikely(path[plen] != 0)) {
 		char *path_alloced = kmalloc(plen+1, GFP_KERNEL);
 		if (path_alloced == NULL)
-			err = -ENOMEM;
+			host_err = -ENOMEM;
 		else {
 			strncpy(path_alloced, path, plen);
 			path_alloced[plen] = 0;
-			err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
+			host_err = vfs_symlink(dentry->d_inode, dnew, path_alloced, mode);
 			kfree(path_alloced);
 		}
 	} else
-		err = vfs_symlink(dentry->d_inode, dnew, path, mode);
+		host_err = vfs_symlink(dentry->d_inode, dnew, path, mode);
 
-	if (!err)
+	if (!host_err) {
 		if (EX_ISSYNC(fhp->fh_export))
-			err = nfsd_sync_dir(dentry);
-	if (err)
-		err = nfserrno(err);
+			host_err = nfsd_sync_dir(dentry);
+	}
+	err = nfserrno(host_err);
 	fh_unlock(fhp);
 
 	cerr = fh_compose(resfhp, fhp->fh_export, dnew, fhp);
@@ -1482,7 +1492,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out;
 }
 
@@ -1490,13 +1500,14 @@ out_nfserr:
  * Create a hardlink
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
  */
-int
+__be32
 nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 				char *name, int len, struct svc_fh *tfhp)
 {
 	struct dentry	*ddir, *dnew, *dold;
 	struct inode	*dirp, *dest;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_CREATE);
 	if (err)
@@ -1517,24 +1528,25 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
 	dirp = ddir->d_inode;
 
 	dnew = lookup_one_len(name, ddir, len);
-	err = PTR_ERR(dnew);
+	host_err = PTR_ERR(dnew);
 	if (IS_ERR(dnew))
 		goto out_nfserr;
 
 	dold = tfhp->fh_dentry;
 	dest = dold->d_inode;
 
-	err = vfs_link(dold, dirp, dnew);
-	if (!err) {
+	host_err = vfs_link(dold, dirp, dnew);
+	if (!host_err) {
 		if (EX_ISSYNC(ffhp->fh_export)) {
 			err = nfserrno(nfsd_sync_dir(ddir));
 			write_inode_now(dest, 1);
 		}
+		err = 0;
 	} else {
-		if (err == -EXDEV && rqstp->rq_vers == 2)
+		if (host_err == -EXDEV && rqstp->rq_vers == 2)
 			err = nfserr_acces;
 		else
-			err = nfserrno(err);
+			err = nfserrno(host_err);
 	}
 
 	dput(dnew);
@@ -1544,7 +1556,7 @@ out:
 	return err;
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 	goto out_unlock;
 }
 
@@ -1552,13 +1564,14 @@ out_nfserr:
  * Rename a file
  * N.B. After this call _both_ ffhp and tfhp need an fh_put
  */
-int
+__be32
 nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 			    struct svc_fh *tfhp, char *tname, int tlen)
 {
 	struct dentry	*fdentry, *tdentry, *odentry, *ndentry, *trap;
 	struct inode	*fdir, *tdir;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = fh_verify(rqstp, ffhp, S_IFDIR, MAY_REMOVE);
 	if (err)
@@ -1589,22 +1602,22 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	fill_pre_wcc(tfhp);
 
 	odentry = lookup_one_len(fname, fdentry, flen);
-	err = PTR_ERR(odentry);
+	host_err = PTR_ERR(odentry);
 	if (IS_ERR(odentry))
 		goto out_nfserr;
 
-	err = -ENOENT;
+	host_err = -ENOENT;
 	if (!odentry->d_inode)
 		goto out_dput_old;
-	err = -EINVAL;
+	host_err = -EINVAL;
 	if (odentry == trap)
 		goto out_dput_old;
 
 	ndentry = lookup_one_len(tname, tdentry, tlen);
-	err = PTR_ERR(ndentry);
+	host_err = PTR_ERR(ndentry);
 	if (IS_ERR(ndentry))
 		goto out_dput_old;
-	err = -ENOTEMPTY;
+	host_err = -ENOTEMPTY;
 	if (ndentry == trap)
 		goto out_dput_new;
 
@@ -1612,14 +1625,14 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
 	if ((ffhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 		((atomic_read(&odentry->d_count) > 1)
 		 || (atomic_read(&ndentry->d_count) > 1))) {
-			err = -EPERM;
+			host_err = -EPERM;
 	} else
 #endif
-	err = vfs_rename(fdir, odentry, tdir, ndentry);
-	if (!err && EX_ISSYNC(tfhp->fh_export)) {
-		err = nfsd_sync_dir(tdentry);
-		if (!err)
-			err = nfsd_sync_dir(fdentry);
+	host_err = vfs_rename(fdir, odentry, tdir, ndentry);
+	if (!host_err && EX_ISSYNC(tfhp->fh_export)) {
+		host_err = nfsd_sync_dir(tdentry);
+		if (!host_err)
+			host_err = nfsd_sync_dir(fdentry);
 	}
 
  out_dput_new:
@@ -1627,8 +1640,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
  out_dput_old:
 	dput(odentry);
  out_nfserr:
-	if (err)
-		err = nfserrno(err);
+	err = nfserrno(host_err);
 
 	/* we cannot reply on fh_unlock on the two filehandles,
 	 * as that would do the wrong thing if the two directories
@@ -1647,13 +1659,14 @@ out:
  * Unlink a file or directory
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 				char *fname, int flen)
 {
 	struct dentry	*dentry, *rdentry;
 	struct inode	*dirp;
-	int		err;
+	__be32		err;
+	int		host_err;
 
 	err = nfserr_acces;
 	if (!flen || isdotent(fname, flen))
@@ -1667,7 +1680,7 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 	dirp = dentry->d_inode;
 
 	rdentry = lookup_one_len(fname, dentry, flen);
-	err = PTR_ERR(rdentry);
+	host_err = PTR_ERR(rdentry);
 	if (IS_ERR(rdentry))
 		goto out_nfserr;
 
@@ -1684,22 +1697,23 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
 #ifdef MSNFS
 		if ((fhp->fh_export->ex_flags & NFSEXP_MSNFS) &&
 			(atomic_read(&rdentry->d_count) > 1)) {
-			err = -EPERM;
+			host_err = -EPERM;
 		} else
 #endif
-		err = vfs_unlink(dirp, rdentry);
+		host_err = vfs_unlink(dirp, rdentry);
 	} else { /* It's RMDIR */
-		err = vfs_rmdir(dirp, rdentry);
+		host_err = vfs_rmdir(dirp, rdentry);
 	}
 
 	dput(rdentry);
 
-	if (err == 0 &&
-	    EX_ISSYNC(fhp->fh_export))
-			err = nfsd_sync_dir(dentry);
+	if (host_err)
+		goto out_nfserr;
+	if (EX_ISSYNC(fhp->fh_export))
+		host_err = nfsd_sync_dir(dentry);
 
 out_nfserr:
-	err = nfserrno(err);
+	err = nfserrno(host_err);
 out:
 	return err;
 }
@@ -1708,11 +1722,12 @@ out:
  * Read entries from a directory.
  * The  NFSv3/4 verifier we ignore for now.
  */
-int
+__be32
 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
 	     struct readdir_cd *cdp, encode_dent_fn func)
 {
-	int		err;
+	__be32		err;
+	int 		host_err;
 	struct file	*file;
 	loff_t		offset = *offsetp;
 
@@ -1734,10 +1749,10 @@ nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp,
 
 	do {
 		cdp->err = nfserr_eof; /* will be cleared on successful read */
-		err = vfs_readdir(file, (filldir_t) func, cdp);
-	} while (err >=0 && cdp->err == nfs_ok);
-	if (err)
-		err = nfserrno(err);
+		host_err = vfs_readdir(file, (filldir_t) func, cdp);
+	} while (host_err >=0 && cdp->err == nfs_ok);
+	if (host_err)
+		err = nfserrno(host_err);
 	else
 		err = cdp->err;
 	*offsetp = vfs_llseek(file, 0, 1);
@@ -1754,10 +1769,10 @@ out:
  * Get file system stats
  * N.B. After this call fhp needs an fh_put
  */
-int
+__be32
 nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 {
-	int err = fh_verify(rqstp, fhp, 0, MAY_NOP);
+	__be32 err = fh_verify(rqstp, fhp, 0, MAY_NOP);
 	if (!err && vfs_statfs(fhp->fh_dentry,stat))
 		err = nfserr_io;
 	return err;
@@ -1766,7 +1781,7 @@ nfsd_statfs(struct svc_rqst *rqstp, struct svc_fh *fhp, struct kstatfs *stat)
 /*
  * Check for a user's access permissions to this inode.
  */
-int
+__be32
 nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc)
 {
 	struct inode	*inode = dentry->d_inode;
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 2f75160a5824..19a3c83d496e 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -72,57 +72,57 @@ int		nfsd_racache_init(int);
 void		nfsd_racache_shutdown(void);
 int		nfsd_cross_mnt(struct svc_rqst *rqstp, struct dentry **dpp,
 		                struct svc_export **expp);
-int		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_lookup(struct svc_rqst *, struct svc_fh *,
 				const char *, int, struct svc_fh *);
-int		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_setattr(struct svc_rqst *, struct svc_fh *,
 				struct iattr *, int, time_t);
 #ifdef CONFIG_NFSD_V4
-int             nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
+__be32          nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
                     struct nfs4_acl *);
 int             nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
 #endif /* CONFIG_NFSD_V4 */
-int		nfsd_create(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_create(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				int type, dev_t rdev, struct svc_fh *res);
 #ifdef CONFIG_NFSD_V3
-int		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
-int		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
+__be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				struct svc_fh *res, int createmode,
 				u32 *verifier, int *truncp);
-int		nfsd_commit(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
 				loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
-int		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
+__be32		nfsd_open(struct svc_rqst *, struct svc_fh *, int,
 				int, struct file **);
 void		nfsd_close(struct file *);
-int 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
+__be32 		nfsd_read(struct svc_rqst *, struct svc_fh *, struct file *,
 				loff_t, struct kvec *, int, unsigned long *);
-int 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
+__be32 		nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
 				loff_t, struct kvec *,int, unsigned long, int *);
-int		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_readlink(struct svc_rqst *, struct svc_fh *,
 				char *, int *);
-int		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_symlink(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, char *path, int plen,
 				struct svc_fh *res, struct iattr *);
-int		nfsd_link(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_link(struct svc_rqst *, struct svc_fh *,
 				char *, int, struct svc_fh *);
-int		nfsd_rename(struct svc_rqst *,
+__be32		nfsd_rename(struct svc_rqst *,
 				struct svc_fh *, char *, int,
 				struct svc_fh *, char *, int);
-int		nfsd_remove(struct svc_rqst *,
+__be32		nfsd_remove(struct svc_rqst *,
 				struct svc_fh *, char *, int);
-int		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
+__be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
 				char *name, int len);
 int		nfsd_truncate(struct svc_rqst *, struct svc_fh *,
 				unsigned long size);
-int		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
 			     loff_t *, struct readdir_cd *, encode_dent_fn);
-int		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
+__be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *);
 
 int		nfsd_notify_change(struct inode *, struct iattr *);
-int		nfsd_permission(struct svc_export *, struct dentry *, int);
+__be32		nfsd_permission(struct svc_export *, struct dentry *, int);
 int		nfsd_sync_dir(struct dentry *dp);
 
 #if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
-- 
cgit v1.2.3


From b37ad28bcaa7c486a4ff0fb6c3bdaaacd67b86ce Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:28:59 -0700
Subject: [PATCH] nfsd: nfs4 code returns error values in net-endian

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c         |  82 ++++++++++++-------------
 fs/nfsd/nfs4recover.c      |  14 ++---
 fs/nfsd/nfs4state.c        |  96 ++++++++++++++---------------
 fs/nfsd/nfs4xdr.c          | 150 ++++++++++++++++++++++-----------------------
 include/linux/nfsd/state.h |  10 +--
 include/linux/nfsd/xdr4.h  |  30 ++++-----
 6 files changed, 191 insertions(+), 191 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ca6414248527..63823945f972 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -67,10 +67,10 @@ fh_dup2(struct svc_fh *dst, struct svc_fh *src)
 	*dst = *src;
 }
 
-static int
+static __be32
 do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, int accmode)
 {
-	int status;
+	__be32 status;
 
 	if (open->op_truncate &&
 		!(open->op_share_access & NFS4_SHARE_ACCESS_WRITE))
@@ -88,11 +88,11 @@ do_open_permission(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfs
 	return status;
 }
 
-static int
+static __be32
 do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct svc_fh resfh;
-	int status;
+	__be32 status;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -131,10 +131,10 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	return status;
 }
 
-static int
+static __be32
 do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
-	int status;
+	__be32 status;
 
 	/* Only reclaims from previously confirmed clients are valid */
 	if ((status = nfs4_check_open_reclaim(&open->op_clientid)))
@@ -161,10 +161,10 @@ do_open_fhandle(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_
 }
 
 
-static inline int
+static inline __be32
 nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
 		(int)open->op_fname.len, open->op_fname.data,
 		open->op_stateowner);
@@ -261,7 +261,7 @@ out:
 /*
  * filehandle-manipulating ops.
  */
-static inline int
+static inline __be32
 nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
 {
 	if (!current_fh->fh_dentry)
@@ -271,7 +271,7 @@ nfsd4_getfh(struct svc_fh *current_fh, struct svc_fh **getfh)
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putfh *putfh)
 {
 	fh_put(current_fh);
@@ -280,10 +280,10 @@ nfsd4_putfh(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_putf
 	return fh_verify(rqstp, current_fh, 0, MAY_NOP);
 }
 
-static inline int
+static inline __be32
 nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 {
-	int status;
+	__be32 status;
 
 	fh_put(current_fh);
 	status = exp_pseudoroot(rqstp->rq_client, current_fh,
@@ -291,7 +291,7 @@ nfsd4_putrootfh(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 {
 	if (!save_fh->fh_dentry)
@@ -301,7 +301,7 @@ nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 {
 	if (!current_fh->fh_dentry)
@@ -314,7 +314,7 @@ nfsd4_savefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
 /*
  * misc nfsv4 ops
  */
-static inline int
+static inline __be32
 nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_access *access)
 {
 	if (access->ac_req_access & ~NFS3_ACCESS_FULL)
@@ -324,10 +324,10 @@ nfsd4_access(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_acc
 	return nfsd_access(rqstp, current_fh, &access->ac_resp_access, &access->ac_supported);
 }
 
-static inline int
+static inline __be32
 nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
 {
-	int status;
+	__be32 status;
 
 	u32 *p = (u32 *)commit->co_verf.data;
 	*p++ = nfssvc_boot.tv_sec;
@@ -339,11 +339,11 @@ nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_com
 	return status;
 }
 
-static int
+static __be32
 nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_create *create)
 {
 	struct svc_fh resfh;
-	int status;
+	__be32 status;
 	dev_t rdev;
 
 	fh_init(&resfh, NFS4_FHSIZE);
@@ -423,10 +423,10 @@ nfsd4_create(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_cre
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_getattr *getattr)
 {
-	int status;
+	__be32 status;
 
 	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
 	if (status)
@@ -442,11 +442,11 @@ nfsd4_getattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_ge
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	   struct svc_fh *save_fh, struct nfsd4_link *link)
 {
-	int status = nfserr_nofilehandle;
+	__be32 status = nfserr_nofilehandle;
 
 	if (!save_fh->fh_dentry)
 		return status;
@@ -456,11 +456,11 @@ nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	return status;
 }
 
-static int
+static __be32
 nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 {
 	struct svc_fh tmp_fh;
-	int ret;
+	__be32 ret;
 
 	fh_init(&tmp_fh, NFS4_FHSIZE);
 	if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
@@ -474,16 +474,16 @@ nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
 	return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
 }
 
-static inline int
+static inline __be32
 nfsd4_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lookup *lookup)
 {
 	return nfsd_lookup(rqstp, current_fh, lookup->lo_name, lookup->lo_len, current_fh);
 }
 
-static inline int
+static inline __be32
 nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
 {
-	int status;
+	__be32 status;
 
 	/* no need to check permission - this will be done in nfsd_read() */
 
@@ -508,7 +508,7 @@ out:
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
 {
 	u64 cookie = readdir->rd_cookie;
@@ -531,7 +531,7 @@ nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_re
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readlink *readlink)
 {
 	readlink->rl_rqstp = rqstp;
@@ -539,10 +539,10 @@ nfsd4_readlink(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_r
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_remove *remove)
 {
-	int status;
+	__be32 status;
 
 	if (nfs4_in_grace())
 		return nfserr_grace;
@@ -556,11 +556,11 @@ nfsd4_remove(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_rem
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	     struct svc_fh *save_fh, struct nfsd4_rename *rename)
 {
-	int status = nfserr_nofilehandle;
+	__be32 status = nfserr_nofilehandle;
 
 	if (!save_fh->fh_dentry)
 		return status;
@@ -589,10 +589,10 @@ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
 {
-	int status = nfs_ok;
+	__be32 status = nfs_ok;
 
 	if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
 		nfs4_lock_state();
@@ -614,13 +614,13 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_se
 	return status;
 }
 
-static inline int
+static inline __be32
 nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_write *write)
 {
 	stateid_t *stateid = &write->wr_stateid;
 	struct file *filp = NULL;
 	u32 *p;
-	int status = nfs_ok;
+	__be32 status = nfs_ok;
 
 	/* no need to check permission - this will be done in nfsd_write() */
 
@@ -661,12 +661,12 @@ nfsd4_write(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_writ
  * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
  * to NFS_OK after the call; NVERIFY by mapping NFSERR_NOT_SAME to NFS_OK.
  */
-static int
+static __be32
 nfsd4_verify(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_verify *verify)
 {
 	__be32 *buf, *p;
 	int count;
-	int status;
+	__be32 status;
 
 	status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
 	if (status)
@@ -741,7 +741,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	struct svc_fh	*save_fh = NULL;
 	struct nfs4_stateowner *replay_owner = NULL;
 	int		slack_space;    /* in words, not bytes! */
-	int		status;
+	__be32		status;
 
 	status = nfserr_resource;
 	current_fh = kmalloc(sizeof(*current_fh), GFP_KERNEL);
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index 1cbd2e4ee122..e9d07704680e 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -83,13 +83,13 @@ md5_to_hex(char *out, char *md5)
 	*out = '\0';
 }
 
-int
+__be32
 nfs4_make_rec_clidname(char *dname, struct xdr_netobj *clname)
 {
 	struct xdr_netobj cksum;
 	struct hash_desc desc;
 	struct scatterlist sg[1];
-	int status = nfserr_resource;
+	__be32 status = nfserr_resource;
 
 	dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
 			clname->len, clname->data);
@@ -193,7 +193,7 @@ nfsd4_build_dentrylist(void *arg, const char *name, int namlen,
 	struct dentry_list *child;
 
 	if (name && isdotent(name, namlen))
-		return nfs_ok;
+		return 0;
 	dentry = lookup_one_len(name, parent, namlen);
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
@@ -333,14 +333,14 @@ purge_old(struct dentry *parent, struct dentry *child)
 	int status;
 
 	if (nfs4_has_reclaimed_state(child->d_name.name))
-		return nfs_ok;
+		return 0;
 
 	status = nfsd4_clear_clid_dir(parent, child);
 	if (status)
 		printk("failed to remove client recovery directory %s\n",
 				child->d_name.name);
 	/* Keep trying, success or failure: */
-	return nfs_ok;
+	return 0;
 }
 
 void
@@ -365,10 +365,10 @@ load_recdir(struct dentry *parent, struct dentry *child)
 		printk("nfsd4: illegal name %s in recovery directory\n",
 				child->d_name.name);
 		/* Keep trying; maybe the others are OK: */
-		return nfs_ok;
+		return 0;
 	}
 	nfs4_client_to_reclaim(child->d_name.name);
-	return nfs_ok;
+	return 0;
 }
 
 int
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ebcf226a9e4a..e5ca6d7028df 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -710,7 +710,7 @@ out_err:
  *		as described above.
  *
  */
-int
+__be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 {
 	u32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
@@ -721,7 +721,7 @@ nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 	nfs4_verifier		clverifier = setclid->se_verf;
 	unsigned int 		strhashval;
 	struct nfs4_client	*conf, *unconf, *new;
-	int 			status;
+	__be32 			status;
 	char                    dname[HEXDIR_LEN];
 	
 	if (!check_name(clname))
@@ -875,14 +875,14 @@ out:
  *
  * NOTE: callback information will be processed here in a future patch
  */
-int
+__be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
 	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
-	int status;
+	__be32 status;
 
 	if (STALE_CLIENTID(clid))
 		return nfserr_stale_clientid;
@@ -1280,13 +1280,13 @@ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
  * Called to check deny when READ with all zero stateid or
  * WRITE with all zero or all one stateid
  */
-static int
+static __be32
 nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
 {
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_file *fp;
 	struct nfs4_stateid *stp;
-	int ret;
+	__be32 ret;
 
 	dprintk("NFSD: nfs4_share_conflict\n");
 
@@ -1444,7 +1444,7 @@ static struct lock_manager_operations nfsd_lease_mng_ops = {
 };
 
 
-int
+__be32
 nfsd4_process_open1(struct nfsd4_open *open)
 {
 	clientid_t *clientid = &open->op_clientid;
@@ -1501,7 +1501,7 @@ renew:
 	return nfs_ok;
 }
 
-static inline int
+static inline __be32
 nfs4_check_delegmode(struct nfs4_delegation *dp, int flags)
 {
 	if ((flags & WR_STATE) && (dp->dl_type == NFS4_OPEN_DELEGATE_READ))
@@ -1522,12 +1522,12 @@ find_delegation_file(struct nfs4_file *fp, stateid_t *stid)
 	return NULL;
 }
 
-static int
+static __be32
 nfs4_check_deleg(struct nfs4_file *fp, struct nfsd4_open *open,
 		struct nfs4_delegation **dp)
 {
 	int flags;
-	int status = nfserr_bad_stateid;
+	__be32 status = nfserr_bad_stateid;
 
 	*dp = find_delegation_file(fp, &open->op_delegate_stateid);
 	if (*dp == NULL)
@@ -1546,11 +1546,11 @@ out:
 	return nfs_ok;
 }
 
-static int
+static __be32
 nfs4_check_open(struct nfs4_file *fp, struct nfsd4_open *open, struct nfs4_stateid **stpp)
 {
 	struct nfs4_stateid *local;
-	int status = nfserr_share_denied;
+	__be32 status = nfserr_share_denied;
 	struct nfs4_stateowner *sop = open->op_stateowner;
 
 	list_for_each_entry(local, &fp->fi_stateids, st_perfile) {
@@ -1575,7 +1575,7 @@ nfs4_alloc_stateid(void)
 	return kmem_cache_alloc(stateid_slab, GFP_KERNEL);
 }
 
-static int
+static __be32
 nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		struct nfs4_delegation *dp,
 		struct svc_fh *cur_fh, int flags)
@@ -1590,7 +1590,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 		get_file(dp->dl_vfs_file);
 		stp->st_vfs_file = dp->dl_vfs_file;
 	} else {
-		int status;
+		__be32 status;
 		status = nfsd_open(rqstp, cur_fh, S_IFREG, flags,
 				&stp->st_vfs_file);
 		if (status) {
@@ -1604,7 +1604,7 @@ nfs4_new_open(struct svc_rqst *rqstp, struct nfs4_stateid **stpp,
 	return 0;
 }
 
-static inline int
+static inline __be32
 nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 		struct nfsd4_open *open)
 {
@@ -1619,22 +1619,22 @@ nfsd4_truncate(struct svc_rqst *rqstp, struct svc_fh *fh,
 	return nfsd_setattr(rqstp, fh, &iattr, 0, (time_t)0);
 }
 
-static int
+static __be32
 nfs4_upgrade_open(struct svc_rqst *rqstp, struct svc_fh *cur_fh, struct nfs4_stateid *stp, struct nfsd4_open *open)
 {
 	struct file *filp = stp->st_vfs_file;
 	struct inode *inode = filp->f_dentry->d_inode;
 	unsigned int share_access, new_writer;
-	int status;
+	__be32 status;
 
 	set_access(&share_access, stp->st_access_bmap);
 	new_writer = (~share_access) & open->op_share_access
 			& NFS4_SHARE_ACCESS_WRITE;
 
 	if (new_writer) {
-		status = get_write_access(inode);
-		if (status)
-			return nfserrno(status);
+		int err = get_write_access(inode);
+		if (err)
+			return nfserrno(err);
 	}
 	status = nfsd4_truncate(rqstp, cur_fh, open);
 	if (status) {
@@ -1738,14 +1738,14 @@ out:
 /*
  * called with nfs4_lock_state() held.
  */
-int
+__be32
 nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
 {
 	struct nfs4_file *fp = NULL;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
 	struct nfs4_stateid *stp = NULL;
 	struct nfs4_delegation *dp = NULL;
-	int status;
+	__be32 status;
 
 	status = nfserr_inval;
 	if (!access_valid(open->op_share_access)
@@ -1833,11 +1833,11 @@ static struct work_struct laundromat_work;
 static void laundromat_main(void *);
 static DECLARE_WORK(laundromat_work, laundromat_main, NULL);
 
-int 
+__be32
 nfsd4_renew(clientid_t *clid)
 {
 	struct nfs4_client *clp;
-	int status;
+	__be32 status;
 
 	nfs4_lock_state();
 	dprintk("process_renew(%08x/%08x): starting\n", 
@@ -1996,9 +1996,9 @@ access_permit_write(unsigned long access_bmap)
 }
 
 static
-int nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
+__be32 nfs4_check_openmode(struct nfs4_stateid *stp, int flags)
 {
-        int status = nfserr_openmode;
+        __be32 status = nfserr_openmode;
 
 	if ((flags & WR_STATE) && (!access_permit_write(stp->st_access_bmap)))
                 goto out;
@@ -2009,7 +2009,7 @@ out:
 	return status;
 }
 
-static inline int
+static inline __be32
 check_special_stateids(svc_fh *current_fh, stateid_t *stateid, int flags)
 {
 	/* Trying to call delegreturn with a special stateid? Yuch: */
@@ -2043,14 +2043,14 @@ io_during_grace_disallowed(struct inode *inode, int flags)
 /*
 * Checks for stateid operations
 */
-int
+__be32
 nfs4_preprocess_stateid_op(struct svc_fh *current_fh, stateid_t *stateid, int flags, struct file **filpp)
 {
 	struct nfs4_stateid *stp = NULL;
 	struct nfs4_delegation *dp = NULL;
 	stateid_t *stidp;
 	struct inode *ino = current_fh->fh_dentry->d_inode;
-	int status;
+	__be32 status;
 
 	dprintk("NFSD: preprocess_stateid_op: stateid = (%08x/%08x/%08x/%08x)\n",
 		stateid->si_boot, stateid->si_stateownerid, 
@@ -2125,7 +2125,7 @@ setlkflg (int type)
 /* 
  * Checks for sequence id mutating operations. 
  */
-static int
+static __be32
 nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, struct nfsd4_lock *lock)
 {
 	struct nfs4_stateid *stp;
@@ -2169,7 +2169,7 @@ nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *statei
 		clientid_t *lockclid = &lock->v.new.clientid;
 		struct nfs4_client *clp = sop->so_client;
 		int lkflg = 0;
-		int status;
+		__be32 status;
 
 		lkflg = setlkflg(lock->lk_type);
 
@@ -2241,10 +2241,10 @@ check_replay:
 	return nfserr_bad_seqid;
 }
 
-int
+__be32
 nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateowner *sop;
 	struct nfs4_stateid *stp;
 
@@ -2310,10 +2310,10 @@ reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
 	}
 }
 
-int
+__be32
 nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateid *stp;
 	unsigned int share_access;
 
@@ -2365,10 +2365,10 @@ out:
 /*
  * nfs4_unlock_state() called after encode
  */
-int
+__be32
 nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close, struct nfs4_stateowner **replay_owner)
 {
-	int status;
+	__be32 status;
 	struct nfs4_stateid *stp;
 
 	dprintk("NFSD: nfsd4_close on file %.*s\n", 
@@ -2404,10 +2404,10 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_delegreturn(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_delegreturn *dr)
 {
-	int status;
+	__be32 status;
 
 	if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
 		goto out;
@@ -2635,7 +2635,7 @@ check_lock_length(u64 offset, u64 length)
 /*
  *  LOCK operation 
  */
-int
+__be32
 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateowner *open_sop = NULL;
@@ -2644,7 +2644,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock
 	struct file *filp;
 	struct file_lock file_lock;
 	struct file_lock conflock;
-	int status = 0;
+	__be32 status = 0;
 	unsigned int strhashval;
 
 	dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
@@ -2793,14 +2793,14 @@ out:
 /*
  * LOCKT operation
  */
-int
+__be32
 nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lockt *lockt)
 {
 	struct inode *inode;
 	struct file file;
 	struct file_lock file_lock;
 	struct file_lock conflock;
-	int status;
+	__be32 status;
 
 	if (nfs4_in_grace())
 		return nfserr_grace;
@@ -2873,13 +2873,13 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku, struct nfs4_stateowner **replay_owner)
 {
 	struct nfs4_stateid *stp;
 	struct file *filp = NULL;
 	struct file_lock file_lock;
-	int status;
+	__be32 status;
 						        
 	dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
 		(long long) locku->lu_offset,
@@ -2965,7 +2965,7 @@ out:
 	return status;
 }
 
-int
+__be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
 {
 	clientid_t *clid = &rlockowner->rl_clientid;
@@ -2974,7 +2974,7 @@ nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *
 	struct xdr_netobj *owner = &rlockowner->rl_owner;
 	struct list_head matches;
 	int i;
-	int status;
+	__be32 status;
 
 	dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
 		clid->cl_boot, clid->cl_id);
@@ -3111,7 +3111,7 @@ nfs4_find_reclaim_client(clientid_t *clid)
 /*
 * Called from OPEN. Look for clientid in reclaim list.
 */
-int
+__be32
 nfs4_check_open_reclaim(clientid_t *clid)
 {
 	return nfs4_find_reclaim_client(clid) ? nfs_ok : nfserr_reclaim_bad;
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 3419d99aeb1a..d7b630f1a9ae 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -68,8 +68,8 @@
 #define NFS4_REFERRAL_FSID_MAJOR	0x8000000ULL
 #define NFS4_REFERRAL_FSID_MINOR	0x8000000ULL
 
-static int
-check_filename(char *str, int len, int err)
+static __be32
+check_filename(char *str, int len, __be32 err)
 {
 	int i;
 
@@ -95,7 +95,7 @@ check_filename(char *str, int len, int err)
  */
 #define DECODE_HEAD				\
 	__be32 *p;				\
-	int status
+	__be32 status
 #define DECODE_TAIL				\
 	status = 0;				\
 out:						\
@@ -217,7 +217,7 @@ static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
 }
 
 
-static int
+static __be32
 nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 {
 	u32 bmlen;
@@ -240,7 +240,7 @@ nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
     struct nfs4_acl **acl)
 {
@@ -418,7 +418,7 @@ out_nfserr:
 	goto out;
 }
 
-static int
+static __be32
 nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
 {
 	DECODE_HEAD;
@@ -429,7 +429,7 @@ nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
 {
 	DECODE_HEAD;
@@ -444,7 +444,7 @@ nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
 }
 
 
-static int
+static __be32
 nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
 {
 	DECODE_HEAD;
@@ -456,7 +456,7 @@ nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
 {
 	DECODE_HEAD;
@@ -496,7 +496,7 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
 	DECODE_TAIL;
 }
 
-static inline int
+static inline __be32
 nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
 {
 	DECODE_HEAD;
@@ -508,13 +508,13 @@ nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegretu
 	DECODE_TAIL;
 }
 
-static inline int
+static inline __be32
 nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
 {
 	return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
 }
 
-static int
+static __be32
 nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 {
 	DECODE_HEAD;
@@ -529,7 +529,7 @@ nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 {
 	DECODE_HEAD;
@@ -568,7 +568,7 @@ nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
 {
 	DECODE_HEAD;
@@ -587,7 +587,7 @@ nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
 {
 	DECODE_HEAD;
@@ -606,7 +606,7 @@ nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
 {
 	DECODE_HEAD;
@@ -621,7 +621,7 @@ nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 {
 	DECODE_HEAD;
@@ -699,7 +699,7 @@ nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
 {
 	DECODE_HEAD;
@@ -713,7 +713,7 @@ nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_con
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
 {
 	DECODE_HEAD;
@@ -729,7 +729,7 @@ nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_d
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
 {
 	DECODE_HEAD;
@@ -744,7 +744,7 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
 {
 	DECODE_HEAD;
@@ -758,7 +758,7 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
 {
 	DECODE_HEAD;
@@ -774,7 +774,7 @@ nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *read
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
 {
 	DECODE_HEAD;
@@ -789,7 +789,7 @@ nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
 {
 	DECODE_HEAD;
@@ -809,7 +809,7 @@ nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
 {
 	DECODE_HEAD;
@@ -820,7 +820,7 @@ nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *setattr)
 {
 	DECODE_HEAD;
@@ -834,7 +834,7 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
 {
 	DECODE_HEAD;
@@ -859,7 +859,7 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
 {
 	DECODE_HEAD;
@@ -872,7 +872,7 @@ nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_s
 }
 
 /* Also used for NVERIFY */
-static int
+static __be32
 nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
 {
 #if 0
@@ -908,7 +908,7 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 {
 	int avail;
@@ -959,7 +959,7 @@ nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
 {
 	DECODE_HEAD;
@@ -973,7 +973,7 @@ nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_rel
 	DECODE_TAIL;
 }
 
-static int
+static __be32
 nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 {
 	DECODE_HEAD;
@@ -1234,7 +1234,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 /* Encode as an array of strings the string given with components
  * seperated @sep.
  */
-static int nfsd4_encode_components(char sep, char *components,
+static __be32 nfsd4_encode_components(char sep, char *components,
 				   __be32 **pp, int *buflen)
 {
 	__be32 *p = *pp;
@@ -1271,10 +1271,10 @@ static int nfsd4_encode_components(char sep, char *components,
 /*
  * encode a location element of a fs_locations structure
  */
-static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
+static __be32 nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
 				    __be32 **pp, int *buflen)
 {
-	int status;
+	__be32 status;
 	__be32 *p = *pp;
 
 	status = nfsd4_encode_components(':', location->hosts, &p, buflen);
@@ -1292,7 +1292,7 @@ static int nfsd4_encode_fs_location4(struct nfsd4_fs_location *location,
  * Returned string is safe to use as long as the caller holds a reference
  * to @exp.
  */
-static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *stat)
+static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, __be32 *stat)
 {
 	struct svc_fh tmp_fh;
 	char *path, *rootpath;
@@ -1318,11 +1318,11 @@ static char *nfsd4_path(struct svc_rqst *rqstp, struct svc_export *exp, u32 *sta
 /*
  *  encode a fs_locations structure
  */
-static int nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
+static __be32 nfsd4_encode_fs_locations(struct svc_rqst *rqstp,
 				     struct svc_export *exp,
 				     __be32 **pp, int *buflen)
 {
-	u32 status;
+	__be32 status;
 	int i;
 	__be32 *p = *pp;
 	struct nfsd4_fs_locations *fslocs = &exp->ex_fslocs;
@@ -1353,7 +1353,7 @@ static u32 nfs4_ftypes[16] = {
         NF4SOCK, NF4BAD,  NF4LNK, NF4BAD,
 };
 
-static int
+static __be32
 nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 			__be32 **p, int *buflen)
 {
@@ -1375,19 +1375,19 @@ nfsd4_encode_name(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 	return 0;
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_user(struct svc_rqst *rqstp, uid_t uid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, uid, 0, p, buflen);
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_group(struct svc_rqst *rqstp, uid_t gid, __be32 **p, int *buflen)
 {
 	return nfsd4_encode_name(rqstp, NFS4_ACL_WHO_NAMED, gid, 1, p, buflen);
 }
 
-static inline int
+static inline __be32
 nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 		__be32 **p, int *buflen)
 {
@@ -1398,7 +1398,7 @@ nfsd4_encode_aclname(struct svc_rqst *rqstp, int whotype, uid_t id, int group,
 			      FATTR4_WORD0_RDATTR_ERROR)
 #define WORD1_ABSENT_FS_ATTRS FATTR4_WORD1_MOUNTED_ON_FILEID
 
-static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
+static __be32 fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
 {
 	/* As per referral draft:  */
 	if (*bmval0 & ~WORD0_ABSENT_FS_ATTRS ||
@@ -1421,7 +1421,7 @@ static int fattr_handle_absent_fs(u32 *bmval0, u32 *bmval1, u32 *rdattr_err)
  * @countp is the buffer size in _words_; upon successful return this becomes
  * replaced with the number of words written.
  */
-int
+__be32
 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 		struct dentry *dentry, __be32 *buffer, int *countp, u32 *bmval,
 		struct svc_rqst *rqstp)
@@ -1437,7 +1437,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
 	u64 dummy64;
 	u32 rdattr_err = 0;
 	__be32 *p = buffer;
-	int status;
+	__be32 status;
 	int aclsupport = 0;
 	struct nfs4_acl *acl = NULL;
 
@@ -1829,13 +1829,13 @@ out_serverfault:
 	goto out;
 }
 
-static int
+static __be32
 nfsd4_encode_dirent_fattr(struct nfsd4_readdir *cd,
 		const char *name, int namlen, __be32 *p, int *buflen)
 {
 	struct svc_export *exp = cd->rd_fhp->fh_export;
 	struct dentry *dentry;
-	int nfserr;
+	__be32 nfserr;
 
 	dentry = lookup_one_len(name, cd->rd_fhp->fh_dentry, namlen);
 	if (IS_ERR(dentry))
@@ -1865,7 +1865,7 @@ out_put:
 }
 
 static __be32 *
-nfsd4_encode_rdattr_error(__be32 *p, int buflen, int nfserr)
+nfsd4_encode_rdattr_error(__be32 *p, int buflen, __be32 nfserr)
 {
 	__be32 *attrlenp;
 
@@ -1888,7 +1888,7 @@ nfsd4_encode_dirent(struct readdir_cd *ccd, const char *name, int namlen,
 	struct nfsd4_readdir *cd = container_of(ccd, struct nfsd4_readdir, common);
 	int buflen;
 	__be32 *p = cd->buffer;
-	int nfserr = nfserr_toosmall;
+	__be32 nfserr = nfserr_toosmall;
 
 	/* In nfsv4, "." and ".." never make it onto the wire.. */
 	if (name && isdotent(name, namlen)) {
@@ -1944,7 +1944,7 @@ fail:
 }
 
 static void
-nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_access *access)
+nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_access *access)
 {
 	ENCODE_HEAD;
 
@@ -1957,7 +1957,7 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_acc
 }
 
 static void
-nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_close *close)
+nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_close *close)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -1972,7 +1972,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_clos
 
 
 static void
-nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_commit *commit)
+nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_commit *commit)
 {
 	ENCODE_HEAD;
 
@@ -1984,7 +1984,7 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_com
 }
 
 static void
-nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_create *create)
+nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_create *create)
 {
 	ENCODE_HEAD;
 
@@ -1998,8 +1998,8 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_cre
 	}
 }
 
-static int
-nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_getattr *getattr)
+static __be32
+nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_getattr *getattr)
 {
 	struct svc_fh *fhp = getattr->ga_fhp;
 	int buflen;
@@ -2017,7 +2017,7 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ge
 }
 
 static void
-nfsd4_encode_getfh(struct nfsd4_compoundres *resp, int nfserr, struct svc_fh *fhp)
+nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh *fhp)
 {
 	unsigned int len;
 	ENCODE_HEAD;
@@ -2057,7 +2057,7 @@ nfsd4_encode_lock_denied(struct nfsd4_compoundres *resp, struct nfsd4_lock_denie
 }
 
 static void
-nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock *lock)
+nfsd4_encode_lock(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lock *lock)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2073,14 +2073,14 @@ nfsd4_encode_lock(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
 }
 
 static void
-nfsd4_encode_lockt(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lockt *lockt)
+nfsd4_encode_lockt(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_lockt *lockt)
 {
 	if (nfserr == nfserr_denied)
 		nfsd4_encode_lock_denied(resp, &lockt->lt_denied);
 }
 
 static void
-nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_locku *locku)
+nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_locku *locku)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2096,7 +2096,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_lock
 
 
 static void
-nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link *link)
+nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_link *link)
 {
 	ENCODE_HEAD;
 
@@ -2109,7 +2109,7 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_link
 
 
 static void
-nfsd4_encode_open(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open *open)
+nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open *open)
 {
 	ENCODE_SEQID_OP_HEAD;
 
@@ -2174,7 +2174,7 @@ out:
 }
 
 static void
-nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_confirm *oc)
+nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_confirm *oc)
 {
 	ENCODE_SEQID_OP_HEAD;
 				        
@@ -2189,7 +2189,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, int nfserr, struct nfs
 }
 
 static void
-nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_open_downgrade *od)
+nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_open_downgrade *od)
 {
 	ENCODE_SEQID_OP_HEAD;
 				        
@@ -2203,8 +2203,8 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, int nfserr, struct n
 	ENCODE_SEQID_OP_TAIL(od->od_stateowner);
 }
 
-static int
-nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
+static __be32
+nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 		  struct nfsd4_read *read)
 {
 	u32 eof;
@@ -2268,8 +2268,8 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, int nfserr,
 	return 0;
 }
 
-static int
-nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readlink *readlink)
+static __be32
+nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readlink *readlink)
 {
 	int maxcount;
 	char *page;
@@ -2316,8 +2316,8 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_r
 	return 0;
 }
 
-static int
-nfsd4_encode_readdir(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_readdir *readdir)
+static __be32
+nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_readdir *readdir)
 {
 	int maxcount;
 	loff_t offset;
@@ -2396,7 +2396,7 @@ err_no_verf:
 }
 
 static void
-nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_remove *remove)
+nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_remove *remove)
 {
 	ENCODE_HEAD;
 
@@ -2408,7 +2408,7 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rem
 }
 
 static void
-nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_rename *rename)
+nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_rename *rename)
 {
 	ENCODE_HEAD;
 
@@ -2425,7 +2425,7 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_ren
  * regardless of the error status.
  */
 static void
-nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setattr *setattr)
+nfsd4_encode_setattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setattr *setattr)
 {
 	ENCODE_HEAD;
 
@@ -2444,7 +2444,7 @@ nfsd4_encode_setattr(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_se
 }
 
 static void
-nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_setclientid *scd)
+nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_setclientid *scd)
 {
 	ENCODE_HEAD;
 
@@ -2463,7 +2463,7 @@ nfsd4_encode_setclientid(struct nfsd4_compoundres *resp, int nfserr, struct nfsd
 }
 
 static void
-nfsd4_encode_write(struct nfsd4_compoundres *resp, int nfserr, struct nfsd4_write *write)
+nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_write *write)
 {
 	ENCODE_HEAD;
 
@@ -2641,7 +2641,7 @@ void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
 int
 nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p, struct nfsd4_compoundargs *args)
 {
-	int status;
+	__be32 status;
 
 	args->p = p;
 	args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index 8bf23cf8b603..a597e2e72469 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -164,7 +164,7 @@ update_stateid(stateid_t *stateid)
  * is cached. 
  */
 struct nfs4_replay {
-	u32			rp_status;
+	__be32			rp_status;
 	unsigned int		rp_buflen;
 	char			*rp_buf;
 	unsigned		intrp_allocated;
@@ -273,19 +273,19 @@ struct nfs4_stateid {
 	((err) != nfserr_stale_stateid) &&      \
 	((err) != nfserr_bad_stateid))
 
-extern int nfsd4_renew(clientid_t *clid);
-extern int nfs4_preprocess_stateid_op(struct svc_fh *current_fh, 
+extern __be32 nfsd4_renew(clientid_t *clid);
+extern __be32 nfs4_preprocess_stateid_op(struct svc_fh *current_fh,
 		stateid_t *stateid, int flags, struct file **filp);
 extern void nfs4_lock_state(void);
 extern void nfs4_unlock_state(void);
 extern int nfs4_in_grace(void);
-extern int nfs4_check_open_reclaim(clientid_t *clid);
+extern __be32 nfs4_check_open_reclaim(clientid_t *clid);
 extern void put_nfs4_client(struct nfs4_client *clp);
 extern void nfs4_free_stateowner(struct kref *kref);
 extern void nfsd4_probe_callback(struct nfs4_client *clp);
 extern void nfsd4_cb_recall(struct nfs4_delegation *dp);
 extern void nfs4_put_delegation(struct nfs4_delegation *dp);
-extern int nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
+extern __be32 nfs4_make_rec_clidname(char *clidname, struct xdr_netobj *clname);
 extern void nfsd4_init_recdir(char *recdir_name);
 extern int nfsd4_recdir_load(void);
 extern void nfsd4_shutdown_recdir(void);
diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h
index 003193fe6fc6..45ca01b5f844 100644
--- a/include/linux/nfsd/xdr4.h
+++ b/include/linux/nfsd/xdr4.h
@@ -334,7 +334,7 @@ struct nfsd4_write {
 
 struct nfsd4_op {
 	int					opnum;
-	int					status;
+	__be32					status;
 	union {
 		struct nfsd4_access		access;
 		struct nfsd4_close		close;
@@ -426,38 +426,38 @@ int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *,
 		struct nfsd4_compoundres *);
 void nfsd4_encode_operation(struct nfsd4_compoundres *, struct nfsd4_op *);
 void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
-int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
-		       struct dentry *dentry, u32 *buffer, int *countp, 
+__be32 nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+		       struct dentry *dentry, __be32 *buffer, int *countp,
 		       u32 *bmval, struct svc_rqst *);
-extern int nfsd4_setclientid(struct svc_rqst *rqstp, 
+extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp,
 		struct nfsd4_setclientid *setclid);
-extern int nfsd4_setclientid_confirm(struct svc_rqst *rqstp, 
+extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
 		struct nfsd4_setclientid_confirm *setclientid_confirm);
-extern int nfsd4_process_open1(struct nfsd4_open *open);
-extern int nfsd4_process_open2(struct svc_rqst *rqstp, 
+extern __be32 nfsd4_process_open1(struct nfsd4_open *open);
+extern __be32 nfsd4_process_open2(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_open *open);
-extern int nfsd4_open_confirm(struct svc_rqst *rqstp, 
+extern __be32 nfsd4_open_confirm(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_open_confirm *oc,
 		struct nfs4_stateowner **);
-extern  int nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, 
+extern __be32 nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 		struct nfsd4_close *close,
 		struct nfs4_stateowner **replay_owner);
-extern int nfsd4_open_downgrade(struct svc_rqst *rqstp, 
+extern __be32 nfsd4_open_downgrade(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_open_downgrade *od,
 		struct nfs4_stateowner **replay_owner);
-extern int nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, 
+extern __be32 nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 		struct nfsd4_lock *lock,
 		struct nfs4_stateowner **replay_owner);
-extern int nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh, 
+extern __be32 nfsd4_lockt(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 		struct nfsd4_lockt *lockt);
-extern int nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, 
+extern __be32 nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh,
 		struct nfsd4_locku *locku,
 		struct nfs4_stateowner **replay_owner);
-extern int
+extern __be32
 nfsd4_release_lockowner(struct svc_rqst *rqstp,
 		struct nfsd4_release_lockowner *rlockowner);
 extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
-extern int nfsd4_delegreturn(struct svc_rqst *rqstp,
+extern __be32 nfsd4_delegreturn(struct svc_rqst *rqstp,
 		struct svc_fh *current_fh, struct nfsd4_delegreturn *dr);
 #endif
 
-- 
cgit v1.2.3


From c7afef1f963bec198b186cc34b9e8c9b9ce2e266 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:02 -0700
Subject: [PATCH] nfsd: misc endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/export.c            | 4 ++--
 fs/nfsd/lockd.c             | 2 +-
 fs/nfsd/nfs4callback.c      | 2 +-
 fs/nfsd/nfs4state.c         | 4 ++--
 fs/nfsd/nfscache.c          | 8 ++++----
 fs/nfsd/nfssvc.c            | 2 +-
 include/linux/nfsd/cache.h  | 6 +++---
 include/linux/nfsd/export.h | 2 +-
 include/linux/nfsd/nfsd.h   | 2 +-
 include/linux/nfsd/nfsfh.h  | 2 +-
 include/linux/nfsd/state.h  | 2 +-
 11 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index e13fa23bd108..f37df46d2eaa 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -1148,12 +1148,12 @@ exp_find(struct auth_domain *clp, int fsid_type, u32 *fsidv,
  * for a given NFSv4 client.   The root is defined to be the
  * export point with fsid==0
  */
-int
+__be32
 exp_pseudoroot(struct auth_domain *clp, struct svc_fh *fhp,
 	       struct cache_req *creq)
 {
 	struct svc_export *exp;
-	int rv;
+	__be32 rv;
 	u32 fsidv[2];
 
 	mk_fsid_v1(fsidv, 0);
diff --git a/fs/nfsd/lockd.c b/fs/nfsd/lockd.c
index 9b9e7e127c03..11fdaf7721b4 100644
--- a/fs/nfsd/lockd.c
+++ b/fs/nfsd/lockd.c
@@ -25,7 +25,7 @@
 static u32
 nlm_fopen(struct svc_rqst *rqstp, struct nfs_fh *f, struct file **filp)
 {
-	u32		nfserr;
+	__be32		nfserr;
 	struct svc_fh	fh;
 
 	/* must initialize before using! but maxsize doesn't matter */
diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c
index 8497ed4862b2..f57655a7a2b6 100644
--- a/fs/nfsd/nfs4callback.c
+++ b/fs/nfsd/nfs4callback.c
@@ -461,7 +461,7 @@ nfs4_cb_null(struct rpc_task *task, void *dummy)
 {
 	struct nfs4_client *clp = (struct nfs4_client *)task->tk_msg.rpc_argp;
 	struct nfs4_callback *cb = &clp->cl_callback;
-	u32 addr = htonl(cb->cb_addr);
+	__be32 addr = htonl(cb->cb_addr);
 
 	dprintk("NFSD: nfs4_cb_null task->tk_status %d\n", task->tk_status);
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index ae1d47715b90..2e468c9e64d9 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -713,7 +713,7 @@ out_err:
 __be32
 nfsd4_setclientid(struct svc_rqst *rqstp, struct nfsd4_setclientid *setclid)
 {
-	u32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	__be32 			ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct xdr_netobj 	clname = { 
 		.len = setclid->se_namelen,
 		.data = setclid->se_name,
@@ -878,7 +878,7 @@ out:
 __be32
 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_setclientid_confirm *setclientid_confirm)
 {
-	u32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
+	__be32 ip_addr = rqstp->rq_addr.sin_addr.s_addr;
 	struct nfs4_client *conf, *unconf;
 	nfs4_verifier confirm = setclientid_confirm->sc_confirm; 
 	clientid_t * clid = &setclientid_confirm->sc_clientid;
diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index fdf7cf3dfadc..6100bbe27432 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -29,7 +29,7 @@
  */
 #define CACHESIZE		1024
 #define HASHSIZE		64
-#define REQHASH(xid)		((((xid) >> 24) ^ (xid)) & (HASHSIZE-1))
+#define REQHASH(xid)		(((((__force __u32)xid) >> 24) ^ ((__force __u32)xid)) & (HASHSIZE-1))
 
 static struct hlist_head *	hash_list;
 static struct list_head 	lru_head;
@@ -127,8 +127,8 @@ nfsd_cache_lookup(struct svc_rqst *rqstp, int type)
 	struct hlist_node	*hn;
 	struct hlist_head 	*rh;
 	struct svc_cacherep	*rp;
-	u32			xid = rqstp->rq_xid,
-				proto =  rqstp->rq_prot,
+	__be32			xid = rqstp->rq_xid;
+	u32			proto =  rqstp->rq_prot,
 				vers = rqstp->rq_vers,
 				proc = rqstp->rq_proc;
 	unsigned long		age;
@@ -258,7 +258,7 @@ found_entry:
  * In this case, nfsd_cache_update is called with statp == NULL.
  */
 void
-nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, u32 *statp)
+nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 {
 	struct svc_cacherep *rp;
 	struct kvec	*resv = &rqstp->rq_res.head[0], *cachv;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 8067118b1c0c..0aaccb03bf76 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -491,7 +491,7 @@ out:
 }
 
 int
-nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp)
+nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 {
 	struct svc_procedure	*proc;
 	kxdrproc_t		xdr;
diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h
index c3a3557c2a5b..007480cd6a60 100644
--- a/include/linux/nfsd/cache.h
+++ b/include/linux/nfsd/cache.h
@@ -26,14 +26,14 @@ struct svc_cacherep {
 				c_type,		/* status, buffer */
 				c_secure : 1;	/* req came from port < 1024 */
 	struct sockaddr_in	c_addr;
-	u32			c_xid;
+	__be32			c_xid;
 	u32			c_prot;
 	u32			c_proc;
 	u32			c_vers;
 	unsigned long		c_timestamp;
 	union {
 		struct kvec	u_vec;
-		u32		u_status;
+		__be32		u_status;
 	}			c_u;
 };
 
@@ -75,7 +75,7 @@ enum {
 void	nfsd_cache_init(void);
 void	nfsd_cache_shutdown(void);
 int	nfsd_cache_lookup(struct svc_rqst *, int);
-void	nfsd_cache_update(struct svc_rqst *, int, u32 *);
+void	nfsd_cache_update(struct svc_rqst *, int, __be32 *);
 
 #endif /* __KERNEL__ */
 #endif /* NFSCACHE_H */
diff --git a/include/linux/nfsd/export.h b/include/linux/nfsd/export.h
index 27666f5b8b53..045e38cdbe64 100644
--- a/include/linux/nfsd/export.h
+++ b/include/linux/nfsd/export.h
@@ -117,7 +117,7 @@ struct svc_export *	exp_parent(struct auth_domain *clp,
 				   struct cache_req *reqp);
 int			exp_rootfh(struct auth_domain *, 
 					char *path, struct knfsd_fh *, int maxsize);
-int			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
+__be32			exp_pseudoroot(struct auth_domain *, struct svc_fh *fhp, struct cache_req *creq);
 __be32			nfserrno(int errno);
 
 extern struct cache_detail svc_export_cache;
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 19a3c83d496e..68d29b66c6e2 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -64,7 +64,7 @@ extern struct svc_serv		*nfsd_serv;
  * Function prototypes.
  */
 int		nfsd_svc(unsigned short port, int nrservs);
-int		nfsd_dispatch(struct svc_rqst *rqstp, u32 *statp);
+int		nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp);
 
 /* nfsd/vfs.c */
 int		fh_lock_parent(struct svc_fh *, struct dentry *);
diff --git a/include/linux/nfsd/nfsfh.h b/include/linux/nfsd/nfsfh.h
index 749bad1ca16f..f3b51d62ec7d 100644
--- a/include/linux/nfsd/nfsfh.h
+++ b/include/linux/nfsd/nfsfh.h
@@ -157,7 +157,7 @@ typedef struct svc_fh {
 	__u64			fh_post_size;	/* i_size */
 	unsigned long		fh_post_blocks; /* i_blocks */
 	unsigned long		fh_post_blksize;/* i_blksize */
-	__u32			fh_post_rdev[2];/* i_rdev */
+	__be32			fh_post_rdev[2];/* i_rdev */
 	struct timespec		fh_post_atime;	/* i_atime */
 	struct timespec		fh_post_mtime;	/* i_mtime */
 	struct timespec		fh_post_ctime;	/* i_ctime */
diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h
index a597e2e72469..c3673f487e84 100644
--- a/include/linux/nfsd/state.h
+++ b/include/linux/nfsd/state.h
@@ -125,7 +125,7 @@ struct nfs4_client {
 	char                    cl_recdir[HEXDIR_LEN]; /* recovery dir */
 	nfs4_verifier		cl_verifier; 	/* generated by client */
 	time_t                  cl_time;        /* time of last lease renewal */
-	u32			cl_addr; 	/* client ipaddress */
+	__be32			cl_addr; 	/* client ipaddress */
 	struct svc_cred		cl_cred; 	/* setclientid principal */
 	clientid_t		cl_clientid;	/* generated by server */
 	nfs4_verifier		cl_confirm;	/* generated by server */
-- 
cgit v1.2.3


From a90b061c0bf712961cea40d9c916b300073d12e5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ftp.linux.org.uk>
Date: Thu, 19 Oct 2006 23:29:03 -0700
Subject: [PATCH] nfsd: nfs_replay_me

We are using NFS_REPLAY_ME as a special error value that is never leaked to
clients.  That works fine; the only problem is mixing host- and network-
endian values in the same objects.  Network-endian equivalent would work just
as fine; switch to it.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Trond Myklebust <trond.myklebust@fys.uio.no>
Acked-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs4proc.c        | 6 +++---
 fs/nfsd/nfs4state.c       | 4 ++--
 include/linux/nfsd/nfsd.h | 1 +
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 63823945f972..0a7bbdc4a10a 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -177,7 +177,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 
 	/* check seqid for replay. set nfs4_owner */
 	status = nfsd4_process_open1(open);
-	if (status == NFSERR_REPLAY_ME) {
+	if (status == nfserr_replay_me) {
 		struct nfs4_replay *rp = &open->op_stateowner->so_replay;
 		fh_put(current_fh);
 		current_fh->fh_handle.fh_size = rp->rp_openfh_len;
@@ -188,7 +188,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open
 			dprintk("nfsd4_open: replay failed"
 				" restoring previous filehandle\n");
 		else
-			status = NFSERR_REPLAY_ME;
+			status = nfserr_replay_me;
 	}
 	if (status)
 		goto out;
@@ -937,7 +937,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 		}
 
 encode_op:
-		if (op->status == NFSERR_REPLAY_ME) {
+		if (op->status == nfserr_replay_me) {
 			op->replay = &replay_owner->so_replay;
 			nfsd4_encode_replay(resp, op);
 			status = op->status = op->replay->rp_status;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 2e468c9e64d9..293b6495829f 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -1477,7 +1477,7 @@ nfsd4_process_open1(struct nfsd4_open *open)
 	}
 	if (open->op_seqid == sop->so_seqid - 1) {
 		if (sop->so_replay.rp_buflen)
-			return NFSERR_REPLAY_ME;
+			return nfserr_replay_me;
 		/* The original OPEN failed so spectacularly
 		 * that we don't even have replay data saved!
 		 * Therefore, we have no choice but to continue
@@ -2233,7 +2233,7 @@ check_replay:
 	if (seqid == sop->so_seqid - 1) {
 		dprintk("NFSD: preprocess_seqid_op: retransmission?\n");
 		/* indicate replay to calling function */
-		return NFSERR_REPLAY_ME;
+		return nfserr_replay_me;
 	}
 	printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d)\n",
 			sop->so_seqid, seqid);
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index 68d29b66c6e2..eb231143d579 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -238,6 +238,7 @@ void		nfsd_lockd_shutdown(void);
 #define	nfserr_badname		__constant_htonl(NFSERR_BADNAME)
 #define	nfserr_cb_path_down	__constant_htonl(NFSERR_CB_PATH_DOWN)
 #define	nfserr_locked		__constant_htonl(NFSERR_LOCKED)
+#define	nfserr_replay_me	__constant_htonl(NFSERR_REPLAY_ME)
 
 /* error codes for internal use */
 /* if a request fails due to kmalloc failure, it gets dropped.
-- 
cgit v1.2.3


From 690a973f48b6ba2954465992c08e65059c8374fe Mon Sep 17 00:00:00 2001
From: Jan Beulich <jbeulich@novell.com>
Date: Sat, 21 Oct 2006 18:37:01 +0200
Subject: [PATCH] x86-64: Speed up dwarf2 unwinder

This changes the dwarf2 unwinder to do a binary search for CIEs
instead of a linear work. The linker is unfortunately not
able to build a proper lookup table at link time, instead it creates
one at runtime as soon as the bootmem allocator is usable (so you'll continue
using the linear lookup for the first [hopefully] few calls).
The code should be ready to utilize a build-time created table once
a fixed linker becomes available.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 Makefile                          |   1 +
 include/asm-generic/vmlinux.lds.h |  16 ++
 include/linux/unwind.h            |   2 +
 init/main.c                       |   1 +
 kernel/unwind.c                   | 318 +++++++++++++++++++++++++++++++++-----
 5 files changed, 299 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/Makefile b/Makefile
index 62a1343cf327..389ff0cca9a7 100644
--- a/Makefile
+++ b/Makefile
@@ -499,6 +499,7 @@ endif
 
 ifdef CONFIG_UNWIND_INFO
 CFLAGS		+= -fasynchronous-unwind-tables
+LDFLAGS_vmlinux	+= --eh-frame-hdr
 endif
 
 ifdef CONFIG_DEBUG_INFO
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 69240b52f8e1..9d0d11c180d9 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -125,6 +125,10 @@
 		*(__param)						\
 		VMLINUX_SYMBOL(__stop___param) = .;			\
 	}								\
+									\
+	/* Unwind data binary search table */				\
+	EH_FRAME_HDR							\
+									\
 	__end_rodata = .;						\
 	. = ALIGN(4096);
 
@@ -157,6 +161,18 @@
 		*(.kprobes.text)					\
 		VMLINUX_SYMBOL(__kprobes_text_end) = .;
 
+#ifdef CONFIG_STACK_UNWIND
+		/* Unwind data binary search table */
+#define EH_FRAME_HDR							\
+        	.eh_frame_hdr : AT(ADDR(.eh_frame_hdr) - LOAD_OFFSET) {	\
+			VMLINUX_SYMBOL(__start_unwind_hdr) = .;		\
+			*(.eh_frame_hdr)				\
+			VMLINUX_SYMBOL(__end_unwind_hdr) = .;		\
+		}
+#else
+#define EH_FRAME_HDR
+#endif
+
 		/* DWARF debug sections.
 		Symbols in the DWARF debugging sections are relative to
 		the beginning of the section so we begin them at 0.  */
diff --git a/include/linux/unwind.h b/include/linux/unwind.h
index 73e1751d03dd..749928c161fb 100644
--- a/include/linux/unwind.h
+++ b/include/linux/unwind.h
@@ -26,6 +26,7 @@ struct module;
  * Initialize unwind support.
  */
 extern void unwind_init(void);
+extern void unwind_setup(void);
 
 #ifdef CONFIG_MODULES
 
@@ -73,6 +74,7 @@ extern int unwind_to_user(struct unwind_frame_info *);
 struct unwind_frame_info {};
 
 static inline void unwind_init(void) {}
+static inline void unwind_setup(void) {}
 
 #ifdef CONFIG_MODULES
 
diff --git a/init/main.c b/init/main.c
index ee123243fb53..36f608a7cfba 100644
--- a/init/main.c
+++ b/init/main.c
@@ -503,6 +503,7 @@ asmlinkage void __init start_kernel(void)
 	printk(KERN_NOTICE);
 	printk(linux_banner);
 	setup_arch(&command_line);
+	unwind_setup();
 	setup_per_cpu_areas();
 	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
 
diff --git a/kernel/unwind.c b/kernel/unwind.c
index 2e2368607aab..f7e50d16dbf6 100644
--- a/kernel/unwind.c
+++ b/kernel/unwind.c
@@ -11,13 +11,15 @@
 
 #include <linux/unwind.h>
 #include <linux/module.h>
-#include <linux/delay.h>
+#include <linux/bootmem.h>
+#include <linux/sort.h>
 #include <linux/stop_machine.h>
 #include <asm/sections.h>
 #include <asm/uaccess.h>
 #include <asm/unaligned.h>
 
 extern char __start_unwind[], __end_unwind[];
+extern const u8 __start_unwind_hdr[], __end_unwind_hdr[];
 
 #define MAX_STACK_DEPTH 8
 
@@ -100,6 +102,8 @@ static struct unwind_table {
 	} core, init;
 	const void *address;
 	unsigned long size;
+	const unsigned char *header;
+	unsigned long hdrsz;
 	struct unwind_table *link;
 	const char *name;
 } root_table;
@@ -145,6 +149,10 @@ static struct unwind_table *find_table(unsigned long pc)
 	return table;
 }
 
+static unsigned long read_pointer(const u8 **pLoc,
+                                  const void *end,
+                                  signed ptrType);
+
 static void init_unwind_table(struct unwind_table *table,
                               const char *name,
                               const void *core_start,
@@ -152,14 +160,30 @@ static void init_unwind_table(struct unwind_table *table,
                               const void *init_start,
                               unsigned long init_size,
                               const void *table_start,
-                              unsigned long table_size)
+                              unsigned long table_size,
+                              const u8 *header_start,
+                              unsigned long header_size)
 {
+	const u8 *ptr = header_start + 4;
+	const u8 *end = header_start + header_size;
+
 	table->core.pc = (unsigned long)core_start;
 	table->core.range = core_size;
 	table->init.pc = (unsigned long)init_start;
 	table->init.range = init_size;
 	table->address = table_start;
 	table->size = table_size;
+	/* See if the linker provided table looks valid. */
+	if (header_size <= 4
+	    || header_start[0] != 1
+	    || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
+	    || header_start[2] == DW_EH_PE_omit
+	    || read_pointer(&ptr, end, header_start[2]) <= 0
+	    || header_start[3] == DW_EH_PE_omit)
+		header_start = NULL;
+	table->hdrsz = header_size;
+	smp_wmb();
+	table->header = header_start;
 	table->link = NULL;
 	table->name = name;
 }
@@ -169,7 +193,143 @@ void __init unwind_init(void)
 	init_unwind_table(&root_table, "kernel",
 	                  _text, _end - _text,
 	                  NULL, 0,
-	                  __start_unwind, __end_unwind - __start_unwind);
+	                  __start_unwind, __end_unwind - __start_unwind,
+	                  __start_unwind_hdr, __end_unwind_hdr - __start_unwind_hdr);
+}
+
+static const u32 bad_cie, not_fde;
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *);
+static signed fde_pointer_type(const u32 *cie);
+
+struct eh_frame_hdr_table_entry {
+	unsigned long start, fde;
+};
+
+static int cmp_eh_frame_hdr_table_entries(const void *p1, const void *p2)
+{
+	const struct eh_frame_hdr_table_entry *e1 = p1;
+	const struct eh_frame_hdr_table_entry *e2 = p2;
+
+	return (e1->start > e2->start) - (e1->start < e2->start);
+}
+
+static void swap_eh_frame_hdr_table_entries(void *p1, void *p2, int size)
+{
+	struct eh_frame_hdr_table_entry *e1 = p1;
+	struct eh_frame_hdr_table_entry *e2 = p2;
+	unsigned long v;
+
+	v = e1->start;
+	e1->start = e2->start;
+	e2->start = v;
+	v = e1->fde;
+	e1->fde = e2->fde;
+	e2->fde = v;
+}
+
+static void __init setup_unwind_table(struct unwind_table *table,
+					void *(*alloc)(unsigned long))
+{
+	const u8 *ptr;
+	unsigned long tableSize = table->size, hdrSize;
+	unsigned n;
+	const u32 *fde;
+	struct {
+		u8 version;
+		u8 eh_frame_ptr_enc;
+		u8 fde_count_enc;
+		u8 table_enc;
+		unsigned long eh_frame_ptr;
+		unsigned int fde_count;
+		struct eh_frame_hdr_table_entry table[];
+	} __attribute__((__packed__)) *header;
+
+	if (table->header)
+		return;
+
+	if (table->hdrsz)
+		printk(KERN_WARNING ".eh_frame_hdr for '%s' present but unusable\n",
+		       table->name);
+
+	if (tableSize & (sizeof(*fde) - 1))
+		return;
+
+	for (fde = table->address, n = 0;
+	     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = cie_for_fde(fde, table);
+		signed ptrType;
+
+		if (cie == &not_fde)
+			continue;
+		if (cie == NULL
+		    || cie == &bad_cie
+		    || (ptrType = fde_pointer_type(cie)) < 0)
+			return;
+		ptr = (const u8 *)(fde + 2);
+		if (!read_pointer(&ptr,
+		                  (const u8 *)(fde + 1) + *fde,
+		                  ptrType))
+			return;
+		++n;
+	}
+
+	if (tableSize || !n)
+		return;
+
+	hdrSize = 4 + sizeof(unsigned long) + sizeof(unsigned int)
+	        + 2 * n * sizeof(unsigned long);
+	header = alloc(hdrSize);
+	if (!header)
+		return;
+	header->version          = 1;
+	header->eh_frame_ptr_enc = DW_EH_PE_abs|DW_EH_PE_native;
+	header->fde_count_enc    = DW_EH_PE_abs|DW_EH_PE_data4;
+	header->table_enc        = DW_EH_PE_abs|DW_EH_PE_native;
+	put_unaligned((unsigned long)table->address, &header->eh_frame_ptr);
+	BUILD_BUG_ON(offsetof(typeof(*header), fde_count)
+	             % __alignof(typeof(header->fde_count)));
+	header->fde_count        = n;
+
+	BUILD_BUG_ON(offsetof(typeof(*header), table)
+	             % __alignof(typeof(*header->table)));
+	for (fde = table->address, tableSize = table->size, n = 0;
+	     tableSize;
+	     tableSize -= sizeof(*fde) + *fde, fde += 1 + *fde / sizeof(*fde)) {
+		const u32 *cie = fde + 1 - fde[1] / sizeof(*fde);
+
+		if (!fde[1])
+			continue; /* this is a CIE */
+		ptr = (const u8 *)(fde + 2);
+		header->table[n].start = read_pointer(&ptr,
+		                                      (const u8 *)(fde + 1) + *fde,
+		                                      fde_pointer_type(cie));
+		header->table[n].fde = (unsigned long)fde;
+		++n;
+	}
+	WARN_ON(n != header->fde_count);
+
+	sort(header->table,
+	     n,
+	     sizeof(*header->table),
+	     cmp_eh_frame_hdr_table_entries,
+	     swap_eh_frame_hdr_table_entries);
+
+	table->hdrsz = hdrSize;
+	smp_wmb();
+	table->header = (const void *)header;
+}
+
+static void *__init balloc(unsigned long sz)
+{
+	return __alloc_bootmem_nopanic(sz,
+	                               sizeof(unsigned int),
+	                               __pa(MAX_DMA_ADDRESS));
+}
+
+void __init unwind_setup(void)
+{
+	setup_unwind_table(&root_table, balloc);
 }
 
 #ifdef CONFIG_MODULES
@@ -193,7 +353,8 @@ void *unwind_add_table(struct module *module,
 	init_unwind_table(table, module->name,
 	                  module->module_core, module->core_size,
 	                  module->module_init, module->init_size,
-	                  table_start, table_size);
+	                  table_start, table_size,
+	                  NULL, 0);
 
 	if (last_table)
 		last_table->link = table;
@@ -303,6 +464,26 @@ static sleb128_t get_sleb128(const u8 **pcur, const u8 *end)
 	return value;
 }
 
+static const u32 *cie_for_fde(const u32 *fde, const struct unwind_table *table)
+{
+	const u32 *cie;
+
+	if (!*fde || (*fde & (sizeof(*fde) - 1)))
+		return &bad_cie;
+	if (!fde[1])
+		return &not_fde; /* this is a CIE */
+	if ((fde[1] & (sizeof(*fde) - 1))
+	    || fde[1] > (unsigned long)(fde + 1) - (unsigned long)table->address)
+		return NULL; /* this is not a valid FDE */
+	cie = fde + 1 - fde[1] / sizeof(*fde);
+	if (*cie <= sizeof(*cie) + 4
+	    || *cie >= fde[1] - sizeof(*fde)
+	    || (*cie & (sizeof(*cie) - 1))
+	    || cie[1])
+		return NULL; /* this is not a (valid) CIE */
+	return cie;
+}
+
 static unsigned long read_pointer(const u8 **pLoc,
                                   const void *end,
                                   signed ptrType)
@@ -610,49 +791,108 @@ int unwind(struct unwind_frame_info *frame)
 	unsigned i;
 	signed ptrType = -1;
 	uleb128_t retAddrReg = 0;
-	struct unwind_table *table;
+	const struct unwind_table *table;
 	struct unwind_state state;
 
 	if (UNW_PC(frame) == 0)
 		return -EINVAL;
 	if ((table = find_table(pc)) != NULL
 	    && !(table->size & (sizeof(*fde) - 1))) {
-		unsigned long tableSize = table->size;
-
-		for (fde = table->address;
-		     tableSize > sizeof(*fde) && tableSize - sizeof(*fde) >= *fde;
-		     tableSize -= sizeof(*fde) + *fde,
-		     fde += 1 + *fde / sizeof(*fde)) {
-			if (!*fde || (*fde & (sizeof(*fde) - 1)))
-				break;
-			if (!fde[1])
-				continue; /* this is a CIE */
-			if ((fde[1] & (sizeof(*fde) - 1))
-			    || fde[1] > (unsigned long)(fde + 1)
-			                - (unsigned long)table->address)
-				continue; /* this is not a valid FDE */
-			cie = fde + 1 - fde[1] / sizeof(*fde);
-			if (*cie <= sizeof(*cie) + 4
-			    || *cie >= fde[1] - sizeof(*fde)
-			    || (*cie & (sizeof(*cie) - 1))
-			    || cie[1]
-			    || (ptrType = fde_pointer_type(cie)) < 0) {
-				cie = NULL; /* this is not a (valid) CIE */
-				continue;
+		const u8 *hdr = table->header;
+		unsigned long tableSize;
+
+		smp_rmb();
+		if (hdr && hdr[0] == 1) {
+			switch(hdr[3] & DW_EH_PE_FORM) {
+			case DW_EH_PE_native: tableSize = sizeof(unsigned long); break;
+			case DW_EH_PE_data2: tableSize = 2; break;
+			case DW_EH_PE_data4: tableSize = 4; break;
+			case DW_EH_PE_data8: tableSize = 8; break;
+			default: tableSize = 0; break;
+			}
+			ptr = hdr + 4;
+			end = hdr + table->hdrsz;
+			if (tableSize
+			    && read_pointer(&ptr, end, hdr[1])
+			       == (unsigned long)table->address
+			    && (i = read_pointer(&ptr, end, hdr[2])) > 0
+			    && i == (end - ptr) / (2 * tableSize)
+			    && !((end - ptr) % (2 * tableSize))) {
+				do {
+					const u8 *cur = ptr + (i / 2) * (2 * tableSize);
+
+					startLoc = read_pointer(&cur,
+					                        cur + tableSize,
+					                        hdr[3]);
+					if (pc < startLoc)
+						i /= 2;
+					else {
+						ptr = cur - tableSize;
+						i = (i + 1) / 2;
+					}
+				} while (startLoc && i > 1);
+				if (i == 1
+				    && (startLoc = read_pointer(&ptr,
+				                                ptr + tableSize,
+				                                hdr[3])) != 0
+				    && pc >= startLoc)
+					fde = (void *)read_pointer(&ptr,
+					                           ptr + tableSize,
+					                           hdr[3]);
 			}
+		}
+
+		if (fde != NULL) {
+			cie = cie_for_fde(fde, table);
 			ptr = (const u8 *)(fde + 2);
-			startLoc = read_pointer(&ptr,
-			                        (const u8 *)(fde + 1) + *fde,
-			                        ptrType);
-			endLoc = startLoc
-			         + read_pointer(&ptr,
-			                        (const u8 *)(fde + 1) + *fde,
-			                        ptrType & DW_EH_PE_indirect
-			                        ? ptrType
-			                        : ptrType & (DW_EH_PE_FORM|DW_EH_PE_signed));
-			if (pc >= startLoc && pc < endLoc)
-				break;
-			cie = NULL;
+			if(cie != NULL
+			   && cie != &bad_cie
+			   && cie != &not_fde
+			   && (ptrType = fde_pointer_type(cie)) >= 0
+			   && read_pointer(&ptr,
+			                   (const u8 *)(fde + 1) + *fde,
+			                   ptrType) == startLoc) {
+				if (!(ptrType & DW_EH_PE_indirect))
+					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+				endLoc = startLoc
+				         + read_pointer(&ptr,
+				                        (const u8 *)(fde + 1) + *fde,
+				                        ptrType);
+				if(pc >= endLoc)
+					fde = NULL;
+			} else
+				fde = NULL;
+		}
+		if (fde == NULL) {
+			for (fde = table->address, tableSize = table->size;
+			     cie = NULL, tableSize > sizeof(*fde)
+			     && tableSize - sizeof(*fde) >= *fde;
+			     tableSize -= sizeof(*fde) + *fde,
+			     fde += 1 + *fde / sizeof(*fde)) {
+				cie = cie_for_fde(fde, table);
+				if (cie == &bad_cie) {
+					cie = NULL;
+					break;
+				}
+				if (cie == NULL
+				    || cie == &not_fde
+				    || (ptrType = fde_pointer_type(cie)) < 0)
+					continue;
+				ptr = (const u8 *)(fde + 2);
+				startLoc = read_pointer(&ptr,
+				                        (const u8 *)(fde + 1) + *fde,
+				                        ptrType);
+				if (!startLoc)
+					continue;
+				if (!(ptrType & DW_EH_PE_indirect))
+					ptrType &= DW_EH_PE_FORM|DW_EH_PE_signed;
+				endLoc = startLoc
+				         + read_pointer(&ptr,
+				                        (const u8 *)(fde + 1) + *fde,
+				                        ptrType);
+				if (pc >= startLoc && pc < endLoc)
+					break;
+			}
 		}
 	}
 	if (cie != NULL) {
-- 
cgit v1.2.3


From 7a71cef780404e8c90d23b1131142e158d94354b Mon Sep 17 00:00:00 2001
From: "bibo,mao" <bibo.mao@intel.com>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] x86-64: x86_64 add NX mask for PTE entry

    If function change_page_attr_addr calls revert_page to revert
to original pte value, mk_pte_phys does not mask NX bit. If NX bit
is set on no NX hardware supported x86_64 machine, there is will
be RSVD type page fault and system will crash. This patch adds NX
mask bit for PTE entry.

Signed-off-by: bibo,mao <bibo.mao@intel.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 include/asm-x86_64/pgtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 6899e770b173..0555c1c4d8fa 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -366,6 +366,7 @@ static inline pte_t mk_pte_phys(unsigned long physpage, pgprot_t pgprot)
 { 
 	pte_t pte;
 	pte_val(pte) = physpage | pgprot_val(pgprot); 
+	pte_val(pte) &= __supported_pte_mask;
 	return pte; 
 }
  
-- 
cgit v1.2.3


From a1bae67243512ca30ceda48e3e24e25b543f8ab7 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:02 +0200
Subject: [PATCH] i386: Disable nmi watchdog on all ThinkPads

Even newer Thinkpads have bugs in SMM code that causes hangs with
NMI watchdog.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/i386/kernel/nmi.c      | 10 +++++-----
 drivers/firmware/dmi_scan.c | 20 ++++++++++++++++++++
 include/linux/dmi.h         |  2 ++
 3 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index 3e8e3adb0489..eaafe233a5da 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -219,11 +219,11 @@ static int __init check_nmi_watchdog(void)
 	int cpu;
 
 	/* Enable NMI watchdog for newer systems.
-           Actually it should be safe for most systems before 2004 too except
-	   for some IBM systems that corrupt registers when NMI happens
-	   during SMM. Unfortunately we don't have more exact information
- 	   on these and use this coarse check. */
-	if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
+	   Probably safe on most older systems too, but let's be careful.
+	   IBM ThinkPads use INT10 inside SMM and that allows early NMI inside SMM
+	   which hangs the system. Disable watchdog for all thinkpads */
+	if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004 &&
+		!dmi_name_in_vendors("ThinkPad"))
 		nmi_watchdog = NMI_LOCAL_APIC;
 
 	if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c
index b8b596d5778d..37deee6c0c1c 100644
--- a/drivers/firmware/dmi_scan.c
+++ b/drivers/firmware/dmi_scan.c
@@ -326,6 +326,26 @@ char *dmi_get_system_info(int field)
 }
 EXPORT_SYMBOL(dmi_get_system_info);
 
+
+/**
+ *	dmi_name_in_vendors - Check if string is anywhere in the DMI vendor information.
+ *	@str: 	Case sensitive Name
+ */
+int dmi_name_in_vendors(char *str)
+{
+	static int fields[] = { DMI_BIOS_VENDOR, DMI_BIOS_VERSION, DMI_SYS_VENDOR,
+				DMI_PRODUCT_NAME, DMI_PRODUCT_VERSION, DMI_BOARD_VENDOR,
+				DMI_BOARD_NAME, DMI_BOARD_VERSION, DMI_NONE };
+	int i;
+	for (i = 0; fields[i] != DMI_NONE; i++) {
+		int f = fields[i];
+		if (dmi_ident[f] && strstr(dmi_ident[f], str))
+			return 1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(dmi_name_in_vendors);
+
 /**
  *	dmi_find_device - find onboard device by type/name
  *	@type: device type or %DMI_DEV_TYPE_ANY to match all device types
diff --git a/include/linux/dmi.h b/include/linux/dmi.h
index 38dc403be70b..904bf3d2d90b 100644
--- a/include/linux/dmi.h
+++ b/include/linux/dmi.h
@@ -69,6 +69,7 @@ extern struct dmi_device * dmi_find_device(int type, const char *name,
 	struct dmi_device *from);
 extern void dmi_scan_machine(void);
 extern int dmi_get_year(int field);
+extern int dmi_name_in_vendors(char *str);
 
 #else
 
@@ -77,6 +78,7 @@ static inline char * dmi_get_system_info(int field) { return NULL; }
 static inline struct dmi_device * dmi_find_device(int type, const char *name,
 	struct dmi_device *from) { return NULL; }
 static inline int dmi_get_year(int year) { return 0; }
+static inline int dmi_name_in_vendors(char *s) { return 0; }
 
 #endif
 
-- 
cgit v1.2.3


From e70ea8c09db0e25ab58f84ba7f393e5c9125a8ee Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Sat, 21 Oct 2006 18:37:03 +0200
Subject: [PATCH] x86-64: Revert timer routing behaviour back to 2.6.16 state

By default route the 8254 over the 8259 and only disable
it on ATI boards where this causes double timer interrupts.

This should unbreak some Nvidia boards where the timer doesn't
seem to tick of it isn't enabled in the 8259. At least one
VIA board also seemed to have a little trouble with the disabled
8259.

For 2.6.20 we'll try both dynamically without black listing, but I think
for .19 this is the safer approach because it has been already well tested
in earlier kernels. This also makes the x86-64 behaviour the same
as i386.

Command line options can change all this of course.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/early-quirks.c | 9 +++++----
 arch/x86_64/kernel/io_apic.c      | 2 +-
 include/asm-x86_64/proto.h        | 2 ++
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 208e38a372c1..2b1245d86258 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -61,10 +61,11 @@ static void nvidia_bugs(void)
 
 static void ati_bugs(void)
 {
-#if 1 /* for testing */
-	printk("ATI board detected\n");
-#endif
-	/* No bugs right now */
+	if (timer_over_8254 == 1) {
+		timer_over_8254 = 0;
+		printk(KERN_INFO
+	 	"ATI board detected. Disabling timer routing over 8254.\n");
+	}
 }
 
 struct chipset {
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 8a9a357875b7..b000017e4b5d 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -57,7 +57,7 @@ static int no_timer_check;
 
 static int disable_timer_pin_1 __initdata;
 
-int timer_over_8254 __initdata = 0;
+int timer_over_8254 __initdata = 1;
 
 /* Where if anywhere is the i8259 connect in external int mode */
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
diff --git a/include/asm-x86_64/proto.h b/include/asm-x86_64/proto.h
index c181fef786e4..e72cfcdf5344 100644
--- a/include/asm-x86_64/proto.h
+++ b/include/asm-x86_64/proto.h
@@ -122,6 +122,8 @@ extern int fix_aperture;
 extern int reboot_force;
 extern int notsc_setup(char *);
 
+extern int timer_over_8254;
+
 extern int gsi_irq_sharing(int gsi);
 
 extern void smp_local_timer_interrupt(void);
-- 
cgit v1.2.3


From 3343571d9f88a0de542d33aea9ab881f00ff866d Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Thu, 19 Oct 2006 14:44:53 +0900
Subject: [PATCH] libata: typo fix

Typo fix in commment.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/libata.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/libata.h b/include/linux/libata.h
index d0a7ad5ed518..b03d5a340dc8 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -143,7 +143,7 @@ enum {
 	ATA_DFLAG_CFG_MASK	= (1 << 8) - 1,
 
 	ATA_DFLAG_PIO		= (1 << 8), /* device limited to PIO mode */
-	ATA_DFLAG_NCQ_OFF	= (1 << 9), /* devied limited to non-NCQ mode */
+	ATA_DFLAG_NCQ_OFF	= (1 << 9), /* device limited to non-NCQ mode */
 	ATA_DFLAG_SUSPENDED	= (1 << 10), /* device suspended */
 	ATA_DFLAG_INIT_MASK	= (1 << 16) - 1,
 
-- 
cgit v1.2.3


From da3ed32fe568148ede256975d40825ffcdac767b Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 21 Oct 2006 10:24:08 -0700
Subject: [PATCH] md: add another COMPAT_IOCTL for md

..  so that you can use bitmaps with 32bit userspace on a 64 bit kernel.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/compat_ioctl.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/compat_ioctl.h b/include/linux/compat_ioctl.h
index cfdb4f6a89d4..c26c3adcfacf 100644
--- a/include/linux/compat_ioctl.h
+++ b/include/linux/compat_ioctl.h
@@ -131,6 +131,7 @@ COMPATIBLE_IOCTL(RUN_ARRAY)
 COMPATIBLE_IOCTL(STOP_ARRAY)
 COMPATIBLE_IOCTL(STOP_ARRAY_RO)
 COMPATIBLE_IOCTL(RESTART_ARRAY_RW)
+COMPATIBLE_IOCTL(GET_BITMAP_FILE)
 ULONG_IOCTL(SET_BITMAP_FILE)
 /* DM */
 COMPATIBLE_IOCTL(DM_VERSION_32)
-- 
cgit v1.2.3


From 1c05b4bc22cd640d3a534bd2851a8413d5df3709 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 21 Oct 2006 10:24:08 -0700
Subject: [PATCH] md: endian annotation for v1 superblock access

Includes a couple of bugfixes found by sparse.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/md.c           | 13 ++++++-----
 include/linux/raid/md_p.h | 56 +++++++++++++++++++++++------------------------
 2 files changed, 35 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/md/md.c b/drivers/md/md.c
index f7f19088f3be..7daa7b1e145f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -974,12 +974,13 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev)
  * version 1 superblock
  */
 
-static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
+static __le32 calc_sb_1_csum(struct mdp_superblock_1 * sb)
 {
-	unsigned int disk_csum, csum;
+	__le32 disk_csum;
+	u32 csum;
 	unsigned long long newcsum;
 	int size = 256 + le32_to_cpu(sb->max_dev)*2;
-	unsigned int *isuper = (unsigned int*)sb;
+	__le32 *isuper = (__le32*)sb;
 	int i;
 
 	disk_csum = sb->sb_csum;
@@ -989,7 +990,7 @@ static unsigned int calc_sb_1_csum(struct mdp_superblock_1 * sb)
 		newcsum += le32_to_cpu(*isuper++);
 
 	if (size == 2)
-		newcsum += le16_to_cpu(*(unsigned short*) isuper);
+		newcsum += le16_to_cpu(*(__le16*) isuper);
 
 	csum = (newcsum & 0xffffffff) + (newcsum >> 32);
 	sb->sb_csum = disk_csum;
@@ -1106,7 +1107,7 @@ static int super_1_load(mdk_rdev_t *rdev, mdk_rdev_t *refdev, int minor_version)
 	if (le32_to_cpu(sb->chunksize))
 		rdev->size &= ~((sector_t)le32_to_cpu(sb->chunksize)/2 - 1);
 
-	if (le32_to_cpu(sb->size) > rdev->size*2)
+	if (le64_to_cpu(sb->size) > rdev->size*2)
 		return -EINVAL;
 	return ret;
 }
@@ -1228,7 +1229,7 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev)
 	else
 		sb->resync_offset = cpu_to_le64(0);
 
-	sb->cnt_corrected_read = atomic_read(&rdev->corrected_errors);
+	sb->cnt_corrected_read = cpu_to_le32(atomic_read(&rdev->corrected_errors));
 
 	sb->raid_disks = cpu_to_le32(mddev->raid_disks);
 	sb->size = cpu_to_le64(mddev->size<<1);
diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h
index b6ebc69bae54..3f2cd98c508b 100644
--- a/include/linux/raid/md_p.h
+++ b/include/linux/raid/md_p.h
@@ -206,52 +206,52 @@ static inline __u64 md_event(mdp_super_t *sb) {
  */
 struct mdp_superblock_1 {
 	/* constant array information - 128 bytes */
-	__u32	magic;		/* MD_SB_MAGIC: 0xa92b4efc - little endian */
-	__u32	major_version;	/* 1 */
-	__u32	feature_map;	/* bit 0 set if 'bitmap_offset' is meaningful */
-	__u32	pad0;		/* always set to 0 when writing */
+	__le32	magic;		/* MD_SB_MAGIC: 0xa92b4efc - little endian */
+	__le32	major_version;	/* 1 */
+	__le32	feature_map;	/* bit 0 set if 'bitmap_offset' is meaningful */
+	__le32	pad0;		/* always set to 0 when writing */
 
 	__u8	set_uuid[16];	/* user-space generated. */
 	char	set_name[32];	/* set and interpreted by user-space */
 
-	__u64	ctime;		/* lo 40 bits are seconds, top 24 are microseconds or 0*/
-	__u32	level;		/* -4 (multipath), -1 (linear), 0,1,4,5 */
-	__u32	layout;		/* only for raid5 and raid10 currently */
-	__u64	size;		/* used size of component devices, in 512byte sectors */
+	__le64	ctime;		/* lo 40 bits are seconds, top 24 are microseconds or 0*/
+	__le32	level;		/* -4 (multipath), -1 (linear), 0,1,4,5 */
+	__le32	layout;		/* only for raid5 and raid10 currently */
+	__le64	size;		/* used size of component devices, in 512byte sectors */
 
-	__u32	chunksize;	/* in 512byte sectors */
-	__u32	raid_disks;
-	__u32	bitmap_offset;	/* sectors after start of superblock that bitmap starts
+	__le32	chunksize;	/* in 512byte sectors */
+	__le32	raid_disks;
+	__le32	bitmap_offset;	/* sectors after start of superblock that bitmap starts
 				 * NOTE: signed, so bitmap can be before superblock
 				 * only meaningful of feature_map[0] is set.
 				 */
 
 	/* These are only valid with feature bit '4' */
-	__u32	new_level;	/* new level we are reshaping to		*/
-	__u64	reshape_position;	/* next address in array-space for reshape */
-	__u32	delta_disks;	/* change in number of raid_disks		*/
-	__u32	new_layout;	/* new layout					*/
-	__u32	new_chunk;	/* new chunk size (bytes)			*/
+	__le32	new_level;	/* new level we are reshaping to		*/
+	__le64	reshape_position;	/* next address in array-space for reshape */
+	__le32	delta_disks;	/* change in number of raid_disks		*/
+	__le32	new_layout;	/* new layout					*/
+	__le32	new_chunk;	/* new chunk size (bytes)			*/
 	__u8	pad1[128-124];	/* set to 0 when written */
 
 	/* constant this-device information - 64 bytes */
-	__u64	data_offset;	/* sector start of data, often 0 */
-	__u64	data_size;	/* sectors in this device that can be used for data */
-	__u64	super_offset;	/* sector start of this superblock */
-	__u64	recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
-	__u32	dev_number;	/* permanent identifier of this  device - not role in raid */
-	__u32	cnt_corrected_read; /* number of read errors that were corrected by re-writing */
+	__le64	data_offset;	/* sector start of data, often 0 */
+	__le64	data_size;	/* sectors in this device that can be used for data */
+	__le64	super_offset;	/* sector start of this superblock */
+	__le64	recovery_offset;/* sectors before this offset (from data_offset) have been recovered */
+	__le32	dev_number;	/* permanent identifier of this  device - not role in raid */
+	__le32	cnt_corrected_read; /* number of read errors that were corrected by re-writing */
 	__u8	device_uuid[16]; /* user-space setable, ignored by kernel */
 	__u8	devflags;	/* per-device flags.  Only one defined...*/
 #define	WriteMostly1	1	/* mask for writemostly flag in above */
 	__u8	pad2[64-57];	/* set to 0 when writing */
 
 	/* array state information - 64 bytes */
-	__u64	utime;		/* 40 bits second, 24 btes microseconds */
-	__u64	events;		/* incremented when superblock updated */
-	__u64	resync_offset;	/* data before this offset (from data_offset) known to be in sync */
-	__u32	sb_csum;	/* checksum upto devs[max_dev] */
-	__u32	max_dev;	/* size of devs[] array to consider */
+	__le64	utime;		/* 40 bits second, 24 btes microseconds */
+	__le64	events;		/* incremented when superblock updated */
+	__le64	resync_offset;	/* data before this offset (from data_offset) known to be in sync */
+	__le32	sb_csum;	/* checksum upto devs[max_dev] */
+	__le32	max_dev;	/* size of devs[] array to consider */
 	__u8	pad3[64-32];	/* set to 0 when writing */
 
 	/* device state information. Indexed by dev_number.
@@ -260,7 +260,7 @@ struct mdp_superblock_1 {
 	 * into the 'roles' value.  If a device is spare or faulty, then it doesn't
 	 * have a meaningful role.
 	 */
-	__u16	dev_roles[0];	/* role in array, or 0xffff for a spare, or 0xfffe for faulty */
+	__le16	dev_roles[0];	/* role in array, or 0xffff for a spare, or 0xfffe for faulty */
 };
 
 /* feature_map bits */
-- 
cgit v1.2.3


From 4f2e639af4bd5e152fc79256e333643d3dd6c10f Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 21 Oct 2006 10:24:09 -0700
Subject: [PATCH] md: endian annotations for the bitmap superblock

And a couple of bug fixes found by sparse.

Signed-off-by: Neil Brown <neilb@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/md/bitmap.c         | 10 +++++-----
 include/linux/raid/bitmap.h | 20 ++++++++++----------
 2 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index d47d38ac71b1..d6f614738bbd 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -536,7 +536,7 @@ static int bitmap_read_sb(struct bitmap *bitmap)
 		printk(KERN_INFO "%s: bitmap file is out of date (%llu < %llu) "
 			"-- forcing full recovery\n", bmname(bitmap), events,
 			(unsigned long long) bitmap->mddev->events);
-		sb->state |= BITMAP_STALE;
+		sb->state |= cpu_to_le32(BITMAP_STALE);
 	}
 success:
 	/* assign fields using values from superblock */
@@ -544,11 +544,11 @@ success:
 	bitmap->daemon_sleep = daemon_sleep;
 	bitmap->daemon_lastrun = jiffies;
 	bitmap->max_write_behind = write_behind;
-	bitmap->flags |= sb->state;
+	bitmap->flags |= le32_to_cpu(sb->state);
 	if (le32_to_cpu(sb->version) == BITMAP_MAJOR_HOSTENDIAN)
 		bitmap->flags |= BITMAP_HOSTENDIAN;
 	bitmap->events_cleared = le64_to_cpu(sb->events_cleared);
-	if (sb->state & BITMAP_STALE)
+	if (sb->state & cpu_to_le32(BITMAP_STALE))
 		bitmap->events_cleared = bitmap->mddev->events;
 	err = 0;
 out:
@@ -578,9 +578,9 @@ static void bitmap_mask_state(struct bitmap *bitmap, enum bitmap_state bits,
 	spin_unlock_irqrestore(&bitmap->lock, flags);
 	sb = (bitmap_super_t *)kmap_atomic(bitmap->sb_page, KM_USER0);
 	switch (op) {
-		case MASK_SET: sb->state |= bits;
+		case MASK_SET: sb->state |= cpu_to_le32(bits);
 				break;
-		case MASK_UNSET: sb->state &= ~bits;
+		case MASK_UNSET: sb->state &= cpu_to_le32(~bits);
 				break;
 		default: BUG();
 	}
diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h
index 84d887751855..ebd42a3710b4 100644
--- a/include/linux/raid/bitmap.h
+++ b/include/linux/raid/bitmap.h
@@ -146,16 +146,16 @@ enum bitmap_state {
 
 /* the superblock at the front of the bitmap file -- little endian */
 typedef struct bitmap_super_s {
-	__u32 magic;        /*  0  BITMAP_MAGIC */
-	__u32 version;      /*  4  the bitmap major for now, could change... */
-	__u8  uuid[16];     /*  8  128 bit uuid - must match md device uuid */
-	__u64 events;       /* 24  event counter for the bitmap (1)*/
-	__u64 events_cleared;/*32  event counter when last bit cleared (2) */
-	__u64 sync_size;    /* 40  the size of the md device's sync range(3) */
-	__u32 state;        /* 48  bitmap state information */
-	__u32 chunksize;    /* 52  the bitmap chunk size in bytes */
-	__u32 daemon_sleep; /* 56  seconds between disk flushes */
-	__u32 write_behind; /* 60  number of outstanding write-behind writes */
+	__le32 magic;        /*  0  BITMAP_MAGIC */
+	__le32 version;      /*  4  the bitmap major for now, could change... */
+	__u8  uuid[16];      /*  8  128 bit uuid - must match md device uuid */
+	__le64 events;       /* 24  event counter for the bitmap (1)*/
+	__le64 events_cleared;/*32  event counter when last bit cleared (2) */
+	__le64 sync_size;    /* 40  the size of the md device's sync range(3) */
+	__le32 state;        /* 48  bitmap state information */
+	__le32 chunksize;    /* 52  the bitmap chunk size in bytes */
+	__le32 daemon_sleep; /* 56  seconds between disk flushes */
+	__le32 write_behind; /* 60  number of outstanding write-behind writes */
 
 	__u8  pad[256 - 64]; /* set to zero */
 } bitmap_super_t;
-- 
cgit v1.2.3


From d42552c3ace1fa1f16ae02ce642f4c733cec40ca Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Sat, 21 Oct 2006 10:24:12 -0700
Subject: [PATCH] pci: declare pci_get_device_reverse()

We seem to have lost the declaration of pci_get_device_reverse(), if we ever
had one.

Add a CONFIG_PCI=0 stub too.

Acked-by: Alan Cox <alan@lxorguk.ukuu.org.uk>
Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/pci.h | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4689e2a699c0..09be0f81b27b 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -455,7 +455,11 @@ int pci_find_next_capability (struct pci_dev *dev, u8 pos, int cap);
 int pci_find_ext_capability (struct pci_dev *dev, int cap);
 struct pci_bus *pci_find_next_bus(const struct pci_bus *from);
 
-struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from);
+struct pci_dev *pci_get_device(unsigned int vendor, unsigned int device,
+				struct pci_dev *from);
+struct pci_dev *pci_get_device_reverse(unsigned int vendor, unsigned int device,
+				struct pci_dev *from);
+
 struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device,
 				unsigned int ss_vendor, unsigned int ss_device,
 				struct pci_dev *from);
@@ -660,7 +664,12 @@ static inline struct pci_dev *pci_find_device(unsigned int vendor, unsigned int
 static inline struct pci_dev *pci_find_slot(unsigned int bus, unsigned int devfn)
 { return NULL; }
 
-static inline struct pci_dev *pci_get_device (unsigned int vendor, unsigned int device, struct pci_dev *from)
+static inline struct pci_dev *pci_get_device(unsigned int vendor,
+				unsigned int device, struct pci_dev *from)
+{ return NULL; }
+
+static inline struct pci_dev *pci_get_device_reverse(unsigned int vendor,
+				unsigned int device, struct pci_dev *from)
 { return NULL; }
 
 static inline struct pci_dev *pci_get_subsys (unsigned int vendor, unsigned int device,
-- 
cgit v1.2.3


From 7516795739bd53175629b90fab0ad488d7a6a9f7 Mon Sep 17 00:00:00 2001
From: Andy Whitcroft <apw@shadowen.org>
Date: Sat, 21 Oct 2006 10:24:14 -0700
Subject: [PATCH] Reintroduce NODES_SPAN_OTHER_NODES for powerpc

Reintroduce NODES_SPAN_OTHER_NODES for powerpc

Revert "[PATCH] Remove SPAN_OTHER_NODES config definition"
    This reverts commit f62859bb6871c5e4a8e591c60befc8caaf54db8c.
Revert "[PATCH] mm: remove arch independent NODES_SPAN_OTHER_NODES"
    This reverts commit a94b3ab7eab4edcc9b2cb474b188f774c331adf7.

Also update the comments to indicate that this is still required
and where its used.

Signed-off-by: Andy Whitcroft <apw@shadowen.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Mike Kravetz <kravetz@us.ibm.com>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Mel Gorman <mel@csn.ul.ie>
Acked-by: Will Schmidt <will_schmidt@vnet.ibm.com>
Cc: Christoph Lameter <clameter@sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/powerpc/Kconfig                   | 9 +++++++++
 arch/powerpc/configs/pseries_defconfig | 1 +
 include/linux/mmzone.h                 | 6 ++++++
 mm/page_alloc.c                        | 2 ++
 4 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8b6910465578..2bd9b7fb0f6c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -751,6 +751,15 @@ config ARCH_MEMORY_PROBE
 	def_bool y
 	depends on MEMORY_HOTPLUG
 
+# Some NUMA nodes have memory ranges that span
+# other nodes.  Even though a pfn is valid and
+# between a node's start and end pfns, it may not
+# reside on that node.  See memmap_init_zone()
+# for details.
+config NODES_SPAN_OTHER_NODES
+	def_bool y
+	depends on NEED_MULTIPLE_NODES
+
 config PPC_64K_PAGES
 	bool "64k page size"
 	depends on PPC64
diff --git a/arch/powerpc/configs/pseries_defconfig b/arch/powerpc/configs/pseries_defconfig
index 9828663652e9..d2833c1a1f3d 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -184,6 +184,7 @@ CONFIG_SPLIT_PTLOCK_CPUS=4
 CONFIG_MIGRATION=y
 CONFIG_RESOURCES_64BIT=y
 CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID=y
+CONFIG_NODES_SPAN_OTHER_NODES=y
 # CONFIG_PPC_64K_PAGES is not set
 CONFIG_SCHED_SMT=y
 CONFIG_PROC_DEVICETREE=y
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 59855b8718a0..ed0762b283a9 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -674,6 +674,12 @@ void sparse_init(void);
 #define sparse_index_init(_sec, _nid)  do {} while (0)
 #endif /* CONFIG_SPARSEMEM */
 
+#ifdef CONFIG_NODES_SPAN_OTHER_NODES
+#define early_pfn_in_nid(pfn, nid)	(early_pfn_to_nid(pfn) == (nid))
+#else
+#define early_pfn_in_nid(pfn, nid)	(1)
+#endif
+
 #ifndef early_pfn_valid
 #define early_pfn_valid(pfn)	(1)
 #endif
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ebd425c2e2a7..f5fc45472d5c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1689,6 +1689,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
 	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
 		if (!early_pfn_valid(pfn))
 			continue;
+		if (!early_pfn_in_nid(pfn, nid))
+			continue;
 		page = pfn_to_page(pfn);
 		set_page_links(page, zone, nid, pfn);
 		init_page_count(page);
-- 
cgit v1.2.3


From faf6bbcf94caee10ba34adb86db4ecca96bfd3bf Mon Sep 17 00:00:00 2001
From: Paul Jackson <pj@sgi.com>
Date: Sat, 21 Oct 2006 10:24:17 -0700
Subject: [PATCH] cpuset: mempolicy migration typo fix

Mistyped an ifdef CONFIG_CPUSETS - fixed.

I doubt that anyone ever noticed.  The impact of this typo was
that if someone:
 1) was using MPOL_BIND to force off node allocations
 2) while using cpusets to constrain memory placement
 3) when that cpuset was migrating that jobs memory
 4) while the tasks in that job were actively forking
then there was a rare chance that future allocations using
that MPOL_BIND policy would be node local, not off node.

Signed-off-by: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mempolicy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 09f0f575ddff..daabb3aa1ec6 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -150,7 +150,7 @@ extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 extern void mpol_fix_fork_child_flag(struct task_struct *p);
 #define set_cpuset_being_rebound(x) (cpuset_being_rebound = (x))
 
-#ifdef CONFIG_CPUSET
+#ifdef CONFIG_CPUSETS
 #define current_cpuset_is_being_rebound() \
 				(cpuset_being_rebound == current->cpuset)
 #else
-- 
cgit v1.2.3


From f8829caee311207afbc882794bdc5aa0db5caf33 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sat, 21 Oct 2006 23:17:35 +0100
Subject: [MIPS] Fix aliasing bug in copy_to_user_page / copy_from_user_page

The current implementation uses a sequence of a cacheflush and a copy.
This is racy in case of a multithreaded debuggee and renders GDB
virtually unusable.

Aside this fixes a performance hog rendering access to /proc/cmdline very
slow and resulting in a enough cache stalls for the 34K AP/SP programming
model to make the bare metal code on the non-Linux VPE miss RT deadlines.

The main part of this patch was originally written by Ralf Baechle;
Atushi Nemoto did the the debugging.

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mm/init.c           | 167 ++++++++++++++++++++++++++++++++++++++++--
 arch/mips/mm/pgtable-32.c     |   7 +-
 arch/mips/mm/pgtable-64.c     |  11 +++
 include/asm-mips/cacheflush.h |  19 +----
 include/asm-mips/fixmap.h     |  14 +++-
 5 files changed, 190 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 88b72c9a8495..2de4d3c367a2 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -30,11 +30,34 @@
 #include <asm/cachectl.h>
 #include <asm/cpu.h>
 #include <asm/dma.h>
+#include <asm/kmap_types.h>
 #include <asm/mmu_context.h>
 #include <asm/sections.h>
 #include <asm/pgtable.h>
 #include <asm/pgalloc.h>
 #include <asm/tlb.h>
+#include <asm/fixmap.h>
+
+/* Atomicity and interruptability */
+#ifdef CONFIG_MIPS_MT_SMTC
+
+#include <asm/mipsmtregs.h>
+
+#define ENTER_CRITICAL(flags) \
+	{ \
+	unsigned int mvpflags; \
+	local_irq_save(flags);\
+	mvpflags = dvpe()
+#define EXIT_CRITICAL(flags) \
+	evpe(mvpflags); \
+	local_irq_restore(flags); \
+	}
+#else
+
+#define ENTER_CRITICAL(flags) local_irq_save(flags)
+#define EXIT_CRITICAL(flags) local_irq_restore(flags)
+
+#endif /* CONFIG_MIPS_MT_SMTC */
 
 DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 
@@ -80,13 +103,142 @@ unsigned long setup_zero_pages(void)
 	return 1UL << order;
 }
 
-#ifdef CONFIG_HIGHMEM
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
+/*
+ * These are almost like kmap_atomic / kunmap_atmic except they take an
+ * additional address argument as the hint.
+ */
 
 #define kmap_get_fixmap_pte(vaddr)					\
 	pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)), (vaddr))
 
+#ifdef CONFIG_MIPS_MT_SMTC
+static pte_t *kmap_coherent_pte;
+static void __init kmap_coherent_init(void)
+{
+	unsigned long vaddr;
+
+	/* cache the first coherent kmap pte */
+	vaddr = __fix_to_virt(FIX_CMAP_BEGIN);
+	kmap_coherent_pte = kmap_get_fixmap_pte(vaddr);
+}
+#else
+static inline void kmap_coherent_init(void) {}
+#endif
+
+static inline void *kmap_coherent(struct page *page, unsigned long addr)
+{
+	enum fixed_addresses idx;
+	unsigned long vaddr, flags, entrylo;
+	unsigned long old_ctx;
+	pte_t pte;
+	int tlbidx;
+
+	inc_preempt_count();
+	idx = (addr >> PAGE_SHIFT) & (FIX_N_COLOURS - 1);
+#ifdef CONFIG_MIPS_MT_SMTC
+	idx += FIX_N_COLOURS * smp_processor_id();
+#endif
+	vaddr = __fix_to_virt(FIX_CMAP_END - idx);
+	pte = mk_pte(page, PAGE_KERNEL);
+#if defined(CONFIG_64BIT_PHYS_ADDR) && defined(CONFIG_CPU_MIPS32_R1)
+	entrylo = pte.pte_high;
+#else
+	entrylo = pte_val(pte) >> 6;
+#endif
+
+	ENTER_CRITICAL(flags);
+	old_ctx = read_c0_entryhi();
+	write_c0_entryhi(vaddr & (PAGE_MASK << 1));
+	write_c0_entrylo0(entrylo);
+	write_c0_entrylo1(entrylo);
+#ifdef CONFIG_MIPS_MT_SMTC
+	set_pte(kmap_coherent_pte - (FIX_CMAP_END - idx), pte);
+	/* preload TLB instead of local_flush_tlb_one() */
+	mtc0_tlbw_hazard();
+	tlb_probe();
+	tlb_probe_hazard();
+	tlbidx = read_c0_index();
+	mtc0_tlbw_hazard();
+	if (tlbidx < 0)
+		tlb_write_random();
+	else
+		tlb_write_indexed();
+#else
+	tlbidx = read_c0_wired();
+	write_c0_wired(tlbidx + 1);
+	write_c0_index(tlbidx);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+#endif
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	EXIT_CRITICAL(flags);
+
+	return (void*) vaddr;
+}
+
+#define UNIQUE_ENTRYHI(idx) (CKSEG0 + ((idx) << (PAGE_SHIFT + 1)))
+
+static inline void kunmap_coherent(struct page *page)
+{
+#ifndef CONFIG_MIPS_MT_SMTC
+	unsigned int wired;
+	unsigned long flags, old_ctx;
+
+	ENTER_CRITICAL(flags);
+	old_ctx = read_c0_entryhi();
+	wired = read_c0_wired() - 1;
+	write_c0_wired(wired);
+	write_c0_index(wired);
+	write_c0_entryhi(UNIQUE_ENTRYHI(wired));
+	write_c0_entrylo0(0);
+	write_c0_entrylo1(0);
+	mtc0_tlbw_hazard();
+	tlb_write_indexed();
+	tlbw_use_hazard();
+	write_c0_entryhi(old_ctx);
+	EXIT_CRITICAL(flags);
+#endif
+	dec_preempt_count();
+	preempt_check_resched();
+}
+
+void copy_to_user_page(struct vm_area_struct *vma,
+	struct page *page, unsigned long vaddr, void *dst, const void *src,
+	unsigned long len)
+{
+	if (cpu_has_dc_aliases) {
+		void *vto = kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(vto, src, len);
+		kunmap_coherent(page);
+	} else
+		memcpy(dst, src, len);
+	if ((vma->vm_flags & VM_EXEC) && !cpu_has_ic_fills_f_dc)
+		flush_cache_page(vma, vaddr, page_to_pfn(page));
+}
+
+EXPORT_SYMBOL(copy_to_user_page);
+
+void copy_from_user_page(struct vm_area_struct *vma,
+	struct page *page, unsigned long vaddr, void *dst, const void *src,
+	unsigned long len)
+{
+	if (cpu_has_dc_aliases) {
+		void *vfrom =
+			kmap_coherent(page, vaddr) + (vaddr & ~PAGE_MASK);
+		memcpy(dst, vfrom, len);
+		kunmap_coherent(page);
+	} else
+		memcpy(dst, src, len);
+}
+
+EXPORT_SYMBOL(copy_from_user_page);
+
+
+#ifdef CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
+
 static void __init kmap_init(void)
 {
 	unsigned long kmap_vstart;
@@ -97,11 +249,12 @@ static void __init kmap_init(void)
 
 	kmap_prot = PAGE_KERNEL;
 }
+#endif /* CONFIG_HIGHMEM */
 
-#ifdef CONFIG_32BIT
 void __init fixrange_init(unsigned long start, unsigned long end,
 	pgd_t *pgd_base)
 {
+#if defined(CONFIG_HIGHMEM) || defined(CONFIG_MIPS_MT_SMTC)
 	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
@@ -122,7 +275,7 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 			for (; (k < PTRS_PER_PMD) && (vaddr != end); pmd++, k++) {
 				if (pmd_none(*pmd)) {
 					pte = (pte_t *) alloc_bootmem_low_pages(PAGE_SIZE);
-					set_pmd(pmd, __pmd(pte));
+					set_pmd(pmd, __pmd((unsigned long)pte));
 					if (pte != pte_offset_kernel(pmd, 0))
 						BUG();
 				}
@@ -132,9 +285,8 @@ void __init fixrange_init(unsigned long start, unsigned long end,
 		}
 		j = 0;
 	}
+#endif
 }
-#endif /* CONFIG_32BIT */
-#endif /* CONFIG_HIGHMEM */
 
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 extern void pagetable_init(void);
@@ -175,6 +327,7 @@ void __init paging_init(void)
 #ifdef CONFIG_HIGHMEM
 	kmap_init();
 #endif
+	kmap_coherent_init();
 
 	max_dma = virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT;
 	low = max_low_pfn;
diff --git a/arch/mips/mm/pgtable-32.c b/arch/mips/mm/pgtable-32.c
index 4bdaa05f485b..4a61e624b0ec 100644
--- a/arch/mips/mm/pgtable-32.c
+++ b/arch/mips/mm/pgtable-32.c
@@ -31,9 +31,10 @@ void pgd_init(unsigned long page)
 
 void __init pagetable_init(void)
 {
-#ifdef CONFIG_HIGHMEM
 	unsigned long vaddr;
-	pgd_t *pgd, *pgd_base;
+	pgd_t *pgd_base;
+#ifdef CONFIG_HIGHMEM
+	pgd_t *pgd;
 	pud_t *pud;
 	pmd_t *pmd;
 	pte_t *pte;
@@ -44,7 +45,6 @@ void __init pagetable_init(void)
 	pgd_init((unsigned long)swapper_pg_dir
 		 + sizeof(pgd_t) * USER_PTRS_PER_PGD);
 
-#ifdef CONFIG_HIGHMEM
 	pgd_base = swapper_pg_dir;
 
 	/*
@@ -53,6 +53,7 @@ void __init pagetable_init(void)
 	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
 	fixrange_init(vaddr, 0, pgd_base);
 
+#ifdef CONFIG_HIGHMEM
 	/*
 	 * Permanent kmaps:
 	 */
diff --git a/arch/mips/mm/pgtable-64.c b/arch/mips/mm/pgtable-64.c
index 44b5e97fff65..8d600d307d5d 100644
--- a/arch/mips/mm/pgtable-64.c
+++ b/arch/mips/mm/pgtable-64.c
@@ -8,6 +8,7 @@
  */
 #include <linux/init.h>
 #include <linux/mm.h>
+#include <asm/fixmap.h>
 #include <asm/pgtable.h>
 
 void pgd_init(unsigned long page)
@@ -52,7 +53,17 @@ void pmd_init(unsigned long addr, unsigned long pagetable)
 
 void __init pagetable_init(void)
 {
+	unsigned long vaddr;
+	pgd_t *pgd_base;
+
 	/* Initialize the entire pgd.  */
 	pgd_init((unsigned long)swapper_pg_dir);
 	pmd_init((unsigned long)invalid_pmd_table, (unsigned long)invalid_pte_table);
+
+	pgd_base = swapper_pg_dir;
+	/*
+	 * Fixed mappings:
+	 */
+	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+	fixrange_init(vaddr, 0, pgd_base);
 }
diff --git a/include/asm-mips/cacheflush.h b/include/asm-mips/cacheflush.h
index 9ab59e2bb233..e3c9925876a3 100644
--- a/include/asm-mips/cacheflush.h
+++ b/include/asm-mips/cacheflush.h
@@ -55,24 +55,13 @@ extern void (*flush_icache_range)(unsigned long start, unsigned long end);
 #define flush_cache_vmap(start, end)		flush_cache_all()
 #define flush_cache_vunmap(start, end)		flush_cache_all()
 
-static inline void copy_to_user_page(struct vm_area_struct *vma,
+extern void copy_to_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
-	unsigned long len)
-{
-	if (cpu_has_dc_aliases)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-	memcpy(dst, src, len);
-	__flush_icache_page(vma, page);
-}
+	unsigned long len);
 
-static inline void copy_from_user_page(struct vm_area_struct *vma,
+extern void copy_from_user_page(struct vm_area_struct *vma,
 	struct page *page, unsigned long vaddr, void *dst, const void *src,
-	unsigned long len)
-{
-	if (cpu_has_dc_aliases)
-		flush_cache_page(vma, vaddr, page_to_pfn(page));
-	memcpy(dst, src, len);
-}
+	unsigned long len);
 
 extern void (*flush_cache_sigtramp)(unsigned long addr);
 extern void (*flush_icache_all)(void);
diff --git a/include/asm-mips/fixmap.h b/include/asm-mips/fixmap.h
index 6959bdb59310..02c8a13fc894 100644
--- a/include/asm-mips/fixmap.h
+++ b/include/asm-mips/fixmap.h
@@ -45,8 +45,16 @@
  * fix-mapped?
  */
 enum fixed_addresses {
+#define FIX_N_COLOURS 8
+	FIX_CMAP_BEGIN,
+#ifdef CONFIG_MIPS_MT_SMTC
+	FIX_CMAP_END = FIX_CMAP_BEGIN + (FIX_N_COLOURS * NR_CPUS),
+#else
+	FIX_CMAP_END = FIX_CMAP_BEGIN + FIX_N_COLOURS,
+#endif
 #ifdef CONFIG_HIGHMEM
-	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+	/* reserved pte's for temporary kernel mappings */
+	FIX_KMAP_BEGIN = FIX_CMAP_END + 1,
 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
 	__end_of_fixed_addresses
@@ -70,9 +78,9 @@ extern void __set_fixmap (enum fixed_addresses idx,
  * at the top of mem..
  */
 #if defined(CONFIG_CPU_TX39XX) || defined(CONFIG_CPU_TX49XX)
-#define FIXADDR_TOP	(0xff000000UL - 0x2000)
+#define FIXADDR_TOP	((unsigned long)(long)(int)(0xff000000 - 0x20000))
 #else
-#define FIXADDR_TOP	(0xffffe000UL)
+#define FIXADDR_TOP	((unsigned long)(long)(int)0xfffe0000)
 #endif
 #define FIXADDR_SIZE	(__end_of_fixed_addresses << PAGE_SHIFT)
 #define FIXADDR_START	(FIXADDR_TOP - FIXADDR_SIZE)
-- 
cgit v1.2.3


From 185b1aa122f87052d9154bb74990bc785372a750 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Sat, 21 Oct 2006 20:24:01 -0700
Subject: [NET]: Reduce sizeof(struct flowi) by 20 bytes.

As suggested by David, just kill off some unused fields in dnports to
reduce sizef(struct flowi). If they come back, they should be moved to
nl_u.dn_u in order not to enlarge again struct flowi

[ Modified to really delete this stuff instead of using #if 0. -DaveM ]

Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dn.h   | 5 -----
 include/net/flow.h | 3 ---
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/net/dn.h b/include/net/dn.h
index 465b78302782..ac4ce9091747 100644
--- a/include/net/dn.h
+++ b/include/net/dn.h
@@ -199,11 +199,6 @@ static inline void dn_sk_ports_copy(struct flowi *fl, struct dn_scp *scp)
 {
 	fl->uli_u.dnports.sport = scp->addrloc;
 	fl->uli_u.dnports.dport = scp->addrrem;
-	fl->uli_u.dnports.objnum = scp->addr.sdn_objnum;
-	if (fl->uli_u.dnports.objnum == 0) {
-		fl->uli_u.dnports.objnamel = (__u8)dn_ntohs(scp->addr.sdn_objnamel);
-		memcpy(fl->uli_u.dnports.objname, scp->addr.sdn_objname, 16);
-	}
 }
 
 extern unsigned dn_mss_from_pmtu(struct net_device *dev, int mtu);
diff --git a/include/net/flow.h b/include/net/flow.h
index 3b44d72b27d3..5cda27cd9deb 100644
--- a/include/net/flow.h
+++ b/include/net/flow.h
@@ -68,9 +68,6 @@ struct flowi {
 		struct {
 			__le16	sport;
 			__le16	dport;
-			__u8	objnum;
-			__u8	objnamel; /* Not 16 bits since max val is 16 */
-			__u8	objname[16]; /* Not zero terminated */
 		} dnports;
 
 		__be32		spi;
-- 
cgit v1.2.3


From a5c81b648476f5b0594daeefb38bb98409da5340 Mon Sep 17 00:00:00 2001
From: Jaroslav Kysela <perex@suse.cz>
Date: Sun, 22 Oct 2006 10:56:24 +0200
Subject: [ALSA] version 1.0.13

Signed-off-by: Jaroslav Kysela <perex@suse.cz>
---
 include/sound/version.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/sound/version.h b/include/sound/version.h
index 4ad86eb6440b..52fd6879b86e 100644
--- a/include/sound/version.h
+++ b/include/sound/version.h
@@ -1,3 +1,3 @@
 /* include/version.h.  Generated by alsa/ksync script.  */
 #define CONFIG_SND_VERSION "1.0.13"
-#define CONFIG_SND_DATE " (Fri Oct 06 18:28:19 2006 UTC)"
+#define CONFIG_SND_DATE " (Sun Oct 22 08:56:16 2006 UTC)"
-- 
cgit v1.2.3


From 6a43487f43fbd4e03c606dcb62b98374a3af88fc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@xenotime.net>
Date: Sun, 22 Oct 2006 20:38:00 -0700
Subject: [NET]: kernel-doc fix for sock.h

Fix kernel-doc warning in include/net/sock.h:
Warning(/var/linsrc/linux-2619-rc1-pv//include/net/sock.h:894): No description found for parameter 'rcu'

Signed-off-by: Randy Dunlap <rdunlap@xenotime.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 40bb90ebb2d1..ac286a353032 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -884,8 +884,7 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
 
 /**
  *	sk_filter_release: Release a socket filter
- *	@sk: socket
- *	@fp: filter to remove
+ *	@rcu: rcu_head that contains the sk_filter info to remove
  *
  *	Remove a filter from a socket and release its resources.
  */
-- 
cgit v1.2.3


From a94b1d1fd7ca3129e1d38d38167779fa6ee69780 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@sunset.davemloft.net>
Date: Sun, 22 Oct 2006 21:53:30 -0700
Subject: [SPARC64]: 8-byte align return value from compat_alloc_user_space()

Otherwise we get a ton of unaligned exceptions, for cases such
as compat_sys_msgrcv() which go:

	p = compat_alloc_user_space(second + sizeof(struct msgbuf));

and here 'second' can for example be an arbitrary odd value.

Based upon a bug report from Jurij Smakov.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/compat.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc64/compat.h b/include/asm-sparc64/compat.h
index c73935dc7ba1..36511ca51416 100644
--- a/include/asm-sparc64/compat.h
+++ b/include/asm-sparc64/compat.h
@@ -164,7 +164,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
 	return (u32)(unsigned long)uptr;
 }
 
-static __inline__ void __user *compat_alloc_user_space(long len)
+static inline void __user *compat_alloc_user_space(long len)
 {
 	struct pt_regs *regs = current_thread_info()->kregs;
 	unsigned long usp = regs->u_regs[UREG_I6];
@@ -174,7 +174,10 @@ static __inline__ void __user *compat_alloc_user_space(long len)
 	else
 		usp &= 0xffffffffUL;
 
-	return (void __user *) (usp - len);
+	usp -= len;
+	usp &= ~0x7UL;
+
+	return (void __user *) usp;
 }
 
 struct compat_ipc64_perm {
-- 
cgit v1.2.3


From 362ff7b2ac0234152b4a334dd006b77f4fa2ab23 Mon Sep 17 00:00:00 2001
From: Jake Moilanen <moilanen@austin.ibm.com>
Date: Wed, 18 Oct 2006 10:47:22 -0500
Subject: [POWERPC] Add 970GX cputable entry

970GX cputable entry from Steve Winiecki.

Signed-off-by: Jake Moilanen <moilanen@austin.ibm.com>

 arch/powerpc/kernel/cputable.c          |   15 +++++++++++++++
 arch/powerpc/oprofile/op_model_power4.c |    2 +-
 include/asm-powerpc/reg.h               |    1 +
 3 files changed, 17 insertions(+), 1 deletion(-)
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/cputable.c          | 15 +++++++++++++++
 arch/powerpc/oprofile/op_model_power4.c |  2 +-
 include/asm-powerpc/reg.h               |  1 +
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 95382f994404..f23aad66a79e 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -227,6 +227,21 @@ struct cpu_spec	cpu_specs[] = {
 		.oprofile_type		= PPC_OPROFILE_POWER4,
 		.platform		= "ppc970",
 	},
+	{	/* PPC970GX */
+		.pvr_mask		= 0xffff0000,
+		.pvr_value		= 0x00450000,
+		.cpu_name		= "PPC970GX",
+		.cpu_features		= CPU_FTRS_PPC970,
+		.cpu_user_features	= COMMON_USER_POWER4 |
+			PPC_FEATURE_HAS_ALTIVEC_COMP,
+		.icache_bsize		= 128,
+		.dcache_bsize		= 128,
+		.num_pmcs		= 8,
+		.cpu_setup		= __setup_cpu_ppc970,
+		.oprofile_cpu_type	= "ppc64/970",
+		.oprofile_type		= PPC_OPROFILE_POWER4,
+		.platform		= "ppc970",
+	},
 	{	/* Power5 GR */
 		.pvr_mask		= 0xffff0000,
 		.pvr_value		= 0x003a0000,
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index 506f6b79f893..6a927effcc77 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -76,7 +76,7 @@ static inline int mmcra_must_set_sample(void)
 {
 	if (__is_processor(PV_POWER4) || __is_processor(PV_POWER4p) ||
 	    __is_processor(PV_970) || __is_processor(PV_970FX) ||
-	    __is_processor(PV_970MP))
+	    __is_processor(PV_970MP) || __is_processor(PV_970GX))
 		return 1;
 
 	return 0;
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
index 8fb96811b55d..fde5c804eccb 100644
--- a/include/asm-powerpc/reg.h
+++ b/include/asm-powerpc/reg.h
@@ -591,6 +591,7 @@
 #define PV_630		0x0040
 #define PV_630p	0x0041
 #define PV_970MP	0x0044
+#define PV_970GX	0x0045
 #define PV_BE		0x0070
 #define PV_PA6T		0x0090
 
-- 
cgit v1.2.3


From 04fed361dadb7921507a470947ac23d2f26352cf Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Sun, 22 Oct 2006 15:57:18 +0100
Subject: [PATCH] Remove __must_check for device_for_each_child()

Eliminate more __must_check madness.

The return code from device_for_each_child() depends on the values
which the helper function returns.  If the helper function always
returns zero, it's utterly pointless to check the return code from
device_for_each_child().

The only code which knows if the return value should be checked is
the caller itself, so forcing the return code to always be checked
is silly.  Hence, remove the __must_check annotation.

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index 662e6a10144e..9d4f6a963936 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -393,7 +393,7 @@ extern void device_unregister(struct device * dev);
 extern void device_initialize(struct device * dev);
 extern int __must_check device_add(struct device * dev);
 extern void device_del(struct device * dev);
-extern int __must_check device_for_each_child(struct device *, void *,
+extern int device_for_each_child(struct device *, void *,
 		     int (*fn)(struct device *, void *));
 extern int device_rename(struct device *dev, char *new_name);
 
-- 
cgit v1.2.3


From 64271c4d909a15bc588f053a739db2e6df336d7d Mon Sep 17 00:00:00 2001
From: Craig Hughes <craig@com.rmk.(none)>
Date: Tue, 24 Oct 2006 00:47:35 +0100
Subject: [ARM] 3902/1: Enable GPIO81-84 on PXA255

The PXA255 has 84 GPIO lines available.  This patch allows access to 81-84

Signed-off-by: Craig Hughes <craig@gumstix.com>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/irqs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/irqs.h b/include/asm-arm/arch-pxa/irqs.h
index f3bc70eee35b..67ed43674c63 100644
--- a/include/asm-arm/arch-pxa/irqs.h
+++ b/include/asm-arm/arch-pxa/irqs.h
@@ -73,7 +73,7 @@
 #define IRQ_TO_GPIO(i)	(((i) < IRQ_GPIO(2)) ? ((i) - IRQ_GPIO0) : IRQ_TO_GPIO_2_x(i))
 
 #if defined(CONFIG_PXA25x)
-#define PXA_LAST_GPIO	80
+#define PXA_LAST_GPIO	84
 #elif defined(CONFIG_PXA27x)
 #define PXA_LAST_GPIO	127
 #endif
-- 
cgit v1.2.3


From 70a0a5357db20c291d46c04011d646d5d84c868c Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 25 Oct 2006 01:00:23 +0200
Subject: [PATCH] x86-64: Only look at per_cpu data for online cpus.

When I generalized __assign_irq_vector I failed to pay attention
to what happens when you access a per cpu data structure for
a cpu that is not online.   It is an undefined case making any
code that does it have undefined behavior as well.

The code still needs to be able to allocate a vector across cpus
that are not online to properly handle combinations like lowest
priority interrupt delivery and cpu_hotplug.  Not that we can do
that today but the infrastructure shouldn't prevent it.

So this patch updates the places where we touch per cpu data
to only touch online cpus, it makes cpu vector allocation
an atomic operation with respect to cpu hotplug, and it updates
the cpu start code to properly initialize vector_irq so we
don't have inconsistencies.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/io_apic.c | 42 +++++++++++++++++++++++++++++++++++++-----
 arch/x86_64/kernel/smpboot.c |  7 ++++++-
 include/asm-x86_64/hw_irq.h  |  2 ++
 3 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 0e89ae7e7b22..fe429e5d6b29 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -63,7 +63,7 @@ int timer_over_8254 __initdata = 1;
 static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
 
 static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
+DEFINE_SPINLOCK(vector_lock);
 
 /*
  * # of IRQ routing registers
@@ -618,6 +618,9 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 
 	BUG_ON((unsigned)irq >= NR_IRQ_VECTORS);
 
+	/* Only try and allocate irqs on cpus that are present */
+	cpus_and(mask, mask, cpu_online_map);
+
 	if (irq_vector[irq] > 0)
 		old_vector = irq_vector[irq];
 	if (old_vector > 0) {
@@ -627,11 +630,12 @@ static int __assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	}
 
 	for_each_cpu_mask(cpu, mask) {
-		cpumask_t domain;
+		cpumask_t domain, new_mask;
 		int new_cpu;
 		int vector, offset;
 
 		domain = vector_allocation_domain(cpu);
+		cpus_and(new_mask, domain, cpu_online_map);
 
 		vector = current_vector;
 		offset = current_offset;
@@ -646,18 +650,20 @@ next:
 			continue;
 		if (vector == IA32_SYSCALL_VECTOR)
 			goto next;
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			if (per_cpu(vector_irq, new_cpu)[vector] != -1)
 				goto next;
 		/* Found one! */
 		current_vector = vector;
 		current_offset = offset;
 		if (old_vector >= 0) {
+			cpumask_t old_mask;
 			int old_cpu;
-			for_each_cpu_mask(old_cpu, irq_domain[irq])
+			cpus_and(old_mask, irq_domain[irq], cpu_online_map);
+			for_each_cpu_mask(old_cpu, old_mask)
 				per_cpu(vector_irq, old_cpu)[old_vector] = -1;
 		}
-		for_each_cpu_mask(new_cpu, domain)
+		for_each_cpu_mask(new_cpu, new_mask)
 			per_cpu(vector_irq, new_cpu)[vector] = irq;
 		irq_vector[irq] = vector;
 		irq_domain[irq] = domain;
@@ -678,6 +684,32 @@ static int assign_irq_vector(int irq, cpumask_t mask, cpumask_t *result)
 	return vector;
 }
 
+void __setup_vector_irq(int cpu)
+{
+	/* Initialize vector_irq on a new cpu */
+	/* This function must be called with vector_lock held */
+	unsigned long flags;
+	int irq, vector;
+
+
+	/* Mark the inuse vectors */
+	for (irq = 0; irq < NR_IRQ_VECTORS; ++irq) {
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			continue;
+		vector = irq_vector[irq];
+		per_cpu(vector_irq, cpu)[vector] = irq;
+	}
+	/* Mark the free vectors */
+	for (vector = 0; vector < NR_VECTORS; ++vector) {
+		irq = per_cpu(vector_irq, cpu)[vector];
+		if (irq < 0)
+			continue;
+		if (!cpu_isset(cpu, irq_domain[irq]))
+			per_cpu(vector_irq, cpu)[vector] = -1;
+	}
+}
+
+
 extern void (*interrupt[NR_IRQS])(void);
 
 static struct irq_chip ioapic_chip;
diff --git a/arch/x86_64/kernel/smpboot.c b/arch/x86_64/kernel/smpboot.c
index 7b7a6870288a..62c2e747af58 100644
--- a/arch/x86_64/kernel/smpboot.c
+++ b/arch/x86_64/kernel/smpboot.c
@@ -581,12 +581,16 @@ void __cpuinit start_secondary(void)
 	 * smp_call_function().
 	 */
 	lock_ipi_call_lock();
+	spin_lock(&vector_lock);
 
+	/* Setup the per cpu irq handling data structures */
+	__setup_vector_irq(smp_processor_id());
 	/*
 	 * Allow the master to continue.
 	 */
 	cpu_set(smp_processor_id(), cpu_online_map);
 	per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
+	spin_unlock(&vector_lock);
 	unlock_ipi_call_lock();
 
 	cpu_idle();
@@ -799,7 +803,6 @@ static int __cpuinit do_boot_cpu(int cpu, int apicid)
 				cpu, node);
 	}
 
-
 	alternatives_smp_switch(1);
 
 	c_idle.idle = get_idle_for_cpu(cpu);
@@ -1246,8 +1249,10 @@ int __cpu_disable(void)
 	local_irq_disable();
 	remove_siblinginfo(cpu);
 
+	spin_lock(&vector_lock);
 	/* It's now safe to remove this processor from the online map */
 	cpu_clear(cpu, cpu_online_map);
+	spin_unlock(&vector_lock);
 	remove_cpu_from_maps();
 	fixup_irqs(cpu_online_map);
 	return 0;
diff --git a/include/asm-x86_64/hw_irq.h b/include/asm-x86_64/hw_irq.h
index 792dd52fcd70..179cce755aa7 100644
--- a/include/asm-x86_64/hw_irq.h
+++ b/include/asm-x86_64/hw_irq.h
@@ -76,6 +76,8 @@
 #ifndef __ASSEMBLY__
 typedef int vector_irq_t[NR_VECTORS];
 DECLARE_PER_CPU(vector_irq_t, vector_irq);
+extern void __setup_vector_irq(int cpu);
+extern spinlock_t vector_lock;
 
 /*
  * Various low-level irq details needed by irq.c, process.c,
-- 
cgit v1.2.3


From 42c4aaadb737e0e672b3fb86b2c41ff59f0fb8bc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Tue, 24 Oct 2006 16:42:40 +1000
Subject: [POWERPC] Consolidate feature fixup code

There are currently two versions of the functions for applying the
feature fixups, one for CPU features and one for firmware features. In
addition, they are both in assembly and with separate implementations
for 32 and 64 bits. identify_cpu() is also implemented in assembly and
separately for 32 and 64 bits.

This patch replaces them with a pair of C functions. The call sites are
slightly moved on ppc64 as well to be called from C instead of from
assembly, though it's a very small change, and thus shouldn't cause any
problem.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/cputable.c         |  72 ++++++++++++++++++-
 arch/powerpc/kernel/head_64.S          |  19 -----
 arch/powerpc/kernel/misc_32.S          |  74 --------------------
 arch/powerpc/kernel/misc_64.S          | 124 ---------------------------------
 arch/powerpc/kernel/setup_32.c         |   8 ++-
 arch/powerpc/kernel/setup_64.c         |  11 +++
 arch/powerpc/platforms/iseries/setup.c |   5 ++
 arch/ppc/kernel/misc.S                 |  74 --------------------
 arch/ppc/kernel/setup.c                |  10 +--
 include/asm-powerpc/cputable.h         |   7 +-
 include/asm-powerpc/firmware.h         |   2 +
 11 files changed, 106 insertions(+), 300 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index f23aad66a79e..6fdfaa4a82b8 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -18,6 +18,7 @@
 
 #include <asm/oprofile_impl.h>
 #include <asm/cputable.h>
+#include <asm/prom.h>		/* for PTRRELOC on ARCH=ppc */
 
 struct cpu_spec* cur_cpu_spec = NULL;
 EXPORT_SYMBOL(cur_cpu_spec);
@@ -73,7 +74,7 @@ extern void __restore_cpu_ppc970(void);
 #define PPC_FEATURE_SPE_COMP	0
 #endif
 
-struct cpu_spec	cpu_specs[] = {
+static struct cpu_spec cpu_specs[] = {
 #ifdef CONFIG_PPC64
 	{	/* Power3 */
 		.pvr_mask		= 0xffff0000,
@@ -1167,3 +1168,72 @@ struct cpu_spec	cpu_specs[] = {
 #endif /* !CLASSIC_PPC */
 #endif /* CONFIG_PPC32 */
 };
+
+struct cpu_spec *identify_cpu(unsigned long offset)
+{
+	struct cpu_spec *s = cpu_specs;
+	struct cpu_spec **cur = &cur_cpu_spec;
+	unsigned int pvr = mfspr(SPRN_PVR);
+	int i;
+
+	s = PTRRELOC(s);
+	cur = PTRRELOC(cur);
+
+	if (*cur != NULL)
+		return PTRRELOC(*cur);
+
+	for (i = 0; i < ARRAY_SIZE(cpu_specs); i++,s++)
+		if ((pvr & s->pvr_mask) == s->pvr_value) {
+			*cur = cpu_specs + i;
+#ifdef CONFIG_PPC64
+			/* ppc64 expects identify_cpu to also call setup_cpu
+			 * for that processor. I will consolidate that at a
+			 * later time, for now, just use our friend #ifdef.
+			 * we also don't need to PTRRELOC the function pointer
+			 * on ppc64 as we are running at 0 in real mode.
+			 */
+			if (s->cpu_setup) {
+				s->cpu_setup(offset, s);
+			}
+#endif /* CONFIG_PPC64 */
+			return s;
+		}
+	BUG();
+	return NULL;
+}
+
+void do_feature_fixups(unsigned long offset, unsigned long value,
+		       void *fixup_start, void *fixup_end)
+{
+	struct fixup_entry {
+		unsigned long	mask;
+		unsigned long	value;
+		unsigned int	*start;
+		unsigned int	*end;
+	} *fcur, *fend;
+
+	fcur = fixup_start;
+	fend = fixup_end;
+
+	for (; fcur < fend; fcur++) {
+		unsigned int *pstart, *pend, *p;
+
+		if ((value & fcur->mask) == fcur->value)
+			continue;
+
+		/* These PTRRELOCs will disappear once the new scheme for
+		 * modules and vdso is implemented
+		 */
+		pstart = PTRRELOC(fcur->start);
+		pend = PTRRELOC(fcur->end);
+
+		for (p = pstart; p < pend; p++) {
+			*p = 0x60000000u;
+			asm volatile ("dcbst 0, %0" : : "r" (p));
+		}
+		asm volatile ("sync" : : : "memory");
+		for (p = pstart; p < pend; p++)
+			asm volatile ("icbi 0,%0" : : "r" (p));
+		asm volatile ("sync; isync" : : : "memory");
+	}
+}
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 645c7f10fb28..f12e3c55520d 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -1580,11 +1580,6 @@ _STATIC(__start_initialization_iSeries)
 	li	r0,0
 	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
 
-	LOAD_REG_IMMEDIATE(r3,cpu_specs)
-	LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
-	li	r5,0
-	bl	.identify_cpu
-
 	LOAD_REG_IMMEDIATE(r2,__toc_start)
 	addi	r2,r2,0x4000
 	addi	r2,r2,0x4000
@@ -1964,13 +1959,6 @@ _STATIC(start_here_multiplatform)
 	addi	r2,r2,0x4000
 	add	r2,r2,r26
 
-	LOAD_REG_IMMEDIATE(r3, cpu_specs)
-	add	r3,r3,r26
-	LOAD_REG_IMMEDIATE(r4,cur_cpu_spec)
-	add	r4,r4,r26
-	mr	r5,r26
-	bl	.identify_cpu
-
 	/* Do very early kernel initializations, including initial hash table,
 	 * stab and slb setup before we turn on relocation.	*/
 
@@ -2000,13 +1988,6 @@ _STATIC(start_here_common)
 	li	r0,0
 	stdu	r0,-STACK_FRAME_OVERHEAD(r1)
 
-	/* Apply the CPUs-specific fixups (nop out sections not relevant
-	 * to this CPU
-	 */
-	li	r3,0
-	bl	.do_cpu_ftr_fixups
-	bl	.do_fw_ftr_fixups
-
 	/* ptr to current */
 	LOAD_REG_IMMEDIATE(r4, init_task)
 	std	r4,PACACURRENT(r13)
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 88fd73fdf048..412bea3cf813 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -101,80 +101,6 @@ _GLOBAL(reloc_got2)
 	mtlr	r11
 	blr
 
-/*
- * identify_cpu,
- * called with r3 = data offset and r4 = CPU number
- * doesn't change r3
- */
-_GLOBAL(identify_cpu)
-	addis	r8,r3,cpu_specs@ha
-	addi	r8,r8,cpu_specs@l
-	mfpvr	r7
-1:
-	lwz	r5,CPU_SPEC_PVR_MASK(r8)
-	and	r5,r5,r7
-	lwz	r6,CPU_SPEC_PVR_VALUE(r8)
-	cmplw	0,r6,r5
-	beq	1f
-	addi	r8,r8,CPU_SPEC_ENTRY_SIZE
-	b	1b
-1:
-	addis	r6,r3,cur_cpu_spec@ha
-	addi	r6,r6,cur_cpu_spec@l
-	sub	r8,r8,r3
-	stw	r8,0(r6)
-	blr
-
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
-	/* Get CPU 0 features */
-	addis	r6,r3,cur_cpu_spec@ha
-	addi	r6,r6,cur_cpu_spec@l
-	lwz	r4,0(r6)
-	add	r4,r4,r3
-	lwz	r4,CPU_SPEC_FEATURES(r4)
-
-	/* Get the fixup table */
-	addis	r6,r3,__start___ftr_fixup@ha
-	addi	r6,r6,__start___ftr_fixup@l
-	addis	r7,r3,__stop___ftr_fixup@ha
-	addi	r7,r7,__stop___ftr_fixup@l
-
-	/* Do the fixup */
-1:	cmplw	0,r6,r7
-	bgelr
-	addi	r6,r6,16
-	lwz	r8,-16(r6)	/* mask */
-	and	r8,r8,r4
-	lwz	r9,-12(r6)	/* value */
-	cmplw	0,r8,r9
-	beq	1b
-	lwz	r8,-8(r6)	/* section begin */
-	lwz	r9,-4(r6)	/* section end */
-	subf.	r9,r8,r9
-	beq	1b
-	/* write nops over the section of code */
-	/* todo: if large section, add a branch at the start of it */
-	srwi	r9,r9,2
-	mtctr	r9
-	add	r8,r8,r3
-	lis	r0,0x60000000@h	/* nop */
-3:	stw	r0,0(r8)
-	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
-	beq	2f
-	dcbst	0,r8		/* suboptimal, but simpler */
-	sync
-	icbi	0,r8
-2:	addi	r8,r8,4
-	bdnz	3b
-	sync			/* additional sync needed on g4 */
-	isync
-	b	1b
-
 /*
  * call_setup_cpu - call the setup_cpu function for this cpu
  * r3 = data offset, r24 = cpu number
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index c70e20708a1f..21fd2c662a99 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -246,130 +246,6 @@ _GLOBAL(__flush_dcache_icache)
 	isync
 	blr
 
-/*
- * identify_cpu and calls setup_cpu
- * In:	r3 = base of the cpu_specs array
- *	r4 = address of cur_cpu_spec
- *	r5 = relocation offset
- */
-_GLOBAL(identify_cpu)
-	mfpvr	r7
-1:
-	lwz	r8,CPU_SPEC_PVR_MASK(r3)
-	and	r8,r8,r7
-	lwz	r9,CPU_SPEC_PVR_VALUE(r3)
-	cmplw	0,r9,r8
-	beq	1f
-	addi	r3,r3,CPU_SPEC_ENTRY_SIZE
-	b	1b
-1:
-	sub	r0,r3,r5
-	std	r0,0(r4)
-	ld	r4,CPU_SPEC_SETUP(r3)
-	cmpdi	0,r4,0
-	add	r4,r4,r5
-	beqlr
-	ld	r4,0(r4)
-	add	r4,r4,r5
-	mtctr	r4
-	/* Calling convention for cpu setup is r3=offset, r4=cur_cpu_spec */
-	mr	r4,r3
-	mr	r3,r5
-	bctr
-
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
-	/* Get CPU 0 features */
-	LOAD_REG_IMMEDIATE(r6,cur_cpu_spec)
-	sub	r6,r6,r3
-	ld	r4,0(r6)
-	sub	r4,r4,r3
-	ld	r4,CPU_SPEC_FEATURES(r4)
-	/* Get the fixup table */
-	LOAD_REG_IMMEDIATE(r6,__start___ftr_fixup)
-	sub	r6,r6,r3
-	LOAD_REG_IMMEDIATE(r7,__stop___ftr_fixup)
-	sub	r7,r7,r3
-	/* Do the fixup */
-1:	cmpld	r6,r7
-	bgelr
-	addi	r6,r6,32
-	ld	r8,-32(r6)	/* mask */
-	and	r8,r8,r4
-	ld	r9,-24(r6)	/* value */
-	cmpld	r8,r9
-	beq	1b
-	ld	r8,-16(r6)	/* section begin */
-	ld	r9,-8(r6)	/* section end */
-	subf.	r9,r8,r9
-	beq	1b
-	/* write nops over the section of code */
-	/* todo: if large section, add a branch at the start of it */
-	srwi	r9,r9,2
-	mtctr	r9
-	sub	r8,r8,r3
-	lis	r0,0x60000000@h	/* nop */
-3:	stw	r0,0(r8)
-	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
-	beq	2f
-	dcbst	0,r8		/* suboptimal, but simpler */
-	sync
-	icbi	0,r8
-2:	addi	r8,r8,4
-	bdnz	3b
-	sync			/* additional sync needed on g4 */
-	isync
-	b	1b
-
-/*
- * do_fw_ftr_fixups - goes through the list of firmware feature fixups
- * and writes nop's over sections of code that don't apply for this firmware.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_fw_ftr_fixups)
-	/* Get firmware features */
-	LOAD_REG_IMMEDIATE(r6,powerpc_firmware_features)
-	sub	r6,r6,r3
-	ld	r4,0(r6)
-	/* Get the fixup table */
-	LOAD_REG_IMMEDIATE(r6,__start___fw_ftr_fixup)
-	sub	r6,r6,r3
-	LOAD_REG_IMMEDIATE(r7,__stop___fw_ftr_fixup)
-	sub	r7,r7,r3
-	/* Do the fixup */
-1:	cmpld	r6,r7
-	bgelr
-	addi	r6,r6,32
-	ld	r8,-32(r6)	/* mask */
-	and	r8,r8,r4
-	ld	r9,-24(r6)	/* value */
-	cmpld	r8,r9
-	beq	1b
-	ld	r8,-16(r6)	/* section begin */
-	ld	r9,-8(r6)	/* section end */
-	subf.	r9,r8,r9
-	beq	1b
-	/* write nops over the section of code */
-	/* todo: if large section, add a branch at the start of it */
-	srwi	r9,r9,2
-	mtctr	r9
-	sub	r8,r8,r3
-	lis	r0,0x60000000@h	/* nop */
-3:	stw	r0,0(r8)
-BEGIN_FTR_SECTION
-	dcbst	0,r8		/* suboptimal, but simpler */
-	sync
-	icbi	0,r8
-END_FTR_SECTION_IFSET(CPU_FTR_SPLIT_ID_CACHE)
-	addi	r8,r8,4
-	bdnz	3b
-	sync			/* additional sync needed on g4 */
-	isync
-	b	1b
 
 #if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE)
 /*
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 191d0ab09222..769e511783b0 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -91,6 +91,7 @@ int ucache_bsize;
 unsigned long __init early_init(unsigned long dt_ptr)
 {
 	unsigned long offset = reloc_offset();
+	struct cpu_spec *spec;
 
 	/* First zero the BSS -- use memset_io, some platforms don't have
 	 * caches on yet */
@@ -100,8 +101,11 @@ unsigned long __init early_init(unsigned long dt_ptr)
 	 * Identify the CPU type and fix up code sections
 	 * that depend on which cpu we have.
 	 */
-	identify_cpu(offset, 0);
-	do_cpu_ftr_fixups(offset);
+	spec = identify_cpu(offset);
+
+	do_feature_fixups(offset, spec->cpu_features,
+			  PTRRELOC(&__start___ftr_fixup),
+			  PTRRELOC(&__stop___ftr_fixup));
 
 	return KERNELBASE + offset;
 }
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 4b2e32eab9dc..1969b5686eee 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -170,6 +170,9 @@ void __init setup_paca(int cpu)
 
 void __init early_setup(unsigned long dt_ptr)
 {
+	/* Identify CPU type */
+	identify_cpu(0);
+
 	/* Assume we're on cpu 0 for now. Don't write to the paca yet! */
 	setup_paca(0);
 
@@ -348,6 +351,14 @@ void __init setup_system(void)
 {
 	DBG(" -> setup_system()\n");
 
+	/* Apply the CPUs-specific and firmware specific fixups to kernel
+	 * text (nop out sections not relevant to this CPU or this firmware)
+	 */
+	do_feature_fixups(0, cur_cpu_spec->cpu_features,
+			  &__start___ftr_fixup, &__stop___ftr_fixup);
+	do_feature_fixups(0, powerpc_firmware_features,
+			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
+
 	/*
 	 * Unflatten the device-tree passed by prom_init or kexec
 	 */
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index a0ff7ba7d666..6f73469fd3b0 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -694,6 +694,11 @@ void * __init iSeries_early_setup(void)
 {
 	unsigned long phys_mem_size;
 
+	/* Identify CPU type. This is done again by the common code later
+	 * on but calling this function multiple times is fine.
+	 */
+	identify_cpu(0);
+
 	powerpc_firmware_features |= FW_FEATURE_ISERIES;
 	powerpc_firmware_features |= FW_FEATURE_LPAR;
 
diff --git a/arch/ppc/kernel/misc.S b/arch/ppc/kernel/misc.S
index 5f6684012ded..d319f9ba2379 100644
--- a/arch/ppc/kernel/misc.S
+++ b/arch/ppc/kernel/misc.S
@@ -109,80 +109,6 @@ _GLOBAL(reloc_got2)
 	mtlr	r11
 	blr
 
-/*
- * identify_cpu,
- * called with r3 = data offset and r4 = CPU number
- * doesn't change r3
- */
-_GLOBAL(identify_cpu)
-	addis	r8,r3,cpu_specs@ha
-	addi	r8,r8,cpu_specs@l
-	mfpvr	r7
-1:
-	lwz	r5,CPU_SPEC_PVR_MASK(r8)
-	and	r5,r5,r7
-	lwz	r6,CPU_SPEC_PVR_VALUE(r8)
-	cmplw	0,r6,r5
-	beq	1f
-	addi	r8,r8,CPU_SPEC_ENTRY_SIZE
-	b	1b
-1:
-	addis	r6,r3,cur_cpu_spec@ha
-	addi	r6,r6,cur_cpu_spec@l
-	sub	r8,r8,r3
-	stw	r8,0(r6)
-	blr
-
-/*
- * do_cpu_ftr_fixups - goes through the list of CPU feature fixups
- * and writes nop's over sections of code that don't apply for this cpu.
- * r3 = data offset (not changed)
- */
-_GLOBAL(do_cpu_ftr_fixups)
-	/* Get CPU 0 features */
-	addis	r6,r3,cur_cpu_spec@ha
-	addi	r6,r6,cur_cpu_spec@l
-	lwz	r4,0(r6)
-	add	r4,r4,r3
-	lwz	r4,CPU_SPEC_FEATURES(r4)
-
-	/* Get the fixup table */
-	addis	r6,r3,__start___ftr_fixup@ha
-	addi	r6,r6,__start___ftr_fixup@l
-	addis	r7,r3,__stop___ftr_fixup@ha
-	addi	r7,r7,__stop___ftr_fixup@l
-
-	/* Do the fixup */
-1:	cmplw	0,r6,r7
-	bgelr
-	addi	r6,r6,16
-	lwz	r8,-16(r6)	/* mask */
-	and	r8,r8,r4
-	lwz	r9,-12(r6)	/* value */
-	cmplw	0,r8,r9
-	beq	1b
-	lwz	r8,-8(r6)	/* section begin */
-	lwz	r9,-4(r6)	/* section end */
-	subf.	r9,r8,r9
-	beq	1b
-	/* write nops over the section of code */
-	/* todo: if large section, add a branch at the start of it */
-	srwi	r9,r9,2
-	mtctr	r9
-	add	r8,r8,r3
-	lis	r0,0x60000000@h	/* nop */
-3:	stw	r0,0(r8)
-	andi.	r10,r4,CPU_FTR_SPLIT_ID_CACHE@l
-	beq	2f
-	dcbst	0,r8		/* suboptimal, but simpler */
-	sync
-	icbi	0,r8
-2:	addi	r8,r8,4
-	bdnz	3b
-	sync			/* additional sync needed on g4 */
-	isync
-	b	1b
-
 /*
  * call_setup_cpu - call the setup_cpu function for this cpu
  * r3 = data offset, r24 = cpu number
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 75fe13815be2..41a640f16bdd 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -38,6 +38,7 @@
 #include <asm/nvram.h>
 #include <asm/xmon.h>
 #include <asm/ocp.h>
+#include <asm/prom.h>
 
 #define USES_PPC_SYS (defined(CONFIG_85xx) || defined(CONFIG_83xx) || \
 		      defined(CONFIG_MPC10X_BRIDGE) || defined(CONFIG_8260) || \
@@ -53,8 +54,6 @@
 
 extern void platform_init(unsigned long r3, unsigned long r4,
 		unsigned long r5, unsigned long r6, unsigned long r7);
-extern void identify_cpu(unsigned long offset, unsigned long cpu);
-extern void do_cpu_ftr_fixups(unsigned long offset);
 extern void reloc_got2(unsigned long offset);
 
 extern void ppc6xx_idle(void);
@@ -301,6 +300,7 @@ early_init(int r3, int r4, int r5)
 {
  	unsigned long phys;
 	unsigned long offset = reloc_offset();
+	struct cpu_spec *spec;
 
  	/* Default */
  	phys = offset + KERNELBASE;
@@ -313,8 +313,10 @@ early_init(int r3, int r4, int r5)
 	 * Identify the CPU type and fix up code sections
 	 * that depend on which cpu we have.
 	 */
-	identify_cpu(offset, 0);
-	do_cpu_ftr_fixups(offset);
+	spec = identify_cpu(offset);
+	do_feature_fixups(offset, spec->cpu_features,
+			  PTRRELOC(&__start___ftr_fixup),
+			  PTRRELOC(&__stop___ftr_fixup));
 
 	return phys;
 }
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 12707ab9dc98..4d22218739e0 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -89,8 +89,11 @@ struct cpu_spec {
 
 extern struct cpu_spec		*cur_cpu_spec;
 
-extern void identify_cpu(unsigned long offset, unsigned long cpu);
-extern void do_cpu_ftr_fixups(unsigned long offset);
+extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
+
+extern struct cpu_spec *identify_cpu(unsigned long offset);
+extern void do_feature_fixups(unsigned long offset, unsigned long value,
+			      void *fixup_start, void *fixup_end);
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h
index 1022737f4f34..c16e0a6b9dab 100644
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -96,6 +96,8 @@ extern void machine_check_fwnmi(void);
 /* This is true if we are using the firmware NMI handler (typically LPAR) */
 extern int fwnmi_active;
 
+extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
+
 #else /* __ASSEMBLY__ */
 
 #define BEGIN_FW_FTR_SECTION		96:
-- 
cgit v1.2.3


From 7aeb732428fc8e2ecae6d432873770c12f04a979 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 20 Oct 2006 11:47:16 +1000
Subject: [POWERPC] Support nested cpu feature sections

This patch adds some macros that can be used with an explicit label in
order to nest cpu features. This should be used very careful but is
necessary for the upcoming cell TB fixup.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/cputable.h | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 4d22218739e0..65faf322ace0 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -434,30 +434,34 @@ static inline int cpu_has_feature(unsigned long feature)
 
 #ifdef __ASSEMBLY__
 
-#define BEGIN_FTR_SECTION		98:
+#define BEGIN_FTR_SECTION_NESTED(label)	label:
+#define BEGIN_FTR_SECTION		BEGIN_FTR_SECTION_NESTED(98)
 
 #ifndef __powerpc64__
-#define END_FTR_SECTION(msk, val)		\
+#define END_FTR_SECTION_NESTED(msk, val, label) \
 99:						\
 	.section __ftr_fixup,"a";		\
 	.align 2;				\
 	.long msk;				\
 	.long val;				\
-	.long 98b;				\
+	.long label##b;				\
 	.long 99b;				\
 	.previous
 #else /* __powerpc64__ */
-#define END_FTR_SECTION(msk, val)		\
+#define END_FTR_SECTION_NESTED(msk, val, label) \
 99:						\
 	.section __ftr_fixup,"a";		\
 	.align 3;				\
 	.llong msk;				\
 	.llong val;				\
-	.llong 98b;				\
+	.llong label##b;				\
 	.llong 99b;	 			\
 	.previous
 #endif /* __powerpc64__ */
 
+#define END_FTR_SECTION(msk, val)		\
+	END_FTR_SECTION_NESTED(msk, val, 98)
+
 #define END_FTR_SECTION_IFSET(msk)	END_FTR_SECTION((msk), (msk))
 #define END_FTR_SECTION_IFCLR(msk)	END_FTR_SECTION((msk), 0)
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From 0909c8c2d547e45ca50e2492b08ec93a37b35237 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 20 Oct 2006 11:47:18 +1000
Subject: [POWERPC] Support feature fixups in vdso's

This patch reworks the feature fixup mecanism so vdso's can be fixed up.
The main issue was that the construct:

        .long   label  (or .llong on 64 bits)

will not work in the case of a shared library like the vdso. It will
generate an empty placeholder in the fixup table along with a reloc,
which is not something we can deal with in the vdso.

The idea here (thanks Alan Modra !) is to instead use something like:

1:
        .long   label - 1b

That is, the feature fixup tables no longer contain addresses of bits of
code to patch, but offsets of such code from the fixup table entry
itself. That is properly resolved by ld when building the .so's. I've
modified the fixup mecanism generically to use that method for the rest
of the kernel as well.

Another trick is that the 32 bits vDSO included in the 64 bits kernel
need to have a table in the 64 bits format. However, gas does not
support 32 bits code with a statement of the form:

        .llong  label - 1b  (Or even just .llong label)

That is, it cannot emit the right fixup/relocation for the linker to use
to assign a 32 bits address to an .llong field. Thus, in the specific
case of the 32 bits vdso built as part of the 64 bits kernel, we are
using a modified macro that generates:

        .long   0xffffffff
        .llong  label - 1b

Note that is assumes that the value is negative which is enforced by
the .lds (those offsets are always negative as the .text is always
before the fixup table and gas doesn't support emiting the reloc the
other way around).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/cputable.c          | 11 ++++---
 arch/powerpc/kernel/setup_32.c          |  2 +-
 arch/powerpc/kernel/setup_64.c          |  4 +--
 arch/powerpc/kernel/vdso.c              | 43 +++++++++++++++++++++++++++
 arch/powerpc/kernel/vdso32/vdso32.lds.S | 12 ++++++++
 arch/powerpc/kernel/vdso64/vdso64.lds.S | 10 +++++++
 arch/ppc/kernel/setup.c                 |  2 +-
 include/asm-powerpc/asm-compat.h        | 52 +++++++++++++++++++++++++++++++++
 include/asm-powerpc/cputable.h          | 31 ++++----------------
 include/asm-powerpc/firmware.h          | 15 ++++------
 include/asm-powerpc/timex.h             |  8 +++--
 11 files changed, 141 insertions(+), 49 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
index 6fdfaa4a82b8..bfd499ee3753 100644
--- a/arch/powerpc/kernel/cputable.c
+++ b/arch/powerpc/kernel/cputable.c
@@ -1202,14 +1202,13 @@ struct cpu_spec *identify_cpu(unsigned long offset)
 	return NULL;
 }
 
-void do_feature_fixups(unsigned long offset, unsigned long value,
-		       void *fixup_start, void *fixup_end)
+void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end)
 {
 	struct fixup_entry {
 		unsigned long	mask;
 		unsigned long	value;
-		unsigned int	*start;
-		unsigned int	*end;
+		long		start_off;
+		long		end_off;
 	} *fcur, *fend;
 
 	fcur = fixup_start;
@@ -1224,8 +1223,8 @@ void do_feature_fixups(unsigned long offset, unsigned long value,
 		/* These PTRRELOCs will disappear once the new scheme for
 		 * modules and vdso is implemented
 		 */
-		pstart = PTRRELOC(fcur->start);
-		pend = PTRRELOC(fcur->end);
+		pstart = ((unsigned int *)fcur) + (fcur->start_off / 4);
+		pend = ((unsigned int *)fcur) + (fcur->end_off / 4);
 
 		for (p = pstart; p < pend; p++) {
 			*p = 0x60000000u;
diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
index 769e511783b0..a4c2964a3ca6 100644
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -103,7 +103,7 @@ unsigned long __init early_init(unsigned long dt_ptr)
 	 */
 	spec = identify_cpu(offset);
 
-	do_feature_fixups(offset, spec->cpu_features,
+	do_feature_fixups(spec->cpu_features,
 			  PTRRELOC(&__start___ftr_fixup),
 			  PTRRELOC(&__stop___ftr_fixup));
 
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 1969b5686eee..16278968dab6 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -354,9 +354,9 @@ void __init setup_system(void)
 	/* Apply the CPUs-specific and firmware specific fixups to kernel
 	 * text (nop out sections not relevant to this CPU or this firmware)
 	 */
-	do_feature_fixups(0, cur_cpu_spec->cpu_features,
+	do_feature_fixups(cur_cpu_spec->cpu_features,
 			  &__start___ftr_fixup, &__stop___ftr_fixup);
-	do_feature_fixups(0, powerpc_firmware_features,
+	do_feature_fixups(powerpc_firmware_features,
 			  &__start___fw_ftr_fixup, &__stop___fw_ftr_fixup);
 
 	/*
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index 1a7e19cdab39..c913ad5cad29 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -36,6 +36,8 @@
 #include <asm/vdso.h>
 #include <asm/vdso_datapage.h>
 
+#include "setup.h"
+
 #undef DEBUG
 
 #ifdef DEBUG
@@ -586,6 +588,43 @@ static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
 	return 0;
 }
 
+
+static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
+				      struct lib64_elfinfo *v64)
+{
+	void *start32;
+	unsigned long size32;
+
+#ifdef CONFIG_PPC64
+	void *start64;
+	unsigned long size64;
+
+	start64 = find_section64(v64->hdr, "__ftr_fixup", &size64);
+	if (start64)
+		do_feature_fixups(cur_cpu_spec->cpu_features,
+				  start64, start64 + size64);
+
+	start64 = find_section64(v64->hdr, "__fw_ftr_fixup", &size64);
+	if (start64)
+		do_feature_fixups(powerpc_firmware_features,
+				  start64, start64 + size64);
+#endif /* CONFIG_PPC64 */
+
+	start32 = find_section32(v32->hdr, "__ftr_fixup", &size32);
+	if (start32)
+		do_feature_fixups(cur_cpu_spec->cpu_features,
+				  start32, start32 + size32);
+
+#ifdef CONFIG_PPC64
+	start32 = find_section32(v32->hdr, "__fw_ftr_fixup", &size32);
+	if (start32)
+		do_feature_fixups(powerpc_firmware_features,
+				  start32, start32 + size32);
+#endif /* CONFIG_PPC64 */
+
+	return 0;
+}
+
 static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
 				       struct lib64_elfinfo *v64)
 {
@@ -634,6 +673,9 @@ static __init int vdso_setup(void)
 	if (vdso_fixup_datapage(&v32, &v64))
 		return -1;
 
+	if (vdso_fixup_features(&v32, &v64))
+		return -1;
+
 	if (vdso_fixup_alt_funcs(&v32, &v64))
 		return -1;
 
@@ -714,6 +756,7 @@ void __init vdso_init(void)
 	 * Setup the syscall map in the vDOS
 	 */
 	vdso_setup_syscall_map();
+
 	/*
 	 * Initialize the vDSO images in memory, that is do necessary
 	 * fixups of vDSO symbols, locate trampolines, etc...
diff --git a/arch/powerpc/kernel/vdso32/vdso32.lds.S b/arch/powerpc/kernel/vdso32/vdso32.lds.S
index 6187af2d54c3..26e138c4ce17 100644
--- a/arch/powerpc/kernel/vdso32/vdso32.lds.S
+++ b/arch/powerpc/kernel/vdso32/vdso32.lds.S
@@ -32,6 +32,18 @@ SECTIONS
   PROVIDE (_etext = .);
   PROVIDE (etext = .);
 
+  . = ALIGN(8);
+  __ftr_fixup : {
+    *(__ftr_fixup)
+  }
+
+#ifdef CONFIG_PPC64
+  . = ALIGN(8);
+  __fw_ftr_fixup : {
+    *(__fw_ftr_fixup)
+  }
+#endif
+
   /* Other stuff is appended to the text segment: */
   .rodata		: { *(.rodata .rodata.* .gnu.linkonce.r.*) }
   .rodata1		: { *(.rodata1) }
diff --git a/arch/powerpc/kernel/vdso64/vdso64.lds.S b/arch/powerpc/kernel/vdso64/vdso64.lds.S
index 4a2b6dc0960c..2d70f35d50b5 100644
--- a/arch/powerpc/kernel/vdso64/vdso64.lds.S
+++ b/arch/powerpc/kernel/vdso64/vdso64.lds.S
@@ -31,6 +31,16 @@ SECTIONS
   PROVIDE (_etext = .);
   PROVIDE (etext = .);
 
+  . = ALIGN(8);
+  __ftr_fixup : {
+    *(__ftr_fixup)
+  }
+
+  . = ALIGN(8);
+  __fw_ftr_fixup : {
+    *(__fw_ftr_fixup)
+  }
+
   /* Other stuff is appended to the text segment: */
   .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
   .rodata1        : { *(.rodata1) }
diff --git a/arch/ppc/kernel/setup.c b/arch/ppc/kernel/setup.c
index 41a640f16bdd..27faeca2c7a2 100644
--- a/arch/ppc/kernel/setup.c
+++ b/arch/ppc/kernel/setup.c
@@ -314,7 +314,7 @@ early_init(int r3, int r4, int r5)
 	 * that depend on which cpu we have.
 	 */
 	spec = identify_cpu(offset);
-	do_feature_fixups(offset, spec->cpu_features,
+	do_feature_fixups(spec->cpu_features,
 			  PTRRELOC(&__start___ftr_fixup),
 			  PTRRELOC(&__stop___ftr_fixup));
 
diff --git a/include/asm-powerpc/asm-compat.h b/include/asm-powerpc/asm-compat.h
index 8e64be0cc47d..c89bd58ee283 100644
--- a/include/asm-powerpc/asm-compat.h
+++ b/include/asm-powerpc/asm-compat.h
@@ -14,6 +14,58 @@
 #  define ASM_CONST(x)		__ASM_CONST(x)
 #endif
 
+
+/*
+ * Feature section common macros
+ *
+ * Note that the entries now contain offsets between the table entry
+ * and the code rather than absolute code pointers in order to be
+ * useable with the vdso shared library. There is also an assumption
+ * that values will be negative, that is, the fixup table has to be
+ * located after the code it fixes up.
+ */
+#ifdef CONFIG_PPC64
+#ifdef __powerpc64__
+/* 64 bits kernel, 64 bits code */
+#define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)	\
+99:							\
+	.section sect,"a";				\
+	.align 3;					\
+98:						       	\
+	.llong msk;					\
+	.llong val;					\
+	.llong label##b-98b;				\
+	.llong 99b-98b;		 			\
+	.previous
+#else /* __powerpc64__ */
+/* 64 bits kernel, 32 bits code (ie. vdso32) */
+#define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)	\
+99:							\
+	.section sect,"a";				\
+	.align 3;					\
+98:						       	\
+	.llong msk;					\
+	.llong val;					\
+	.long 0xffffffff;      				\
+	.long label##b-98b;				\
+	.long 0xffffffff;	       			\
+	.long 99b-98b;		 			\
+	.previous
+#endif /* !__powerpc64__ */
+#else /* CONFIG_PPC64 */
+/* 32 bits kernel, 32 bits code */
+#define MAKE_FTR_SECTION_ENTRY(msk, val, label, sect)	\
+99:						       	\
+	.section sect,"a";			       	\
+	.align 2;				       	\
+98:						       	\
+	.long msk;				       	\
+	.long val;				       	\
+	.long label##b-98b;			       	\
+	.long 99b-98b;				       	\
+	.previous
+#endif /* !CONFIG_PPC64 */
+
 #ifdef __powerpc64__
 
 /* operations for longs and pointers */
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 65faf322ace0..02e52d68cbbe 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -92,8 +92,8 @@ extern struct cpu_spec		*cur_cpu_spec;
 extern unsigned int __start___ftr_fixup, __stop___ftr_fixup;
 
 extern struct cpu_spec *identify_cpu(unsigned long offset);
-extern void do_feature_fixups(unsigned long offset, unsigned long value,
-			      void *fixup_start, void *fixup_end);
+extern void do_feature_fixups(unsigned long value, void *fixup_start,
+			      void *fixup_end);
 
 #endif /* __ASSEMBLY__ */
 
@@ -435,32 +435,11 @@ static inline int cpu_has_feature(unsigned long feature)
 #ifdef __ASSEMBLY__
 
 #define BEGIN_FTR_SECTION_NESTED(label)	label:
-#define BEGIN_FTR_SECTION		BEGIN_FTR_SECTION_NESTED(98)
-
-#ifndef __powerpc64__
-#define END_FTR_SECTION_NESTED(msk, val, label) \
-99:						\
-	.section __ftr_fixup,"a";		\
-	.align 2;				\
-	.long msk;				\
-	.long val;				\
-	.long label##b;				\
-	.long 99b;				\
-	.previous
-#else /* __powerpc64__ */
+#define BEGIN_FTR_SECTION		BEGIN_FTR_SECTION_NESTED(97)
 #define END_FTR_SECTION_NESTED(msk, val, label) \
-99:						\
-	.section __ftr_fixup,"a";		\
-	.align 3;				\
-	.llong msk;				\
-	.llong val;				\
-	.llong label##b;				\
-	.llong 99b;	 			\
-	.previous
-#endif /* __powerpc64__ */
-
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __ftr_fixup)
 #define END_FTR_SECTION(msk, val)		\
-	END_FTR_SECTION_NESTED(msk, val, 98)
+	END_FTR_SECTION_NESTED(msk, val, 97)
 
 #define END_FTR_SECTION_IFSET(msk)	END_FTR_SECTION((msk), (msk))
 #define END_FTR_SECTION_IFCLR(msk)	END_FTR_SECTION((msk), 0)
diff --git a/include/asm-powerpc/firmware.h b/include/asm-powerpc/firmware.h
index c16e0a6b9dab..fdf9aff71150 100644
--- a/include/asm-powerpc/firmware.h
+++ b/include/asm-powerpc/firmware.h
@@ -100,17 +100,12 @@ extern unsigned int __start___fw_ftr_fixup, __stop___fw_ftr_fixup;
 
 #else /* __ASSEMBLY__ */
 
-#define BEGIN_FW_FTR_SECTION		96:
-
+#define BEGIN_FW_FTR_SECTION_NESTED(label)	label:
+#define BEGIN_FW_FTR_SECTION			BEGIN_FW_FTR_SECTION_NESTED(97)
+#define END_FW_FTR_SECTION_NESTED(msk, val, label) \
+	MAKE_FTR_SECTION_ENTRY(msk, val, label, __fw_ftr_fixup)
 #define END_FW_FTR_SECTION(msk, val)		\
-97:						\
-	.section __fw_ftr_fixup,"a";		\
-	.align 3;				\
-	.llong msk;				\
-	.llong val;				\
-	.llong 96b;				\
-	.llong 97b;				\
-	.previous
+	END_FW_FTR_SECTION_NESTED(msk, val, 97)
 
 #define END_FW_FTR_SECTION_IFSET(msk)	END_FW_FTR_SECTION((msk), (msk))
 #define END_FW_FTR_SECTION_IFCLR(msk)	END_FW_FTR_SECTION((msk), 0)
diff --git a/include/asm-powerpc/timex.h b/include/asm-powerpc/timex.h
index 3b9a8e786806..e3f08cf91486 100644
--- a/include/asm-powerpc/timex.h
+++ b/include/asm-powerpc/timex.h
@@ -30,13 +30,15 @@ static inline cycles_t get_cycles(void)
 	ret = 0;
 
 	__asm__ __volatile__(
-		"98:	mftb %0\n"
+		"97:	mftb %0\n"
 		"99:\n"
 		".section __ftr_fixup,\"a\"\n"
+		".align 2\n"
+		"98:\n"
 		"	.long %1\n"
 		"	.long 0\n"
-		"	.long 98b\n"
-		"	.long 99b\n"
+		"	.long 97b-98b\n"
+		"	.long 99b-98b\n"
 		".previous"
 		: "=r" (ret) : "i" (CPU_FTR_601));
 #endif
-- 
cgit v1.2.3


From 859deea949c382d9ccb6397fe33df3703ecef45d Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 20 Oct 2006 14:37:05 +1000
Subject: [POWERPC] Cell timebase bug workaround

The Cell CPU timebase has an erratum. When reading the entire 64 bits
of the timebase with one mftb instruction, there is a handful of cycles
window during which one might read a value with the low order 32 bits
already reset to 0x00000000 but the high order bits not yet incremeted
by one. This fixes it by reading the timebase again until the low order
32 bits is no longer 0. That might introduce occasional latencies if
hitting mftb just at the wrong time, but no more than 70ns on a cell
blade, and that was considered acceptable.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Olof Johansson <olof@lixom.net>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/vdso64/gettimeofday.S |  6 ++++--
 include/asm-powerpc/cputable.h            |  3 ++-
 include/asm-powerpc/ppc_asm.h             | 18 ++++++++++++++----
 include/asm-powerpc/reg.h                 | 25 +++++++++++++++++++++++++
 include/asm-powerpc/time.h                | 27 ++++++++++++++++-----------
 include/asm-powerpc/timex.h               | 12 +++++-------
 6 files changed, 66 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/vdso64/gettimeofday.S b/arch/powerpc/kernel/vdso64/gettimeofday.S
index 56e76ff5498f..40ffd9b6cef7 100644
--- a/arch/powerpc/kernel/vdso64/gettimeofday.S
+++ b/arch/powerpc/kernel/vdso64/gettimeofday.S
@@ -229,8 +229,10 @@ V_FUNCTION_BEGIN(__do_get_xsec)
 	xor	r0,r8,r8		/* create dependency */
 	add	r3,r3,r0
 
-	/* Get TB & offset it */
-	mftb	r7
+	/* Get TB & offset it. We use the MFTB macro which will generate
+	 * workaround code for Cell.
+	 */
+	MFTB(r7)
 	ld	r9,CFG_TB_ORIG_STAMP(r3)
 	subf	r7,r9,r7
 
diff --git a/include/asm-powerpc/cputable.h b/include/asm-powerpc/cputable.h
index 02e52d68cbbe..a9a40149a7c0 100644
--- a/include/asm-powerpc/cputable.h
+++ b/include/asm-powerpc/cputable.h
@@ -147,6 +147,7 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 #define CPU_FTR_CI_LARGE_PAGE		LONG_ASM_CONST(0x0000100000000000)
 #define CPU_FTR_PAUSE_ZERO		LONG_ASM_CONST(0x0000200000000000)
 #define CPU_FTR_PURR			LONG_ASM_CONST(0x0000400000000000)
+#define CPU_FTR_CELL_TB_BUG		LONG_ASM_CONST(0x0000800000000000)
 
 #ifndef __ASSEMBLY__
 
@@ -335,7 +336,7 @@ extern void do_feature_fixups(unsigned long value, void *fixup_start,
 #define CPU_FTRS_CELL	(CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \
-	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE)
+	    CPU_FTR_PAUSE_ZERO | CPU_FTR_CI_LARGE_PAGE | CPU_FTR_CELL_TB_BUG)
 #define CPU_FTRS_PA6T (CPU_FTR_SPLIT_ID_CACHE | CPU_FTR_USE_TB | \
 	    CPU_FTR_HPTE_TABLE | CPU_FTR_PPCAS_ARCH_V2 | \
 	    CPU_FTR_ALTIVEC_COMP | CPU_FTR_CI_LARGE_PAGE | \
diff --git a/include/asm-powerpc/ppc_asm.h b/include/asm-powerpc/ppc_asm.h
index a940cfe040da..fa083d8e4663 100644
--- a/include/asm-powerpc/ppc_asm.h
+++ b/include/asm-powerpc/ppc_asm.h
@@ -30,9 +30,9 @@ BEGIN_FTR_SECTION;							\
 	mfspr	ra,SPRN_PURR;		/* get processor util. reg */	\
 END_FTR_SECTION_IFSET(CPU_FTR_PURR);					\
 BEGIN_FTR_SECTION;							\
-	mftb	ra;			/* or get TB if no PURR */	\
+	MFTB(ra);			/* or get TB if no PURR */	\
 END_FTR_SECTION_IFCLR(CPU_FTR_PURR);					\
-	ld	rb,PACA_STARTPURR(r13);				\
+	ld	rb,PACA_STARTPURR(r13);					\
 	std	ra,PACA_STARTPURR(r13);					\
 	subf	rb,rb,ra;		/* subtract start value */	\
 	ld	ra,PACA_USER_TIME(r13);					\
@@ -45,9 +45,9 @@ BEGIN_FTR_SECTION;							\
 	mfspr	ra,SPRN_PURR;		/* get processor util. reg */	\
 END_FTR_SECTION_IFSET(CPU_FTR_PURR);					\
 BEGIN_FTR_SECTION;							\
-	mftb	ra;			/* or get TB if no PURR */	\
+	MFTB(ra);			/* or get TB if no PURR */	\
 END_FTR_SECTION_IFCLR(CPU_FTR_PURR);					\
-	ld	rb,PACA_STARTPURR(r13);				\
+	ld	rb,PACA_STARTPURR(r13);					\
 	std	ra,PACA_STARTPURR(r13);					\
 	subf	rb,rb,ra;		/* subtract start value */	\
 	ld	ra,PACA_SYSTEM_TIME(r13);				\
@@ -274,6 +274,16 @@ END_FTR_SECTION_IFSET(CPU_FTR_601)
 #define ISYNC_601
 #endif
 
+#ifdef CONFIG_PPC_CELL
+#define MFTB(dest)			\
+90:	mftb  dest;			\
+BEGIN_FTR_SECTION_NESTED(96);		\
+	cmpwi dest,0;			\
+	beq-  90b;			\
+END_FTR_SECTION_NESTED(CPU_FTR_CELL_TB_BUG, CPU_FTR_CELL_TB_BUG, 96)
+#else
+#define MFTB(dest)			mftb dest
+#endif
 
 #ifndef CONFIG_SMP
 #define TLBSYNC
diff --git a/include/asm-powerpc/reg.h b/include/asm-powerpc/reg.h
index fde5c804eccb..6faae7b14d55 100644
--- a/include/asm-powerpc/reg.h
+++ b/include/asm-powerpc/reg.h
@@ -619,10 +619,35 @@
 				: "=r" (rval)); rval;})
 #define mtspr(rn, v)	asm volatile("mtspr " __stringify(rn) ",%0" : : "r" (v))
 
+#ifdef __powerpc64__
+#ifdef CONFIG_PPC_CELL
+#define mftb()		({unsigned long rval;				\
+			asm volatile(					\
+				"90:	mftb %0;\n"			\
+				"97:	cmpwi %0,0;\n"			\
+				"	beq- 90b;\n"			\
+				"99:\n"					\
+				".section __ftr_fixup,\"a\"\n"		\
+				".align 3\n"				\
+				"98:\n"					\
+				"	.llong %1\n"			\
+				"	.llong %1\n"			\
+				"	.llong 97b-98b\n"		\
+				"	.llong 99b-98b\n"		\
+				".previous"				\
+			: "=r" (rval) : "i" (CPU_FTR_CELL_TB_BUG)); rval;})
+#else
 #define mftb()		({unsigned long rval;	\
 			asm volatile("mftb %0" : "=r" (rval)); rval;})
+#endif /* !CONFIG_PPC_CELL */
+
+#else /* __powerpc64__ */
+
 #define mftbl()		({unsigned long rval;	\
 			asm volatile("mftbl %0" : "=r" (rval)); rval;})
+#define mftbu()		({unsigned long rval;	\
+			asm volatile("mftbu %0" : "=r" (rval)); rval;})
+#endif /* !__powerpc64__ */
 
 #define mttbl(v)	asm volatile("mttbl %0":: "r"(v))
 #define mttbu(v)	asm volatile("mttbu %0":: "r"(v))
diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h
index b051d4c88c3b..a78285010d62 100644
--- a/include/asm-powerpc/time.h
+++ b/include/asm-powerpc/time.h
@@ -82,30 +82,35 @@ struct div_result {
 #define __USE_RTC()	0
 #endif
 
-/* On ppc64 this gets us the whole timebase; on ppc32 just the lower half */
+#ifdef CONFIG_PPC64
+
+/* For compatibility, get_tbl() is defined as get_tb() on ppc64 */
+#define get_tbl		get_tb
+
+#else
+
 static inline unsigned long get_tbl(void)
 {
-	unsigned long tbl;
-
 #if defined(CONFIG_403GCX)
+	unsigned long tbl;
 	asm volatile("mfspr %0, 0x3dd" : "=r" (tbl));
+	return tbl;
 #else
-	asm volatile("mftb %0" : "=r" (tbl));
+	return mftbl();
 #endif
-	return tbl;
 }
 
 static inline unsigned int get_tbu(void)
 {
+#ifdef CONFIG_403GCX
 	unsigned int tbu;
-
-#if defined(CONFIG_403GCX)
 	asm volatile("mfspr %0, 0x3dc" : "=r" (tbu));
+	return tbu;
 #else
-	asm volatile("mftbu %0" : "=r" (tbu));
+	return mftbu();
 #endif
-	return tbu;
 }
+#endif /* !CONFIG_PPC64 */
 
 static inline unsigned int get_rtcl(void)
 {
@@ -131,7 +136,7 @@ static inline u64 get_tb(void)
 {
 	return mftb();
 }
-#else
+#else /* CONFIG_PPC64 */
 static inline u64 get_tb(void)
 {
 	unsigned int tbhi, tblo, tbhi2;
@@ -144,7 +149,7 @@ static inline u64 get_tb(void)
 
 	return ((u64)tbhi << 32) | tblo;
 }
-#endif
+#endif /* !CONFIG_PPC64 */
 
 static inline void set_tb(unsigned int upper, unsigned int lower)
 {
diff --git a/include/asm-powerpc/timex.h b/include/asm-powerpc/timex.h
index e3f08cf91486..92dedde761d1 100644
--- a/include/asm-powerpc/timex.h
+++ b/include/asm-powerpc/timex.h
@@ -8,6 +8,7 @@
  */
 
 #include <asm/cputable.h>
+#include <asm/reg.h>
 
 #define CLOCK_TICK_RATE	1024000 /* Underlying HZ */
 
@@ -15,13 +16,11 @@ typedef unsigned long cycles_t;
 
 static inline cycles_t get_cycles(void)
 {
-	cycles_t ret;
-
 #ifdef __powerpc64__
-
-	__asm__ __volatile__("mftb %0" : "=r" (ret) : );
-
+	return mftb();
 #else
+	cycles_t ret;
+
 	/*
 	 * For the "cycle" counter we use the timebase lower half.
 	 * Currently only used on SMP.
@@ -41,9 +40,8 @@ static inline cycles_t get_cycles(void)
 		"	.long 99b-98b\n"
 		".previous"
 		: "=r" (ret) : "i" (CPU_FTR_601));
-#endif
-
 	return ret;
+#endif
 }
 
 #endif	/* __KERNEL__ */
-- 
cgit v1.2.3


From e2100efb266c9335925191afe79f81f8d0a5807e Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 20 Oct 2006 11:49:54 +1000
Subject: [POWERPC] Fix device_is_compatible() const warning

Fix a const'ification related warning with device_is_compatible()
and friends related to get_property() not properly having const
on it's input device node argument.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/prom.c |  8 +++++---
 include/asm-powerpc/prom.h | 10 ++++++----
 2 files changed, 11 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 865b9648d0d5..bdb412d4b748 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -1014,7 +1014,7 @@ EXPORT_SYMBOL(find_all_nodes);
 /** Checks if the given "compat" string matches one of the strings in
  * the device's "compatible" property
  */
-int device_is_compatible(struct device_node *device, const char *compat)
+int device_is_compatible(const struct device_node *device, const char *compat)
 {
 	const char* cp;
 	int cplen, l;
@@ -1491,7 +1491,8 @@ static int __init prom_reconfig_setup(void)
 __initcall(prom_reconfig_setup);
 #endif
 
-struct property *of_find_property(struct device_node *np, const char *name,
+struct property *of_find_property(const struct device_node *np,
+				  const char *name,
 				  int *lenp)
 {
 	struct property *pp;
@@ -1512,7 +1513,8 @@ struct property *of_find_property(struct device_node *np, const char *name,
  * Find a property with a given name for a given node
  * and return the value.
  */
-const void *get_property(struct device_node *np, const char *name, int *lenp)
+const void *get_property(const struct device_node *np, const char *name,
+			 int *lenp)
 {
 	struct property *pp = of_find_property(np,name,lenp);
 	return pp ? pp->value : NULL;
diff --git a/include/asm-powerpc/prom.h b/include/asm-powerpc/prom.h
index 524629769336..ec11d44eaeb5 100644
--- a/include/asm-powerpc/prom.h
+++ b/include/asm-powerpc/prom.h
@@ -134,7 +134,7 @@ extern struct device_node *of_find_all_nodes(struct device_node *prev);
 extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_child(const struct device_node *node,
 					     struct device_node *prev);
-extern struct property *of_find_property(struct device_node *np,
+extern struct property *of_find_property(const struct device_node *np,
 					 const char *name,
 					 int *lenp);
 extern struct device_node *of_node_get(struct device_node *node);
@@ -158,10 +158,12 @@ extern void of_detach_node(const struct device_node *);
 extern void finish_device_tree(void);
 extern void unflatten_device_tree(void);
 extern void early_init_devtree(void *);
-extern int device_is_compatible(struct device_node *device, const char *);
+extern int device_is_compatible(const struct device_node *device,
+				const char *);
 extern int machine_is_compatible(const char *compat);
-extern const void *get_property(struct device_node *node, const char *name,
-		int *lenp);
+extern const void *get_property(const struct device_node *node,
+				const char *name,
+				int *lenp);
 extern void print_properties(struct device_node *node);
 extern int prom_n_addr_cells(struct device_node* np);
 extern int prom_n_size_cells(struct device_node* np);
-- 
cgit v1.2.3


From f4d4c354bca18210296cc0a8f592c0cdb720bf20 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 25 Oct 2006 13:22:27 +1000
Subject: [POWERPC] Fix CHRP platforms with only 8259

On CHRP platforms with only a 8259 controller, we should set the
default IRQ host to the 8259 driver's one for the IRQ probing
fallbacks to work in case the IRQ tree is incorrect (like on
Pegasos for example). Without this fix, we get a bunch of WARN_ON's
during boot.

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/platforms/chrp/setup.c | 4 +++-
 arch/powerpc/sysdev/i8259.c         | 5 +++++
 include/asm-powerpc/i8259.h         | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/platforms/chrp/setup.c b/arch/powerpc/platforms/chrp/setup.c
index cae3d13229b9..49b8dabcbc99 100644
--- a/arch/powerpc/platforms/chrp/setup.c
+++ b/arch/powerpc/platforms/chrp/setup.c
@@ -477,8 +477,10 @@ static void __init chrp_find_8259(void)
 		       " address, polling\n");
 
 	i8259_init(pic, chrp_int_ack);
-	if (ppc_md.get_irq == NULL)
+	if (ppc_md.get_irq == NULL) {
 		ppc_md.get_irq = i8259_irq;
+		irq_set_default_host(i8259_get_host());
+	}
 	if (chrp_mpic != NULL) {
 		cascade_irq = irq_of_parse_and_map(pic, 0);
 		if (cascade_irq == NO_IRQ)
diff --git a/arch/powerpc/sysdev/i8259.c b/arch/powerpc/sysdev/i8259.c
index 0450265d73bb..ad87adc975bc 100644
--- a/arch/powerpc/sysdev/i8259.c
+++ b/arch/powerpc/sysdev/i8259.c
@@ -224,6 +224,11 @@ static struct irq_host_ops i8259_host_ops = {
 	.xlate = i8259_host_xlate,
 };
 
+struct irq_host *i8259_get_host(void)
+{
+	return i8259_host;
+}
+
 /**
  * i8259_init - Initialize the legacy controller
  * @node: device node of the legacy PIC (can be NULL, but then, it will match
diff --git a/include/asm-powerpc/i8259.h b/include/asm-powerpc/i8259.h
index 78489fb8d140..db1362f8c603 100644
--- a/include/asm-powerpc/i8259.h
+++ b/include/asm-powerpc/i8259.h
@@ -7,6 +7,7 @@
 #ifdef CONFIG_PPC_MERGE
 extern void i8259_init(struct device_node *node, unsigned long intack_addr);
 extern unsigned int i8259_irq(void);
+extern struct irq_host *i8259_get_host(void);
 #else
 extern void i8259_init(unsigned long intack_addr, int offset);
 extern int i8259_irq(void);
-- 
cgit v1.2.3


From 43a145a3440c5c5f24ff2888801e40e2242187e6 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 16 Oct 2006 18:09:38 -0400
Subject: [SCSI] iscsi class: fix slab corruption during restart

The transport class recv mempools are causing slab corruption.
We could hack around netlink's lack of mempool support like dm,
but it is just too ulgy (dm's hack is ugly enough :) when you need
to support broadcast.

This patch removes the recv pools. We have not used them even when
we were allocting 20 MB per session and the system only had 64 MBs.
And we have no pools on the send side and have been ok there. When
Peter's work gets merged we can use that since the network guys
are in favor of that approach and are not going to add mempools
everywhere.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_transport_iscsi.c | 246 +++---------------------------------
 include/scsi/scsi_transport_iscsi.h |   4 -
 2 files changed, 17 insertions(+), 233 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c
index 7b0019cccce3..2d3baa99ca25 100644
--- a/drivers/scsi/scsi_transport_iscsi.c
+++ b/drivers/scsi/scsi_transport_iscsi.c
@@ -21,7 +21,6 @@
  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
  */
 #include <linux/module.h>
-#include <linux/mempool.h>
 #include <linux/mutex.h>
 #include <net/tcp.h>
 #include <scsi/scsi.h>
@@ -149,30 +148,6 @@ static DECLARE_TRANSPORT_CLASS(iscsi_connection_class,
 static struct sock *nls;
 static DEFINE_MUTEX(rx_queue_mutex);
 
-struct mempool_zone {
-	mempool_t *pool;
-	atomic_t allocated;
-	int size;
-	int hiwat;
-	struct list_head freequeue;
-	spinlock_t freelock;
-};
-
-static struct mempool_zone *z_reply;
-
-/*
- * Z_MAX_* - actual mempool size allocated at the mempool_zone_init() time
- * Z_HIWAT_* - zone's high watermark when if_error bit will be set to -ENOMEM
- *             so daemon will notice OOM on NETLINK tranposrt level and will
- *             be able to predict or change operational behavior
- */
-#define Z_MAX_REPLY	8
-#define Z_HIWAT_REPLY	6
-#define Z_MAX_PDU	8
-#define Z_HIWAT_PDU	6
-#define Z_MAX_ERROR	16
-#define Z_HIWAT_ERROR	12
-
 static LIST_HEAD(sesslist);
 static DEFINE_SPINLOCK(sesslock);
 static LIST_HEAD(connlist);
@@ -414,59 +389,11 @@ int iscsi_destroy_session(struct iscsi_cls_session *session)
 }
 EXPORT_SYMBOL_GPL(iscsi_destroy_session);
 
-static void mempool_zone_destroy(struct mempool_zone *zp)
-{
-	mempool_destroy(zp->pool);
-	kfree(zp);
-}
-
-static void*
-mempool_zone_alloc_skb(gfp_t gfp_mask, void *pool_data)
-{
-	struct mempool_zone *zone = pool_data;
-
-	return alloc_skb(zone->size, gfp_mask);
-}
-
-static void
-mempool_zone_free_skb(void *element, void *pool_data)
-{
-	kfree_skb(element);
-}
-
-static struct mempool_zone *
-mempool_zone_init(unsigned max, unsigned size, unsigned hiwat)
-{
-	struct mempool_zone *zp;
-
-	zp = kzalloc(sizeof(*zp), GFP_KERNEL);
-	if (!zp)
-		return NULL;
-
-	zp->size = size;
-	zp->hiwat = hiwat;
-	INIT_LIST_HEAD(&zp->freequeue);
-	spin_lock_init(&zp->freelock);
-	atomic_set(&zp->allocated, 0);
-
-	zp->pool = mempool_create(max, mempool_zone_alloc_skb,
-				  mempool_zone_free_skb, zp);
-	if (!zp->pool) {
-		kfree(zp);
-		return NULL;
-	}
-
-	return zp;
-}
-
 static void iscsi_conn_release(struct device *dev)
 {
 	struct iscsi_cls_conn *conn = iscsi_dev_to_conn(dev);
 	struct device *parent = conn->dev.parent;
 
-	mempool_zone_destroy(conn->z_pdu);
-	mempool_zone_destroy(conn->z_error);
-
 	kfree(conn);
 	put_device(parent);
 }
@@ -476,31 +403,6 @@ static int iscsi_is_conn_dev(const struct device *dev)
 	return dev->release == iscsi_conn_release;
 }
 
-static int iscsi_create_event_pools(struct iscsi_cls_conn *conn)
-{
-	conn->z_pdu = mempool_zone_init(Z_MAX_PDU,
-			NLMSG_SPACE(sizeof(struct iscsi_uevent) +
-				    sizeof(struct iscsi_hdr) +
-				    DEFAULT_MAX_RECV_DATA_SEGMENT_LENGTH),
-			Z_HIWAT_PDU);
-	if (!conn->z_pdu) {
-		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
-			   "pdu zone for new conn\n");
-		return -ENOMEM;
-	}
-
-	conn->z_error = mempool_zone_init(Z_MAX_ERROR,
-			NLMSG_SPACE(sizeof(struct iscsi_uevent)),
-			Z_HIWAT_ERROR);
-	if (!conn->z_error) {
-		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not allocate "
-			   "error zone for new conn\n");
-		mempool_zone_destroy(conn->z_pdu);
-		return -ENOMEM;
-	}
-	return 0;
-}
-
 /**
  * iscsi_create_conn - create iscsi class connection
  * @session: iscsi cls session
@@ -533,12 +435,9 @@ iscsi_create_conn(struct iscsi_cls_session *session, uint32_t cid)
 	conn->transport = transport;
 	conn->cid = cid;
 
-	if (iscsi_create_event_pools(conn))
-		goto free_conn;
-
 	/* this is released in the dev's release function */
 	if (!get_device(&session->dev))
-		goto free_conn_pools;
+		goto free_conn;
 
 	snprintf(conn->dev.bus_id, BUS_ID_SIZE, "connection%d:%u",
 		 session->sid, cid);
@@ -555,8 +454,6 @@ iscsi_create_conn(struct iscsi_cls_session *session, uint32_t cid)
 
 release_parent_ref:
 	put_device(&session->dev);
-free_conn_pools:
-
 free_conn:
 	kfree(conn);
 	return NULL;
@@ -599,81 +496,31 @@ iscsi_if_transport_lookup(struct iscsi_transport *tt)
 	return NULL;
 }
 
-static inline struct list_head *skb_to_lh(struct sk_buff *skb)
-{
-	return (struct list_head *)&skb->cb;
-}
-
-static void
-mempool_zone_complete(struct mempool_zone *zone)
-{
-	unsigned long flags;
-	struct list_head *lh, *n;
-
-	spin_lock_irqsave(&zone->freelock, flags);
-	list_for_each_safe(lh, n, &zone->freequeue) {
-		struct sk_buff *skb = (struct sk_buff *)((char *)lh -
-				offsetof(struct sk_buff, cb));
-		if (!skb_shared(skb)) {
-			list_del(skb_to_lh(skb));
-			mempool_free(skb, zone->pool);
-			atomic_dec(&zone->allocated);
-		}
-	}
-	spin_unlock_irqrestore(&zone->freelock, flags);
-}
-
-static struct sk_buff*
-mempool_zone_get_skb(struct mempool_zone *zone)
-{
-	struct sk_buff *skb;
-
-	skb = mempool_alloc(zone->pool, GFP_ATOMIC);
-	if (skb)
-		atomic_inc(&zone->allocated);
-	return skb;
-}
-
 static int
-iscsi_broadcast_skb(struct mempool_zone *zone, struct sk_buff *skb, gfp_t gfp)
+iscsi_broadcast_skb(struct sk_buff *skb, gfp_t gfp)
 {
-	unsigned long flags;
 	int rc;
 
-	skb_get(skb);
 	rc = netlink_broadcast(nls, skb, 0, 1, gfp);
 	if (rc < 0) {
-		mempool_free(skb, zone->pool);
 		printk(KERN_ERR "iscsi: can not broadcast skb (%d)\n", rc);
 		return rc;
 	}
 
-	spin_lock_irqsave(&zone->freelock, flags);
-	INIT_LIST_HEAD(skb_to_lh(skb));
-	list_add(skb_to_lh(skb), &zone->freequeue);
-	spin_unlock_irqrestore(&zone->freelock, flags);
 	return 0;
 }
 
 static int
-iscsi_unicast_skb(struct mempool_zone *zone, struct sk_buff *skb, int pid)
+iscsi_unicast_skb(struct sk_buff *skb, int pid)
 {
-	unsigned long flags;
 	int rc;
 
-	skb_get(skb);
 	rc = netlink_unicast(nls, skb, pid, MSG_DONTWAIT);
 	if (rc < 0) {
-		mempool_free(skb, zone->pool);
 		printk(KERN_ERR "iscsi: can not unicast skb (%d)\n", rc);
 		return rc;
 	}
 
-	spin_lock_irqsave(&zone->freelock, flags);
-	INIT_LIST_HEAD(skb_to_lh(skb));
-	list_add(skb_to_lh(skb), &zone->freequeue);
-	spin_unlock_irqrestore(&zone->freelock, flags);
-
 	return 0;
 }
 
@@ -692,9 +539,7 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 	if (!priv)
 		return -EINVAL;
 
-	mempool_zone_complete(conn->z_pdu);
-
-	skb = mempool_zone_get_skb(conn->z_pdu);
+	skb = alloc_skb(len, GFP_ATOMIC);
 	if (!skb) {
 		iscsi_conn_error(conn, ISCSI_ERR_CONN_FAILED);
 		dev_printk(KERN_ERR, &conn->dev, "iscsi: can not deliver "
@@ -707,15 +552,13 @@ int iscsi_recv_pdu(struct iscsi_cls_conn *conn, struct iscsi_hdr *hdr,
 	memset(ev, 0, sizeof(*ev));
 	ev->transport_handle = iscsi_handle(conn->transport);
 	ev->type = ISCSI_KEVENT_RECV_PDU;
-	if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat)
-		ev->iferror = -ENOMEM;
 	ev->r.recv_req.cid = conn->cid;
 	ev->r.recv_req.sid = iscsi_conn_get_sid(conn);
 	pdu = (char*)ev + sizeof(*ev);
 	memcpy(pdu, hdr, sizeof(struct iscsi_hdr));
 	memcpy(pdu + sizeof(struct iscsi_hdr), data, data_size);
 
-	return iscsi_unicast_skb(conn->z_pdu, skb, priv->daemon_pid);
+	return iscsi_unicast_skb(skb, priv->daemon_pid);
 }
 EXPORT_SYMBOL_GPL(iscsi_recv_pdu);
 
@@ -731,9 +574,7 @@ void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	if (!priv)
 		return;
 
-	mempool_zone_complete(conn->z_error);
-
-	skb = mempool_zone_get_skb(conn->z_error);
+	skb = alloc_skb(len, GFP_ATOMIC);
 	if (!skb) {
 		dev_printk(KERN_ERR, &conn->dev, "iscsi: gracefully ignored "
 			  "conn error (%d)\n", error);
@@ -744,13 +585,11 @@ void iscsi_conn_error(struct iscsi_cls_conn *conn, enum iscsi_err error)
 	ev = NLMSG_DATA(nlh);
 	ev->transport_handle = iscsi_handle(conn->transport);
 	ev->type = ISCSI_KEVENT_CONN_ERROR;
-	if (atomic_read(&conn->z_error->allocated) >= conn->z_error->hiwat)
-		ev->iferror = -ENOMEM;
 	ev->r.connerror.error = error;
 	ev->r.connerror.cid = conn->cid;
 	ev->r.connerror.sid = iscsi_conn_get_sid(conn);
 
-	iscsi_broadcast_skb(conn->z_error, skb, GFP_ATOMIC);
+	iscsi_broadcast_skb(skb, GFP_ATOMIC);
 
 	dev_printk(KERN_INFO, &conn->dev, "iscsi: detected conn error (%d)\n",
 		   error);
@@ -767,9 +606,7 @@ iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
 	int flags = multi ? NLM_F_MULTI : 0;
 	int t = done ? NLMSG_DONE : type;
 
-	mempool_zone_complete(z_reply);
-
-	skb = mempool_zone_get_skb(z_reply);
+	skb = alloc_skb(len, GFP_ATOMIC);
 	/*
 	 * FIXME:
 	 * user is supposed to react on iferror == -ENOMEM;
@@ -780,7 +617,7 @@ iscsi_if_send_reply(int pid, int seq, int type, int done, int multi,
 	nlh = __nlmsg_put(skb, pid, seq, t, (len - sizeof(*nlh)), 0);
 	nlh->nlmsg_flags = flags;
 	memcpy(NLMSG_DATA(nlh), payload, size);
-	return iscsi_unicast_skb(z_reply, skb, pid);
+	return iscsi_unicast_skb(skb, pid);
 }
 
 static int
@@ -810,9 +647,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 	do {
 		int actual_size;
 
-		mempool_zone_complete(conn->z_pdu);
-
-		skbstat = mempool_zone_get_skb(conn->z_pdu);
+		skbstat = alloc_skb(len, GFP_ATOMIC);
 		if (!skbstat) {
 			dev_printk(KERN_ERR, &conn->dev, "iscsi: can not "
 				   "deliver stats: OOM\n");
@@ -825,8 +660,6 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 		memset(evstat, 0, sizeof(*evstat));
 		evstat->transport_handle = iscsi_handle(conn->transport);
 		evstat->type = nlh->nlmsg_type;
-		if (atomic_read(&conn->z_pdu->allocated) >= conn->z_pdu->hiwat)
-			evstat->iferror = -ENOMEM;
 		evstat->u.get_stats.cid =
 			ev->u.get_stats.cid;
 		evstat->u.get_stats.sid =
@@ -845,7 +678,7 @@ iscsi_if_get_stats(struct iscsi_transport *transport, struct nlmsghdr *nlh)
 		skb_trim(skbstat, NLMSG_ALIGN(actual_size));
 		nlhstat->nlmsg_len = actual_size;
 
-		err = iscsi_unicast_skb(conn->z_pdu, skbstat, priv->daemon_pid);
+		err = iscsi_unicast_skb(skbstat, priv->daemon_pid);
 	} while (err < 0 && err != -ECONNREFUSED);
 
 	return err;
@@ -876,9 +709,7 @@ int iscsi_if_destroy_session_done(struct iscsi_cls_conn *conn)
 	session = iscsi_dev_to_session(conn->dev.parent);
 	shost = iscsi_session_to_shost(session);
 
-	mempool_zone_complete(conn->z_pdu);
-
-	skb = mempool_zone_get_skb(conn->z_pdu);
+	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb) {
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session creation event\n");
@@ -896,7 +727,7 @@ int iscsi_if_destroy_session_done(struct iscsi_cls_conn *conn)
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL);
+	rc = iscsi_broadcast_skb(skb, GFP_KERNEL);
 	if (rc < 0)
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session destruction event. Check iscsi daemon\n");
@@ -939,9 +770,7 @@ int iscsi_if_create_session_done(struct iscsi_cls_conn *conn)
 	session = iscsi_dev_to_session(conn->dev.parent);
 	shost = iscsi_session_to_shost(session);
 
-	mempool_zone_complete(conn->z_pdu);
-
-	skb = mempool_zone_get_skb(conn->z_pdu);
+	skb = alloc_skb(len, GFP_KERNEL);
 	if (!skb) {
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session creation event\n");
@@ -959,7 +788,7 @@ int iscsi_if_create_session_done(struct iscsi_cls_conn *conn)
 	 * this will occur if the daemon is not up, so we just warn
 	 * the user and when the daemon is restarted it will handle it
 	 */
-	rc = iscsi_broadcast_skb(conn->z_pdu, skb, GFP_KERNEL);
+	rc = iscsi_broadcast_skb(skb, GFP_KERNEL);
 	if (rc < 0)
 		dev_printk(KERN_ERR, &conn->dev, "Cannot notify userspace of "
 			  "session creation event. Check iscsi daemon\n");
@@ -1278,9 +1107,6 @@ iscsi_if_rx(struct sock *sk, int len)
 				err = iscsi_if_send_reply(
 					NETLINK_CREDS(skb)->pid, nlh->nlmsg_seq,
 					nlh->nlmsg_type, 0, 0, ev, sizeof(*ev));
-				if (atomic_read(&z_reply->allocated) >=
-						z_reply->hiwat)
-					ev->iferror = -ENOMEM;
 			} while (err < 0 && err != -ECONNREFUSED);
 			skb_pull(skb, rlen);
 		}
@@ -1584,32 +1410,6 @@ int iscsi_unregister_transport(struct iscsi_transport *tt)
 }
 EXPORT_SYMBOL_GPL(iscsi_unregister_transport);
 
-static int
-iscsi_rcv_nl_event(struct notifier_block *this, unsigned long event, void *ptr)
-{
-	struct netlink_notify *n = ptr;
-
-	if (event == NETLINK_URELEASE &&
-	    n->protocol == NETLINK_ISCSI && n->pid) {
-		struct iscsi_cls_conn *conn;
-		unsigned long flags;
-
-		mempool_zone_complete(z_reply);
-		spin_lock_irqsave(&connlock, flags);
-		list_for_each_entry(conn, &connlist, conn_list) {
-			mempool_zone_complete(conn->z_error);
-			mempool_zone_complete(conn->z_pdu);
-		}
-		spin_unlock_irqrestore(&connlock, flags);
-	}
-
-	return NOTIFY_DONE;
-}
-
-static struct notifier_block iscsi_nl_notifier = {
-	.notifier_call	= iscsi_rcv_nl_event,
-};
-
 static __init int iscsi_transport_init(void)
 {
 	int err;
@@ -1633,25 +1433,15 @@ static __init int iscsi_transport_init(void)
 	if (err)
 		goto unregister_conn_class;
 
-	err = netlink_register_notifier(&iscsi_nl_notifier);
-	if (err)
-		goto unregister_session_class;
-
 	nls = netlink_kernel_create(NETLINK_ISCSI, 1, iscsi_if_rx,
 			THIS_MODULE);
 	if (!nls) {
 		err = -ENOBUFS;
-		goto unregister_notifier;
+		goto unregister_session_class;
 	}
 
-	z_reply = mempool_zone_init(Z_MAX_REPLY,
-		NLMSG_SPACE(sizeof(struct iscsi_uevent)), Z_HIWAT_REPLY);
-	if (z_reply)
-		return 0;
+	return 0;
 
-	sock_release(nls->sk_socket);
-unregister_notifier:
-	netlink_unregister_notifier(&iscsi_nl_notifier);
 unregister_session_class:
 	transport_class_unregister(&iscsi_session_class);
 unregister_conn_class:
@@ -1665,9 +1455,7 @@ unregister_transport_class:
 
 static void __exit iscsi_transport_exit(void)
 {
-	mempool_zone_destroy(z_reply);
 	sock_release(nls->sk_socket);
-	netlink_unregister_notifier(&iscsi_nl_notifier);
 	transport_class_unregister(&iscsi_connection_class);
 	transport_class_unregister(&iscsi_session_class);
 	transport_class_unregister(&iscsi_host_class);
diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h
index 39e833260bd0..4b95c89c95c9 100644
--- a/include/scsi/scsi_transport_iscsi.h
+++ b/include/scsi/scsi_transport_iscsi.h
@@ -29,7 +29,6 @@
 struct scsi_transport_template;
 struct iscsi_transport;
 struct Scsi_Host;
-struct mempool_zone;
 struct iscsi_cls_conn;
 struct iscsi_conn;
 struct iscsi_cmd_task;
@@ -157,9 +156,6 @@ struct iscsi_cls_conn {
 
 	int active;			/* must be accessed with the connlock */
 	struct device dev;		/* sysfs transport/container device */
-	struct mempool_zone *z_error;
-	struct mempool_zone *z_pdu;
-	struct list_head freequeue;
 };
 
 #define iscsi_dev_to_conn(_dev) \
-- 
cgit v1.2.3


From 98644047916c24258fb47c3dab2bed8a44f53b83 Mon Sep 17 00:00:00 2001
From: Mike Christie <michaelc@cs.wisc.edu>
Date: Mon, 16 Oct 2006 18:09:39 -0400
Subject: [SCSI] libiscsi: fix oops in connection create failure path

If connection creation fails we end up calling list_del
on a invalid struct. This then causes an oops. We are not
acutally using the lists (old MCS code we thought might
be useful elsewhere) so this patch just removes that
code.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/libiscsi.c | 38 ++++++++------------------------------
 include/scsi/libiscsi.h |  3 ---
 2 files changed, 8 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index c542d0e95e68..1000fe936791 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -778,6 +778,10 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 	}
 
 	conn = session->leadconn;
+	if (!conn) {
+		reason = FAILURE_SESSION_FREED;
+		goto fault;
+	}
 
 	if (!__kfifo_get(session->cmdpool.queue, (void*)&ctask,
 			 sizeof(void*))) {
@@ -1377,7 +1381,6 @@ iscsi_session_setup(struct iscsi_transport *iscsit,
 	}
 
 	spin_lock_init(&session->lock);
-	INIT_LIST_HEAD(&session->connections);
 
 	/* initialize immediate command pool */
 	if (iscsi_pool_init(&session->mgmtpool, session->mgmtpool_max,
@@ -1580,16 +1583,11 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
 	kfree(conn->persistent_address);
 	__kfifo_put(session->mgmtpool.queue, (void*)&conn->login_mtask,
 		    sizeof(void*));
-	list_del(&conn->item);
-	if (list_empty(&session->connections))
+	if (session->leadconn == conn) {
 		session->leadconn = NULL;
-	if (session->leadconn && session->leadconn == conn)
-		session->leadconn = container_of(session->connections.next,
-			struct iscsi_conn, item);
-
-	if (session->leadconn == NULL)
 		/* no connections exits.. reset sequencing */
 		session->cmdsn = session->max_cmdsn = session->exp_cmdsn = 1;
+	}
 	spin_unlock_bh(&session->lock);
 
 	kfifo_free(conn->immqueue);
@@ -1777,32 +1775,12 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
 		    struct iscsi_cls_conn *cls_conn, int is_leading)
 {
 	struct iscsi_session *session = class_to_transport_session(cls_session);
-	struct iscsi_conn *tmp = ERR_PTR(-EEXIST), *conn = cls_conn->dd_data;
+	struct iscsi_conn *conn = cls_conn->dd_data;
 
-	/* lookup for existing connection */
 	spin_lock_bh(&session->lock);
-	list_for_each_entry(tmp, &session->connections, item) {
-		if (tmp == conn) {
-			if (conn->c_stage != ISCSI_CONN_STOPPED ||
-			    conn->stop_stage == STOP_CONN_TERM) {
-				printk(KERN_ERR "iscsi: can't bind "
-				       "non-stopped connection (%d:%d)\n",
-				       conn->c_stage, conn->stop_stage);
-				spin_unlock_bh(&session->lock);
-				return -EIO;
-			}
-			break;
-		}
-	}
-	if (tmp != conn) {
-		/* bind new iSCSI connection to session */
-		conn->session = session;
-		list_add(&conn->item, &session->connections);
-	}
-	spin_unlock_bh(&session->lock);
-
 	if (is_leading)
 		session->leadconn = conn;
+	spin_unlock_bh(&session->lock);
 
 	/*
 	 * Unblock xmitworker(), Login Phase will pass through.
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 401192e56e50..61eebec00a7b 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -136,7 +136,6 @@ struct iscsi_conn {
 
 	/* control data */
 	int			id;		/* CID */
-	struct list_head	item;		/* maintains list of conns */
 	int			c_stage;	/* connection state */
 	/*
 	 * Preallocated buffer for pdus that have data but do not
@@ -235,10 +234,8 @@ struct iscsi_session {
 						 * - mgmtpool,		   *
 						 * - r2tpool		   */
 	int			state;		/* session state           */
-	struct list_head	item;
 	int			age;		/* counts session re-opens */
 
-	struct list_head	connections;	/* list of connections */
 	int			cmds_max;	/* size of cmds array */
 	struct iscsi_cmd_task	**cmds;		/* Original Cmds arr */
 	struct iscsi_queue	cmdpool;	/* PDU's pool */
-- 
cgit v1.2.3


From 5a09e39810ae0465016c380962e12dd115779b87 Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Fri, 20 Oct 2006 09:58:47 +0200
Subject: [SCSI] scsi_debug: support REPORT TARGET PORT GROUPS

This patch adds support for REPORT TARGET PORT GROUPS. This is used
eg for the multipathing priority callout to determine the path
priority.
With this patch multipath-tools can use the existing mpath_prio_alua
callout to exercise the path priority grouping.

Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Douglas Gilbert <dougg@torque.net>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 drivers/scsi/scsi_debug.c | 141 +++++++++++++++++++++++++++++++++++++++++-----
 include/scsi/scsi.h       |   3 +
 2 files changed, 130 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 9c0f35820e3e..30ee3d72c021 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -52,7 +52,7 @@
 #include "scsi_debug.h"
 
 #define SCSI_DEBUG_VERSION "1.80"
-static const char * scsi_debug_version_date = "20060914";
+static const char * scsi_debug_version_date = "20061018";
 
 /* Additional Sense Code (ASC) used */
 #define NO_ADDITIONAL_SENSE 0x0
@@ -254,6 +254,8 @@ static int resp_requests(struct scsi_cmnd * SCpnt,
 			 struct sdebug_dev_info * devip);
 static int resp_start_stop(struct scsi_cmnd * scp,
 			   struct sdebug_dev_info * devip);
+static int resp_report_tgtpgs(struct scsi_cmnd * scp,
+			      struct sdebug_dev_info * devip);
 static int resp_readcap(struct scsi_cmnd * SCpnt,
 			struct sdebug_dev_info * devip);
 static int resp_readcap16(struct scsi_cmnd * SCpnt,
@@ -287,9 +289,9 @@ static void __init sdebug_build_parts(unsigned char * ramp);
 static void __init init_all_queued(void);
 static void stop_all_queued(void);
 static int stop_queued_cmnd(struct scsi_cmnd * cmnd);
-static int inquiry_evpd_83(unsigned char * arr, int target_dev_id,
-			   int dev_id_num, const char * dev_id_str,
-			   int dev_id_str_len);
+static int inquiry_evpd_83(unsigned char * arr, int port_group_id,
+			   int target_dev_id, int dev_id_num,
+			   const char * dev_id_str, int dev_id_str_len);
 static int inquiry_evpd_88(unsigned char * arr, int target_dev_id);
 static int do_create_driverfs_files(void);
 static void do_remove_driverfs_files(void);
@@ -422,6 +424,15 @@ int scsi_debug_queuecommand(struct scsi_cmnd * SCpnt, done_funct_t done)
 		}
 		errsts = resp_readcap16(SCpnt, devip);
 		break;
+	case MAINTENANCE_IN:
+		if (MI_REPORT_TARGET_PGS != cmd[1]) {
+			mk_sense_buffer(devip, ILLEGAL_REQUEST,
+					INVALID_OPCODE, 0);
+			errsts = check_condition_result;
+			break;
+		}
+		errsts = resp_report_tgtpgs(SCpnt, devip);
+		break;
 	case READ_16:
 	case READ_12:
 	case READ_10:
@@ -665,8 +676,9 @@ static const char * inq_vendor_id = "Linux   ";
 static const char * inq_product_id = "scsi_debug      ";
 static const char * inq_product_rev = "0004";
 
-static int inquiry_evpd_83(unsigned char * arr, int target_dev_id,
-			   int dev_id_num, const char * dev_id_str,
+static int inquiry_evpd_83(unsigned char * arr, int port_group_id,
+			   int target_dev_id, int dev_id_num,
+			   const char * dev_id_str,
 			   int dev_id_str_len)
 {
 	int num, port_a;
@@ -720,6 +732,15 @@ static int inquiry_evpd_83(unsigned char * arr, int target_dev_id,
 	arr[num++] = (port_a >> 16) & 0xff;
 	arr[num++] = (port_a >> 8) & 0xff;
 	arr[num++] = port_a & 0xff;
+	/* NAA-5, Target port group identifier */
+	arr[num++] = 0x61;	/* proto=sas, binary */
+	arr[num++] = 0x95;	/* piv=1, target port group id */
+	arr[num++] = 0x0;
+	arr[num++] = 0x4;
+	arr[num++] = 0;
+	arr[num++] = 0;
+	arr[num++] = (port_group_id >> 8) & 0xff;
+	arr[num++] = port_group_id & 0xff;
 	/* NAA-5, Target device identifier */
 	arr[num++] = 0x61;	/* proto=sas, binary */
 	arr[num++] = 0xa3;	/* piv=1, target device, naa */
@@ -928,12 +949,12 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 			struct sdebug_dev_info * devip)
 {
 	unsigned char pq_pdt;
-	unsigned char arr[SDEBUG_MAX_INQ_ARR_SZ];
+	unsigned char * arr;
 	unsigned char *cmd = (unsigned char *)scp->cmnd;
-	int alloc_len, n;
+	int alloc_len, n, ret;
 
 	alloc_len = (cmd[3] << 8) + cmd[4];
-	memset(arr, 0, SDEBUG_MAX_INQ_ARR_SZ);
+	arr = kzalloc(SDEBUG_MAX_INQ_ARR_SZ, GFP_KERNEL);
 	if (devip->wlun)
 		pq_pdt = 0x1e;	/* present, wlun */
 	else if (scsi_debug_no_lun_0 && (0 == devip->lun))
@@ -944,12 +965,15 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 	if (0x2 & cmd[1]) {  /* CMDDT bit set */
 		mk_sense_buffer(devip, ILLEGAL_REQUEST, INVALID_FIELD_IN_CDB,
 			       	0);
+		kfree(arr);
 		return check_condition_result;
 	} else if (0x1 & cmd[1]) {  /* EVPD bit set */
-		int lu_id_num, target_dev_id, len;
+		int lu_id_num, port_group_id, target_dev_id, len;
 		char lu_id_str[6];
 		int host_no = devip->sdbg_host->shost->host_no;
 		
+		port_group_id = (((host_no + 1) & 0x7f) << 8) +
+		    (devip->channel & 0x7f);
 		if (0 == scsi_debug_vpd_use_hostno)
 			host_no = 0;
 		lu_id_num = devip->wlun ? -1 : (((host_no + 1) * 2000) +
@@ -977,8 +1001,9 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 			memcpy(&arr[4], lu_id_str, len);
 		} else if (0x83 == cmd[2]) { /* device identification */
 			arr[1] = cmd[2];	/*sanity */
-			arr[3] = inquiry_evpd_83(&arr[4], target_dev_id,
-						 lu_id_num, lu_id_str, len);
+			arr[3] = inquiry_evpd_83(&arr[4], port_group_id,
+						 target_dev_id, lu_id_num,
+						 lu_id_str, len);
 		} else if (0x84 == cmd[2]) { /* Software interface ident. */
 			arr[1] = cmd[2];	/*sanity */
 			arr[3] = inquiry_evpd_84(&arr[4]);
@@ -1012,17 +1037,22 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 			/* Illegal request, invalid field in cdb */
 			mk_sense_buffer(devip, ILLEGAL_REQUEST,
 					INVALID_FIELD_IN_CDB, 0);
+			kfree(arr);
 			return check_condition_result;
 		}
 		len = min(((arr[2] << 8) + arr[3]) + 4, alloc_len);
-		return fill_from_dev_buffer(scp, arr,
+		ret = fill_from_dev_buffer(scp, arr,
 			    min(len, SDEBUG_MAX_INQ_ARR_SZ));
+		kfree(arr);
+		return ret;
 	}
 	/* drops through here for a standard inquiry */
 	arr[1] = DEV_REMOVEABLE(target) ? 0x80 : 0;	/* Removable disk */
 	arr[2] = scsi_debug_scsi_level;
 	arr[3] = 2;    /* response_data_format==2 */
 	arr[4] = SDEBUG_LONG_INQ_SZ - 5;
+	if (0 == scsi_debug_vpd_use_hostno)
+		arr[5] = 0x10; /* claim: implicit TGPS */
 	arr[6] = 0x10; /* claim: MultiP */
 	/* arr[6] |= 0x40; ... claim: EncServ (enclosure services) */
 	arr[7] = 0xa; /* claim: LINKED + CMDQUE */
@@ -1039,8 +1069,10 @@ static int resp_inquiry(struct scsi_cmnd * scp, int target,
 		arr[n++] = 0x3; arr[n++] = 0x60; /* SSC-2 no version */
 	}
 	arr[n++] = 0xc; arr[n++] = 0xf;  /* SAS-1.1 rev 10 */
-	return fill_from_dev_buffer(scp, arr,
+	ret = fill_from_dev_buffer(scp, arr,
 			    min(alloc_len, SDEBUG_LONG_INQ_SZ));
+	kfree(arr);
+	return ret;
 }
 
 static int resp_requests(struct scsi_cmnd * scp,
@@ -1171,6 +1203,87 @@ static int resp_readcap16(struct scsi_cmnd * scp,
 				    min(alloc_len, SDEBUG_READCAP16_ARR_SZ));
 }
 
+#define SDEBUG_MAX_TGTPGS_ARR_SZ 1412
+
+static int resp_report_tgtpgs(struct scsi_cmnd * scp,
+			      struct sdebug_dev_info * devip)
+{
+	unsigned char *cmd = (unsigned char *)scp->cmnd;
+	unsigned char * arr;
+	int host_no = devip->sdbg_host->shost->host_no;
+	int n, ret, alen, rlen;
+	int port_group_a, port_group_b, port_a, port_b;
+
+	alen = ((cmd[6] << 24) + (cmd[7] << 16) + (cmd[8] << 8)
+		+ cmd[9]);
+
+	arr = kzalloc(SDEBUG_MAX_TGTPGS_ARR_SZ, GFP_KERNEL);
+	/*
+	 * EVPD page 0x88 states we have two ports, one
+	 * real and a fake port with no device connected.
+	 * So we create two port groups with one port each
+	 * and set the group with port B to unavailable.
+	 */
+	port_a = 0x1; /* relative port A */
+	port_b = 0x2; /* relative port B */
+	port_group_a = (((host_no + 1) & 0x7f) << 8) +
+	    (devip->channel & 0x7f);
+	port_group_b = (((host_no + 1) & 0x7f) << 8) +
+	    (devip->channel & 0x7f) + 0x80;
+
+	/*
+	 * The asymmetric access state is cycled according to the host_id.
+	 */
+	n = 4;
+	if (0 == scsi_debug_vpd_use_hostno) {
+	    arr[n++] = host_no % 3; /* Asymm access state */
+	    arr[n++] = 0x0F; /* claim: all states are supported */
+	} else {
+	    arr[n++] = 0x0; /* Active/Optimized path */
+	    arr[n++] = 0x01; /* claim: only support active/optimized paths */
+	}
+	arr[n++] = (port_group_a >> 8) & 0xff;
+	arr[n++] = port_group_a & 0xff;
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = 0;    /* Status code */
+	arr[n++] = 0;    /* Vendor unique */
+	arr[n++] = 0x1;  /* One port per group */
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = (port_a >> 8) & 0xff;
+	arr[n++] = port_a & 0xff;
+	arr[n++] = 3;    /* Port unavailable */
+	arr[n++] = 0x08; /* claim: only unavailalbe paths are supported */
+	arr[n++] = (port_group_b >> 8) & 0xff;
+	arr[n++] = port_group_b & 0xff;
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = 0;    /* Status code */
+	arr[n++] = 0;    /* Vendor unique */
+	arr[n++] = 0x1;  /* One port per group */
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = 0;    /* Reserved */
+	arr[n++] = (port_b >> 8) & 0xff;
+	arr[n++] = port_b & 0xff;
+
+	rlen = n - 4;
+	arr[0] = (rlen >> 24) & 0xff;
+	arr[1] = (rlen >> 16) & 0xff;
+	arr[2] = (rlen >> 8) & 0xff;
+	arr[3] = rlen & 0xff;
+
+	/*
+	 * Return the smallest value of either
+	 * - The allocated length
+	 * - The constructed command length
+	 * - The maximum array size
+	 */
+	rlen = min(alen,n);
+	ret = fill_from_dev_buffer(scp, arr,
+				   min(rlen, SDEBUG_MAX_TGTPGS_ARR_SZ));
+	kfree(arr);
+	return ret;
+}
+
 /* <<Following mode page info copied from ST318451LW>> */
 
 static int resp_err_recov_pg(unsigned char * p, int pcontrol, int target)
diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 84a6d5fe0920..8a3f0bd0d45a 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -97,6 +97,7 @@ extern const unsigned char scsi_command_size[8];
 #define PERSISTENT_RESERVE_IN 0x5e
 #define PERSISTENT_RESERVE_OUT 0x5f
 #define REPORT_LUNS           0xa0
+#define MAINTENANCE_IN        0xa3
 #define MOVE_MEDIUM           0xa5
 #define EXCHANGE_MEDIUM       0xa6
 #define READ_12               0xa8
@@ -114,6 +115,8 @@ extern const unsigned char scsi_command_size[8];
 #define SERVICE_ACTION_IN     0x9e
 /* values for service action in */
 #define	SAI_READ_CAPACITY_16  0x10
+/* values for maintenance in */
+#define MI_REPORT_TARGET_PGS  0x0a
 
 /* Values for T10/04-262r7 */
 #define	ATA_16		      0x85	/* 16-byte pass-thru */
-- 
cgit v1.2.3


From 4a531e8c79fe459e922347461ccc0f0c13de20d5 Mon Sep 17 00:00:00 2001
From: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Date: Fri, 20 Oct 2006 09:08:18 +0900
Subject: [SCSI] replace u8 and u32 with __u8 and __u32 in scsi.h for user
 space

Signed-off-by: FUJITA Tomonori <fujita.tomonori@lab.ntt.co.jp>
Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: James Bottomley <James.Bottomley@SteelEye.com>
---
 include/scsi/scsi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/scsi/scsi.h b/include/scsi/scsi.h
index 8a3f0bd0d45a..5c0e9791441c 100644
--- a/include/scsi/scsi.h
+++ b/include/scsi/scsi.h
@@ -433,7 +433,7 @@ struct scsi_lun {
 #define SCSI_IOCTL_GET_PCI		0x5387
 
 /* Pull a u32 out of a SCSI message (using BE SCSI conventions) */
-static inline u32 scsi_to_u32(u8 *ptr)
+static inline __u32 scsi_to_u32(__u8 *ptr)
 {
 	return (ptr[0]<<24) + (ptr[1]<<16) + (ptr[2]<<8) + ptr[3];
 }
-- 
cgit v1.2.3


From fa3522407f01ead1ec14bdd6b785ea08d17d500d Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Tue, 24 Oct 2006 10:12:42 +0200
Subject: [PATCH] AVR32: Fix oversize immediates in atomic.h

When calling e.g. atomic_sub_return with a large constant, the
compiler may output an immediate that is too large for the sub
instruction in the middle of the loop.

Fix this by explicitly specifying the number of bits allowed in the
constraint. Also stop atomic_add_return() and friends from falling
back to their respective "sub" variants if the constant is too large
to fit in an immediate.

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-avr32/atomic.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-avr32/atomic.h b/include/asm-avr32/atomic.h
index e0b9c44c126c..c40b6032c480 100644
--- a/include/asm-avr32/atomic.h
+++ b/include/asm-avr32/atomic.h
@@ -41,7 +41,7 @@ static inline int atomic_sub_return(int i, atomic_t *v)
 		"	stcond	%1, %0\n"
 		"	brne	1b"
 		: "=&r"(result), "=o"(v->counter)
-		: "m"(v->counter), "ir"(i)
+		: "m"(v->counter), "rKs21"(i)
 		: "cc");
 
 	return result;
@@ -58,7 +58,7 @@ static inline int atomic_add_return(int i, atomic_t *v)
 {
 	int result;
 
-	if (__builtin_constant_p(i))
+	if (__builtin_constant_p(i) && (i >= -1048575) && (i <= 1048576))
 		result = atomic_sub_return(-i, v);
 	else
 		asm volatile(
@@ -101,7 +101,7 @@ static inline int atomic_sub_unless(atomic_t *v, int a, int u)
 		"	mov	%1, 1\n"
 		"1:"
 		: "=&r"(tmp), "=&r"(result), "=o"(v->counter)
-		: "m"(v->counter), "ir"(a), "ir"(u)
+		: "m"(v->counter), "rKs21"(a), "rKs21"(u)
 		: "cc", "memory");
 
 	return result;
@@ -121,7 +121,7 @@ static inline int atomic_add_unless(atomic_t *v, int a, int u)
 {
 	int tmp, result;
 
-	if (__builtin_constant_p(a))
+	if (__builtin_constant_p(a) && (a >= -1048575) && (a <= 1048576))
 		result = atomic_sub_unless(v, -a, u);
 	else {
 		result = 0;
-- 
cgit v1.2.3


From 065834ab3988fece5608088e83724891c8190a2f Mon Sep 17 00:00:00 2001
From: Ben Nizette <ben@mallochdigital.com>
Date: Tue, 24 Oct 2006 10:12:43 +0200
Subject: [PATCH] AVR32: add io{read,write}{8,16,32}{be,} support

A number of new drivers require io{read,write}{8,16,32}{be,} family of io
operations.  These are provided for the AVR32 by this patch in the form of
a series of macros.

Access to the (memory mapped) io space through these macros is defined to
be little endian only as little endian devices (such as PCI) are the main
consumer of IO access.  If high speed access is required,
io{read,write}{16,32}be macros are supplied to perform native big endian
access to this io space.

Signed-off-by: Ben Nizette <ben@mallochdigital.com>
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-avr32/io.h | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

(limited to 'include')

diff --git a/include/asm-avr32/io.h b/include/asm-avr32/io.h
index 2fc8f111dce9..eec47500fa66 100644
--- a/include/asm-avr32/io.h
+++ b/include/asm-avr32/io.h
@@ -76,6 +76,39 @@ static inline unsigned int readl(const volatile void __iomem *addr)
 #define readsw(p, d, l)		__raw_readsw((unsigned int)p, d, l)
 #define readsl(p, d, l)		__raw_readsl((unsigned int)p, d, l)
 
+
+/*
+ * io{read,write}{8,16,32} macros in both le (for PCI style consumers) and native be
+ */
+#ifndef ioread8
+
+#define ioread8(p)	({ unsigned int __v = __raw_readb(p); __v; })
+
+#define ioread16(p)	({ unsigned int __v = le16_to_cpu(__raw_readw(p)); __v; })
+#define ioread16be(p)	({ unsigned int __v = be16_to_cpu(__raw_readw(p)); __v; })
+
+#define ioread32(p)	({ unsigned int __v = le32_to_cpu(__raw_readl(p)); __v; })
+#define ioread32be(p)	({ unsigned int __v = be32_to_cpu(__raw_readl(p)); __v; })
+
+#define iowrite8(v,p)	__raw_writeb(v, p)
+
+#define iowrite16(v,p)	__raw_writew(cpu_to_le16(v), p)
+#define iowrite16be(v,p)	__raw_writew(cpu_to_be16(v), p)
+
+#define iowrite32(v,p)	__raw_writel(cpu_to_le32(v), p)
+#define iowrite32be(v,p)	__raw_writel(cpu_to_be32(v), p)
+
+#define ioread8_rep(p,d,c)	__raw_readsb(p,d,c)
+#define ioread16_rep(p,d,c)	__raw_readsw(p,d,c)
+#define ioread32_rep(p,d,c)	__raw_readsl(p,d,c)
+
+#define iowrite8_rep(p,s,c)	__raw_writesb(p,s,c)
+#define iowrite16_rep(p,s,c)	__raw_writesw(p,s,c)
+#define iowrite32_rep(p,s,c)	__raw_writesl(p,s,c)
+
+#endif
+
+
 /*
  * These two are only here because ALSA _thinks_ it needs them...
  */
-- 
cgit v1.2.3


From f2454a1a4b2aca38d3b7887619f43291d773c1ee Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Wed, 25 Oct 2006 05:49:53 -0400
Subject: [IA64] don't double >> PAGE_SHIFT pointer for /dev/kmem access

Don't PAGE_SHIFT pointer before handing it to virt_to_page() in
xlate_dev_kmem_ptr() as it results in a double shift.

Spotted by Bob Montgomery.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/asm-ia64/uaccess.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-ia64/uaccess.h b/include/asm-ia64/uaccess.h
index 9adb51211c22..449c8c0fa2bd 100644
--- a/include/asm-ia64/uaccess.h
+++ b/include/asm-ia64/uaccess.h
@@ -389,7 +389,7 @@ xlate_dev_kmem_ptr (char * p)
 	struct page *page;
 	char * ptr;
 
-	page = virt_to_page((unsigned long)p >> PAGE_SHIFT);
+	page = virt_to_page((unsigned long)p);
 	if (PageUncached(page))
 		ptr = (char *)__pa(p) + __IA64_UNCACHED_OFFSET;
 	else
-- 
cgit v1.2.3


From 61ce1efe6e40233663d27ab8ac9ba9710eebcaad Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 27 Oct 2006 11:41:44 -0700
Subject: [PATCH] vmlinux.lds: consolidate initcall sections

Add a vmlinux.lds.h helper macro for defining the eight-level initcall table,
teach all the architectures to use it.

This is a prerequisite for a patch which performs initcall synchronisation for
multithreaded-probing.

Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
[ Added AVR32 as well ]
Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/kernel/vmlinux.lds.S     |  8 +-------
 arch/arm/kernel/vmlinux.lds.S       |  8 +-------
 arch/avr32/kernel/vmlinux.lds.c     |  8 +-------
 arch/frv/kernel/vmlinux.lds.S       |  8 +-------
 arch/h8300/kernel/vmlinux.lds.S     |  8 +-------
 arch/i386/kernel/vmlinux.lds.S      |  8 +-------
 arch/ia64/kernel/vmlinux.lds.S      |  8 +-------
 arch/m32r/kernel/vmlinux.lds.S      |  8 +-------
 arch/m68knommu/kernel/vmlinux.lds.S |  8 +-------
 arch/mips/kernel/vmlinux.lds.S      |  8 +-------
 arch/parisc/kernel/vmlinux.lds.S    |  8 +-------
 arch/powerpc/kernel/vmlinux.lds.S   |  8 +-------
 arch/ppc/kernel/vmlinux.lds.S       |  8 +-------
 arch/s390/kernel/vmlinux.lds.S      |  8 +-------
 arch/sh/kernel/vmlinux.lds.S        |  8 +-------
 arch/sh64/kernel/vmlinux.lds.S      |  8 +-------
 arch/sparc/kernel/vmlinux.lds.S     |  8 +-------
 arch/sparc64/kernel/vmlinux.lds.S   |  8 +-------
 arch/v850/kernel/vmlinux.lds.S      |  8 +-------
 arch/x86_64/kernel/vmlinux.lds.S    |  8 +-------
 arch/xtensa/kernel/vmlinux.lds.S    |  8 +-------
 include/asm-generic/vmlinux.lds.h   | 10 ++++++++++
 22 files changed, 31 insertions(+), 147 deletions(-)

(limited to 'include')

diff --git a/arch/alpha/kernel/vmlinux.lds.S b/arch/alpha/kernel/vmlinux.lds.S
index 71470e9d93ba..76bf071e376c 100644
--- a/arch/alpha/kernel/vmlinux.lds.S
+++ b/arch/alpha/kernel/vmlinux.lds.S
@@ -48,13 +48,7 @@ SECTIONS
   . = ALIGN(8);
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
 
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 3ca574ee2772..a8fa75ea07a9 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -45,13 +45,7 @@ SECTIONS
 			*(.early_param.init)
 		__early_end = .;
 		__initcall_start = .;
-			*(.initcall1.init)
-			*(.initcall2.init)
-			*(.initcall3.init)
-			*(.initcall4.init)
-			*(.initcall5.init)
-			*(.initcall6.init)
-			*(.initcall7.init)
+			INITCALLS
 		__initcall_end = .;
 		__con_initcall_start = .;
 			*(.con_initcall.init)
diff --git a/arch/avr32/kernel/vmlinux.lds.c b/arch/avr32/kernel/vmlinux.lds.c
index cdd627c6b7dc..5c4424e362b5 100644
--- a/arch/avr32/kernel/vmlinux.lds.c
+++ b/arch/avr32/kernel/vmlinux.lds.c
@@ -38,13 +38,7 @@ SECTIONS
 		__setup_end = .;
 		. = ALIGN(4);
 		__initcall_start = .;
-			*(.initcall1.init)
-			*(.initcall2.init)
-			*(.initcall3.init)
-			*(.initcall4.init)
-			*(.initcall5.init)
-			*(.initcall6.init)
-			*(.initcall7.init)
+			INITCALLS
 		__initcall_end = .;
 		__con_initcall_start = .;
 			*(.con_initcall.init)
diff --git a/arch/frv/kernel/vmlinux.lds.S b/arch/frv/kernel/vmlinux.lds.S
index f474534ba78a..9c1fb12367fa 100644
--- a/arch/frv/kernel/vmlinux.lds.S
+++ b/arch/frv/kernel/vmlinux.lds.S
@@ -44,13 +44,7 @@ SECTIONS
 
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/h8300/kernel/vmlinux.lds.S b/arch/h8300/kernel/vmlinux.lds.S
index 6406c388f88a..756325dd480e 100644
--- a/arch/h8300/kernel/vmlinux.lds.S
+++ b/arch/h8300/kernel/vmlinux.lds.S
@@ -118,13 +118,7 @@ SECTIONS
 	. = ALIGN(0x4) ;
 	___setup_end = .;
 	___initcall_start = .;
-		*(.initcall1.init)
-		*(.initcall2.init)
-		*(.initcall3.init)
-		*(.initcall4.init)
-		*(.initcall5.init)
-		*(.initcall6.init)
-		*(.initcall7.init)
+		INITCALLS
 	___initcall_end = .;
 	___con_initcall_start = .;
 		*(.con_initcall.init)
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S
index 1e7ac1c44ddc..adc1f232afee 100644
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -126,13 +126,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index b3b2e389d6b2..d6083a0936f4 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -128,13 +128,7 @@ SECTIONS
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET)
 	{
 	  __initcall_start = .;
-	  *(.initcall1.init)
-	  *(.initcall2.init)
-	  *(.initcall3.init)
-	  *(.initcall4.init)
-	  *(.initcall5.init)
-	  *(.initcall6.init)
-	  *(.initcall7.init)
+	INITCALLS
 	  __initcall_end = .;
 	}
 
diff --git a/arch/m32r/kernel/vmlinux.lds.S b/arch/m32r/kernel/vmlinux.lds.S
index 13c7bb698e37..358b9cee2c65 100644
--- a/arch/m32r/kernel/vmlinux.lds.S
+++ b/arch/m32r/kernel/vmlinux.lds.S
@@ -83,13 +83,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/m68knommu/kernel/vmlinux.lds.S b/arch/m68knommu/kernel/vmlinux.lds.S
index ccd2ceb05cfb..58afa8be604e 100644
--- a/arch/m68knommu/kernel/vmlinux.lds.S
+++ b/arch/m68knommu/kernel/vmlinux.lds.S
@@ -140,13 +140,7 @@ SECTIONS {
 		*(.init.setup)
 		__setup_end = .;
 		__initcall_start = .;
-		*(.initcall1.init)
-		*(.initcall2.init)
-		*(.initcall3.init)
-		*(.initcall4.init)
-		*(.initcall5.init)
-		*(.initcall6.init)
-		*(.initcall7.init)
+		INITCALLS
 		__initcall_end = .;
 		__con_initcall_start = .;
 		*(.con_initcall.init)
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 0bb9cd889456..25ed3337ce35 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -91,13 +91,7 @@ SECTIONS
 
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
 
diff --git a/arch/parisc/kernel/vmlinux.lds.S b/arch/parisc/kernel/vmlinux.lds.S
index b3677fc8eef5..7b943b45f7cd 100644
--- a/arch/parisc/kernel/vmlinux.lds.S
+++ b/arch/parisc/kernel/vmlinux.lds.S
@@ -153,13 +153,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index cb0e8d46c3e8..e8342d867536 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -108,13 +108,7 @@ SECTIONS
 
 	.initcall.init : {
 		__initcall_start = .;
-		*(.initcall1.init)
-		*(.initcall2.init)
-		*(.initcall3.init)
-		*(.initcall4.init)
-		*(.initcall5.init)
-		*(.initcall6.init)
-		*(.initcall7.init)
+		INITCALLS
 		__initcall_end = .;
 		}
 
diff --git a/arch/ppc/kernel/vmlinux.lds.S b/arch/ppc/kernel/vmlinux.lds.S
index 095fd3323323..16e8661e1fec 100644
--- a/arch/ppc/kernel/vmlinux.lds.S
+++ b/arch/ppc/kernel/vmlinux.lds.S
@@ -115,13 +115,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init)
-	*(.initcall2.init)
-	*(.initcall3.init)
-	*(.initcall4.init)
-	*(.initcall5.init)
-	*(.initcall6.init)
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
 
diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S
index af9e69a03011..fe0f2e97ba7b 100644
--- a/arch/s390/kernel/vmlinux.lds.S
+++ b/arch/s390/kernel/vmlinux.lds.S
@@ -83,13 +83,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/sh/kernel/vmlinux.lds.S b/arch/sh/kernel/vmlinux.lds.S
index 5eb930918186..77b4026d5688 100644
--- a/arch/sh/kernel/vmlinux.lds.S
+++ b/arch/sh/kernel/vmlinux.lds.S
@@ -76,13 +76,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/sh64/kernel/vmlinux.lds.S b/arch/sh64/kernel/vmlinux.lds.S
index a8fcc3a71585..95c4d753e357 100644
--- a/arch/sh64/kernel/vmlinux.lds.S
+++ b/arch/sh64/kernel/vmlinux.lds.S
@@ -108,13 +108,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : C_PHYS(.initcall.init) {
-  	*(.initcall1.init)
-  	*(.initcall2.init)
-  	*(.initcall3.init)
-  	*(.initcall4.init)
-  	*(.initcall5.init)
-  	*(.initcall6.init)
-  	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/sparc/kernel/vmlinux.lds.S b/arch/sparc/kernel/vmlinux.lds.S
index 1dd78c84888a..5cc5ff7f8824 100644
--- a/arch/sparc/kernel/vmlinux.lds.S
+++ b/arch/sparc/kernel/vmlinux.lds.S
@@ -49,13 +49,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/sparc64/kernel/vmlinux.lds.S b/arch/sparc64/kernel/vmlinux.lds.S
index b097379a49a8..bd9de8c2a2aa 100644
--- a/arch/sparc64/kernel/vmlinux.lds.S
+++ b/arch/sparc64/kernel/vmlinux.lds.S
@@ -57,13 +57,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/v850/kernel/vmlinux.lds.S b/arch/v850/kernel/vmlinux.lds.S
index 63399219cd9f..88d087f527c9 100644
--- a/arch/v850/kernel/vmlinux.lds.S
+++ b/arch/v850/kernel/vmlinux.lds.S
@@ -140,13 +140,7 @@
 		___setup_end = . ;					      \
 		___initcall_start = . ;					      \
 			*(.initcall.init)				      \
-			*(.initcall1.init)				      \
-			*(.initcall2.init)				      \
-			*(.initcall3.init)				      \
-			*(.initcall4.init)				      \
-			*(.initcall5.init)				      \
-			*(.initcall6.init)				      \
-			*(.initcall7.init)				      \
+			INITCALLS					      \
 		. = ALIGN (4) ;						      \
 		___initcall_end = . ;					      \
 		___con_initcall_start = .;				      \
diff --git a/arch/x86_64/kernel/vmlinux.lds.S b/arch/x86_64/kernel/vmlinux.lds.S
index 1283614c9b24..edb24aa714b4 100644
--- a/arch/x86_64/kernel/vmlinux.lds.S
+++ b/arch/x86_64/kernel/vmlinux.lds.S
@@ -175,13 +175,7 @@ SECTIONS
   __setup_end = .;
   __initcall_start = .;
   .initcall.init : AT(ADDR(.initcall.init) - LOAD_OFFSET) {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
   __con_initcall_start = .;
diff --git a/arch/xtensa/kernel/vmlinux.lds.S b/arch/xtensa/kernel/vmlinux.lds.S
index ab6cdbd5eb68..cfe75f528725 100644
--- a/arch/xtensa/kernel/vmlinux.lds.S
+++ b/arch/xtensa/kernel/vmlinux.lds.S
@@ -184,13 +184,7 @@ SECTIONS
 
   __initcall_start = .;
   .initcall.init : {
-  	*(.initcall1.init)
-  	*(.initcall2.init)
-  	*(.initcall3.init)
-  	*(.initcall4.init)
-  	*(.initcall5.init)
-  	*(.initcall6.init)
-  	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
 
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9d0d11c180d9..e3e83bcaf710 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -213,3 +213,13 @@
 
 #define NOTES								\
 		.notes : { *(.note.*) } :note
+
+#define INITCALLS							\
+  	*(.initcall1.init)						\
+  	*(.initcall2.init)						\
+  	*(.initcall3.init)						\
+  	*(.initcall4.init)						\
+  	*(.initcall5.init)						\
+  	*(.initcall6.init)						\
+  	*(.initcall7.init)
+
-- 
cgit v1.2.3


From 735a7ffb739b6efeaeb1e720306ba308eaaeb20e Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Fri, 27 Oct 2006 11:42:37 -0700
Subject: [PATCH] drivers: wait for threaded probes between initcall levels

The multithreaded-probing code has a problem: after one initcall level (eg,
core_initcall) has been processed, we will then start processing the next
level (postcore_initcall) while the kernel threads which are handling
core_initcall are still executing.  This breaks the guarantees which the
layered initcalls previously gave us.

IOW, we want to be multithreaded _within_ an initcall level, but not between
different levels.

Fix that up by causing the probing code to wait for all outstanding probes at
one level to complete before we start processing the next level.

Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/base/dd.c                 | 30 ++++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h |  9 ++++++++-
 include/linux/init.h              | 28 +++++++++++++++++++---------
 3 files changed, 57 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index db01b95a47a5..c5d6bb4290ad 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/wait.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -70,6 +71,8 @@ struct stupid_thread_structure {
 };
 
 static atomic_t probe_count = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
+
 static int really_probe(void *void_data)
 {
 	struct stupid_thread_structure *data = void_data;
@@ -121,6 +124,7 @@ probe_failed:
 done:
 	kfree(data);
 	atomic_dec(&probe_count);
+	wake_up(&probe_waitqueue);
 	return ret;
 }
 
@@ -337,6 +341,32 @@ void driver_detach(struct device_driver * drv)
 	}
 }
 
+#ifdef CONFIG_PCI_MULTITHREAD_PROBE
+static int __init wait_for_probes(void)
+{
+	DEFINE_WAIT(wait);
+
+	printk(KERN_INFO "%s: waiting for %d threads\n", __FUNCTION__,
+			atomic_read(&probe_count));
+	if (!atomic_read(&probe_count))
+		return 0;
+	while (atomic_read(&probe_count)) {
+		prepare_to_wait(&probe_waitqueue, &wait, TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&probe_count))
+			schedule();
+	}
+	finish_wait(&probe_waitqueue, &wait);
+	return 0;
+}
+
+core_initcall_sync(wait_for_probes);
+postcore_initcall_sync(wait_for_probes);
+arch_initcall_sync(wait_for_probes);
+subsys_initcall_sync(wait_for_probes);
+fs_initcall_sync(wait_for_probes);
+device_initcall_sync(wait_for_probes);
+late_initcall_sync(wait_for_probes);
+#endif
 
 EXPORT_SYMBOL_GPL(device_bind_driver);
 EXPORT_SYMBOL_GPL(device_release_driver);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index e3e83bcaf710..9d873163a7ab 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -216,10 +216,17 @@
 
 #define INITCALLS							\
   	*(.initcall1.init)						\
+  	*(.initcall1s.init)						\
   	*(.initcall2.init)						\
+  	*(.initcall2s.init)						\
   	*(.initcall3.init)						\
+  	*(.initcall3s.init)						\
   	*(.initcall4.init)						\
+  	*(.initcall4s.init)						\
   	*(.initcall5.init)						\
+  	*(.initcall5s.init)						\
   	*(.initcall6.init)						\
-  	*(.initcall7.init)
+  	*(.initcall6s.init)						\
+  	*(.initcall7.init)						\
+  	*(.initcall7s.init)
 
diff --git a/include/linux/init.h b/include/linux/init.h
index e92b1455d7af..ff40ea118e3a 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -84,19 +84,29 @@ extern void setup_arch(char **);
  * by link order. 
  * For backwards compatibility, initcall() puts the call in 
  * the device init subsection.
+ *
+ * The `id' arg to __define_initcall() is needed so that multiple initcalls
+ * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn) \
-	static initcall_t __initcall_##fn __attribute_used__ \
+#define __define_initcall(level,fn,id) \
+	static initcall_t __initcall_##fn##id __attribute_used__ \
 	__attribute__((__section__(".initcall" level ".init"))) = fn
 
-#define core_initcall(fn)		__define_initcall("1",fn)
-#define postcore_initcall(fn)		__define_initcall("2",fn)
-#define arch_initcall(fn)		__define_initcall("3",fn)
-#define subsys_initcall(fn)		__define_initcall("4",fn)
-#define fs_initcall(fn)			__define_initcall("5",fn)
-#define device_initcall(fn)		__define_initcall("6",fn)
-#define late_initcall(fn)		__define_initcall("7",fn)
+#define core_initcall(fn)		__define_initcall("1",fn,1)
+#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
+#define postcore_initcall(fn)		__define_initcall("2",fn,2)
+#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
+#define arch_initcall(fn)		__define_initcall("3",fn,3)
+#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
+#define subsys_initcall(fn)		__define_initcall("4",fn,4)
+#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
+#define fs_initcall(fn)			__define_initcall("5",fn,5)
+#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
+#define device_initcall(fn)		__define_initcall("6",fn,6)
+#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
+#define late_initcall(fn)		__define_initcall("7",fn,7)
+#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
 
 #define __initcall(fn) device_initcall(fn)
 
-- 
cgit v1.2.3


From 9957329800b8b554b1af669bcc6878282338c34e Mon Sep 17 00:00:00 2001
From: Russell King <rmk@dyn-67.arm.linux.org.uk>
Date: Thu, 26 Oct 2006 10:27:42 +0100
Subject: [ARM] Add __must_check to uaccess functions

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/uaccess.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/uaccess.h b/include/asm-arm/uaccess.h
index 09ad0cab9014..5f420a0149f1 100644
--- a/include/asm-arm/uaccess.h
+++ b/include/asm-arm/uaccess.h
@@ -383,19 +383,19 @@ do {									\
 
 
 #ifdef CONFIG_MMU
-extern unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n);
-extern unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n);
-extern unsigned long __clear_user(void __user *addr, unsigned long n);
+extern unsigned long __must_check __copy_from_user(void *to, const void __user *from, unsigned long n);
+extern unsigned long __must_check __copy_to_user(void __user *to, const void *from, unsigned long n);
+extern unsigned long __must_check __clear_user(void __user *addr, unsigned long n);
 #else
 #define __copy_from_user(to,from,n)	(memcpy(to, (void __force *)from, n), 0)
 #define __copy_to_user(to,from,n)	(memcpy((void __force *)to, from, n), 0)
 #define __clear_user(addr,n)		(memset((void __force *)addr, 0, n), 0)
 #endif
 
-extern unsigned long __strncpy_from_user(char *to, const char __user *from, unsigned long count);
-extern unsigned long __strnlen_user(const char __user *s, long n);
+extern unsigned long __must_check __strncpy_from_user(char *to, const char __user *from, unsigned long count);
+extern unsigned long __must_check __strnlen_user(const char __user *s, long n);
 
-static inline unsigned long copy_from_user(void *to, const void __user *from, unsigned long n)
+static inline unsigned long __must_check copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	if (access_ok(VERIFY_READ, from, n))
 		n = __copy_from_user(to, from, n);
@@ -404,7 +404,7 @@ static inline unsigned long copy_from_user(void *to, const void __user *from, un
 	return n;
 }
 
-static inline unsigned long copy_to_user(void __user *to, const void *from, unsigned long n)
+static inline unsigned long __must_check copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	if (access_ok(VERIFY_WRITE, to, n))
 		n = __copy_to_user(to, from, n);
@@ -414,14 +414,14 @@ static inline unsigned long copy_to_user(void __user *to, const void *from, unsi
 #define __copy_to_user_inatomic __copy_to_user
 #define __copy_from_user_inatomic __copy_from_user
 
-static inline unsigned long clear_user(void __user *to, unsigned long n)
+static inline unsigned long __must_check clear_user(void __user *to, unsigned long n)
 {
 	if (access_ok(VERIFY_WRITE, to, n))
 		n = __clear_user(to, n);
 	return n;
 }
 
-static inline long strncpy_from_user(char *dst, const char __user *src, long count)
+static inline long __must_check strncpy_from_user(char *dst, const char __user *src, long count)
 {
 	long res = -EFAULT;
 	if (access_ok(VERIFY_READ, src, 1))
@@ -431,7 +431,7 @@ static inline long strncpy_from_user(char *dst, const char __user *src, long cou
 
 #define strlen_user(s)	strnlen_user(s, ~0UL >> 1)
 
-static inline long strnlen_user(const char __user *s, long n)
+static inline long __must_check strnlen_user(const char __user *s, long n)
 {
 	unsigned long res = 0;
 
-- 
cgit v1.2.3


From 2ae88149a27cadf2840e0ab8155bef13be285c03 Mon Sep 17 00:00:00 2001
From: Nick Piggin <npiggin@suse.de>
Date: Sat, 28 Oct 2006 10:38:23 -0700
Subject: [PATCH] mm: clean up pagecache allocation

- Consolidate page_cache_alloc

- Fix splice: only the pagecache pages and filesystem data need to use
  mapping_gfp_mask.

- Fix grab_cache_page_nowait: same as splice, also honour NUMA placement.

Signed-off-by: Nick Piggin <npiggin@suse.de>
Cc: Jens Axboe <jens.axboe@oracle.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/splice.c             |  9 ++++-----
 include/linux/pagemap.h | 14 +++++++++-----
 mm/filemap.c            | 24 ++++++------------------
 3 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/fs/splice.c b/fs/splice.c
index 49fb9f129938..8d705954d294 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -74,7 +74,7 @@ static int page_cache_pipe_buf_steal(struct pipe_inode_info *pipe,
 		wait_on_page_writeback(page);
 
 		if (PagePrivate(page))
-			try_to_release_page(page, mapping_gfp_mask(mapping));
+			try_to_release_page(page, GFP_KERNEL);
 
 		/*
 		 * If we succeeded in removing the mapping, set LRU flag
@@ -333,7 +333,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
 				break;
 
 			error = add_to_page_cache_lru(page, mapping, index,
-					      mapping_gfp_mask(mapping));
+					      GFP_KERNEL);
 			if (unlikely(error)) {
 				page_cache_release(page);
 				if (error == -EEXIST)
@@ -557,7 +557,6 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 {
 	struct file *file = sd->file;
 	struct address_space *mapping = file->f_mapping;
-	gfp_t gfp_mask = mapping_gfp_mask(mapping);
 	unsigned int offset, this_len;
 	struct page *page;
 	pgoff_t index;
@@ -591,7 +590,7 @@ static int pipe_to_file(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
 			goto find_page;
 
 		page = buf->page;
-		if (add_to_page_cache(page, mapping, index, gfp_mask)) {
+		if (add_to_page_cache(page, mapping, index, GFP_KERNEL)) {
 			unlock_page(page);
 			goto find_page;
 		}
@@ -613,7 +612,7 @@ find_page:
 			 * This will also lock the page
 			 */
 			ret = add_to_page_cache_lru(page, mapping, index,
-						    gfp_mask);
+						    GFP_KERNEL);
 			if (unlikely(ret))
 				goto out;
 		}
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 64f950925151..c3e255bf8594 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -52,19 +52,23 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 void release_pages(struct page **pages, int nr, int cold);
 
 #ifdef CONFIG_NUMA
-extern struct page *page_cache_alloc(struct address_space *x);
-extern struct page *page_cache_alloc_cold(struct address_space *x);
+extern struct page *__page_cache_alloc(gfp_t gfp);
 #else
+static inline struct page *__page_cache_alloc(gfp_t gfp)
+{
+	return alloc_pages(gfp, 0);
+}
+#endif
+
 static inline struct page *page_cache_alloc(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return __page_cache_alloc(mapping_gfp_mask(x));
 }
 
 static inline struct page *page_cache_alloc_cold(struct address_space *x)
 {
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
+	return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD);
 }
-#endif
 
 typedef int filler_t(void *, struct page *);
 
diff --git a/mm/filemap.c b/mm/filemap.c
index cb26e33fd0ff..7b84dc814347 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -467,25 +467,15 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
 }
 
 #ifdef CONFIG_NUMA
-struct page *page_cache_alloc(struct address_space *x)
+struct page *__page_cache_alloc(gfp_t gfp)
 {
 	if (cpuset_do_page_mem_spread()) {
 		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x), 0);
+		return alloc_pages_node(n, gfp, 0);
 	}
-	return alloc_pages(mapping_gfp_mask(x), 0);
+	return alloc_pages(gfp, 0);
 }
-EXPORT_SYMBOL(page_cache_alloc);
-
-struct page *page_cache_alloc_cold(struct address_space *x)
-{
-	if (cpuset_do_page_mem_spread()) {
-		int n = cpuset_mem_spread_node();
-		return alloc_pages_node(n, mapping_gfp_mask(x)|__GFP_COLD, 0);
-	}
-	return alloc_pages(mapping_gfp_mask(x)|__GFP_COLD, 0);
-}
-EXPORT_SYMBOL(page_cache_alloc_cold);
+EXPORT_SYMBOL(__page_cache_alloc);
 #endif
 
 static int __sleep_on_page_lock(void *word)
@@ -826,7 +816,6 @@ struct page *
 grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 {
 	struct page *page = find_get_page(mapping, index);
-	gfp_t gfp_mask;
 
 	if (page) {
 		if (!TestSetPageLocked(page))
@@ -834,9 +823,8 @@ grab_cache_page_nowait(struct address_space *mapping, unsigned long index)
 		page_cache_release(page);
 		return NULL;
 	}
-	gfp_mask = mapping_gfp_mask(mapping) & ~__GFP_FS;
-	page = alloc_pages(gfp_mask, 0);
-	if (page && add_to_page_cache_lru(page, mapping, index, gfp_mask)) {
+	page = __page_cache_alloc(mapping_gfp_mask(mapping) & ~__GFP_FS);
+	if (page && add_to_page_cache_lru(page, mapping, index, GFP_KERNEL)) {
 		page_cache_release(page);
 		page = NULL;
 	}
-- 
cgit v1.2.3


From 3bb1a852ab6c9cdf211a2f4a2f502340c8c38eca Mon Sep 17 00:00:00 2001
From: Martin Bligh <mbligh@mbligh.org>
Date: Sat, 28 Oct 2006 10:38:24 -0700
Subject: [PATCH] vmscan: Fix temp_priority race

The temp_priority field in zone is racy, as we can walk through a reclaim
path, and just before we copy it into prev_priority, it can be overwritten
(say with DEF_PRIORITY) by another reclaimer.

The same bug is contained in both try_to_free_pages and balance_pgdat, but
it is fixed slightly differently.  In balance_pgdat, we keep a separate
priority record per zone in a local array.  In try_to_free_pages there is
no need to do this, as the priority level is the same for all zones that we
reclaim from.

Impact of this bug is that temp_priority is copied into prev_priority, and
setting this artificially high causes reclaimers to set distress
artificially low.  They then fail to reclaim mapped pages, when they are,
in fact, under severe memory pressure (their priority may be as low as 0).
This causes the OOM killer to fire incorrectly.

From: Andrew Morton <akpm@osdl.org>

__zone_reclaim() isn't modifying zone->prev_priority.  But zone->prev_priority
is used in the decision whether or not to bring mapped pages onto the inactive
list.  Hence there's a risk here that __zone_reclaim() will fail because
zone->prev_priority ir large (ie: low urgency) and lots of mapped pages end up
stuck on the active list.

Fix that up by decreasing (ie making more urgent) zone->prev_priority as
__zone_reclaim() scans the zone's pages.

This bug perhaps explains why ZONE_RECLAIM_PRIORITY was created.  It should be
possible to remove that now, and to just start out at DEF_PRIORITY?

Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mmzone.h |  6 +-----
 mm/page_alloc.c        |  2 +-
 mm/vmscan.c            | 55 +++++++++++++++++++++++++++++++++++++-------------
 mm/vmstat.c            |  2 --
 4 files changed, 43 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ed0762b283a9..e06683e2bea3 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -218,13 +218,9 @@ struct zone {
 	 * under - it drives the swappiness decision: whether to unmap mapped
 	 * pages.
 	 *
-	 * temp_priority is used to remember the scanning priority at which
-	 * this zone was successfully refilled to free_pages == pages_high.
-	 *
-	 * Access to both these fields is quite racy even on uniprocessor.  But
+	 * Access to both this field is quite racy even on uniprocessor.  But
 	 * it is expected to average out OK.
 	 */
-	int temp_priority;
 	int prev_priority;
 
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f5fc45472d5c..ecf853b5e30e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2407,7 +2407,7 @@ static void __meminit free_area_init_core(struct pglist_data *pgdat,
 		zone->zone_pgdat = pgdat;
 		zone->free_pages = 0;
 
-		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
+		zone->prev_priority = DEF_PRIORITY;
 
 		zone_pcp_init(zone);
 		INIT_LIST_HEAD(&zone->active_list);
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f05527bf792b..b32560ead5c0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -723,6 +723,20 @@ done:
 	return nr_reclaimed;
 }
 
+/*
+ * We are about to scan this zone at a certain priority level.  If that priority
+ * level is smaller (ie: more urgent) than the previous priority, then note
+ * that priority level within the zone.  This is done so that when the next
+ * process comes in to scan this zone, it will immediately start out at this
+ * priority level rather than having to build up its own scanning priority.
+ * Here, this priority affects only the reclaim-mapped threshold.
+ */
+static inline void note_zone_scanning_priority(struct zone *zone, int priority)
+{
+	if (priority < zone->prev_priority)
+		zone->prev_priority = priority;
+}
+
 static inline int zone_is_near_oom(struct zone *zone)
 {
 	return zone->pages_scanned >= (zone->nr_active + zone->nr_inactive)*3;
@@ -972,9 +986,7 @@ static unsigned long shrink_zones(int priority, struct zone **zones,
 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
-		zone->temp_priority = priority;
-		if (zone->prev_priority > priority)
-			zone->prev_priority = priority;
+		note_zone_scanning_priority(zone, priority);
 
 		if (zone->all_unreclaimable && priority != DEF_PRIORITY)
 			continue;	/* Let kswapd poll it */
@@ -1024,7 +1036,6 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
-		zone->temp_priority = DEF_PRIORITY;
 		lru_pages += zone->nr_active + zone->nr_inactive;
 	}
 
@@ -1065,13 +1076,22 @@ unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
 	if (!sc.all_unreclaimable)
 		ret = 1;
 out:
+	/*
+	 * Now that we've scanned all the zones at this priority level, note
+	 * that level within the zone so that the next thread which performs
+	 * scanning of this zone will immediately start out at this priority
+	 * level.  This affects only the decision whether or not to bring
+	 * mapped pages onto the inactive list.
+	 */
+	if (priority < 0)
+		priority = 0;
 	for (i = 0; zones[i] != 0; i++) {
 		struct zone *zone = zones[i];
 
 		if (!cpuset_zone_allowed(zone, __GFP_HARDWALL))
 			continue;
 
-		zone->prev_priority = zone->temp_priority;
+		zone->prev_priority = priority;
 	}
 	return ret;
 }
@@ -1111,6 +1131,11 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order)
 		.swap_cluster_max = SWAP_CLUSTER_MAX,
 		.swappiness = vm_swappiness,
 	};
+	/*
+	 * temp_priority is used to remember the scanning priority at which
+	 * this zone was successfully refilled to free_pages == pages_high.
+	 */
+	int temp_priority[MAX_NR_ZONES];
 
 loop_again:
 	total_scanned = 0;
@@ -1118,11 +1143,8 @@ loop_again:
 	sc.may_writepage = !laptop_mode;
 	count_vm_event(PAGEOUTRUN);
 
-	for (i = 0; i < pgdat->nr_zones; i++) {
-		struct zone *zone = pgdat->node_zones + i;
-
-		zone->temp_priority = DEF_PRIORITY;
-	}
+	for (i = 0; i < pgdat->nr_zones; i++)
+		temp_priority[i] = DEF_PRIORITY;
 
 	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
 		int end_zone = 0;	/* Inclusive.  0 = ZONE_DMA */
@@ -1183,10 +1205,9 @@ scan:
 			if (!zone_watermark_ok(zone, order, zone->pages_high,
 					       end_zone, 0))
 				all_zones_ok = 0;
-			zone->temp_priority = priority;
-			if (zone->prev_priority > priority)
-				zone->prev_priority = priority;
+			temp_priority[i] = priority;
 			sc.nr_scanned = 0;
+			note_zone_scanning_priority(zone, priority);
 			nr_reclaimed += shrink_zone(priority, zone, &sc);
 			reclaim_state->reclaimed_slab = 0;
 			nr_slab = shrink_slab(sc.nr_scanned, GFP_KERNEL,
@@ -1226,10 +1247,15 @@ scan:
 			break;
 	}
 out:
+	/*
+	 * Note within each zone the priority level at which this zone was
+	 * brought into a happy state.  So that the next thread which scans this
+	 * zone will start out at that priority level.
+	 */
 	for (i = 0; i < pgdat->nr_zones; i++) {
 		struct zone *zone = pgdat->node_zones + i;
 
-		zone->prev_priority = zone->temp_priority;
+		zone->prev_priority = temp_priority[i];
 	}
 	if (!all_zones_ok) {
 		cond_resched();
@@ -1614,6 +1640,7 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		 */
 		priority = ZONE_RECLAIM_PRIORITY;
 		do {
+			note_zone_scanning_priority(zone, priority);
 			nr_reclaimed += shrink_zone(priority, zone, &sc);
 			priority--;
 		} while (priority >= 0 && nr_reclaimed < nr_pages);
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 45b124e012f5..8614e8f6743b 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -587,11 +587,9 @@ static int zoneinfo_show(struct seq_file *m, void *arg)
 		seq_printf(m,
 			   "\n  all_unreclaimable: %u"
 			   "\n  prev_priority:     %i"
-			   "\n  temp_priority:     %i"
 			   "\n  start_pfn:         %lu",
 			   zone->all_unreclaimable,
 			   zone->prev_priority,
-			   zone->temp_priority,
 			   zone->zone_start_pfn);
 		spin_unlock_irqrestore(&zone->lock, flags);
 		seq_putc(m, '\n');
-- 
cgit v1.2.3


From 52fd24ca1db3a741f144bbc229beefe044202cac Mon Sep 17 00:00:00 2001
From: Giridhar Pemmasani <pgiri@yahoo.com>
Date: Sat, 28 Oct 2006 10:38:34 -0700
Subject: [PATCH] __vmalloc with GFP_ATOMIC causes 'sleeping from invalid
 context'

If __vmalloc is called to allocate memory with GFP_ATOMIC in atomic
context, the chain of calls results in __get_vm_area_node allocating memory
for vm_struct with GFP_KERNEL, causing the 'sleeping from invalid context'
warning.  This patch fixes it by passing the gfp flags along so
__get_vm_area_node allocates memory for vm_struct with the same flags.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmalloc.h |  3 ++-
 mm/vmalloc.c            | 18 +++++++++++-------
 2 files changed, 13 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index ce5f1482e6be..dc9a29d84abc 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -60,7 +60,8 @@ extern struct vm_struct *get_vm_area(unsigned long size, unsigned long flags);
 extern struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 					unsigned long start, unsigned long end);
 extern struct vm_struct *get_vm_area_node(unsigned long size,
-					unsigned long flags, int node);
+					  unsigned long flags, int node,
+					  gfp_t gfp_mask);
 extern struct vm_struct *remove_vm_area(void *addr);
 extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
 			struct page ***pages);
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 1133dd3aafcf..6d381df7c9b3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -160,13 +160,15 @@ int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page ***pages)
 	return err;
 }
 
-struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
-				unsigned long start, unsigned long end, int node)
+static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
+					    unsigned long start, unsigned long end,
+					    int node, gfp_t gfp_mask)
 {
 	struct vm_struct **p, *tmp, *area;
 	unsigned long align = 1;
 	unsigned long addr;
 
+	BUG_ON(in_interrupt());
 	if (flags & VM_IOREMAP) {
 		int bit = fls(size);
 
@@ -180,7 +182,7 @@ struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long flags,
 	addr = ALIGN(start, align);
 	size = PAGE_ALIGN(size);
 
-	area = kmalloc_node(sizeof(*area), GFP_KERNEL, node);
+	area = kmalloc_node(sizeof(*area), gfp_mask, node);
 	if (unlikely(!area))
 		return NULL;
 
@@ -236,7 +238,7 @@ out:
 struct vm_struct *__get_vm_area(unsigned long size, unsigned long flags,
 				unsigned long start, unsigned long end)
 {
-	return __get_vm_area_node(size, flags, start, end, -1);
+	return __get_vm_area_node(size, flags, start, end, -1, GFP_KERNEL);
 }
 
 /**
@@ -253,9 +255,11 @@ struct vm_struct *get_vm_area(unsigned long size, unsigned long flags)
 	return __get_vm_area(size, flags, VMALLOC_START, VMALLOC_END);
 }
 
-struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags, int node)
+struct vm_struct *get_vm_area_node(unsigned long size, unsigned long flags,
+				   int node, gfp_t gfp_mask)
 {
-	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node);
+	return __get_vm_area_node(size, flags, VMALLOC_START, VMALLOC_END, node,
+				  gfp_mask);
 }
 
 /* Caller must hold vmlist_lock */
@@ -487,7 +491,7 @@ static void *__vmalloc_node(unsigned long size, gfp_t gfp_mask, pgprot_t prot,
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
 		return NULL;
 
-	area = get_vm_area_node(size, VM_ALLOC, node);
+	area = get_vm_area_node(size, VM_ALLOC, node, gfp_mask);
 	if (!area)
 		return NULL;
 
-- 
cgit v1.2.3


From 08d892f11aae7125fe078cf93ec5cf6af288c5e7 Mon Sep 17 00:00:00 2001
From: Andrey Panin <pazke@donpac.ru>
Date: Sat, 28 Oct 2006 10:38:35 -0700
Subject: [PATCH] visws build fix

Fix this:

> Subject    : CONFIG_X86_VISWS=3Dy, CONFIG_SMP=3Dn compile error
> References : http://lkml.org/lkml/2006/10/7/51
> Submitter  : Jesper Juhl <jesper.juhl@gmail.com>
> Caused-By  : David Howells <dhowells@redhat.com>
>              commit 7d12e780e003f93433d49ce78cfedf4b4c52adc5
> Status     : unknown

Via undescribed means.

Signed-off-by: Andrey Panin <pazke@donpac.ru>
Cc: Adrian Bunk <bunk@stusta.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/mach-visws/visws_apic.c       |  7 ++---
 include/asm-i386/mach-visws/do_timer.h  | 53 ---------------------------------
 include/asm-i386/mach-visws/mach_apic.h |  5 ++++
 3 files changed, 8 insertions(+), 57 deletions(-)
 delete mode 100644 include/asm-i386/mach-visws/do_timer.h

(limited to 'include')

diff --git a/arch/i386/mach-visws/visws_apic.c b/arch/i386/mach-visws/visws_apic.c
index 07097ed48890..38c2b13124d9 100644
--- a/arch/i386/mach-visws/visws_apic.c
+++ b/arch/i386/mach-visws/visws_apic.c
@@ -122,7 +122,7 @@ static void end_cobalt_irq(unsigned int irq)
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
 
-static struct hw_interrupt_type cobalt_irq_type = {
+static struct irq_chip cobalt_irq_type = {
 	.typename =	"Cobalt-APIC",
 	.startup =	startup_cobalt_irq,
 	.shutdown =	disable_cobalt_irq,
@@ -159,7 +159,7 @@ static void end_piix4_master_irq(unsigned int irq)
 	spin_unlock_irqrestore(&cobalt_lock, flags);
 }
 
-static struct hw_interrupt_type piix4_master_irq_type = {
+static struct irq_chip piix4_master_irq_type = {
 	.typename =	"PIIX4-master",
 	.startup =	startup_piix4_master_irq,
 	.ack =		ack_cobalt_irq,
@@ -167,9 +167,8 @@ static struct hw_interrupt_type piix4_master_irq_type = {
 };
 
 
-static struct hw_interrupt_type piix4_virtual_irq_type = {
+static struct irq_chip piix4_virtual_irq_type = {
 	.typename =	"PIIX4-virtual",
-	.startup =	startup_8259A_irq,
 	.shutdown =	disable_8259A_irq,
 	.enable =	enable_8259A_irq,
 	.disable =	disable_8259A_irq,
diff --git a/include/asm-i386/mach-visws/do_timer.h b/include/asm-i386/mach-visws/do_timer.h
deleted file mode 100644
index 21cd696d4d0f..000000000000
--- a/include/asm-i386/mach-visws/do_timer.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* defines for inline arch setup functions */
-
-#include <asm/fixmap.h>
-#include <asm/i8259.h>
-#include "cobalt.h"
-
-static inline void do_timer_interrupt_hook(void)
-{
-	/* Clear the interrupt */
-	co_cpu_write(CO_CPU_STAT,co_cpu_read(CO_CPU_STAT) & ~CO_STAT_TIMEINTR);
-
-	do_timer(1);
-#ifndef CONFIG_SMP
-	update_process_times(user_mode_vm(irq_regs));
-#endif
-/*
- * In the SMP case we use the local APIC timer interrupt to do the
- * profiling, except when we simulate SMP mode on a uniprocessor
- * system, in that case we have to call the local interrupt handler.
- */
-#ifndef CONFIG_X86_LOCAL_APIC
-	profile_tick(CPU_PROFILING);
-#else
-	if (!using_apic_timer)
-		smp_local_timer_interrupt();
-#endif
-}
-
-static inline int do_timer_overflow(int count)
-{
-	int i;
-
-	spin_lock(&i8259A_lock);
-	/*
-	 * This is tricky when I/O APICs are used;
-	 * see do_timer_interrupt().
-	 */
-	i = inb(0x20);
-	spin_unlock(&i8259A_lock);
-	
-	/* assumption about timer being IRQ0 */
-	if (i & 0x01) {
-		/*
-		 * We cannot detect lost timer interrupts ... 
-		 * well, that's why we call them lost, don't we? :)
-		 * [hmm, on the Pentium and Alpha we can ... sort of]
-		 */
-		count -= LATCH;
-	} else {
-		printk("do_slow_gettimeoffset(): hardware timer problem?\n");
-	}
-	return count;
-}
diff --git a/include/asm-i386/mach-visws/mach_apic.h b/include/asm-i386/mach-visws/mach_apic.h
index de438c7147a8..18afe6b6fc4d 100644
--- a/include/asm-i386/mach-visws/mach_apic.h
+++ b/include/asm-i386/mach-visws/mach_apic.h
@@ -51,6 +51,11 @@ static inline void clustered_apic_check(void)
 {
 }
 
+static inline int apicid_to_node(int logical_apicid)
+{
+	return 0;
+}
+
 /* Mapping from cpu number to logical apicid */
 static inline int cpu_to_logical_apicid(int cpu)
 {
-- 
cgit v1.2.3


From 5fa3839a64203b2ab727dcb37da9b2d7079fca28 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Sat, 28 Oct 2006 10:38:46 -0700
Subject: [PATCH] Constify compat_get_bitmap argument

This means we can call it when the bitmap we want to fetch is declared
const.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Christoph Lameter <clameter@engr.sgi.com>
Cc: Paul Jackson <pj@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/compat.h | 2 +-
 kernel/compat.c        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index f4ebf96f5308..f1553196826f 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -196,7 +196,7 @@ asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 #define BITS_TO_COMPAT_LONGS(bits) \
 	(((bits)+BITS_PER_COMPAT_LONG-1)/BITS_PER_COMPAT_LONG)
 
-long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size);
 long compat_put_bitmap(compat_ulong_t __user *umask, unsigned long *mask,
 		       unsigned long bitmap_size);
diff --git a/kernel/compat.c b/kernel/compat.c
index 75573e5d27b0..d4898aad6cfa 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -678,7 +678,7 @@ int get_compat_sigevent(struct sigevent *event,
 		? -EFAULT : 0;
 }
 
-long compat_get_bitmap(unsigned long *mask, compat_ulong_t __user *umask,
+long compat_get_bitmap(unsigned long *mask, const compat_ulong_t __user *umask,
 		       unsigned long bitmap_size)
 {
 	int i, j;
-- 
cgit v1.2.3


From 093a8e8aecd77b2799934996a55a6838e1e2b8f3 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 28 Oct 2006 10:38:51 -0700
Subject: [PATCH] taskstats_tgid_free: fix usage

taskstats_tgid_free() is called on copy_process's error path. This is wrong.

	IF (clone_flags & CLONE_THREAD)
		We should not clear ->signal->taskstats, current uses it,
		it probably has a valid accumulated info.
	ELSE
		taskstats_tgid_init() set ->signal->taskstats = NULL,
		there is nothing to free.

Move the callsite to __exit_signal(). We don't need any locking, entire
thread group is exiting, nobody should have a reference to soon to be
released ->signal.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/taskstats_kern.h | 13 ++-----------
 kernel/exit.c                  |  1 +
 kernel/fork.c                  |  1 -
 3 files changed, 3 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 16894b7edcc8..a437ca0d226b 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -49,17 +49,8 @@ static inline void taskstats_tgid_alloc(struct signal_struct *sig)
 
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {
-	struct taskstats *stats = NULL;
-	unsigned long flags;
-
-	spin_lock_irqsave(&sig->stats_lock, flags);
-	if (sig->stats) {
-		stats = sig->stats;
-		sig->stats = NULL;
-	}
-	spin_unlock_irqrestore(&sig->stats_lock, flags);
-	if (stats)
-		kmem_cache_free(taskstats_cache, stats);
+	if (sig->stats)
+		kmem_cache_free(taskstats_cache, sig->stats);
 }
 
 extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
diff --git a/kernel/exit.c b/kernel/exit.c
index f250a5e3e281..06de6c4e8ca3 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -128,6 +128,7 @@ static void __exit_signal(struct task_struct *tsk)
 	flush_sigqueue(&tsk->pending);
 	if (sig) {
 		flush_sigqueue(&sig->shared_pending);
+		taskstats_tgid_free(sig);
 		__cleanup_signal(sig);
 	}
 }
diff --git a/kernel/fork.c b/kernel/fork.c
index 29ebb30850ed..213326609bac 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -897,7 +897,6 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 void __cleanup_signal(struct signal_struct *sig)
 {
 	exit_thread_group_keys(sig);
-	taskstats_tgid_free(sig);
 	kmem_cache_free(signal_cachep, sig);
 }
 
-- 
cgit v1.2.3


From 17b02695b254aa2ef0e53df9c8e6548f86e66a9d Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 28 Oct 2006 10:38:52 -0700
Subject: [PATCH] taskstats_tgid_alloc: optimization

Every subthread (except first) does unneeded kmem_cache_alloc/kmem_cache_free.

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/taskstats_kern.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index a437ca0d226b..664224008fb2 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -32,6 +32,9 @@ static inline void taskstats_tgid_alloc(struct signal_struct *sig)
 	struct taskstats *stats;
 	unsigned long flags;
 
+	if (sig->stats != NULL)
+		return;
+
 	stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
 	if (!stats)
 		return;
-- 
cgit v1.2.3


From b8534d7bd89df0cd41cd47bcd6733a05ea9a691a Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@tv-sign.ru>
Date: Sat, 28 Oct 2006 10:38:53 -0700
Subject: [PATCH] taskstats: kill ->taskstats_lock in favor of ->siglock

signal_struct is (mostly) protected by ->sighand->siglock, I think we don't
need ->taskstats_lock to protect ->stats.  This also allows us to simplify the
locking in fill_tgid().

Signed-off-by: Oleg Nesterov <oleg@tv-sign.ru>
Cc: Shailabh Nagar <nagar@watson.ibm.com>
Cc: Balbir Singh <balbir@in.ibm.com>
Cc: Jay Lan <jlan@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sched.h          |  1 -
 include/linux/taskstats_kern.h | 15 ++++++---------
 kernel/fork.c                  |  2 +-
 kernel/taskstats.c             | 16 ++++++----------
 4 files changed, 13 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6735c1cf334c..eafe4a7b8237 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -466,7 +466,6 @@ struct signal_struct {
 	struct pacct_struct pacct;	/* per-process accounting information */
 #endif
 #ifdef CONFIG_TASKSTATS
-	spinlock_t stats_lock;
 	struct taskstats *stats;
 #endif
 };
diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h
index 664224008fb2..6562a2050a25 100644
--- a/include/linux/taskstats_kern.h
+++ b/include/linux/taskstats_kern.h
@@ -23,28 +23,26 @@ static inline void taskstats_exit_free(struct taskstats *tidstats)
 
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {
-	spin_lock_init(&sig->stats_lock);
 	sig->stats = NULL;
 }
 
-static inline void taskstats_tgid_alloc(struct signal_struct *sig)
+static inline void taskstats_tgid_alloc(struct task_struct *tsk)
 {
+	struct signal_struct *sig = tsk->signal;
 	struct taskstats *stats;
-	unsigned long flags;
 
 	if (sig->stats != NULL)
 		return;
 
+	/* No problem if kmem_cache_zalloc() fails */
 	stats = kmem_cache_zalloc(taskstats_cache, SLAB_KERNEL);
-	if (!stats)
-		return;
 
-	spin_lock_irqsave(&sig->stats_lock, flags);
+	spin_lock_irq(&tsk->sighand->siglock);
 	if (!sig->stats) {
 		sig->stats = stats;
 		stats = NULL;
 	}
-	spin_unlock_irqrestore(&sig->stats_lock, flags);
+	spin_unlock_irq(&tsk->sighand->siglock);
 
 	if (stats)
 		kmem_cache_free(taskstats_cache, stats);
@@ -59,7 +57,6 @@ static inline void taskstats_tgid_free(struct signal_struct *sig)
 extern void taskstats_exit_alloc(struct taskstats **, unsigned int *);
 extern void taskstats_exit_send(struct task_struct *, struct taskstats *, int, unsigned int);
 extern void taskstats_init_early(void);
-extern void taskstats_tgid_alloc(struct signal_struct *);
 #else
 static inline void taskstats_exit_alloc(struct taskstats **ptidstats, unsigned int *mycpu)
 {}
@@ -71,7 +68,7 @@ static inline void taskstats_exit_send(struct task_struct *tsk,
 {}
 static inline void taskstats_tgid_init(struct signal_struct *sig)
 {}
-static inline void taskstats_tgid_alloc(struct signal_struct *sig)
+static inline void taskstats_tgid_alloc(struct task_struct *tsk)
 {}
 static inline void taskstats_tgid_free(struct signal_struct *sig)
 {}
diff --git a/kernel/fork.c b/kernel/fork.c
index 213326609bac..3da978eec791 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -830,7 +830,7 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
 	if (clone_flags & CLONE_THREAD) {
 		atomic_inc(&current->signal->count);
 		atomic_inc(&current->signal->live);
-		taskstats_tgid_alloc(current->signal);
+		taskstats_tgid_alloc(current);
 		return 0;
 	}
 	sig = kmem_cache_alloc(signal_cachep, GFP_KERNEL);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 9aeee511a463..b2efda94615a 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -241,11 +241,11 @@ static int fill_tgid(pid_t tgid, struct task_struct *tgidtsk,
 	tsk = first;
 	read_lock(&tasklist_lock);
 	/* Start with stats from dead tasks */
-	if (first->signal) {
-		spin_lock_irqsave(&first->signal->stats_lock, flags);
+	if (first->sighand) {
+		spin_lock_irqsave(&first->sighand->siglock, flags);
 		if (first->signal->stats)
 			memcpy(stats, first->signal->stats, sizeof(*stats));
-		spin_unlock_irqrestore(&first->signal->stats_lock, flags);
+		spin_unlock_irqrestore(&first->sighand->siglock, flags);
 	}
 
 	do {
@@ -276,7 +276,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
 {
 	unsigned long flags;
 
-	spin_lock_irqsave(&tsk->signal->stats_lock, flags);
+	spin_lock_irqsave(&tsk->sighand->siglock, flags);
 	if (!tsk->signal->stats)
 		goto ret;
 
@@ -288,7 +288,7 @@ static void fill_tgid_exit(struct task_struct *tsk)
 	 */
 	delayacct_add_tsk(tsk->signal->stats, tsk);
 ret:
-	spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
+	spin_unlock_irqrestore(&tsk->sighand->siglock, flags);
 	return;
 }
 
@@ -464,15 +464,10 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
 	size_t size;
 	int is_thread_group;
 	struct nlattr *na;
-	unsigned long flags;
 
 	if (!family_registered || !tidstats)
 		return;
 
-	spin_lock_irqsave(&tsk->signal->stats_lock, flags);
-	is_thread_group = tsk->signal->stats ? 1 : 0;
-	spin_unlock_irqrestore(&tsk->signal->stats_lock, flags);
-
 	rc = 0;
 	/*
 	 * Size includes space for nested attributes
@@ -480,6 +475,7 @@ void taskstats_exit_send(struct task_struct *tsk, struct taskstats *tidstats,
 	size = nla_total_size(sizeof(u32)) +
 		nla_total_size(sizeof(struct taskstats)) + nla_total_size(0);
 
+	is_thread_group = (tsk->signal->stats != NULL);
 	if (is_thread_group)
 		size = 2 * size;	/* PID + STATS + TGID + STATS */
 
-- 
cgit v1.2.3


From 7259f0d05d595b73ef312a082e628627c6414969 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Sun, 29 Oct 2006 22:46:36 -0800
Subject: [PATCH] lockdep: annotate DECLARE_WAIT_QUEUE_HEAD

kernel: INFO: trying to register non-static key.
kernel: the code is fine but needs lockdep annotation.
kernel: turning off the locking correctness validator.
kernel:  [<c04051ed>] show_trace_log_lvl+0x58/0x16a
kernel:  [<c04057fa>] show_trace+0xd/0x10
kernel:  [<c0405913>] dump_stack+0x19/0x1b
kernel:  [<c043b1e2>] __lock_acquire+0xf0/0x90d
kernel:  [<c043bf70>] lock_acquire+0x4b/0x6b
kernel:  [<c061472f>] _spin_lock_irqsave+0x22/0x32
kernel:  [<c04363d3>] prepare_to_wait+0x17/0x4b
kernel:  [<f89a24b6>] lpfc_do_work+0xdd/0xcc2 [lpfc]
kernel:  [<c04361b9>] kthread+0xc3/0xf2
kernel:  [<c0402005>] kernel_thread_helper+0x5/0xb

Another case of non-static lockdep keys; duplicate the paradigm set by
DECLARE_COMPLETION_ONSTACK and introduce DECLARE_WAIT_QUEUE_HEAD_ONSTACK.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Greg KH <gregkh@suse.de>
Cc: Markus Lidel <markus.lidel@shadowconnect.com>
Acked-by: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: James Bottomley <James.Bottomley@steeleye.com>
Cc: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/bluetooth/bluecard_cs.c  | 2 +-
 drivers/message/i2o/exec-osm.c   | 2 +-
 drivers/scsi/dpt/dpti_i2o.h      | 2 +-
 drivers/scsi/imm.c               | 2 +-
 drivers/scsi/lpfc/lpfc_hbadisc.c | 2 +-
 drivers/scsi/lpfc/lpfc_sli.c     | 4 ++--
 drivers/scsi/ppa.c               | 2 +-
 drivers/usb/net/usbnet.c         | 2 +-
 include/linux/wait.h             | 9 +++++++++
 9 files changed, 18 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/bluetooth/bluecard_cs.c b/drivers/bluetooth/bluecard_cs.c
index 845b8680032a..cbc07250b898 100644
--- a/drivers/bluetooth/bluecard_cs.c
+++ b/drivers/bluetooth/bluecard_cs.c
@@ -282,7 +282,7 @@ static void bluecard_write_wakeup(bluecard_info_t *info)
 		clear_bit(ready_bit, &(info->tx_state));
 
 		if (bt_cb(skb)->pkt_type & 0x80) {
-			DECLARE_WAIT_QUEUE_HEAD(wq);
+			DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 			DEFINE_WAIT(wait);
 
 			unsigned char baud_reg;
diff --git a/drivers/message/i2o/exec-osm.c b/drivers/message/i2o/exec-osm.c
index 01a5a702b037..a2350640384b 100644
--- a/drivers/message/i2o/exec-osm.c
+++ b/drivers/message/i2o/exec-osm.c
@@ -124,7 +124,7 @@ static void i2o_exec_wait_free(struct i2o_exec_wait *wait)
 int i2o_msg_post_wait_mem(struct i2o_controller *c, struct i2o_message *msg,
 			  unsigned long timeout, struct i2o_dma *dma)
 {
-	DECLARE_WAIT_QUEUE_HEAD(wq);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
 	struct i2o_exec_wait *wait;
 	static u32 tcntxt = 0x80000000;
 	unsigned long flags;
diff --git a/drivers/scsi/dpt/dpti_i2o.h b/drivers/scsi/dpt/dpti_i2o.h
index b3fa7ed71faf..5a49216fe4cf 100644
--- a/drivers/scsi/dpt/dpti_i2o.h
+++ b/drivers/scsi/dpt/dpti_i2o.h
@@ -49,7 +49,7 @@
 
 #include <linux/wait.h>
 typedef wait_queue_head_t adpt_wait_queue_head_t;
-#define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD(wait)
+#define ADPT_DECLARE_WAIT_QUEUE_HEAD(wait) DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wait)
 typedef wait_queue_t adpt_wait_queue_t;
 
 /*
diff --git a/drivers/scsi/imm.c b/drivers/scsi/imm.c
index 2d95ac9c32c1..e31f6122106f 100644
--- a/drivers/scsi/imm.c
+++ b/drivers/scsi/imm.c
@@ -1153,7 +1153,7 @@ static int __imm_attach(struct parport *pb)
 {
 	struct Scsi_Host *host;
 	imm_struct *dev;
-	DECLARE_WAIT_QUEUE_HEAD(waiting);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
 	DEFINE_WAIT(wait);
 	int ports;
 	int modes, ppb;
diff --git a/drivers/scsi/lpfc/lpfc_hbadisc.c b/drivers/scsi/lpfc/lpfc_hbadisc.c
index d586c3d3b0d0..19c79a0549a7 100644
--- a/drivers/scsi/lpfc/lpfc_hbadisc.c
+++ b/drivers/scsi/lpfc/lpfc_hbadisc.c
@@ -305,7 +305,7 @@ lpfc_do_work(void *p)
 {
 	struct lpfc_hba *phba = p;
 	int rc;
-	DECLARE_WAIT_QUEUE_HEAD(work_waitq);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(work_waitq);
 
 	set_user_nice(current, -20);
 	phba->work_wait = &work_waitq;
diff --git a/drivers/scsi/lpfc/lpfc_sli.c b/drivers/scsi/lpfc/lpfc_sli.c
index 24a1779b9af4..582f5ea4e84e 100644
--- a/drivers/scsi/lpfc/lpfc_sli.c
+++ b/drivers/scsi/lpfc/lpfc_sli.c
@@ -2983,7 +2983,7 @@ lpfc_sli_issue_iocb_wait(struct lpfc_hba * phba,
 			 struct lpfc_iocbq * prspiocbq,
 			 uint32_t timeout)
 {
-	DECLARE_WAIT_QUEUE_HEAD(done_q);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
 	long timeleft, timeout_req = 0;
 	int retval = IOCB_SUCCESS;
 	uint32_t creg_val;
@@ -3061,7 +3061,7 @@ int
 lpfc_sli_issue_mbox_wait(struct lpfc_hba * phba, LPFC_MBOXQ_t * pmboxq,
 			 uint32_t timeout)
 {
-	DECLARE_WAIT_QUEUE_HEAD(done_q);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(done_q);
 	DECLARE_WAITQUEUE(wq_entry, current);
 	uint32_t timeleft = 0;
 	int retval;
diff --git a/drivers/scsi/ppa.c b/drivers/scsi/ppa.c
index b0eba39f208a..89a2a9f11e41 100644
--- a/drivers/scsi/ppa.c
+++ b/drivers/scsi/ppa.c
@@ -1012,7 +1012,7 @@ static LIST_HEAD(ppa_hosts);
 static int __ppa_attach(struct parport *pb)
 {
 	struct Scsi_Host *host;
-	DECLARE_WAIT_QUEUE_HEAD(waiting);
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(waiting);
 	DEFINE_WAIT(wait);
 	ppa_struct *dev;
 	int ports;
diff --git a/drivers/usb/net/usbnet.c b/drivers/usb/net/usbnet.c
index cf3d20eb781c..40873635d80e 100644
--- a/drivers/usb/net/usbnet.c
+++ b/drivers/usb/net/usbnet.c
@@ -554,7 +554,7 @@ static int usbnet_stop (struct net_device *net)
 {
 	struct usbnet		*dev = netdev_priv(net);
 	int			temp;
-	DECLARE_WAIT_QUEUE_HEAD (unlink_wakeup); 
+	DECLARE_WAIT_QUEUE_HEAD_ONSTACK (unlink_wakeup);
 	DECLARE_WAITQUEUE (wait, current);
 
 	netif_stop_queue (net);
diff --git a/include/linux/wait.h b/include/linux/wait.h
index b3b9048421d8..e820d00e1383 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -79,6 +79,15 @@ struct task_struct;
 
 extern void init_waitqueue_head(wait_queue_head_t *q);
 
+#ifdef CONFIG_LOCKDEP
+# define __WAIT_QUEUE_HEAD_INIT_ONSTACK(name) \
+	({ init_waitqueue_head(&name); name; })
+# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) \
+	wait_queue_head_t name = __WAIT_QUEUE_HEAD_INIT_ONSTACK(name)
+#else
+# define DECLARE_WAIT_QUEUE_HEAD_ONSTACK(name) DECLARE_WAIT_QUEUE_HEAD(name)
+#endif
+
 static inline void init_waitqueue_entry(wait_queue_t *q, struct task_struct *p)
 {
 	q->flags = 0;
-- 
cgit v1.2.3


From 351edd240d0ba8620789ca9e24f5a38b62157f23 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Sun, 29 Oct 2006 22:46:40 -0800
Subject: [PATCH] MTD: fix last kernel-doc warning

Fix the last current kernel-doc warning:
Warning(/var/linsrc/linux-2619-rc3g5//include/linux/mtd/nand.h:416): No description found for parameter 'write_page'

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/mtd/nand.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 70420bbae82b..8b3ef4187219 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -355,7 +355,7 @@ struct nand_buffers {
  * @priv:		[OPTIONAL] pointer to private chip date
  * @errstat:		[OPTIONAL] hardware specific function to perform additional error status checks
  *			(determine if errors are correctable)
- * @write_page		[REPLACEABLE] High-level page write function
+ * @write_page:		[REPLACEABLE] High-level page write function
  */
 
 struct nand_chip {
-- 
cgit v1.2.3


From 991ea26dcbc2524a054f37911ea375e631cb8891 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Sun, 29 Oct 2006 21:07:40 +0000
Subject: [MIPS] Wire up getcpu(2) and epoll_wait(2) syscalls.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/scall32-o32.S |  2 ++
 arch/mips/kernel/scall64-64.S  |  2 ++
 arch/mips/kernel/scall64-n32.S |  2 ++
 arch/mips/kernel/scall64-o32.S |  2 ++
 include/asm-mips/unistd.h      | 18 ++++++++++++------
 5 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/scall32-o32.S b/arch/mips/kernel/scall32-o32.S
index 720fac3435d5..a95f37de080e 100644
--- a/arch/mips/kernel/scall32-o32.S
+++ b/arch/mips/kernel/scall32-o32.S
@@ -654,6 +654,8 @@ einval:	li	v0, -EINVAL
 	sys	sys_set_robust_list	2
 	sys	sys_get_robust_list	3	/* 4310 */
 	sys	sys_ni_syscall		0
+	sys	sys_getcpu		3
+	sys	sys_epoll_pwait		6
 	.endm
 
 	/* We pre-compute the number of _instruction_ bytes needed to
diff --git a/arch/mips/kernel/scall64-64.S b/arch/mips/kernel/scall64-64.S
index 3a34f62c8b1b..8fb0f60f657b 100644
--- a/arch/mips/kernel/scall64-64.S
+++ b/arch/mips/kernel/scall64-64.S
@@ -469,3 +469,5 @@ sys_call_table:
 	PTR	sys_set_robust_list
 	PTR	sys_get_robust_list
 	PTR	sys_ni_syscall			/* 5270 */
+	PTR	sys_getcpu
+	PTR	sys_epoll_pwait
diff --git a/arch/mips/kernel/scall64-n32.S b/arch/mips/kernel/scall64-n32.S
index 67b92a1d6c72..0da5ca2040ff 100644
--- a/arch/mips/kernel/scall64-n32.S
+++ b/arch/mips/kernel/scall64-n32.S
@@ -395,3 +395,5 @@ EXPORT(sysn32_call_table)
 	PTR	compat_sys_set_robust_list
 	PTR	compat_sys_get_robust_list
 	PTR	sys_ni_syscall
+	PTR	sys_getcpu
+	PTR	sys_epoll_pwait
diff --git a/arch/mips/kernel/scall64-o32.S b/arch/mips/kernel/scall64-o32.S
index 2875c4a3fa58..b9d00cae8b5f 100644
--- a/arch/mips/kernel/scall64-o32.S
+++ b/arch/mips/kernel/scall64-o32.S
@@ -517,4 +517,6 @@ sys_call_table:
 	PTR	compat_sys_set_robust_list
 	PTR	compat_sys_get_robust_list	/* 4310 */
 	PTR	sys_ni_syscall
+	PTR	sys_getcpu
+	PTR	sys_epoll_pwait
 	.size	sys_call_table,.-sys_call_table
diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index 30240a445dbb..f1ef98cc8699 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -332,16 +332,18 @@
 #define __NR_set_robust_list		(__NR_Linux + 309)
 #define __NR_get_robust_list		(__NR_Linux + 310)
 #define __NR_kexec_load			(__NR_Linux + 311)
+#define __NR_getcpu			(__NR_Linux + 312)
+#define __NR_epoll_pwait		(__NR_Linux + 313)
 
 /*
  * Offset of the last Linux o32 flavoured syscall
  */
-#define __NR_Linux_syscalls		311
+#define __NR_Linux_syscalls		313
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI32 */
 
 #define __NR_O32_Linux			4000
-#define __NR_O32_Linux_syscalls		311
+#define __NR_O32_Linux_syscalls		313
 
 #if _MIPS_SIM == _MIPS_SIM_ABI64
 
@@ -620,16 +622,18 @@
 #define __NR_set_robust_list		(__NR_Linux + 268)
 #define __NR_get_robust_list		(__NR_Linux + 269)
 #define __NR_kexec_load			(__NR_Linux + 270)
+#define __NR_getcpu			(__NR_Linux + 271)
+#define __NR_epoll_pwait		(__NR_Linux + 272)
 
 /*
  * Offset of the last Linux 64-bit flavoured syscall
  */
-#define __NR_Linux_syscalls		270
+#define __NR_Linux_syscalls		272
 
 #endif /* _MIPS_SIM == _MIPS_SIM_ABI64 */
 
 #define __NR_64_Linux			5000
-#define __NR_64_Linux_syscalls		270
+#define __NR_64_Linux_syscalls		272
 
 #if _MIPS_SIM == _MIPS_SIM_NABI32
 
@@ -912,16 +916,18 @@
 #define __NR_set_robust_list		(__NR_Linux + 272)
 #define __NR_get_robust_list		(__NR_Linux + 273)
 #define __NR_kexec_load			(__NR_Linux + 274)
+#define __NR_getcpu			(__NR_Linux + 275)
+#define __NR_epoll_pwait		(__NR_Linux + 276)
 
 /*
  * Offset of the last N32 flavoured syscall
  */
-#define __NR_Linux_syscalls		274
+#define __NR_Linux_syscalls		276
 
 #endif /* _MIPS_SIM == _MIPS_SIM_NABI32 */
 
 #define __NR_N32_Linux			6000
-#define __NR_N32_Linux_syscalls		274
+#define __NR_N32_Linux_syscalls		276
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From 21e9ac7b2dd96dfca997313bae6d9a8f642635c7 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 30 Oct 2006 21:38:22 +0000
Subject: [MIPS] MIPS doesn't need compat_sys_getdents.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/unistd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/asm-mips/unistd.h b/include/asm-mips/unistd.h
index f1ef98cc8699..ec56aa52f669 100644
--- a/include/asm-mips/unistd.h
+++ b/include/asm-mips/unistd.h
@@ -1195,6 +1195,7 @@ type name (atype a,btype b,ctype c,dtype d,etype e,ftype f) \
 #endif /* (_MIPS_SIM == _MIPS_SIM_NABI32) || (_MIPS_SIM == _MIPS_SIM_ABI64) */
 
 
+#define __ARCH_OMIT_COMPAT_SYS_GETDENTS64
 #define __ARCH_WANT_IPC_PARSE_VERSION
 #define __ARCH_WANT_OLD_READDIR
 #define __ARCH_WANT_SYS_ALARM
-- 
cgit v1.2.3


From 6887d83d6a537b5002edff7efa1a7c600af0ce26 Mon Sep 17 00:00:00 2001
From: Paul Mundt <lethal@linux-sh.org>
Date: Tue, 31 Oct 2006 11:44:25 +0900
Subject: sh: Wire up new syscalls.

This wires up sys_move_pages, sys_getcpu, and sys_epoll_pwait.

Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/kernel/syscalls.S | 3 +++
 include/asm-sh/unistd.h   | 5 ++++-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/sh/kernel/syscalls.S b/arch/sh/kernel/syscalls.S
index 768334e95075..ca81976e9e34 100644
--- a/arch/sh/kernel/syscalls.S
+++ b/arch/sh/kernel/syscalls.S
@@ -351,3 +351,6 @@ ENTRY(sys_call_table)
 	.long sys_sync_file_range
 	.long sys_tee			/* 315 */
 	.long sys_vmsplice
+	.long sys_move_pages
+	.long sys_getcpu
+	.long sys_epoll_pwait
diff --git a/include/asm-sh/unistd.h b/include/asm-sh/unistd.h
index f1a0cbc966be..1c2abde122cd 100644
--- a/include/asm-sh/unistd.h
+++ b/include/asm-sh/unistd.h
@@ -324,8 +324,11 @@
 #define __NR_sync_file_range	314
 #define __NR_tee		315
 #define __NR_vmsplice		316
+#define __NR_move_pages		317
+#define __NR_getcpu		318
+#define __NR_epoll_pwait	319
 
-#define NR_syscalls 317
+#define NR_syscalls 320
 
 #ifdef __KERNEL__
 
-- 
cgit v1.2.3


From bd71ab88deab3358241f22ed6c035c427aacc4e7 Mon Sep 17 00:00:00 2001
From: Jamie Lenehan <lenehan@twibble.org>
Date: Tue, 31 Oct 2006 12:35:02 +0900
Subject: sh: Fix IPR-IRQ's for IRQ-chip change breakage.

The conversion from IPR-IRQ to IRQ-chip resulted in the
ipr data being allocated in a local variable in
make_ipr_irq - breaking anything using IPR interrupts.

This changes all of the callers of make_ipr_irq to
allocate a static structure containing the IPR data which
is then passed to make_ipr_irq. This removes the need for
make_ipr_irq to allocate any additional space for the IPR
information.

Signed-off-by: Jamie Lenehan <lenehan@twibble.org>
Signed-off-by: Paul Mundt <lethal@linux-sh.org>
---
 arch/sh/boards/renesas/hs7751rvoip/setup.c  |  12 ++--
 arch/sh/boards/renesas/sh7710voipgw/setup.c | 105 ++++++++++++---------------
 arch/sh/boards/se/7300/irq.c                |  20 +++---
 arch/sh/boards/se/73180/irq.c               |  47 +++++++-----
 arch/sh/boards/se/7343/irq.c                |  90 ++++++++++++-----------
 arch/sh/boards/se/770x/irq.c                |  80 +++++++++++----------
 arch/sh/boards/se/7751/irq.c                |  85 +++++++++++-----------
 arch/sh/boards/sh03/setup.c                 |  13 ++--
 arch/sh/boards/snapgear/setup.c             |  12 ++--
 arch/sh/boards/titan/setup.c                |  12 ++--
 arch/sh/drivers/dma/dma-sh.c                |  42 ++++++-----
 arch/sh/kernel/cpu/irq/ipr.c                | 106 +++++++++++++---------------
 arch/sh/kernel/cpu/irq/pint.c               |   8 ++-
 include/asm-sh/irq.h                        |  10 ++-
 14 files changed, 339 insertions(+), 303 deletions(-)

(limited to 'include')

diff --git a/arch/sh/boards/renesas/hs7751rvoip/setup.c b/arch/sh/boards/renesas/hs7751rvoip/setup.c
index 1d997ffd7931..f7d0e304d899 100644
--- a/arch/sh/boards/renesas/hs7751rvoip/setup.c
+++ b/arch/sh/boards/renesas/hs7751rvoip/setup.c
@@ -15,12 +15,16 @@
 #include <asm/io.h>
 #include <asm/machvec.h>
 
-static void __init hs7751rvoip_init_irq(void)
-{
+static struct ipr_data hs77501rvoip_ipr_map[] = {
 #if defined(CONFIG_HS7751RVOIP_CODEC)
-	make_ipr_irq(DMTE0_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-	make_ipr_irq(DMTE1_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
+	{ DMTE0_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE1_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
 #endif
+};
+
+static void __init hs7751rvoip_init_irq(void)
+{
+	make_ipr_irq(hs77501rvoip_ipr_map, ARRAY_SIZE(hs77501rvoip_ipr_map));
 
 	init_hs7751rvoip_IRQ();
 }
diff --git a/arch/sh/boards/renesas/sh7710voipgw/setup.c b/arch/sh/boards/renesas/sh7710voipgw/setup.c
index e57e7afab8c6..180810b12107 100644
--- a/arch/sh/boards/renesas/sh7710voipgw/setup.c
+++ b/arch/sh/boards/renesas/sh7710voipgw/setup.c
@@ -13,6 +13,51 @@
 #include <asm/io.h>
 #include <asm/irq.h>
 
+static struct ipr_data sh7710voipgw_ipr_map[] = {
+	{ TIMER2_IRQ, TIMER2_IPR_ADDR, TIMER2_IPR_POS, TIMER2_PRIORITY },
+	{ WDT_IRQ, WDT_IPR_ADDR, WDT_IPR_POS, WDT_PRIORITY },
+
+	/* SCIF0 */
+	{ SCIF0_ERI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY },
+	{ SCIF0_RXI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY },
+	{ SCIF0_BRI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY },
+	{ SCIF0_TXI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY },
+
+	/* DMAC-1 */
+	{ DMTE0_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE1_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE2_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE3_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+
+	/* DMAC-2 */
+	{ DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY },
+	{ DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY },
+
+	/* IPSEC */
+	{ IPSEC_IRQ, IPSEC_IPR_ADDR, IPSEC_IPR_POS, IPSEC_PRIORITY },
+
+	/* EDMAC */
+	{ EDMAC0_IRQ, EDMAC0_IPR_ADDR, EDMAC0_IPR_POS, EDMAC0_PRIORITY },
+	{ EDMAC1_IRQ, EDMAC1_IPR_ADDR, EDMAC1_IPR_POS, EDMAC1_PRIORITY },
+	{ EDMAC2_IRQ, EDMAC2_IPR_ADDR, EDMAC2_IPR_POS, EDMAC2_PRIORITY },
+
+	/* SIOF0 */
+	{ SIOF0_ERI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ SIOF0_TXI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ SIOF0_RXI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ SIOF0_CCI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+
+	/* SIOF1 */
+	{ SIOF1_ERI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS, SIOF1_PRIORITY },
+	{ SIOF1_TXI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS, SIOF1_PRIORITY },
+	{ SIOF1_RXI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS, SIOF1_PRIORITY },
+	{ SIOF1_CCI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS, SIOF1_PRIORITY },
+
+	/* SLIC IRQ's */
+	{ IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, IRQ1_PRIORITY },
+	{ IRQ2_IRQ, IRQ2_IPR_ADDR, IRQ2_IPR_POS, IRQ2_PRIORITY },
+};
+
 /*
  * Initialize IRQ setting
  */
@@ -37,65 +82,7 @@ static void __init sh7710voipgw_init_irq(void)
 	 */
 	ctrl_outw(0x2aa, INTC_ICR1);
 
-	/* Now make IPR interrupts */
-	make_ipr_irq(TIMER2_IRQ, TIMER2_IPR_ADDR,
-			TIMER2_IPR_POS, TIMER2_PRIORITY);
-	make_ipr_irq(WDT_IRQ, WDT_IPR_ADDR, WDT_IPR_POS, WDT_PRIORITY);
-
-	/* SCIF0 */
-	make_ipr_irq(SCIF0_ERI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS,
-			SCIF0_PRIORITY);
-	make_ipr_irq(SCIF0_RXI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS,
-			SCIF0_PRIORITY);
-	make_ipr_irq(SCIF0_BRI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS,
-			SCIF0_PRIORITY);
-	make_ipr_irq(SCIF0_TXI_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS,
-			SCIF0_PRIORITY);
-
-	/* DMAC-1 */
-	make_ipr_irq(DMTE0_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-	make_ipr_irq(DMTE1_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-	make_ipr_irq(DMTE2_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-	make_ipr_irq(DMTE3_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-
-	/* DMAC-2 */
-	make_ipr_irq(DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY);
-	make_ipr_irq(DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY);
-
-	/* IPSEC */
-	make_ipr_irq(IPSEC_IRQ, IPSEC_IPR_ADDR, IPSEC_IPR_POS, IPSEC_PRIORITY);
-
-	/* EDMAC */
-	make_ipr_irq(EDMAC0_IRQ, EDMAC0_IPR_ADDR, EDMAC0_IPR_POS,
-			EDMAC0_PRIORITY);
-	make_ipr_irq(EDMAC1_IRQ, EDMAC1_IPR_ADDR, EDMAC1_IPR_POS,
-			EDMAC1_PRIORITY);
-	make_ipr_irq(EDMAC2_IRQ, EDMAC2_IPR_ADDR, EDMAC2_IPR_POS,
-			EDMAC2_PRIORITY);
-
-	/* SIOF0 */
-	make_ipr_irq(SIOF0_ERI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS,
-			SIOF0_PRIORITY);
-	make_ipr_irq(SIOF0_TXI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS,
-			SIOF0_PRIORITY);
-	make_ipr_irq(SIOF0_RXI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS,
-			SIOF0_PRIORITY);
-	make_ipr_irq(SIOF0_CCI_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS,
-			SIOF0_PRIORITY);
-
-	/* SIOF1 */
-	make_ipr_irq(SIOF1_ERI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS,
-			SIOF1_PRIORITY);
-	make_ipr_irq(SIOF1_TXI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS,
-			SIOF1_PRIORITY);
-	make_ipr_irq(SIOF1_RXI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS,
-			SIOF1_PRIORITY);
-	make_ipr_irq(SIOF1_CCI_IRQ, SIOF1_IPR_ADDR, SIOF1_IPR_POS,
-			SIOF1_PRIORITY);
-
-	/* SLIC IRQ's */
-	make_ipr_irq(IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, IRQ1_PRIORITY);
-	make_ipr_irq(IRQ2_IRQ, IRQ2_IPR_ADDR, IRQ2_IPR_POS, IRQ2_PRIORITY);
+	make_ipr_irq(sh7710voipgw_ipr_map, ARRAY_SIZE(sh7710voipgw_ipr_map));
 }
 
 /*
diff --git a/arch/sh/boards/se/7300/irq.c b/arch/sh/boards/se/7300/irq.c
index ad1034f98a29..1279d776d60f 100644
--- a/arch/sh/boards/se/7300/irq.c
+++ b/arch/sh/boards/se/7300/irq.c
@@ -13,6 +13,17 @@
 #include <asm/io.h>
 #include <asm/se7300.h>
 
+static struct ipr_data se7300_ipr_map[] = {
+	/* PC_IRQ[0-3] -> IRQ0 (32) */
+	{ IRQ0_IRQ, IRQ0_IPR_ADDR, IRQ0_IPR_POS, 0x0f - IRQ0_IRQ },
+	/* A_IRQ[0-3] -> IRQ1 (33) */
+	{ IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, 0x0f - IRQ1_IRQ },
+	{ SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ VIO_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+};
+
 /*
  * Initialize IRQ setting
  */
@@ -23,14 +34,7 @@ init_7300se_IRQ(void)
 	ctrl_outw(0xa000, INTC_ICR1);	        /* IRQ mode; IRQ0,1 enable.    */
 	ctrl_outw(0x0000, PORT_PFCR);	        /* use F for IRQ[3:0] and SIU. */
 
-	/* PC_IRQ[0-3] -> IRQ0 (32) */
-	make_ipr_irq(IRQ0_IRQ, IRQ0_IPR_ADDR, IRQ0_IPR_POS, 0x0f - IRQ0_IRQ);
-	/* A_IRQ[0-3] -> IRQ1 (33) */
-	make_ipr_irq(IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, 0x0f - IRQ1_IRQ);
-	make_ipr_irq(SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY);
-	make_ipr_irq(DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(VIO_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
+	make_ipr_irq(se7300_ipr_map, ARRAY_SIZE(se7300_ipr_map));
 
 	ctrl_outw(0x2000, PA_MRSHPC + 0x0c);	/* mrshpc irq enable */
 }
diff --git a/arch/sh/boards/se/73180/irq.c b/arch/sh/boards/se/73180/irq.c
index 2c62b8ea350e..e7200c56bb45 100644
--- a/arch/sh/boards/se/73180/irq.c
+++ b/arch/sh/boards/se/73180/irq.c
@@ -87,13 +87,38 @@ shmse_irq_demux(int irq)
 	return irq;
 }
 
+static struct ipr_data se73180_siof0_ipr_map[] = {
+	{ SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+};
+static struct ipr_data se73180_vpu_ipr_map[] = {
+	{ VPU_IRQ, VPU_IPR_ADDR, VPU_IPR_POS, 8 },
+};
+static struct ipr_data se73180_other_ipr_map[] = {
+	{ DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY },
+	{ IIC0_ALI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_TACKI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_WAITI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_DTEI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ SIU_IRQ, SIU_IPR_ADDR, SIU_IPR_POS, SIU_PRIORITY },
+
+	/* VIO interrupt */
+	{ CEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+	{ BEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+	{ VEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+
+	{ LCDC_IRQ, LCDC_IPR_ADDR, LCDC_IPR_POS, LCDC_PRIORITY },
+};
+
 /*
  * Initialize IRQ setting
  */
 void __init
 init_73180se_IRQ(void)
 {
-	make_ipr_irq(SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY);
+	make_ipr_irq(se73180_siof0_ipr_map, ARRAY_SIZE(se73180_siof0_ipr_map));
 
 	ctrl_outw(0x2000, 0xb03fffec);	/* mrshpc irq enable */
 	ctrl_outw(0x2000, 0xb07fffec);	/* mrshpc irq enable */
@@ -101,27 +126,11 @@ init_73180se_IRQ(void)
 	ctrl_outw(2 << ((7 - 5) * 2), INTC_ICR1);	/* low-level irq */
 	make_intreq_irq(10);
 
-	make_ipr_irq(VPU_IRQ, VPU_IPR_ADDR, VPU_IPR_POS, 8);
+	make_ipr_irq(se73180_vpu_ipr_map, ARRAY_SIZE(se73180_vpu_ipr_map));
 
 	ctrl_outb(0x0f, INTC_IMCR5);	/* enable SCIF IRQ */
 
-	make_ipr_irq(DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY);
-	make_ipr_irq(IIC0_ALI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY);
-	make_ipr_irq(IIC0_TACKI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS,
-		     IIC0_PRIORITY);
-	make_ipr_irq(IIC0_WAITI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS,
-		     IIC0_PRIORITY);
-	make_ipr_irq(IIC0_DTEI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY);
-	make_ipr_irq(SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY);
-	make_ipr_irq(SIU_IRQ, SIU_IPR_ADDR, SIU_IPR_POS, SIU_PRIORITY);
-
-	/* VIO interrupt */
-	make_ipr_irq(CEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
-	make_ipr_irq(BEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
-	make_ipr_irq(VEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
+	make_ipr_irq(se73180_other_ipr_map, ARRAY_SIZE(se73180_other_ipr_map));
 
-	make_ipr_irq(LCDC_IRQ, LCDC_IPR_ADDR, LCDC_IPR_POS, LCDC_PRIORITY);
 	ctrl_outw(0x2000, PA_MRSHPC + 0x0c);	/* mrshpc irq enable */
 }
diff --git a/arch/sh/boards/se/7343/irq.c b/arch/sh/boards/se/7343/irq.c
index 288b62f59419..360153ecc55b 100644
--- a/arch/sh/boards/se/7343/irq.c
+++ b/arch/sh/boards/se/7343/irq.c
@@ -102,6 +102,51 @@ shmse_irq_demux(int irq)
 static struct irqaction irq5 = { no_action, 0, CPU_MASK_NONE, "IRQ5-cascade",
 				NULL, NULL};
 
+static struct ipr_data se7343_irq5_ipr_map[] = {
+	{ IRQ5_IRQ, IRQ5_IPR_ADDR+2, IRQ5_IPR_POS, IRQ5_PRIORITY },
+};
+static struct ipr_data se7343_siof0_vpu_ipr_map[] = {
+	{ SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+	{ VPU_IRQ, VPU_IPR_ADDR, VPU_IPR_POS, 8 },
+};
+static struct ipr_data se7343_other_ipr_map[] = {
+	{ DMTE0_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE1_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY },
+	{ DMTE5_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY },
+
+	/* I2C block */
+	{ IIC0_ALI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_TACKI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_WAITI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+	{ IIC0_DTEI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY },
+
+	{ IIC1_ALI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY },
+	{ IIC1_TACKI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY },
+	{ IIC1_WAITI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY },
+	{ IIC1_DTEI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY },
+
+	/* SIOF */
+	{ SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY },
+
+	/* SIU */
+	{ SIU_IRQ, SIU_IPR_ADDR, SIU_IPR_POS, SIU_PRIORITY },
+
+	/* VIO interrupt */
+	{ CEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+	{ BEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+	{ VEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
+
+	/*MFI interrupt*/
+
+	{ MFI_IRQ, MFI_IPR_ADDR, MFI_IPR_POS, MFI_PRIORITY },
+
+	/* LCD controller */
+	{ LCDC_IRQ, LCDC_IPR_ADDR, LCDC_IPR_POS, LCDC_PRIORITY },
+};
+
 /*
  * Initialize IRQ setting
  */
@@ -138,54 +183,17 @@ init_7343se_IRQ(void)
 	/* Setup all external interrupts to be active low */
 	ctrl_outw(0xaaaa, INTC_ICR1);
 
-	make_ipr_irq(IRQ5_IRQ, IRQ5_IPR_ADDR+2, IRQ5_IPR_POS, IRQ5_PRIORITY);
+	make_ipr_irq(se7343_irq5_ipr_map, ARRAY_SIZE(se7343_irq5_ipr_map));
+
 	setup_irq(IRQ5_IRQ, &irq5);
 	/* Set port control to use IRQ5 */
 	*(u16 *)0xA4050108 &= ~0xc;
 
-	make_ipr_irq(SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY);
-	make_ipr_irq(VPU_IRQ, VPU_IPR_ADDR, VPU_IPR_POS, 8);
+	make_ipr_irq(se7343_siof0_vpu_ipr_map, ARRAY_SIZE(se7343_siof0_vpu_ipr_map));
 
 	ctrl_outb(0x0f, INTC_IMCR5);	/* enable SCIF IRQ */
 
-	make_ipr_irq(DMTE0_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE1_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE4_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY);
-	make_ipr_irq(DMTE5_IRQ, DMA2_IPR_ADDR, DMA2_IPR_POS, DMA2_PRIORITY);
-
-	/* I2C block */
-	make_ipr_irq(IIC0_ALI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY);
-	make_ipr_irq(IIC0_TACKI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS,
-		     IIC0_PRIORITY);
-	make_ipr_irq(IIC0_WAITI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS,
-		     IIC0_PRIORITY);
-	make_ipr_irq(IIC0_DTEI_IRQ, IIC0_IPR_ADDR, IIC0_IPR_POS, IIC0_PRIORITY);
-
-	make_ipr_irq(IIC1_ALI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY);
-	make_ipr_irq(IIC1_TACKI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS,
-		     IIC1_PRIORITY);
-	make_ipr_irq(IIC1_WAITI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS,
-		     IIC1_PRIORITY);
-	make_ipr_irq(IIC1_DTEI_IRQ, IIC1_IPR_ADDR, IIC1_IPR_POS, IIC1_PRIORITY);
-
-	/* SIOF */
-	make_ipr_irq(SIOF0_IRQ, SIOF0_IPR_ADDR, SIOF0_IPR_POS, SIOF0_PRIORITY);
+	make_ipr_irq(se7343_other_ipr_map, ARRAY_SIZE(se7343_other_ipr_map));
 
-	/* SIU */
-	make_ipr_irq(SIU_IRQ, SIU_IPR_ADDR, SIU_IPR_POS, SIU_PRIORITY);
-
-	/* VIO interrupt */
-	make_ipr_irq(CEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
-	make_ipr_irq(BEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
-	make_ipr_irq(VEU_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
-
-	/*MFI interrupt*/
-
-	make_ipr_irq(MFI_IRQ, MFI_IPR_ADDR, MFI_IPR_POS, MFI_PRIORITY);
-
-	/* LCD controller */
-	make_ipr_irq(LCDC_IRQ, LCDC_IPR_ADDR, LCDC_IPR_POS, LCDC_PRIORITY);
 	ctrl_outw(0x2000, PA_MRSHPC + 0x0c);	/* mrshpc irq enable */
 }
diff --git a/arch/sh/boards/se/770x/irq.c b/arch/sh/boards/se/770x/irq.c
index cff6700bbafd..fcd7cd7fa05f 100644
--- a/arch/sh/boards/se/770x/irq.c
+++ b/arch/sh/boards/se/770x/irq.c
@@ -13,6 +13,48 @@
 #include <asm/io.h>
 #include <asm/se.h>
 
+static struct ipr_data se770x_ipr_map[] = {
+#if defined(CONFIG_CPU_SUBTYPE_SH7705)
+	/* This is default value */
+	{ 0xf-0x2, BCR_ILCRA, 2, 0x2 },
+	{ 0xf-0xa, BCR_ILCRA, 1, 0xa },
+	{ 0xf-0x5, BCR_ILCRB, 0, 0x5 },
+	{ 0xf-0x8, BCR_ILCRC, 1, 0x8 },
+	{ 0xf-0xc, BCR_ILCRC, 0, 0xc },
+	{ 0xf-0xe, BCR_ILCRD, 3, 0xe },
+	{ 0xf-0x3, BCR_ILCRD, 1, 0x3 }, /* LAN */
+	{ 0xf-0xd, BCR_ILCRE, 2, 0xd },
+	{ 0xf-0x9, BCR_ILCRE, 1, 0x9 },
+	{ 0xf-0x1, BCR_ILCRE, 0, 0x1 },
+	{ 0xf-0xf, BCR_ILCRF, 3, 0xf },
+	{ 0xf-0xb, BCR_ILCRF, 1, 0xb },
+	{ 0xf-0x7, BCR_ILCRG, 3, 0x7 },
+	{ 0xf-0x6, BCR_ILCRG, 2, 0x6 },
+	{ 0xf-0x4, BCR_ILCRG, 1, 0x4 },
+#else
+	{ 14, BCR_ILCRA, 2, 0x0f-14 },
+	{ 12, BCR_ILCRA, 1, 0x0f-12 },
+	{  8, BCR_ILCRB, 1, 0x0f- 8 },
+	{  6, BCR_ILCRC, 3, 0x0f- 6 },
+	{  5, BCR_ILCRC, 2, 0x0f- 5 },
+	{  4, BCR_ILCRC, 1, 0x0f- 4 },
+	{  3, BCR_ILCRC, 0, 0x0f- 3 },
+	{  1, BCR_ILCRD, 3, 0x0f- 1 },
+
+	{ 10, BCR_ILCRD, 1, 0x0f-10 }, /* LAN */
+
+	{  0, BCR_ILCRE, 3, 0x0f- 0 }, /* PCIRQ3 */
+	{ 11, BCR_ILCRE, 2, 0x0f-11 }, /* PCIRQ2 */
+	{  9, BCR_ILCRE, 1, 0x0f- 9 }, /* PCIRQ1 */
+	{  7, BCR_ILCRE, 0, 0x0f- 7 }, /* PCIRQ0 */
+
+	/* #2, #13 are allocated for SLOT IRQ #1 and #2 (for now) */
+	/* NOTE: #2 and #13 are not used on PC */
+	{ 13, BCR_ILCRG, 1, 0x0f-13 }, /* SLOTIRQ2 */
+	{  2, BCR_ILCRG, 0, 0x0f- 2 }, /* SLOTIRQ1 */
+#endif
+};
+
 /*
  * Initialize IRQ setting
  */
@@ -38,42 +80,6 @@ void __init init_se_IRQ(void)
 	ctrl_outw(0, BCR_ILCRE);
 	ctrl_outw(0, BCR_ILCRF);
 	ctrl_outw(0, BCR_ILCRG);
-	/* This is default value */
-	make_ipr_irq(0xf-0x2, BCR_ILCRA, 2, 0x2);
-	make_ipr_irq(0xf-0xa, BCR_ILCRA, 1, 0xa);
-	make_ipr_irq(0xf-0x5, BCR_ILCRB, 0, 0x5);
-	make_ipr_irq(0xf-0x8, BCR_ILCRC, 1, 0x8);
-	make_ipr_irq(0xf-0xc, BCR_ILCRC, 0, 0xc);
-	make_ipr_irq(0xf-0xe, BCR_ILCRD, 3, 0xe);
-	make_ipr_irq(0xf-0x3, BCR_ILCRD, 1, 0x3); /* LAN */
-	make_ipr_irq(0xf-0xd, BCR_ILCRE, 2, 0xd);
-	make_ipr_irq(0xf-0x9, BCR_ILCRE, 1, 0x9);
-	make_ipr_irq(0xf-0x1, BCR_ILCRE, 0, 0x1);
-	make_ipr_irq(0xf-0xf, BCR_ILCRF, 3, 0xf);
-	make_ipr_irq(0xf-0xb, BCR_ILCRF, 1, 0xb);
-	make_ipr_irq(0xf-0x7, BCR_ILCRG, 3, 0x7);
-	make_ipr_irq(0xf-0x6, BCR_ILCRG, 2, 0x6);
-	make_ipr_irq(0xf-0x4, BCR_ILCRG, 1, 0x4);
-#else
-        make_ipr_irq(14, BCR_ILCRA, 2, 0x0f-14);
-        make_ipr_irq(12, BCR_ILCRA, 1, 0x0f-12);
-        make_ipr_irq( 8, BCR_ILCRB, 1, 0x0f- 8);
-        make_ipr_irq( 6, BCR_ILCRC, 3, 0x0f- 6);
-        make_ipr_irq( 5, BCR_ILCRC, 2, 0x0f- 5);
-        make_ipr_irq( 4, BCR_ILCRC, 1, 0x0f- 4);
-        make_ipr_irq( 3, BCR_ILCRC, 0, 0x0f- 3);
-        make_ipr_irq( 1, BCR_ILCRD, 3, 0x0f- 1);
-
-        make_ipr_irq(10, BCR_ILCRD, 1, 0x0f-10); /* LAN */
-
-        make_ipr_irq( 0, BCR_ILCRE, 3, 0x0f- 0); /* PCIRQ3 */
-        make_ipr_irq(11, BCR_ILCRE, 2, 0x0f-11); /* PCIRQ2 */
-        make_ipr_irq( 9, BCR_ILCRE, 1, 0x0f- 9); /* PCIRQ1 */
-        make_ipr_irq( 7, BCR_ILCRE, 0, 0x0f- 7); /* PCIRQ0 */
-
-        /* #2, #13 are allocated for SLOT IRQ #1 and #2 (for now) */
-        /* NOTE: #2 and #13 are not used on PC */
-        make_ipr_irq(13, BCR_ILCRG, 1, 0x0f-13); /* SLOTIRQ2 */
-        make_ipr_irq( 2, BCR_ILCRG, 0, 0x0f- 2); /* SLOTIRQ1 */
 #endif
+	make_ipr_irq(se770x_ipr_map, ARRAY_SIZE(se770x_ipr_map));
 }
diff --git a/arch/sh/boards/se/7751/irq.c b/arch/sh/boards/se/7751/irq.c
index c607b0a48479..e4c63a48296c 100644
--- a/arch/sh/boards/se/7751/irq.c
+++ b/arch/sh/boards/se/7751/irq.c
@@ -14,53 +14,50 @@
 #include <asm/irq.h>
 #include <asm/se7751.h>
 
-/*
- * Initialize IRQ setting
- */
-void __init init_7751se_IRQ(void)
-{
-
+static struct ipr_data se7751_ipr_map[] = {
   /* Leave old Solution Engine code in for reference. */
 #if defined(CONFIG_SH_SOLUTION_ENGINE)
-        /*
-         * Super I/O (Just mimic PC):
-         *  1: keyboard
-         *  3: serial 0
-         *  4: serial 1
-         *  5: printer
-         *  6: floppy
-         *  8: rtc
-         * 12: mouse
-         * 14: ide0
-         */
-        make_ipr_irq(14, BCR_ILCRA, 2, 0x0f-14);
-        make_ipr_irq(12, BCR_ILCRA, 1, 0x0f-12);
-        make_ipr_irq( 8, BCR_ILCRB, 1, 0x0f- 8);
-        make_ipr_irq( 6, BCR_ILCRC, 3, 0x0f- 6);
-        make_ipr_irq( 5, BCR_ILCRC, 2, 0x0f- 5);
-        make_ipr_irq( 4, BCR_ILCRC, 1, 0x0f- 4);
-        make_ipr_irq( 3, BCR_ILCRC, 0, 0x0f- 3);
-        make_ipr_irq( 1, BCR_ILCRD, 3, 0x0f- 1);
-
-        make_ipr_irq(10, BCR_ILCRD, 1, 0x0f-10); /* LAN */
-
-        make_ipr_irq( 0, BCR_ILCRE, 3, 0x0f- 0); /* PCIRQ3 */
-        make_ipr_irq(11, BCR_ILCRE, 2, 0x0f-11); /* PCIRQ2 */
-        make_ipr_irq( 9, BCR_ILCRE, 1, 0x0f- 9); /* PCIRQ1 */
-        make_ipr_irq( 7, BCR_ILCRE, 0, 0x0f- 7); /* PCIRQ0 */
-
-        /* #2, #13 are allocated for SLOT IRQ #1 and #2 (for now) */
-        /* NOTE: #2 and #13 are not used on PC */
-        make_ipr_irq(13, BCR_ILCRG, 1, 0x0f-13); /* SLOTIRQ2 */
-        make_ipr_irq( 2, BCR_ILCRG, 0, 0x0f- 2); /* SLOTIRQ1 */
-
+	/*
+	 * Super I/O (Just mimic PC):
+	 *  1: keyboard
+	 *  3: serial 0
+	 *  4: serial 1
+	 *  5: printer
+	 *  6: floppy
+	 *  8: rtc
+	 * 12: mouse
+	 * 14: ide0
+	 */
+	{ 14, BCR_ILCRA, 2, 0x0f-14 },
+	{ 12, BCR_ILCRA, 1, 0x0f-12 },
+	{  8, BCR_ILCRB, 1, 0x0f- 8 },
+	{  6, BCR_ILCRC, 3, 0x0f- 6 },
+	{  5, BCR_ILCRC, 2, 0x0f- 5 },
+	{  4, BCR_ILCRC, 1, 0x0f- 4 },
+	{  3, BCR_ILCRC, 0, 0x0f- 3 },
+	{  1, BCR_ILCRD, 3, 0x0f- 1 },
+
+	{ 10, BCR_ILCRD, 1, 0x0f-10 }, /* LAN */
+
+	{  0, BCR_ILCRE, 3, 0x0f- 0 }, /* PCIRQ3 */
+	{ 11, BCR_ILCRE, 2, 0x0f-11 }, /* PCIRQ2 */
+	{  9, BCR_ILCRE, 1, 0x0f- 9 }, /* PCIRQ1 */
+	{  7, BCR_ILCRE, 0, 0x0f- 7 }, /* PCIRQ0 */
+
+	/* #2, #13 are allocated for SLOT IRQ #1 and #2 (for now) */
+	/* NOTE: #2 and #13 are not used on PC */
+	{ 13, BCR_ILCRG, 1, 0x0f-13 }, /* SLOTIRQ2 */
+	{  2, BCR_ILCRG, 0, 0x0f- 2 }, /* SLOTIRQ1 */
 #elif defined(CONFIG_SH_7751_SOLUTION_ENGINE)
-
-        make_ipr_irq(13, BCR_ILCRD, 3, 2);
-
-        /* Add additional calls to make_ipr_irq() as drivers are added
-         * and tested.
-         */
+	{ 13, BCR_ILCRD, 3, 2 },
+	/* Add additional entries here as drivers are added and tested. */
 #endif
+};
 
+/*
+ * Initialize IRQ setting
+ */
+void __init init_7751se_IRQ(void)
+{
+	make_ipr_irq(se7751_ipr_map, ARRAY_SIZE(se7751_ipr_map));
 }
diff --git a/arch/sh/boards/sh03/setup.c b/arch/sh/boards/sh03/setup.c
index 137e2ba9243e..5ad1e19771be 100644
--- a/arch/sh/boards/sh03/setup.c
+++ b/arch/sh/boards/sh03/setup.c
@@ -14,14 +14,17 @@
 #include <asm/sh03/sh03.h>
 #include <asm/addrspace.h>
 
+static struct ipr_data sh03_ipr_map[] = {
+	{ IRL0_IRQ, IRL0_IPR_ADDR, IRL0_IPR_POS, IRL0_PRIORITY },
+	{ IRL1_IRQ, IRL1_IPR_ADDR, IRL1_IPR_POS, IRL1_PRIORITY },
+	{ IRL2_IRQ, IRL2_IPR_ADDR, IRL2_IPR_POS, IRL2_PRIORITY },
+	{ IRL3_IRQ, IRL3_IPR_ADDR, IRL3_IPR_POS, IRL3_PRIORITY },
+};
+
 static void __init init_sh03_IRQ(void)
 {
 	ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
-
-	make_ipr_irq(IRL0_IRQ, IRL0_IPR_ADDR, IRL0_IPR_POS, IRL0_PRIORITY);
-	make_ipr_irq(IRL1_IRQ, IRL1_IPR_ADDR, IRL1_IPR_POS, IRL1_PRIORITY);
-	make_ipr_irq(IRL2_IRQ, IRL2_IPR_ADDR, IRL2_IPR_POS, IRL2_PRIORITY);
-	make_ipr_irq(IRL3_IRQ, IRL3_IPR_ADDR, IRL3_IPR_POS, IRL3_PRIORITY);
+	make_ipr_irq(sh03_ipr_map, ARRAY_SIZE(sh03_ipr_map));
 }
 
 extern void *cf_io_base;
diff --git a/arch/sh/boards/snapgear/setup.c b/arch/sh/boards/snapgear/setup.c
index 540d0bf16446..650fb3645947 100644
--- a/arch/sh/boards/snapgear/setup.c
+++ b/arch/sh/boards/snapgear/setup.c
@@ -68,6 +68,13 @@ module_init(eraseconfig_init);
  * IRL3 = crypto
  */
 
+static struct ipr_data snapgear_ipr_map[] = {
+	make_ipr_irq(IRL0_IRQ, IRL0_IPR_ADDR, IRL0_IPR_POS, IRL0_PRIORITY);
+	make_ipr_irq(IRL1_IRQ, IRL1_IPR_ADDR, IRL1_IPR_POS, IRL1_PRIORITY);
+	make_ipr_irq(IRL2_IRQ, IRL2_IPR_ADDR, IRL2_IPR_POS, IRL2_PRIORITY);
+	make_ipr_irq(IRL3_IRQ, IRL3_IPR_ADDR, IRL3_IPR_POS, IRL3_PRIORITY);
+};
+
 static void __init init_snapgear_IRQ(void)
 {
 	/* enable individual interrupt mode for externals */
@@ -75,10 +82,7 @@ static void __init init_snapgear_IRQ(void)
 
 	printk("Setup SnapGear IRQ/IPR ...\n");
 
-	make_ipr_irq(IRL0_IRQ, IRL0_IPR_ADDR, IRL0_IPR_POS, IRL0_PRIORITY);
-	make_ipr_irq(IRL1_IRQ, IRL1_IPR_ADDR, IRL1_IPR_POS, IRL1_PRIORITY);
-	make_ipr_irq(IRL2_IRQ, IRL2_IPR_ADDR, IRL2_IPR_POS, IRL2_PRIORITY);
-	make_ipr_irq(IRL3_IRQ, IRL3_IPR_ADDR, IRL3_IPR_POS, IRL3_PRIORITY);
+	make_ipr_irq(snapgear_ipr_map, ARRAY_SIZE(snapgear_ipr_map));
 }
 
 /*
diff --git a/arch/sh/boards/titan/setup.c b/arch/sh/boards/titan/setup.c
index 52b66d8b8d2a..a6046d93758b 100644
--- a/arch/sh/boards/titan/setup.c
+++ b/arch/sh/boards/titan/setup.c
@@ -9,15 +9,19 @@
 
 extern void __init pcibios_init_platform(void);
 
+static struct ipr_data titan_ipr_map[] = {
+	{ TITAN_IRQ_WAN,	IRL0_IPR_ADDR,	IRL0_IPR_POS,	IRL0_PRIORITY },
+	{ TITAN_IRQ_LAN,	IRL1_IPR_ADDR,	IRL1_IPR_POS,	IRL1_PRIORITY },
+	{ TITAN_IRQ_MPCIA,	IRL2_IPR_ADDR,	IRL2_IPR_POS,	IRL2_PRIORITY },
+	{ TITAN_IRQ_USB,	IRL3_IPR_ADDR,	IRL3_IPR_POS,	IRL3_PRIORITY },
+};
+
 static void __init init_titan_irq(void)
 {
 	/* enable individual interrupt mode for externals */
 	ctrl_outw(ctrl_inw(INTC_ICR) | INTC_ICR_IRLM, INTC_ICR);
 
-	make_ipr_irq( TITAN_IRQ_WAN,   IRL0_IPR_ADDR, IRL0_IPR_POS, IRL0_PRIORITY); /* PCIRQ0 */
-	make_ipr_irq( TITAN_IRQ_LAN,   IRL1_IPR_ADDR, IRL1_IPR_POS, IRL1_PRIORITY); /* PCIRQ1 */
-	make_ipr_irq( TITAN_IRQ_MPCIA, IRL2_IPR_ADDR, IRL2_IPR_POS, IRL2_PRIORITY); /* PCIRQ2 */
-	make_ipr_irq( TITAN_IRQ_USB,   IRL3_IPR_ADDR, IRL3_IPR_POS, IRL3_PRIORITY); /* PCIRQ3 */
+	make_ipr_irq(titan_ipr_map, ARRAY_SIZE(titan_ipr_map));
 }
 
 struct sh_machine_vector mv_titan __initmv = {
diff --git a/arch/sh/drivers/dma/dma-sh.c b/arch/sh/drivers/dma/dma-sh.c
index d8ece20bb2cf..660786013350 100644
--- a/arch/sh/drivers/dma/dma-sh.c
+++ b/arch/sh/drivers/dma/dma-sh.c
@@ -19,23 +19,34 @@
 #include <asm/io.h>
 #include "dma-sh.h"
 
-static inline unsigned int get_dmte_irq(unsigned int chan)
-{
-	unsigned int irq = 0;
 
+
+#ifdef CONFIG_CPU_SH4
+static struct ipr_data dmae_ipr_map[] = {
+	{ DMAE_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+};
+#endif
+static struct ipr_data dmte_ipr_map[] = {
 	/*
 	 * Normally we could just do DMTE0_IRQ + chan outright, though in the
 	 * case of the 7751R, the DMTE IRQs for channels > 4 start right above
 	 * the SCIF
 	 */
-	if (chan < 4) {
-		irq = DMTE0_IRQ + chan;
-	} else {
-#ifdef DMTE4_IRQ
-		irq = DMTE4_IRQ + chan - 4;
-#endif
-	}
+	{ DMTE0_IRQ + 0, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE0_IRQ + 1, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE0_IRQ + 2, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE0_IRQ + 3, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE4_IRQ + 0, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE4_IRQ + 1, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE4_IRQ + 2, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+	{ DMTE4_IRQ + 3, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY },
+};
 
+static inline unsigned int get_dmte_irq(unsigned int chan)
+{
+	unsigned int irq = 0;
+	if (chan < ARRAY_SIZE(dmte_ipr_map))
+		irq = dmte_ipr_map[chan].irq;
 	return irq;
 }
 
@@ -258,17 +269,16 @@ static int __init sh_dmac_init(void)
 	int i;
 
 #ifdef CONFIG_CPU_SH4
-	make_ipr_irq(DMAE_IRQ, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
+	make_ipr_irq(dmae_ipr_map, ARRAY_SIZE(dmae_ipr_map));
 	i = request_irq(DMAE_IRQ, dma_err, IRQF_DISABLED, "DMAC Address Error", 0);
 	if (unlikely(i < 0))
 		return i;
 #endif
 
-	for (i = 0; i < info->nr_channels; i++) {
-		int irq = get_dmte_irq(i);
-
-		make_ipr_irq(irq, DMA_IPR_ADDR, DMA_IPR_POS, DMA_PRIORITY);
-	}
+	i = info->nr_channels;
+	if (i > ARRAY_SIZE(dmte_ipr_map))
+		i = ARRAY_SIZE(dmte_ipr_map);
+	make_ipr_irq(dmte_ipr_map, i);
 
 	/*
 	 * Initialize DMAOR, and clean up any error flags that may have
diff --git a/arch/sh/kernel/cpu/irq/ipr.c b/arch/sh/kernel/cpu/irq/ipr.c
index f7997312ef98..a0089563cbfc 100644
--- a/arch/sh/kernel/cpu/irq/ipr.c
+++ b/arch/sh/kernel/cpu/irq/ipr.c
@@ -23,24 +23,21 @@
 #include <asm/io.h>
 #include <asm/machvec.h>
 
-struct ipr_data {
-	unsigned int addr;	/* Address of Interrupt Priority Register */
-	int shift;		/* Shifts of the 16-bit data */
-	int priority;		/* The priority */
-};
 
 static void disable_ipr_irq(unsigned int irq)
 {
 	struct ipr_data *p = get_irq_chip_data(irq);
+	int shift = p->shift*4;
 	/* Set the priority in IPR to 0 */
-	ctrl_outw(ctrl_inw(p->addr) & (0xffff ^ (0xf << p->shift)), p->addr);
+	ctrl_outw(ctrl_inw(p->addr) & (0xffff ^ (0xf << shift)), p->addr);
 }
 
 static void enable_ipr_irq(unsigned int irq)
 {
 	struct ipr_data *p = get_irq_chip_data(irq);
+	int shift = p->shift*4;
 	/* Set priority in IPR back to original value */
-	ctrl_outw(ctrl_inw(p->addr) | (p->priority << p->shift), p->addr);
+	ctrl_outw(ctrl_inw(p->addr) | (p->priority << shift), p->addr);
 }
 
 static struct irq_chip ipr_irq_chip = {
@@ -50,67 +47,57 @@ static struct irq_chip ipr_irq_chip = {
 	.mask_ack	= disable_ipr_irq,
 };
 
-void make_ipr_irq(unsigned int irq, unsigned int addr, int pos, int priority)
+void make_ipr_irq(struct ipr_data *table, unsigned int nr_irqs)
 {
-	struct ipr_data ipr_data;
-
-	disable_irq_nosync(irq);
-
-	ipr_data.addr = addr;
-	ipr_data.shift = pos*4; /* POSition (0-3) x 4 means shift */
-	ipr_data.priority = priority;
+	int i;
 
-	set_irq_chip_and_handler_name(irq, &ipr_irq_chip,
+	for (i = 0; i < nr_irqs; i++) {
+		unsigned int irq = table[i].irq;
+		disable_irq_nosync(irq);
+		set_irq_chip_and_handler_name(irq, &ipr_irq_chip,
 				      handle_level_irq, "level");
-	set_irq_chip_data(irq, &ipr_data);
-
-	enable_ipr_irq(irq);
+		set_irq_chip_data(irq, &table[i]);
+		enable_ipr_irq(irq);
+	}
 }
+EXPORT_SYMBOL(make_ipr_irq);
 
-/* XXX: This needs to die a horrible death.. */
-void __init init_IRQ(void)
-{
+static struct ipr_data sys_ipr_map[] = {
 #ifndef CONFIG_CPU_SUBTYPE_SH7780
-	make_ipr_irq(TIMER_IRQ, TIMER_IPR_ADDR, TIMER_IPR_POS, TIMER_PRIORITY);
-	make_ipr_irq(TIMER1_IRQ, TIMER1_IPR_ADDR, TIMER1_IPR_POS, TIMER1_PRIORITY);
+	{ TIMER_IRQ, TIMER_IPR_ADDR, TIMER_IPR_POS, TIMER_PRIORITY },
+	{ TIMER1_IRQ, TIMER1_IPR_ADDR, TIMER1_IPR_POS, TIMER1_PRIORITY },
 #ifdef RTC_IRQ
-	make_ipr_irq(RTC_IRQ, RTC_IPR_ADDR, RTC_IPR_POS, RTC_PRIORITY);
+	{ RTC_IRQ, RTC_IPR_ADDR, RTC_IPR_POS, RTC_PRIORITY },
 #endif
-
 #ifdef SCI_ERI_IRQ
-	make_ipr_irq(SCI_ERI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY);
-	make_ipr_irq(SCI_RXI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY);
-	make_ipr_irq(SCI_TXI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY);
+	{ SCI_ERI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY },
+	{ SCI_RXI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY },
+	{ SCI_TXI_IRQ, SCI_IPR_ADDR, SCI_IPR_POS, SCI_PRIORITY },
 #endif
-
 #ifdef SCIF1_ERI_IRQ
-	make_ipr_irq(SCIF1_ERI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY);
-	make_ipr_irq(SCIF1_RXI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY);
-	make_ipr_irq(SCIF1_BRI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY);
-	make_ipr_irq(SCIF1_TXI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY);
+	{ SCIF1_ERI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY },
+	{ SCIF1_RXI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY },
+	{ SCIF1_BRI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY },
+	{ SCIF1_TXI_IRQ, SCIF1_IPR_ADDR, SCIF1_IPR_POS, SCIF1_PRIORITY },
 #endif
-
 #if defined(CONFIG_CPU_SUBTYPE_SH7300)
-	make_ipr_irq(SCIF0_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY);
-	make_ipr_irq(DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY);
-	make_ipr_irq(VIO_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY);
+	{ SCIF0_IRQ, SCIF0_IPR_ADDR, SCIF0_IPR_POS, SCIF0_PRIORITY },
+	{ DMTE2_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ DMTE3_IRQ, DMA1_IPR_ADDR, DMA1_IPR_POS, DMA1_PRIORITY },
+	{ VIO_IRQ, VIO_IPR_ADDR, VIO_IPR_POS, VIO_PRIORITY },
 #endif
-
 #ifdef SCIF_ERI_IRQ
-	make_ipr_irq(SCIF_ERI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY);
-	make_ipr_irq(SCIF_RXI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY);
-	make_ipr_irq(SCIF_BRI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY);
-	make_ipr_irq(SCIF_TXI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY);
+	{ SCIF_ERI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY },
+	{ SCIF_RXI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY },
+	{ SCIF_BRI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY },
+	{ SCIF_TXI_IRQ, SCIF_IPR_ADDR, SCIF_IPR_POS, SCIF_PRIORITY },
 #endif
-
 #ifdef IRDA_ERI_IRQ
-	make_ipr_irq(IRDA_ERI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY);
-	make_ipr_irq(IRDA_RXI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY);
-	make_ipr_irq(IRDA_BRI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY);
-	make_ipr_irq(IRDA_TXI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY);
+	{ IRDA_ERI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY },
+	{ IRDA_RXI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY },
+	{ IRDA_BRI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY },
+	{ IRDA_TXI_IRQ, IRDA_IPR_ADDR, IRDA_IPR_POS, IRDA_PRIORITY },
 #endif
-
 #if defined(CONFIG_CPU_SUBTYPE_SH7707) || defined(CONFIG_CPU_SUBTYPE_SH7709) || \
     defined(CONFIG_CPU_SUBTYPE_SH7706) || \
     defined(CONFIG_CPU_SUBTYPE_SH7300) || defined(CONFIG_CPU_SUBTYPE_SH7705)
@@ -124,14 +111,19 @@ void __init init_IRQ(void)
 	 * You should set corresponding bits of PFC to "00"
 	 * to enable these interrupts.
 	 */
-	make_ipr_irq(IRQ0_IRQ, IRQ0_IPR_ADDR, IRQ0_IPR_POS, IRQ0_PRIORITY);
-	make_ipr_irq(IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, IRQ1_PRIORITY);
-	make_ipr_irq(IRQ2_IRQ, IRQ2_IPR_ADDR, IRQ2_IPR_POS, IRQ2_PRIORITY);
-	make_ipr_irq(IRQ3_IRQ, IRQ3_IPR_ADDR, IRQ3_IPR_POS, IRQ3_PRIORITY);
-	make_ipr_irq(IRQ4_IRQ, IRQ4_IPR_ADDR, IRQ4_IPR_POS, IRQ4_PRIORITY);
-	make_ipr_irq(IRQ5_IRQ, IRQ5_IPR_ADDR, IRQ5_IPR_POS, IRQ5_PRIORITY);
+	{ IRQ0_IRQ, IRQ0_IPR_ADDR, IRQ0_IPR_POS, IRQ0_PRIORITY },
+	{ IRQ1_IRQ, IRQ1_IPR_ADDR, IRQ1_IPR_POS, IRQ1_PRIORITY },
+	{ IRQ2_IRQ, IRQ2_IPR_ADDR, IRQ2_IPR_POS, IRQ2_PRIORITY },
+	{ IRQ3_IRQ, IRQ3_IPR_ADDR, IRQ3_IPR_POS, IRQ3_PRIORITY },
+	{ IRQ4_IRQ, IRQ4_IPR_ADDR, IRQ4_IPR_POS, IRQ4_PRIORITY },
+	{ IRQ5_IRQ, IRQ5_IPR_ADDR, IRQ5_IPR_POS, IRQ5_PRIORITY },
 #endif
 #endif
+};
+
+void __init init_IRQ(void)
+{
+	make_ipr_irq(sys_ipr_map, ARRAY_SIZE(sys_ipr_map));
 
 #ifdef CONFIG_CPU_HAS_PINT_IRQ
 	init_IRQ_pint();
@@ -153,5 +145,3 @@ int ipr_irq_demux(int irq)
 	return irq;
 }
 #endif
-
-EXPORT_SYMBOL(make_ipr_irq);
diff --git a/arch/sh/kernel/cpu/irq/pint.c b/arch/sh/kernel/cpu/irq/pint.c
index 17f47b373d6e..f60007783a21 100644
--- a/arch/sh/kernel/cpu/irq/pint.c
+++ b/arch/sh/kernel/cpu/irq/pint.c
@@ -84,12 +84,16 @@ void make_pint_irq(unsigned int irq)
 	disable_pint_irq(irq);
 }
 
+static struct ipr_data pint_ipr_map[] = {
+	{ PINT0_IRQ, PINT0_IPR_ADDR, PINT0_IPR_POS, PINT0_PRIORITY },
+	{ PINT8_IRQ, PINT8_IPR_ADDR, PINT8_IPR_POS, PINT8_PRIORITY },
+};
+
 void __init init_IRQ_pint(void)
 {
 	int i;
 
-	make_ipr_irq(PINT0_IRQ, PINT0_IPR_ADDR, PINT0_IPR_POS, PINT0_PRIORITY);
-	make_ipr_irq(PINT8_IRQ, PINT8_IPR_ADDR, PINT8_IPR_POS, PINT8_PRIORITY);
+	make_ipr_irq(pint_ipr_map, ARRAY_SIZE(pint_ipr_map));
 
 	enable_irq(PINT0_IRQ);
 	enable_irq(PINT8_IRQ);
diff --git a/include/asm-sh/irq.h b/include/asm-sh/irq.h
index 7596ab83e0d4..6cd3e9e2a76a 100644
--- a/include/asm-sh/irq.h
+++ b/include/asm-sh/irq.h
@@ -327,11 +327,17 @@ extern unsigned short *irq_mask_register;
  */
 void init_IRQ_pint(void);
 
+struct ipr_data {
+	unsigned int irq;
+	unsigned int addr;	/* Address of Interrupt Priority Register */
+	int shift;		/* Shifts of the 16-bit data */
+	int priority;		/* The priority */
+};
+
 /*
  * Function for "on chip support modules".
  */
-extern void make_ipr_irq(unsigned int irq, unsigned int addr,
-			 int pos,  int priority);
+extern void make_ipr_irq(struct ipr_data *table, unsigned int nr_irqs);
 extern void make_imask_irq(unsigned int irq);
 
 #if defined(CONFIG_CPU_SUBTYPE_SH7300)
-- 
cgit v1.2.3


From 0b26c88f29ad8bcf91a2ea8f25a36f2028ebabea Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Wed, 25 Oct 2006 12:54:20 +0200
Subject: IB/uverbs: Return sq_draining value in query_qp response

Return the sq_draining value back to user space for query_qp instead
of the en_sqd_async notify value, which is valid only for
modify_qp.  For query_qp, the draining status should returned.

Signed-off-by: Jack Morgenstein <jackm@mellanox.co.il>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/uverbs_cmd.c | 2 +-
 include/rdma/ib_user_verbs.h         | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/uverbs_cmd.c b/drivers/infiniband/core/uverbs_cmd.c
index b72c7f69ca90..743247ec065e 100644
--- a/drivers/infiniband/core/uverbs_cmd.c
+++ b/drivers/infiniband/core/uverbs_cmd.c
@@ -1214,7 +1214,7 @@ ssize_t ib_uverbs_query_qp(struct ib_uverbs_file *file,
 	resp.qp_access_flags        = attr->qp_access_flags;
 	resp.pkey_index             = attr->pkey_index;
 	resp.alt_pkey_index         = attr->alt_pkey_index;
-	resp.en_sqd_async_notify    = attr->en_sqd_async_notify;
+	resp.sq_draining            = attr->sq_draining;
 	resp.max_rd_atomic          = attr->max_rd_atomic;
 	resp.max_dest_rd_atomic     = attr->max_dest_rd_atomic;
 	resp.min_rnr_timer          = attr->min_rnr_timer;
diff --git a/include/rdma/ib_user_verbs.h b/include/rdma/ib_user_verbs.h
index db1b814b62cc..64a721fcbc1c 100644
--- a/include/rdma/ib_user_verbs.h
+++ b/include/rdma/ib_user_verbs.h
@@ -458,7 +458,7 @@ struct ib_uverbs_query_qp_resp {
 	__u8  cur_qp_state;
 	__u8  path_mtu;
 	__u8  path_mig_state;
-	__u8  en_sqd_async_notify;
+	__u8  sq_draining;
 	__u8  max_rd_atomic;
 	__u8  max_dest_rd_atomic;
 	__u8  min_rnr_timer;
-- 
cgit v1.2.3


From 525fdb6cc929b515ad7e0be40fd023cff8660ed8 Mon Sep 17 00:00:00 2001
From: Jeff Dike <jdike@addtoit.com>
Date: Mon, 30 Oct 2006 22:07:14 -0800
Subject: [PATCH] uml: add INITCALLS

This is the UML piece of the INITCALLS tidying.

Signed-off-by: Jeff Dike <jdike@addtoit.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-um/common.lds.S | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S
index 1010153faaf9..f0454516dd31 100644
--- a/include/asm-um/common.lds.S
+++ b/include/asm-um/common.lds.S
@@ -42,13 +42,7 @@
 	
   __initcall_start = .;
   .initcall.init : {
-	*(.initcall1.init) 
-	*(.initcall2.init) 
-	*(.initcall3.init) 
-	*(.initcall4.init) 
-	*(.initcall5.init) 
-	*(.initcall6.init) 
-	*(.initcall7.init)
+	INITCALLS
   }
   __initcall_end = .;
 
-- 
cgit v1.2.3


From 70e46f48cb5933119712ee27945309a4bfc98282 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 31 Oct 2006 18:33:09 +0000
Subject: [MIPS] VSMP: Synchronize cp0 counters on bootup.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/smp-mt.c            | 2 ++
 arch/mips/mips-boards/generic/time.c | 5 +++--
 include/asm-mips/mipsmtregs.h        | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index 06b29fa73f56..2ac19a6cbf68 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -153,6 +153,8 @@ static void __init smp_copy_vpe_config(void)
 
 	/* Propagate Config7 */
 	write_vpe_c0_config7(read_c0_config7());
+
+	write_vpe_c0_count(read_c0_count());
 }
 
 static unsigned int __init smp_vpe_init(unsigned int tc, unsigned int mvpconf0,
diff --git a/arch/mips/mips-boards/generic/time.c b/arch/mips/mips-boards/generic/time.c
index 659706705005..d817c60c5ca5 100644
--- a/arch/mips/mips-boards/generic/time.c
+++ b/arch/mips/mips-boards/generic/time.c
@@ -209,6 +209,7 @@ static unsigned int __init estimate_cpu_frequency(void)
 #endif
 #if defined(CONFIG_MIPS_ATLAS) || defined(CONFIG_MIPS_MALTA)
 	unsigned long flags;
+	unsigned int start;
 
 	local_irq_save(flags);
 
@@ -217,13 +218,13 @@ static unsigned int __init estimate_cpu_frequency(void)
 	while (!(CMOS_READ(RTC_REG_A) & RTC_UIP));
 
 	/* Start r4k counter. */
-	write_c0_count(0);
+	start = read_c0_count();
 
 	/* Read counter exactly on falling edge of update flag */
 	while (CMOS_READ(RTC_REG_A) & RTC_UIP);
 	while (!(CMOS_READ(RTC_REG_A) & RTC_UIP));
 
-	count = read_c0_count();
+	count = read_c0_count() - start;
 
 	/* restore interrupts */
 	local_irq_restore(flags);
diff --git a/include/asm-mips/mipsmtregs.h b/include/asm-mips/mipsmtregs.h
index f637ce70758f..3e9468f424f4 100644
--- a/include/asm-mips/mipsmtregs.h
+++ b/include/asm-mips/mipsmtregs.h
@@ -352,6 +352,8 @@ do {									\
 #define write_vpe_c0_vpecontrol(val)	mttc0(1, 1, val)
 #define read_vpe_c0_vpeconf0()		mftc0(1, 2)
 #define write_vpe_c0_vpeconf0(val)	mttc0(1, 2, val)
+#define read_vpe_c0_count()		mftc0(9, 0)
+#define write_vpe_c0_count(val)		mttc0(9, 0, val)
 #define read_vpe_c0_status()		mftc0(12, 0)
 #define write_vpe_c0_status(val)	mttc0(12, 0, val)
 #define read_vpe_c0_cause()		mftc0(13, 0)
-- 
cgit v1.2.3


From 16b7b2ac0148e839da86af8747b6fa4aad43a9b7 Mon Sep 17 00:00:00 2001
From: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Date: Tue, 24 Oct 2006 00:21:27 +0900
Subject: [MIPS] Fixup migration to GENERIC_TIME

Since we already moved to GENERIC_TIME, we should implement alternatives
of old do_gettimeoffset routines to get sub-jiffies resolution from
gettimeofday().  This patch includes:

 * MIPS clocksource support (based on works by Manish Lachwani).
 * remove unused gettimeoffset routines and related codes.
 * remove unised 64bit do_div64_32().
 * simplify mips_hpt_init. (no argument needed, __init tag)
 * simplify c0_hpt_timer_init. (no need to write to c0_count)
 * remove some hpt_init routines.
 * mips_hpt_mask variable to specify bitmask of hpt value.
 * convert jmr3927_do_gettimeoffset to jmr3927_hpt_read.
 * convert ip27_do_gettimeoffset to ip27_hpt_read.
 * convert bcm1480_do_gettimeoffset to bcm1480_hpt_read.
 * simplify sb1250 hpt functions. (no need to subtract and shift)

Signed-off-by: Atsushi Nemoto <anemo@mba.ocn.ne.jp>
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 Documentation/mips/time.README          |  39 +---
 arch/mips/au1000/common/time.c          |  98 ----------
 arch/mips/dec/time.c                    |   9 +-
 arch/mips/jmr3927/rbhma3100/setup.c     |  40 +---
 arch/mips/kernel/time.c                 | 319 ++++++--------------------------
 arch/mips/philips/pnx8550/common/time.c |   4 +-
 arch/mips/pmc-sierra/yosemite/smp.c     |   6 +-
 arch/mips/sgi-ip27/ip27-timer.c         |  16 +-
 arch/mips/sibyte/bcm1480/time.c         |  33 ++--
 arch/mips/sibyte/sb1250/time.c          |  28 +--
 include/asm-mips/div64.h                |  21 ---
 include/asm-mips/sibyte/sb1250.h        |   2 +-
 include/asm-mips/time.h                 |  10 +-
 13 files changed, 103 insertions(+), 522 deletions(-)

(limited to 'include')

diff --git a/Documentation/mips/time.README b/Documentation/mips/time.README
index e1304b6bc483..a4ce603ed3b3 100644
--- a/Documentation/mips/time.README
+++ b/Documentation/mips/time.README
@@ -38,19 +38,14 @@ The new time code provide the following services:
 
   a) Implements functions required by Linux common code:
 	time_init
-	do_gettimeofday
-	do_settimeofday
 
   b) provides an abstraction of RTC and null RTC implementation as default.
 	extern unsigned long (*rtc_get_time)(void);
 	extern int (*rtc_set_time)(unsigned long);
 
-  c) a set of gettimeoffset functions for different CPUs and different
-     needs.
-
-  d) high-level and low-level timer interrupt routines where the timer 
-     interrupt source  may or may not be the CPU timer.  The high-level 
-     routine is dispatched through do_IRQ() while the low-level is 
+  c) high-level and low-level timer interrupt routines where the timer
+     interrupt source  may or may not be the CPU timer.  The high-level
+     routine is dispatched through do_IRQ() while the low-level is
      dispatched in assemably code (usually int-handler.S)
 
 
@@ -73,8 +68,7 @@ the following functions or values:
   c) (optional) board-specific RTC routines.
 
   d) (optional) mips_hpt_frequency - It must be definied if the board
-     is using CPU counter for timer interrupt or it is using fixed rate
-     gettimeoffset().
+     is using CPU counter for timer interrupt.
 
 
 PORTING GUIDE
@@ -89,16 +83,6 @@ Step 1: decide how you like to implement the time services.
      If the answer is no, you need a timer to provide the timer interrupt
      at 100 HZ speed.
 
-     You cannot use the fast gettimeoffset functions, i.e.,
-
-	unsigned long fixed_rate_gettimeoffset(void);
-	unsigned long calibrate_div32_gettimeoffset(void);
-	unsigned long calibrate_div64_gettimeoffset(void);
-
-    You can use null_gettimeoffset() will gives the same time resolution as
-    jiffy.  Or you can implement your own gettimeoffset (probably based on 
-    some ad hoc hardware on your machine.)
-
   c) The following sub steps assume your CPU has counter register.
      Do you plan to use the CPU counter register as the timer interrupt
      or use an exnternal timer?
@@ -123,8 +107,8 @@ Step 3: implement rtc routines, board_time_init() and plat_timer_setup()
   board_time_init() -
   	a) (optional) set up RTC routines,
         b) (optional) calibrate and set the mips_hpt_frequency
- 	    (only needed if you intended to use fixed_rate_gettimeoffset
- 	     or use cpu counter as timer interrupt source)
+ 	    (only needed if you intended to use cpu counter as timer interrupt
+ 	     source)
 
   plat_timer_setup() -
  	a) (optional) over-write any choices made above by time_init().
@@ -154,8 +138,8 @@ for some of the functions in time.c.
 For example, you may define your own timer interrupt routine, which does
 some of its own processing and then calls timer_interrupt().
 
-You can also over-ride any of the built-in functions (gettimeoffset,
-RTC routines and/or timer interrupt routine).
+You can also over-ride any of the built-in functions (RTC routines
+and/or timer interrupt routine).
 
 
 PORTING NOTES FOR SMP
@@ -187,10 +171,3 @@ You need to decide on your timer interrupt sources.
 
 	You can also do the low-level version of those interrupt routines,
 	following similar dispatching routes described above.
-
-Note about do_gettimeoffset():
-
-  It is very likely the CPU counter registers are not sync'ed up in a SMP box.
-  Therefore you cannot really use the many of the existing routines that
-  are based on CPU counter.  You should wirte your own gettimeoffset rouinte
-  if you want intra-jiffy resolution.
diff --git a/arch/mips/au1000/common/time.c b/arch/mips/au1000/common/time.c
index 6768638883ea..fa1c62f05515 100644
--- a/arch/mips/au1000/common/time.c
+++ b/arch/mips/au1000/common/time.c
@@ -53,9 +53,6 @@ static unsigned long r4k_cur;    /* What counter should be at next timer irq */
 int	no_au1xxx_32khz;
 extern int allow_au1k_wait; /* default off for CP0 Counter */
 
-/* Cycle counter value at the previous timer interrupt.. */
-static unsigned int timerhi = 0, timerlo = 0;
-
 #ifdef CONFIG_PM
 #if HZ < 100 || HZ > 1000
 #error "unsupported HZ value! Must be in [100,1000]"
@@ -90,10 +87,6 @@ void mips_timer_interrupt(void)
 		goto null;
 
 	do {
-		count = read_c0_count();
-		timerhi += (count < timerlo);   /* Wrap around */
-		timerlo = count;
-
 		kstat_this_cpu.irqs[irq]++;
 		do_timer(1);
 #ifndef CONFIG_SMP
@@ -297,88 +290,6 @@ unsigned long cal_r4koff(void)
 	return (cpu_speed / HZ);
 }
 
-/* This is for machines which generate the exact clock. */
-#define USECS_PER_JIFFY (1000000/HZ)
-#define USECS_PER_JIFFY_FRAC (0x100000000LL*1000000/HZ&0xffffffff)
-
-static unsigned long
-div64_32(unsigned long v1, unsigned long v2, unsigned long v3)
-{
-	unsigned long r0;
-	do_div64_32(r0, v1, v2, v3);
-	return r0;
-}
-
-static unsigned long do_fast_cp0_gettimeoffset(void)
-{
-	u32 count;
-	unsigned long res, tmp;
-	unsigned long r0;
-
-	/* Last jiffy when do_fast_gettimeoffset() was called. */
-	static unsigned long last_jiffies=0;
-	unsigned long quotient;
-
-	/*
-	 * Cached "1/(clocks per usec)*2^32" value.
-	 * It has to be recalculated once each jiffy.
-	 */
-	static unsigned long cached_quotient=0;
-
-	tmp = jiffies;
-
-	quotient = cached_quotient;
-
-	if (tmp && last_jiffies != tmp) {
-		last_jiffies = tmp;
-		if (last_jiffies != 0) {
-			r0 = div64_32(timerhi, timerlo, tmp);
-			quotient = div64_32(USECS_PER_JIFFY, USECS_PER_JIFFY_FRAC, r0);
-			cached_quotient = quotient;
-		}
-	}
-
-	/* Get last timer tick in absolute kernel time */
-	count = read_c0_count();
-
-	/* .. relative to previous jiffy (32 bits is enough) */
-	count -= timerlo;
-
-	__asm__("multu\t%1,%2\n\t"
-		"mfhi\t%0"
-		: "=r" (res)
-		: "r" (count), "r" (quotient)
-		: "hi", "lo", GCC_REG_ACCUM);
-
-	/*
-	 * Due to possible jiffies inconsistencies, we need to check
-	 * the result so that we'll get a timer that is monotonic.
-	 */
-	if (res >= USECS_PER_JIFFY)
-		res = USECS_PER_JIFFY-1;
-
-	return res;
-}
-
-#ifdef CONFIG_PM
-static unsigned long do_fast_pm_gettimeoffset(void)
-{
-	unsigned long pc0;
-	unsigned long offset;
-
-	pc0 = au_readl(SYS_TOYREAD);
-	au_sync();
-	offset = pc0 - last_pc0;
-	if (offset > 2*MATCH20_INC) {
-		printk("huge offset %x, last_pc0 %x last_match20 %x pc0 %x\n",
-				(unsigned)offset, (unsigned)last_pc0,
-				(unsigned)last_match20, (unsigned)pc0);
-	}
-	offset = (unsigned long)((offset * 305) / 10);
-	return offset;
-}
-#endif
-
 void __init plat_timer_setup(struct irqaction *irq)
 {
 	unsigned int est_freq;
@@ -416,7 +327,6 @@ void __init plat_timer_setup(struct irqaction *irq)
 		unsigned int c0_status;
 
 		printk("WARNING: no 32KHz clock found.\n");
-		do_gettimeoffset = do_fast_cp0_gettimeoffset;
 
 		/* Ensure we get CPO_COUNTER interrupts.
 		*/
@@ -441,19 +351,11 @@ void __init plat_timer_setup(struct irqaction *irq)
 		while (au_readl(SYS_COUNTER_CNTRL) & SYS_CNTRL_M20);
 		startup_match20_interrupt(counter0_irq);
 
-		do_gettimeoffset = do_fast_pm_gettimeoffset;
-
 		/* We can use the real 'wait' instruction.
 		*/
 		allow_au1k_wait = 1;
 	}
 
-#else
-	/* We have to do this here instead of in timer_init because
-	 * the generic code in arch/mips/kernel/time.c will write
-	 * over our function pointer.
-	 */
-	do_gettimeoffset = do_fast_cp0_gettimeoffset;
 #endif
 }
 
diff --git a/arch/mips/dec/time.c b/arch/mips/dec/time.c
index 4cf0c06e2414..69e424e9ab6f 100644
--- a/arch/mips/dec/time.c
+++ b/arch/mips/dec/time.c
@@ -160,11 +160,6 @@ static unsigned int dec_ioasic_hpt_read(void)
 	return ioasic_read(IO_REG_FCTR);
 }
 
-static void dec_ioasic_hpt_init(unsigned int count)
-{
-	ioasic_write(IO_REG_FCTR, ioasic_read(IO_REG_FCTR) - count);
-}
-
 
 void __init dec_time_init(void)
 {
@@ -174,11 +169,9 @@ void __init dec_time_init(void)
 	mips_timer_state = dec_timer_state;
 	mips_timer_ack = dec_timer_ack;
 
-	if (!cpu_has_counter && IOASIC) {
+	if (!cpu_has_counter && IOASIC)
 		/* For pre-R4k systems we use the I/O ASIC's counter.  */
 		mips_hpt_read = dec_ioasic_hpt_read;
-		mips_hpt_init = dec_ioasic_hpt_init;
-	}
 
 	/* Set up the rate of periodic DS1287 interrupts.  */
 	CMOS_WRITE(RTC_REF_CLCK_32KHZ | (16 - __ffs(HZ)), RTC_REG_A);
diff --git a/arch/mips/jmr3927/rbhma3100/setup.c b/arch/mips/jmr3927/rbhma3100/setup.c
index 025434054ed0..16e5dfe7aa8a 100644
--- a/arch/mips/jmr3927/rbhma3100/setup.c
+++ b/arch/mips/jmr3927/rbhma3100/setup.c
@@ -170,12 +170,20 @@ static void jmr3927_machine_power_off(void)
 	while (1);
 }
 
+static unsigned int jmr3927_hpt_read(void)
+{
+	/* We assume this function is called xtime_lock held. */
+	return jiffies * (JMR3927_TIMER_CLK / HZ) + jmr3927_tmrptr->trr;
+}
+
 #define USE_RTC_DS1742
 #ifdef USE_RTC_DS1742
 extern void rtc_ds1742_init(unsigned long base);
 #endif
 static void __init jmr3927_time_init(void)
 {
+	mips_hpt_read = jmr3927_hpt_read;
+	mips_hpt_frequency = JMR3927_TIMER_CLK;
 #ifdef USE_RTC_DS1742
 	if (jmr3927_have_nvram()) {
 	        rtc_ds1742_init(JMR3927_IOC_NVRAMB_ADDR);
@@ -183,12 +191,8 @@ static void __init jmr3927_time_init(void)
 #endif
 }
 
-unsigned long jmr3927_do_gettimeoffset(void);
-
 void __init plat_timer_setup(struct irqaction *irq)
 {
-	do_gettimeoffset = jmr3927_do_gettimeoffset;
-
 	jmr3927_tmrptr->cpra = JMR3927_TIMER_CLK / HZ;
 	jmr3927_tmrptr->itmr = TXx927_TMTITMR_TIIE | TXx927_TMTITMR_TZCE;
 	jmr3927_tmrptr->ccdr = JMR3927_TIMER_CCD;
@@ -200,34 +204,6 @@ void __init plat_timer_setup(struct irqaction *irq)
 
 #define USECS_PER_JIFFY (1000000/HZ)
 
-unsigned long jmr3927_do_gettimeoffset(void)
-{
-       unsigned long count;
-       unsigned long res = 0;
-
-       /* MUST read TRR before TISR. */
-       count = jmr3927_tmrptr->trr;
-
-       if (jmr3927_tmrptr->tisr & TXx927_TMTISR_TIIS) {
-               /* timer interrupt is pending.  use Max value. */
-               res = USECS_PER_JIFFY - 1;
-       } else {
-               /* convert to usec */
-               /* res = count / (JMR3927_TIMER_CLK / 1000000); */
-               res = (count << 7) / ((JMR3927_TIMER_CLK << 7) / 1000000);
-
-               /*
-                * Due to possible jiffies inconsistencies, we need to check
-                * the result so that we'll get a timer that is monotonic.
-                */
-               if (res >= USECS_PER_JIFFY)
-                       res = USECS_PER_JIFFY-1;
-       }
-
-       return res;
-}
-
-
 //#undef DO_WRITE_THROUGH
 #define DO_WRITE_THROUGH
 #define DO_ENABLE_CACHE
diff --git a/arch/mips/kernel/time.c b/arch/mips/kernel/time.c
index debe86c2f691..e535f86efa2f 100644
--- a/arch/mips/kernel/time.c
+++ b/arch/mips/kernel/time.c
@@ -11,6 +11,7 @@
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
  */
+#include <linux/clocksource.h>
 #include <linux/types.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
@@ -67,15 +68,9 @@ int (*rtc_mips_set_time)(unsigned long) = null_rtc_set_time;
 int (*rtc_mips_set_mmss)(unsigned long);
 
 
-/* usecs per counter cycle, shifted to left by 32 bits */
-static unsigned int sll32_usecs_per_cycle;
-
 /* how many counter cycles in a jiffy */
 static unsigned long cycles_per_jiffy __read_mostly;
 
-/* Cycle counter value at the previous timer interrupt.. */
-static unsigned int timerhi, timerlo;
-
 /* expirelo is the count value for next CPU timer interrupt */
 static unsigned int expirelo;
 
@@ -93,7 +88,7 @@ static unsigned int null_hpt_read(void)
 	return 0;
 }
 
-static void null_hpt_init(unsigned int count)
+static void __init null_hpt_init(void)
 {
 	/* nothing */
 }
@@ -128,186 +123,18 @@ static unsigned int c0_hpt_read(void)
 	return read_c0_count();
 }
 
-/* For use solely as a high precision timer.  */
-static void c0_hpt_init(unsigned int count)
-{
-	write_c0_count(read_c0_count() - count);
-}
-
 /* For use both as a high precision timer and an interrupt source.  */
-static void c0_hpt_timer_init(unsigned int count)
+static void __init c0_hpt_timer_init(void)
 {
-	count = read_c0_count() - count;
-	expirelo = (count / cycles_per_jiffy + 1) * cycles_per_jiffy;
-	write_c0_count(expirelo - cycles_per_jiffy);
+	expirelo = read_c0_count() + cycles_per_jiffy;
 	write_c0_compare(expirelo);
-	write_c0_count(count);
 }
 
 int (*mips_timer_state)(void);
 void (*mips_timer_ack)(void);
 unsigned int (*mips_hpt_read)(void);
-void (*mips_hpt_init)(unsigned int);
-
-/*
- * Gettimeoffset routines.  These routines returns the time duration
- * since last timer interrupt in usecs.
- *
- * If the exact CPU counter frequency is known, use fixed_rate_gettimeoffset.
- * Otherwise use calibrate_gettimeoffset()
- *
- * If the CPU does not have the counter register, you can either supply
- * your own gettimeoffset() routine, or use null_gettimeoffset(), which
- * gives the same resolution as HZ.
- */
-
-static unsigned long null_gettimeoffset(void)
-{
-	return 0;
-}
-
-
-/* The function pointer to one of the gettimeoffset funcs.  */
-unsigned long (*do_gettimeoffset)(void) = null_gettimeoffset;
-
-
-static unsigned long fixed_rate_gettimeoffset(void)
-{
-	u32 count;
-	unsigned long res;
-
-	/* Get last timer tick in absolute kernel time */
-	count = mips_hpt_read();
-
-	/* .. relative to previous jiffy (32 bits is enough) */
-	count -= timerlo;
-
-	__asm__("multu	%1,%2"
-		: "=h" (res)
-		: "r" (count), "r" (sll32_usecs_per_cycle)
-		: "lo", GCC_REG_ACCUM);
-
-	/*
-	 * Due to possible jiffies inconsistencies, we need to check
-	 * the result so that we'll get a timer that is monotonic.
-	 */
-	if (res >= USECS_PER_JIFFY)
-		res = USECS_PER_JIFFY - 1;
-
-	return res;
-}
-
-
-/*
- * Cached "1/(clocks per usec) * 2^32" value.
- * It has to be recalculated once each jiffy.
- */
-static unsigned long cached_quotient;
-
-/* Last jiffy when calibrate_divXX_gettimeoffset() was called. */
-static unsigned long last_jiffies;
-
-/*
- * This is moved from dec/time.c:do_ioasic_gettimeoffset() by Maciej.
- */
-static unsigned long calibrate_div32_gettimeoffset(void)
-{
-	u32 count;
-	unsigned long res, tmp;
-	unsigned long quotient;
-
-	tmp = jiffies;
-
-	quotient = cached_quotient;
-
-	if (last_jiffies != tmp) {
-		last_jiffies = tmp;
-		if (last_jiffies != 0) {
-			unsigned long r0;
-			do_div64_32(r0, timerhi, timerlo, tmp);
-			do_div64_32(quotient, USECS_PER_JIFFY,
-				    USECS_PER_JIFFY_FRAC, r0);
-			cached_quotient = quotient;
-		}
-	}
-
-	/* Get last timer tick in absolute kernel time */
-	count = mips_hpt_read();
-
-	/* .. relative to previous jiffy (32 bits is enough) */
-	count -= timerlo;
-
-	__asm__("multu  %1,%2"
-		: "=h" (res)
-		: "r" (count), "r" (quotient)
-		: "lo", GCC_REG_ACCUM);
-
-	/*
-	 * Due to possible jiffies inconsistencies, we need to check
-	 * the result so that we'll get a timer that is monotonic.
-	 */
-	if (res >= USECS_PER_JIFFY)
-		res = USECS_PER_JIFFY - 1;
-
-	return res;
-}
-
-static unsigned long calibrate_div64_gettimeoffset(void)
-{
-	u32 count;
-	unsigned long res, tmp;
-	unsigned long quotient;
-
-	tmp = jiffies;
-
-	quotient = cached_quotient;
-
-	if (last_jiffies != tmp) {
-		last_jiffies = tmp;
-		if (last_jiffies) {
-			unsigned long r0;
-			__asm__(".set	push\n\t"
-				".set	mips3\n\t"
-				"lwu	%0,%3\n\t"
-				"dsll32	%1,%2,0\n\t"
-				"or	%1,%1,%0\n\t"
-				"ddivu	$0,%1,%4\n\t"
-				"mflo	%1\n\t"
-				"dsll32	%0,%5,0\n\t"
-				"or	%0,%0,%6\n\t"
-				"ddivu	$0,%0,%1\n\t"
-				"mflo	%0\n\t"
-				".set	pop"
-				: "=&r" (quotient), "=&r" (r0)
-				: "r" (timerhi), "m" (timerlo),
-				  "r" (tmp), "r" (USECS_PER_JIFFY),
-				  "r" (USECS_PER_JIFFY_FRAC)
-				: "hi", "lo", GCC_REG_ACCUM);
-			cached_quotient = quotient;
-		}
-	}
-
-	/* Get last timer tick in absolute kernel time */
-	count = mips_hpt_read();
-
-	/* .. relative to previous jiffy (32 bits is enough) */
-	count -= timerlo;
-
-	__asm__("multu	%1,%2"
-		: "=h" (res)
-		: "r" (count), "r" (quotient)
-		: "lo", GCC_REG_ACCUM);
-
-	/*
-	 * Due to possible jiffies inconsistencies, we need to check
-	 * the result so that we'll get a timer that is monotonic.
-	 */
-	if (res >= USECS_PER_JIFFY)
-		res = USECS_PER_JIFFY - 1;
-
-	return res;
-}
-
+void (*mips_hpt_init)(void) __initdata = null_hpt_init;
+unsigned int mips_hpt_mask = 0xffffffff;
 
 /* last time when xtime and rtc are sync'ed up */
 static long last_rtc_update;
@@ -334,18 +161,10 @@ void local_timer_interrupt(int irq, void *dev_id)
  */
 irqreturn_t timer_interrupt(int irq, void *dev_id)
 {
-	unsigned long j;
-	unsigned int count;
-
 	write_seqlock(&xtime_lock);
 
-	count = mips_hpt_read();
 	mips_timer_ack();
 
-	/* Update timerhi/timerlo for intra-jiffy calibration. */
-	timerhi += count < timerlo;			/* Wrap around */
-	timerlo = count;
-
 	/*
 	 * call the generic timer interrupt handling
 	 */
@@ -368,47 +187,6 @@ irqreturn_t timer_interrupt(int irq, void *dev_id)
 		}
 	}
 
-	/*
-	 * If jiffies has overflown in this timer_interrupt, we must
-	 * update the timer[hi]/[lo] to make fast gettimeoffset funcs
-	 * quotient calc still valid. -arca
-	 *
-	 * The first timer interrupt comes late as interrupts are
-	 * enabled long after timers are initialized.  Therefore the
-	 * high precision timer is fast, leading to wrong gettimeoffset()
-	 * calculations.  We deal with it by setting it based on the
-	 * number of its ticks between the second and the third interrupt.
-	 * That is still somewhat imprecise, but it's a good estimate.
-	 * --macro
-	 */
-	j = jiffies;
-	if (j < 4) {
-		static unsigned int prev_count;
-		static int hpt_initialized;
-
-		switch (j) {
-		case 0:
-			timerhi = timerlo = 0;
-			mips_hpt_init(count);
-			break;
-		case 2:
-			prev_count = count;
-			break;
-		case 3:
-			if (!hpt_initialized) {
-				unsigned int c3 = 3 * (count - prev_count);
-
-				timerhi = 0;
-				timerlo = c3;
-				mips_hpt_init(count - c3);
-				hpt_initialized = 1;
-			}
-			break;
-		default:
-			break;
-		}
-	}
-
 	write_sequnlock(&xtime_lock);
 
 	/*
@@ -476,12 +254,11 @@ asmlinkage void ll_local_timer_interrupt(int irq)
  * 1) board_time_init() -
  * 	a) (optional) set up RTC routines,
  *      b) (optional) calibrate and set the mips_hpt_frequency
- *	    (only needed if you intended to use fixed_rate_gettimeoffset
- *	     or use cpu counter as timer interrupt source)
+ *	    (only needed if you intended to use cpu counter as timer interrupt
+ *	     source)
  * 2) setup xtime based on rtc_mips_get_time().
- * 3) choose a appropriate gettimeoffset routine.
- * 4) calculate a couple of cached variables for later usage
- * 5) plat_timer_setup() -
+ * 3) calculate a couple of cached variables for later usage
+ * 4) plat_timer_setup() -
  *	a) (optional) over-write any choices made above by time_init().
  *	b) machine specific code should setup the timer irqaction.
  *	c) enable the timer interrupt
@@ -533,13 +310,48 @@ static unsigned int __init calibrate_hpt(void)
 	} while (--i);
 	hpt_end = mips_hpt_read();
 
-	hpt_count = hpt_end - hpt_start;
+	hpt_count = (hpt_end - hpt_start) & mips_hpt_mask;
 	hz = HZ;
 	frequency = (u64)hpt_count * (u64)hz;
 
 	return frequency >> log_2_loops;
 }
 
+static cycle_t read_mips_hpt(void)
+{
+	return (cycle_t)mips_hpt_read();
+}
+
+static struct clocksource clocksource_mips = {
+	.name		= "MIPS",
+	.read		= read_mips_hpt,
+	.is_continuous	= 1,
+};
+
+static void __init init_mips_clocksource(void)
+{
+	u64 temp;
+	u32 shift;
+
+	if (!mips_hpt_frequency || mips_hpt_read == null_hpt_read)
+		return;
+
+	/* Calclate a somewhat reasonable rating value */
+	clocksource_mips.rating = 200 + mips_hpt_frequency / 10000000;
+	/* Find a shift value */
+	for (shift = 32; shift > 0; shift--) {
+		temp = (u64) NSEC_PER_SEC << shift;
+		do_div(temp, mips_hpt_frequency);
+		if ((temp >> 32) == 0)
+			break;
+	}
+	clocksource_mips.shift = shift;
+	clocksource_mips.mult = (u32)temp;
+	clocksource_mips.mask = mips_hpt_mask;
+
+	clocksource_register(&clocksource_mips);
+}
+
 void __init time_init(void)
 {
 	if (board_time_init)
@@ -555,41 +367,21 @@ void __init time_init(void)
 	                        -xtime.tv_sec, -xtime.tv_nsec);
 
 	/* Choose appropriate high precision timer routines.  */
-	if (!cpu_has_counter && !mips_hpt_read) {
+	if (!cpu_has_counter && !mips_hpt_read)
 		/* No high precision timer -- sorry.  */
 		mips_hpt_read = null_hpt_read;
-		mips_hpt_init = null_hpt_init;
-	} else if (!mips_hpt_frequency && !mips_timer_state) {
+	else if (!mips_hpt_frequency && !mips_timer_state) {
 		/* A high precision timer of unknown frequency.  */
-		if (!mips_hpt_read) {
+		if (!mips_hpt_read)
 			/* No external high precision timer -- use R4k.  */
 			mips_hpt_read = c0_hpt_read;
-			mips_hpt_init = c0_hpt_init;
-		}
-
-		if (cpu_has_mips32r1 || cpu_has_mips32r2 ||
-		    (current_cpu_data.isa_level == MIPS_CPU_ISA_I) ||
-		    (current_cpu_data.isa_level == MIPS_CPU_ISA_II))
-			/*
-			 * We need to calibrate the counter but we don't have
-			 * 64-bit division.
-			 */
-			do_gettimeoffset = calibrate_div32_gettimeoffset;
-		else
-			/*
-			 * We need to calibrate the counter but we *do* have
-			 * 64-bit division.
-			 */
-			do_gettimeoffset = calibrate_div64_gettimeoffset;
 	} else {
 		/* We know counter frequency.  Or we can get it.  */
 		if (!mips_hpt_read) {
 			/* No external high precision timer -- use R4k.  */
 			mips_hpt_read = c0_hpt_read;
 
-			if (mips_timer_state)
-				mips_hpt_init = c0_hpt_init;
-			else {
+			if (!mips_timer_state) {
 				/* No external timer interrupt -- use R4k.  */
 				mips_hpt_init = c0_hpt_timer_init;
 				mips_timer_ack = c0_timer_ack;
@@ -598,16 +390,9 @@ void __init time_init(void)
 		if (!mips_hpt_frequency)
 			mips_hpt_frequency = calibrate_hpt();
 
-		do_gettimeoffset = fixed_rate_gettimeoffset;
-
 		/* Calculate cache parameters.  */
 		cycles_per_jiffy = (mips_hpt_frequency + HZ / 2) / HZ;
 
-		/* sll32_usecs_per_cycle = 10^6 * 2^32 / mips_counter_freq  */
-		do_div64_32(sll32_usecs_per_cycle,
-			    1000000, mips_hpt_frequency / 2,
-			    mips_hpt_frequency);
-
 		/* Report the high precision timer rate for a reference.  */
 		printk("Using %u.%03u MHz high precision timer.\n",
 		       ((mips_hpt_frequency + 500) / 1000) / 1000,
@@ -619,7 +404,7 @@ void __init time_init(void)
 		mips_timer_ack = null_timer_ack;
 
 	/* This sets up the high precision timer for the first interrupt.  */
-	mips_hpt_init(mips_hpt_read());
+	mips_hpt_init();
 
 	/*
 	 * Call board specific timer interrupt setup.
@@ -633,6 +418,8 @@ void __init time_init(void)
 	 * is not invoked accidentally.
 	 */
 	plat_timer_setup(&timer_irqaction);
+
+	init_mips_clocksource();
 }
 
 #define FEBRUARY		2
diff --git a/arch/mips/philips/pnx8550/common/time.c b/arch/mips/philips/pnx8550/common/time.c
index 0af655b1f330..65c440e8480b 100644
--- a/arch/mips/philips/pnx8550/common/time.c
+++ b/arch/mips/philips/pnx8550/common/time.c
@@ -41,8 +41,8 @@ extern unsigned int mips_hpt_frequency;
  * 1) board_time_init() -
  * 	a) (optional) set up RTC routines,
  *      b) (optional) calibrate and set the mips_hpt_frequency
- *	    (only needed if you intended to use fixed_rate_gettimeoffset
- *	     or use cpu counter as timer interrupt source)
+ *	    (only needed if you intended to use cpu counter as timer interrupt
+ *	     source)
  */
 
 void pnx8550_time_init(void)
diff --git a/arch/mips/pmc-sierra/yosemite/smp.c b/arch/mips/pmc-sierra/yosemite/smp.c
index 65fa3a23ea5e..3cc0436db6cf 100644
--- a/arch/mips/pmc-sierra/yosemite/smp.c
+++ b/arch/mips/pmc-sierra/yosemite/smp.c
@@ -3,9 +3,7 @@
 
 #include <asm/pmon.h>
 #include <asm/titan_dep.h>
-
-extern unsigned int (*mips_hpt_read)(void);
-extern void (*mips_hpt_init)(unsigned int);
+#include <asm/time.h>
 
 #define LAUNCHSTACK_SIZE 256
 
@@ -101,7 +99,7 @@ void prom_cpus_done(void)
  */
 void prom_init_secondary(void)
 {
-	mips_hpt_init(mips_hpt_read());
+	mips_hpt_init();
 
 	set_c0_status(ST0_CO | ST0_IE | ST0_IM);
 }
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index 4e870fc4469b..c965705f3427 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -134,13 +134,6 @@ again:
 	irq_exit();
 }
 
-unsigned long ip27_do_gettimeoffset(void)
-{
-	unsigned long ct_cur1;
-	ct_cur1 = REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT) + CYCLES_PER_JIFFY;
-	return (ct_cur1 - ct_cur[0]) * NSEC_PER_CYCLE / 1000;
-}
-
 /* Includes for ioc3_init().  */
 #include <asm/sn/types.h>
 #include <asm/sn/sn0/addrs.h>
@@ -248,12 +241,17 @@ void __init plat_timer_setup(struct irqaction *irq)
 	setup_irq(irqno, &rt_irqaction);
 }
 
+static unsigned int ip27_hpt_read(void)
+{
+	return REMOTE_HUB_L(cputonasid(0), PI_RT_COUNT);
+}
+
 void __init ip27_time_init(void)
 {
+	mips_hpt_read = ip27_hpt_read;
+	mips_hpt_frequency = CYCLES_PER_SEC;
 	xtime.tv_sec = get_m48t35_time();
 	xtime.tv_nsec = 0;
-
-	do_gettimeoffset = ip27_do_gettimeoffset;
 }
 
 void __init cpu_time_init(void)
diff --git a/arch/mips/sibyte/bcm1480/time.c b/arch/mips/sibyte/bcm1480/time.c
index bf12af46132e..e136bde5248e 100644
--- a/arch/mips/sibyte/bcm1480/time.c
+++ b/arch/mips/sibyte/bcm1480/time.c
@@ -47,6 +47,12 @@
 #define IMR_IP3_VAL	K_BCM1480_INT_MAP_I1
 #define IMR_IP4_VAL	K_BCM1480_INT_MAP_I2
 
+#ifdef CONFIG_SIMULATION
+#define BCM1480_HPT_VALUE	50000
+#else
+#define BCM1480_HPT_VALUE	1000000
+#endif
+
 extern int bcm1480_steal_irq(int irq);
 
 void bcm1480_time_init(void)
@@ -59,11 +65,6 @@ void bcm1480_time_init(void)
 		BUG();
 	}
 
-	if (!cpu) {
-		/* Use our own gettimeoffset() routine */
-		do_gettimeoffset = bcm1480_gettimeoffset;
-	}
-
 	bcm1480_mask_irq(cpu, irq);
 
 	/* Map the timer interrupt to ip[4] of this cpu */
@@ -74,11 +75,7 @@ void bcm1480_time_init(void)
 	/* Disable the timer and set up the count */
 	__raw_writeq(0, IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_CFG)));
 	__raw_writeq(
-#ifndef CONFIG_SIMULATION
-		1000000/HZ
-#else
-		50000/HZ
-#endif
+		BCM1480_HPT_VALUE/HZ
 		, IOADDR(A_SCD_TIMER_REGISTER(cpu, R_SCD_TIMER_INIT)));
 
 	/* Set the timer running */
@@ -122,16 +119,16 @@ void bcm1480_timer_interrupt(void)
 	}
 }
 
-/*
- * We use our own do_gettimeoffset() instead of the generic one,
- * because the generic one does not work for SMP case.
- * In addition, since we use general timer 0 for system time,
- * we can get accurate intra-jiffy offset without calibration.
- */
-unsigned long bcm1480_gettimeoffset(void)
+static unsigned int bcm1480_hpt_read(void)
 {
+	/* We assume this function is called xtime_lock held. */
 	unsigned long count =
 		__raw_readq(IOADDR(A_SCD_TIMER_REGISTER(0, R_SCD_TIMER_CNT)));
+	return (jiffies + 1) * (BCM1480_HPT_VALUE / HZ) - count;
+}
 
-	return 1000000/HZ - count;
+void __init bcm1480_hpt_setup(void)
+{
+	mips_hpt_read = bcm1480_hpt_read;
+	mips_hpt_frequency = BCM1480_HPT_VALUE;
 }
diff --git a/arch/mips/sibyte/sb1250/time.c b/arch/mips/sibyte/sb1250/time.c
index 0ccf1796dd78..bcb74f2c1948 100644
--- a/arch/mips/sibyte/sb1250/time.c
+++ b/arch/mips/sibyte/sb1250/time.c
@@ -47,15 +47,11 @@
 
 #define SB1250_HPT_NUM		3
 #define SB1250_HPT_VALUE	M_SCD_TIMER_CNT /* max value */
-#define SB1250_HPT_SHIFT	((sizeof(unsigned int)*8)-V_SCD_TIMER_WIDTH)
 
 
 extern int sb1250_steal_irq(int irq);
 
 static unsigned int sb1250_hpt_read(void);
-static void sb1250_hpt_init(unsigned int);
-
-static unsigned int hpt_offset;
 
 void __init sb1250_hpt_setup(void)
 {
@@ -69,13 +65,9 @@ void __init sb1250_hpt_setup(void)
 		__raw_writeq(M_SCD_TIMER_ENABLE | M_SCD_TIMER_MODE_CONTINUOUS,
 			     IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM, R_SCD_TIMER_CFG)));
 
-		/*
-		 * we need to fill 32 bits, so just use the upper 23 bits and pretend
-		 * the timer is going 512Mhz instead of 1Mhz
-		 */
-		mips_hpt_frequency = V_SCD_TIMER_FREQ << SB1250_HPT_SHIFT;
-		mips_hpt_init = sb1250_hpt_init;
+		mips_hpt_frequency = V_SCD_TIMER_FREQ;
 		mips_hpt_read = sb1250_hpt_read;
+		mips_hpt_mask = M_SCD_TIMER_INIT;
 	}
 }
 
@@ -149,11 +141,7 @@ void sb1250_timer_interrupt(void)
 
 /*
  * The HPT is free running from SB1250_HPT_VALUE down to 0 then starts over
- * again. There's no easy way to set to a specific value so store init value
- * in hpt_offset and subtract each time.
- *
- * Note: Timer isn't full 32bits so shift it into the upper part making
- *       it appear to run at a higher frequency.
+ * again.
  */
 static unsigned int sb1250_hpt_read(void)
 {
@@ -161,13 +149,5 @@ static unsigned int sb1250_hpt_read(void)
 
 	count = G_SCD_TIMER_CNT(__raw_readq(IOADDR(A_SCD_TIMER_REGISTER(SB1250_HPT_NUM, R_SCD_TIMER_CNT))));
 
-	count = (SB1250_HPT_VALUE - count) << SB1250_HPT_SHIFT;
-
-	return count - hpt_offset;
-}
-
-static void sb1250_hpt_init(unsigned int count)
-{
-	hpt_offset = count;
-	return;
+	return SB1250_HPT_VALUE - count;
 }
diff --git a/include/asm-mips/div64.h b/include/asm-mips/div64.h
index 5f7dcf5452e7..d107832de1b6 100644
--- a/include/asm-mips/div64.h
+++ b/include/asm-mips/div64.h
@@ -82,27 +82,6 @@
 
 #if (_MIPS_SZLONG == 64)
 
-/*
- * Don't use this one in new code
- */
-#define do_div64_32(res, high, low, base) ({ \
-	unsigned int __quot, __mod; \
-	unsigned long __div; \
-	unsigned int __low, __high, __base; \
-	\
-	__high = (high); \
-	__low = (low); \
-	__div = __high; \
-	__div = __div << 32 | __low; \
-	__base = (base); \
-	\
-	__mod = __div % __base; \
-	__div = __div / __base; \
-	\
-	__quot = __div; \
-	(res) = __quot; \
-	__mod; })
-
 /*
  * Hey, we're already 64-bit, no
  * need to play games..
diff --git a/include/asm-mips/sibyte/sb1250.h b/include/asm-mips/sibyte/sb1250.h
index b09e16c93ca0..2ba6988ddc8e 100644
--- a/include/asm-mips/sibyte/sb1250.h
+++ b/include/asm-mips/sibyte/sb1250.h
@@ -51,8 +51,8 @@ extern void sb1250_mask_irq(int cpu, int irq);
 extern void sb1250_unmask_irq(int cpu, int irq);
 extern void sb1250_smp_finish(void);
 
+extern void bcm1480_hpt_setup(void);
 extern void bcm1480_time_init(void);
-extern unsigned long bcm1480_gettimeoffset(void);
 extern void bcm1480_mask_irq(int cpu, int irq);
 extern void bcm1480_unmask_irq(int cpu, int irq);
 extern void bcm1480_smp_finish(void);
diff --git a/include/asm-mips/time.h b/include/asm-mips/time.h
index 28512ba2266e..625acd337bc3 100644
--- a/include/asm-mips/time.h
+++ b/include/asm-mips/time.h
@@ -48,7 +48,8 @@ extern void (*mips_timer_ack)(void);
  * If mips_hpt_read is NULL, an R4k-compatible timer setup is attempted.
  */
 extern unsigned int (*mips_hpt_read)(void);
-extern void (*mips_hpt_init)(unsigned int);
+extern void (*mips_hpt_init)(void);
+extern unsigned int mips_hpt_mask;
 
 /*
  * to_tm() converts system time back to (year, mon, day, hour, min, sec).
@@ -57,13 +58,6 @@ extern void (*mips_hpt_init)(unsigned int);
  */
 extern void to_tm(unsigned long tim, struct rtc_time *tm);
 
-/*
- * do_gettimeoffset(). By default, this func pointer points to
- * do_null_gettimeoffset(), which leads to the same resolution as HZ.
- * Higher resolution versions are available, which give ~1us resolution.
- */
-extern unsigned long (*do_gettimeoffset)(void);
-
 /*
  * high-level timer interrupt routines.
  */
-- 
cgit v1.2.3


From fa1d19e5d9a94120f31e5783ab44758f46892d94 Mon Sep 17 00:00:00 2001
From: Troy Heber <troy.heber@hp.com>
Date: Wed, 25 Oct 2006 14:46:15 -0600
Subject: [IA64] move SAL_CACHE_FLUSH check later in boot

The check to see if the firmware drops interrupts during a
SAL_CACHE_FLUSH is done to early in the boot. SAL_CACHE_FLUSH expects
to be able to make PAL calls in virtual mode, on some cell based
machines a fault occurs causing a MCA. This patch moves the check
after mmu_context_init so the TLB and VHPT are properly setup.

Signed-off-by Troy Heber <troy.heber@hp.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 arch/ia64/kernel/sal.c   | 11 +++++++----
 arch/ia64/kernel/setup.c |  2 ++
 include/asm-ia64/sal.h   |  1 +
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/kernel/sal.c b/arch/ia64/kernel/sal.c
index 642fdc7b969d..20bad78b5073 100644
--- a/arch/ia64/kernel/sal.c
+++ b/arch/ia64/kernel/sal.c
@@ -223,12 +223,13 @@ static void __init sal_desc_ap_wakeup(void *p) { }
  */
 static int sal_cache_flush_drops_interrupts;
 
-static void __init
+void __init
 check_sal_cache_flush (void)
 {
 	unsigned long flags;
 	int cpu;
-	u64 vector;
+	u64 vector, cache_type = 3;
+	struct ia64_sal_retval isrv;
 
 	cpu = get_cpu();
 	local_irq_save(flags);
@@ -243,7 +244,10 @@ check_sal_cache_flush (void)
 	while (!ia64_get_irr(IA64_TIMER_VECTOR))
 		cpu_relax();
 
-	ia64_sal_cache_flush(3);
+	SAL_CALL(isrv, SAL_CACHE_FLUSH, cache_type, 0, 0, 0, 0, 0, 0);
+
+	if (isrv.status)
+		printk(KERN_ERR "SAL_CAL_FLUSH failed with %ld\n", isrv.status);
 
 	if (ia64_get_irr(IA64_TIMER_VECTOR)) {
 		vector = ia64_get_ivr();
@@ -331,7 +335,6 @@ ia64_sal_init (struct ia64_sal_systab *systab)
 		p += SAL_DESC_SIZE(*p);
 	}
 
-	check_sal_cache_flush();
 }
 
 int
diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c
index c4caa8003492..d10404a41756 100644
--- a/arch/ia64/kernel/setup.c
+++ b/arch/ia64/kernel/setup.c
@@ -457,6 +457,8 @@ setup_arch (char **cmdline_p)
 	cpu_init();	/* initialize the bootstrap CPU */
 	mmu_context_init();	/* initialize context_id bitmap */
 
+	check_sal_cache_flush();
+
 #ifdef CONFIG_ACPI
 	acpi_boot_init();
 #endif
diff --git a/include/asm-ia64/sal.h b/include/asm-ia64/sal.h
index 0b210abbe003..d000689d9142 100644
--- a/include/asm-ia64/sal.h
+++ b/include/asm-ia64/sal.h
@@ -659,6 +659,7 @@ ia64_sal_freq_base (unsigned long which, unsigned long *ticks_per_second,
 }
 
 extern s64 ia64_sal_cache_flush (u64 cache_type);
+extern void __init check_sal_cache_flush (void);
 
 /* Initialize all the processor and platform level instruction and data caches */
 static inline s64
-- 
cgit v1.2.3


From 6e42acc4115bc376b8523acbcba2b2b7cc27d016 Mon Sep 17 00:00:00 2001
From: Tejun Heo <htejun@gmail.com>
Date: Fri, 27 Oct 2006 19:08:42 -0700
Subject: [PATCH] libata: unexport ata_dev_revalidate()

ata_dev_revalidate() isn't used outside of libata core.  Unexport it.

Signed-off-by: Tejun Heo <htejun@gmail.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 drivers/ata/libata-core.c | 1 -
 drivers/ata/libata.h      | 1 +
 include/linux/libata.h    | 1 -
 3 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 83728a9457ad..a8fd0c3e59b3 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6122,7 +6122,6 @@ EXPORT_SYMBOL_GPL(ata_std_prereset);
 EXPORT_SYMBOL_GPL(ata_std_softreset);
 EXPORT_SYMBOL_GPL(sata_std_hardreset);
 EXPORT_SYMBOL_GPL(ata_std_postreset);
-EXPORT_SYMBOL_GPL(ata_dev_revalidate);
 EXPORT_SYMBOL_GPL(ata_dev_classify);
 EXPORT_SYMBOL_GPL(ata_dev_pair);
 EXPORT_SYMBOL_GPL(ata_port_disable);
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index a5ecb71390a9..0ed263be652a 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -53,6 +53,7 @@ extern unsigned ata_exec_internal(struct ata_device *dev,
 extern unsigned int ata_do_simple_cmd(struct ata_device *dev, u8 cmd);
 extern int ata_dev_read_id(struct ata_device *dev, unsigned int *p_class,
 			   int post_reset, u16 *id);
+extern int ata_dev_revalidate(struct ata_device *dev, int post_reset);
 extern int ata_dev_configure(struct ata_device *dev, int print_info);
 extern int sata_down_spd_limit(struct ata_port *ap);
 extern int sata_set_spd_needed(struct ata_port *ap);
diff --git a/include/linux/libata.h b/include/linux/libata.h
index b03d5a340dc8..abd2debebca2 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -702,7 +702,6 @@ extern int ata_std_prereset(struct ata_port *ap);
 extern int ata_std_softreset(struct ata_port *ap, unsigned int *classes);
 extern int sata_std_hardreset(struct ata_port *ap, unsigned int *class);
 extern void ata_std_postreset(struct ata_port *ap, unsigned int *classes);
-extern int ata_dev_revalidate(struct ata_device *dev, int post_reset);
 extern void ata_port_disable(struct ata_port *);
 extern void ata_std_ports(struct ata_ioports *ioaddr);
 #ifdef CONFIG_PCI
-- 
cgit v1.2.3


From e0da0daee14862e0a5c49f2059641a8deb27eca2 Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Fri, 27 Oct 2006 14:31:07 -0500
Subject: [POWERPC] Fix rmb() for e500-based machines it

The e500 core generates an illegal instruction exception when it tries
to execute the lwsync instruction, which we currently use for rmb().
This fixes it by using the LWSYNC macro, which turns into a plain sync
on 32-bit machines.

Signed-off-by: Andrew Fleming <afleming@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/system.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/asm-powerpc/system.h b/include/asm-powerpc/system.h
index 43627596003b..f7b1227d6454 100644
--- a/include/asm-powerpc/system.h
+++ b/include/asm-powerpc/system.h
@@ -25,8 +25,8 @@
  *
  * We have to use the sync instructions for mb(), since lwsync doesn't
  * order loads with respect to previous stores.  Lwsync is fine for
- * rmb(), though.  Note that lwsync is interpreted as sync by
- * 32-bit and older 64-bit CPUs.
+ * rmb(), though. Note that rmb() actually uses a sync on 32-bit
+ * architectures.
  *
  * For wmb(), we use sync since wmb is used in drivers to order
  * stores to system memory with respect to writes to the device.
@@ -34,7 +34,7 @@
  * SMP since it is only used to order updates to system memory.
  */
 #define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("lwsync" : : : "memory")
+#define rmb()  __asm__ __volatile__ (__stringify(LWSYNC) : : : "memory")
 #define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 #define read_barrier_depends()  do { } while(0)
 
-- 
cgit v1.2.3


From dd6c89f686bdb2a5de72fab636fc839e5a0add6d Mon Sep 17 00:00:00 2001
From: Andy Fleming <afleming@freescale.com>
Date: Fri, 27 Oct 2006 15:06:32 -0500
Subject: [POWERPC] Fix oprofile support for e500 in arch/powerpc

Fixed a compile error in building the 85xx support with oprofile, and in
the process cleaned up some issues with the fsl_booke performance monitor
code.

* Reorganized FSL Book-E performance monitoring code so that the 7450
  wouldn't be built if the e500 was, and cleaned it up so it was more
  self-contained.

* Added a cpu_setup function for FSL Book-E.  The original
  cpu_setup function prototype had no arguments, assuming that
  the reg_setup function would copy the required information into
  variables which represented the registers.  This was silly for
  e500, since it has 1 register per counter (rather than 3 for
  all counters), so the code has been restructured to have
  cpu_setup take the current counter config array as an argument,
  with op_powerpc_setup() invoking op_powerpc_cpu_setup() through
  on_each_cpu(), and op_powerpc_cpu_setup() invoking the
  model-specific cpu_setup function with an argument.  The
  argument is ignored on all other platforms at present.

* Fixed a confusing line where a trinary operator only had two
  arguments

Signed-off-by: Andrew Fleming <afleming@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/Makefile               |   1 -
 arch/powerpc/kernel/perfmon_fsl_booke.c    | 221 -----------------------------
 arch/powerpc/kernel/pmc.c                  |   2 +-
 arch/powerpc/oprofile/Makefile             |   2 +-
 arch/powerpc/oprofile/common.c             |  10 +-
 arch/powerpc/oprofile/op_model_7450.c      |   2 +-
 arch/powerpc/oprofile/op_model_fsl_booke.c | 170 +++++++++++++++++-----
 arch/powerpc/oprofile/op_model_power4.c    |   2 +-
 arch/powerpc/oprofile/op_model_rs64.c      |   2 +-
 include/asm-powerpc/oprofile_impl.h        |  87 +++++++++++-
 include/asm-powerpc/pmc.h                  |  13 --
 11 files changed, 234 insertions(+), 278 deletions(-)
 delete mode 100644 arch/powerpc/kernel/perfmon_fsl_booke.c

(limited to 'include')

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 8b133afbdc20..7af23c43fd4b 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -38,7 +38,6 @@ obj-$(CONFIG_6xx)		+= idle_6xx.o l2cr_6xx.o cpu_setup_6xx.o
 obj-$(CONFIG_TAU)		+= tau_6xx.o
 obj32-$(CONFIG_SOFTWARE_SUSPEND) += swsusp_32.o
 obj32-$(CONFIG_MODULES)		+= module_32.o
-obj-$(CONFIG_E500)		+= perfmon_fsl_booke.o
 
 ifeq ($(CONFIG_PPC_MERGE),y)
 
diff --git a/arch/powerpc/kernel/perfmon_fsl_booke.c b/arch/powerpc/kernel/perfmon_fsl_booke.c
deleted file mode 100644
index e0dcf2b41fbe..000000000000
--- a/arch/powerpc/kernel/perfmon_fsl_booke.c
+++ /dev/null
@@ -1,221 +0,0 @@
-/* arch/powerpc/kernel/perfmon_fsl_booke.c
- * Freescale Book-E Performance Monitor code
- *
- * Author: Andy Fleming
- * Copyright (c) 2004 Freescale Semiconductor, Inc
- *
- *  This program is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU General Public License
- *  as published by the Free Software Foundation; either version
- *  2 of the License, or (at your option) any later version.
- */
-
-#include <linux/errno.h>
-#include <linux/sched.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
-#include <linux/ptrace.h>
-#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/a.out.h>
-#include <linux/interrupt.h>
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/prctl.h>
-
-#include <asm/pgtable.h>
-#include <asm/uaccess.h>
-#include <asm/system.h>
-#include <asm/io.h>
-#include <asm/reg.h>
-#include <asm/xmon.h>
-#include <asm/pmc.h>
-
-static inline u32 get_pmlca(int ctr);
-static inline void set_pmlca(int ctr, u32 pmlca);
-
-static inline u32 get_pmlca(int ctr)
-{
-	u32 pmlca;
-
-	switch (ctr) {
-		case 0:
-			pmlca = mfpmr(PMRN_PMLCA0);
-			break;
-		case 1:
-			pmlca = mfpmr(PMRN_PMLCA1);
-			break;
-		case 2:
-			pmlca = mfpmr(PMRN_PMLCA2);
-			break;
-		case 3:
-			pmlca = mfpmr(PMRN_PMLCA3);
-			break;
-		default:
-			panic("Bad ctr number\n");
-	}
-
-	return pmlca;
-}
-
-static inline void set_pmlca(int ctr, u32 pmlca)
-{
-	switch (ctr) {
-		case 0:
-			mtpmr(PMRN_PMLCA0, pmlca);
-			break;
-		case 1:
-			mtpmr(PMRN_PMLCA1, pmlca);
-			break;
-		case 2:
-			mtpmr(PMRN_PMLCA2, pmlca);
-			break;
-		case 3:
-			mtpmr(PMRN_PMLCA3, pmlca);
-			break;
-		default:
-			panic("Bad ctr number\n");
-	}
-}
-
-void init_pmc_stop(int ctr)
-{
-	u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU |
-			PMLCA_FCM1 | PMLCA_FCM0);
-	u32 pmlcb = 0;
-
-	switch (ctr) {
-		case 0:
-			mtpmr(PMRN_PMLCA0, pmlca);
-			mtpmr(PMRN_PMLCB0, pmlcb);
-			break;
-		case 1:
-			mtpmr(PMRN_PMLCA1, pmlca);
-			mtpmr(PMRN_PMLCB1, pmlcb);
-			break;
-		case 2:
-			mtpmr(PMRN_PMLCA2, pmlca);
-			mtpmr(PMRN_PMLCB2, pmlcb);
-			break;
-		case 3:
-			mtpmr(PMRN_PMLCA3, pmlca);
-			mtpmr(PMRN_PMLCB3, pmlcb);
-			break;
-		default:
-			panic("Bad ctr number!\n");
-	}
-}
-
-void set_pmc_event(int ctr, int event)
-{
-	u32 pmlca;
-
-	pmlca = get_pmlca(ctr);
-
-	pmlca = (pmlca & ~PMLCA_EVENT_MASK) |
-		((event << PMLCA_EVENT_SHIFT) &
-		 PMLCA_EVENT_MASK);
-
-	set_pmlca(ctr, pmlca);
-}
-
-void set_pmc_user_kernel(int ctr, int user, int kernel)
-{
-	u32 pmlca;
-
-	pmlca = get_pmlca(ctr);
-
-	if(user)
-		pmlca &= ~PMLCA_FCU;
-	else
-		pmlca |= PMLCA_FCU;
-
-	if(kernel)
-		pmlca &= ~PMLCA_FCS;
-	else
-		pmlca |= PMLCA_FCS;
-
-	set_pmlca(ctr, pmlca);
-}
-
-void set_pmc_marked(int ctr, int mark0, int mark1)
-{
-	u32 pmlca = get_pmlca(ctr);
-
-	if(mark0)
-		pmlca &= ~PMLCA_FCM0;
-	else
-		pmlca |= PMLCA_FCM0;
-
-	if(mark1)
-		pmlca &= ~PMLCA_FCM1;
-	else
-		pmlca |= PMLCA_FCM1;
-
-	set_pmlca(ctr, pmlca);
-}
-
-void pmc_start_ctr(int ctr, int enable)
-{
-	u32 pmlca = get_pmlca(ctr);
-
-	pmlca &= ~PMLCA_FC;
-
-	if (enable)
-		pmlca |= PMLCA_CE;
-	else
-		pmlca &= ~PMLCA_CE;
-
-	set_pmlca(ctr, pmlca);
-}
-
-void pmc_start_ctrs(int enable)
-{
-	u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
-	pmgc0 &= ~PMGC0_FAC;
-	pmgc0 |= PMGC0_FCECE;
-
-	if (enable)
-		pmgc0 |= PMGC0_PMIE;
-	else
-		pmgc0 &= ~PMGC0_PMIE;
-
-	mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-void pmc_stop_ctrs(void)
-{
-	u32 pmgc0 = mfpmr(PMRN_PMGC0);
-
-	pmgc0 |= PMGC0_FAC;
-
-	pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE);
-
-	mtpmr(PMRN_PMGC0, pmgc0);
-}
-
-void dump_pmcs(void)
-{
-	printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0));
-	printk("pmc\t\tpmlca\t\tpmlcb\n");
-	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0),
-			mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0));
-	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1),
-			mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1));
-	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2),
-			mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2));
-	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3),
-			mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
-}
-
-EXPORT_SYMBOL(init_pmc_stop);
-EXPORT_SYMBOL(set_pmc_event);
-EXPORT_SYMBOL(set_pmc_user_kernel);
-EXPORT_SYMBOL(set_pmc_marked);
-EXPORT_SYMBOL(pmc_start_ctr);
-EXPORT_SYMBOL(pmc_start_ctrs);
-EXPORT_SYMBOL(pmc_stop_ctrs);
-EXPORT_SYMBOL(dump_pmcs);
diff --git a/arch/powerpc/kernel/pmc.c b/arch/powerpc/kernel/pmc.c
index a0a2efadeabf..3d8f6f44641e 100644
--- a/arch/powerpc/kernel/pmc.c
+++ b/arch/powerpc/kernel/pmc.c
@@ -71,7 +71,7 @@ int reserve_pmc_hardware(perf_irq_t new_perf_irq)
 	}
 
 	pmc_owner_caller = __builtin_return_address(0);
-	perf_irq = new_perf_irq ? : dummy_perf;
+	perf_irq = new_perf_irq ? new_perf_irq : dummy_perf;
 
  out:
 	spin_unlock(&pmc_owner_lock);
diff --git a/arch/powerpc/oprofile/Makefile b/arch/powerpc/oprofile/Makefile
index 3145d610b5b0..0b5df9c96ae0 100644
--- a/arch/powerpc/oprofile/Makefile
+++ b/arch/powerpc/oprofile/Makefile
@@ -13,4 +13,4 @@ DRIVER_OBJS := $(addprefix ../../../drivers/oprofile/, \
 oprofile-y := $(DRIVER_OBJS) common.o backtrace.o
 oprofile-$(CONFIG_PPC64) += op_model_rs64.o op_model_power4.o
 oprofile-$(CONFIG_FSL_BOOKE) += op_model_fsl_booke.o
-oprofile-$(CONFIG_PPC32) += op_model_7450.o
+oprofile-$(CONFIG_6xx) += op_model_7450.o
diff --git a/arch/powerpc/oprofile/common.c b/arch/powerpc/oprofile/common.c
index fd0bbbe7a4de..63bbef3b63f1 100644
--- a/arch/powerpc/oprofile/common.c
+++ b/arch/powerpc/oprofile/common.c
@@ -34,6 +34,11 @@ static void op_handle_interrupt(struct pt_regs *regs)
 	model->handle_interrupt(regs, ctr);
 }
 
+static void op_powerpc_cpu_setup(void *dummy)
+{
+	model->cpu_setup(ctr);
+}
+
 static int op_powerpc_setup(void)
 {
 	int err;
@@ -47,7 +52,7 @@ static int op_powerpc_setup(void)
 	model->reg_setup(ctr, &sys, model->num_counters);
 
 	/* Configure the registers on all cpus.  */
-	on_each_cpu(model->cpu_setup, NULL, 0, 1);
+	on_each_cpu(op_powerpc_cpu_setup, NULL, 0, 1);
 
 	return 0;
 }
@@ -142,7 +147,8 @@ int __init oprofile_arch_init(struct oprofile_operations *ops)
 		case PPC_OPROFILE_POWER4:
 			model = &op_model_power4;
 			break;
-#else
+#endif
+#ifdef CONFIG_6xx
 		case PPC_OPROFILE_G4:
 			model = &op_model_7450;
 			break;
diff --git a/arch/powerpc/oprofile/op_model_7450.c b/arch/powerpc/oprofile/op_model_7450.c
index d8ee3aea83f8..f481c0ed5e67 100644
--- a/arch/powerpc/oprofile/op_model_7450.c
+++ b/arch/powerpc/oprofile/op_model_7450.c
@@ -81,7 +81,7 @@ static void pmc_stop_ctrs(void)
 
 /* Configures the counters on this CPU based on the global
  * settings */
-static void fsl7450_cpu_setup(void *unused)
+static void fsl7450_cpu_setup(struct op_counter_config *ctr)
 {
 	/* freeze all counters */
 	pmc_stop_ctrs();
diff --git a/arch/powerpc/oprofile/op_model_fsl_booke.c b/arch/powerpc/oprofile/op_model_fsl_booke.c
index e29dede31423..0b3c31f5209e 100644
--- a/arch/powerpc/oprofile/op_model_fsl_booke.c
+++ b/arch/powerpc/oprofile/op_model_fsl_booke.c
@@ -32,42 +32,152 @@ static unsigned long reset_value[OP_MAX_COUNTER];
 static int num_counters;
 static int oprofile_running;
 
-static inline unsigned int ctr_read(unsigned int i)
+static void init_pmc_stop(int ctr)
 {
-	switch(i) {
-		case 0:
-			return mfpmr(PMRN_PMC0);
-		case 1:
-			return mfpmr(PMRN_PMC1);
-		case 2:
-			return mfpmr(PMRN_PMC2);
-		case 3:
-			return mfpmr(PMRN_PMC3);
-		default:
-			return 0;
-	}
-}
+	u32 pmlca = (PMLCA_FC | PMLCA_FCS | PMLCA_FCU |
+			PMLCA_FCM1 | PMLCA_FCM0);
+	u32 pmlcb = 0;
 
-static inline void ctr_write(unsigned int i, unsigned int val)
-{
-	switch(i) {
+	switch (ctr) {
 		case 0:
-			mtpmr(PMRN_PMC0, val);
+			mtpmr(PMRN_PMLCA0, pmlca);
+			mtpmr(PMRN_PMLCB0, pmlcb);
 			break;
 		case 1:
-			mtpmr(PMRN_PMC1, val);
+			mtpmr(PMRN_PMLCA1, pmlca);
+			mtpmr(PMRN_PMLCB1, pmlcb);
 			break;
 		case 2:
-			mtpmr(PMRN_PMC2, val);
+			mtpmr(PMRN_PMLCA2, pmlca);
+			mtpmr(PMRN_PMLCB2, pmlcb);
 			break;
 		case 3:
-			mtpmr(PMRN_PMC3, val);
+			mtpmr(PMRN_PMLCA3, pmlca);
+			mtpmr(PMRN_PMLCB3, pmlcb);
 			break;
 		default:
-			break;
+			panic("Bad ctr number!\n");
 	}
 }
 
+static void set_pmc_event(int ctr, int event)
+{
+	u32 pmlca;
+
+	pmlca = get_pmlca(ctr);
+
+	pmlca = (pmlca & ~PMLCA_EVENT_MASK) |
+		((event << PMLCA_EVENT_SHIFT) &
+		 PMLCA_EVENT_MASK);
+
+	set_pmlca(ctr, pmlca);
+}
+
+static void set_pmc_user_kernel(int ctr, int user, int kernel)
+{
+	u32 pmlca;
+
+	pmlca = get_pmlca(ctr);
+
+	if(user)
+		pmlca &= ~PMLCA_FCU;
+	else
+		pmlca |= PMLCA_FCU;
+
+	if(kernel)
+		pmlca &= ~PMLCA_FCS;
+	else
+		pmlca |= PMLCA_FCS;
+
+	set_pmlca(ctr, pmlca);
+}
+
+static void set_pmc_marked(int ctr, int mark0, int mark1)
+{
+	u32 pmlca = get_pmlca(ctr);
+
+	if(mark0)
+		pmlca &= ~PMLCA_FCM0;
+	else
+		pmlca |= PMLCA_FCM0;
+
+	if(mark1)
+		pmlca &= ~PMLCA_FCM1;
+	else
+		pmlca |= PMLCA_FCM1;
+
+	set_pmlca(ctr, pmlca);
+}
+
+static void pmc_start_ctr(int ctr, int enable)
+{
+	u32 pmlca = get_pmlca(ctr);
+
+	pmlca &= ~PMLCA_FC;
+
+	if (enable)
+		pmlca |= PMLCA_CE;
+	else
+		pmlca &= ~PMLCA_CE;
+
+	set_pmlca(ctr, pmlca);
+}
+
+static void pmc_start_ctrs(int enable)
+{
+	u32 pmgc0 = mfpmr(PMRN_PMGC0);
+
+	pmgc0 &= ~PMGC0_FAC;
+	pmgc0 |= PMGC0_FCECE;
+
+	if (enable)
+		pmgc0 |= PMGC0_PMIE;
+	else
+		pmgc0 &= ~PMGC0_PMIE;
+
+	mtpmr(PMRN_PMGC0, pmgc0);
+}
+
+static void pmc_stop_ctrs(void)
+{
+	u32 pmgc0 = mfpmr(PMRN_PMGC0);
+
+	pmgc0 |= PMGC0_FAC;
+
+	pmgc0 &= ~(PMGC0_PMIE | PMGC0_FCECE);
+
+	mtpmr(PMRN_PMGC0, pmgc0);
+}
+
+static void dump_pmcs(void)
+{
+	printk("pmgc0: %x\n", mfpmr(PMRN_PMGC0));
+	printk("pmc\t\tpmlca\t\tpmlcb\n");
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC0),
+			mfpmr(PMRN_PMLCA0), mfpmr(PMRN_PMLCB0));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC1),
+			mfpmr(PMRN_PMLCA1), mfpmr(PMRN_PMLCB1));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC2),
+			mfpmr(PMRN_PMLCA2), mfpmr(PMRN_PMLCB2));
+	printk("%8x\t%8x\t%8x\n", mfpmr(PMRN_PMC3),
+			mfpmr(PMRN_PMLCA3), mfpmr(PMRN_PMLCB3));
+}
+
+static void fsl_booke_cpu_setup(struct op_counter_config *ctr)
+{
+	int i;
+
+	/* freeze all counters */
+	pmc_stop_ctrs();
+
+	for (i = 0;i < num_counters;i++) {
+		init_pmc_stop(i);
+
+		set_pmc_event(i, ctr[i].event);
+
+		set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
+	}
+}
 
 static void fsl_booke_reg_setup(struct op_counter_config *ctr,
 			     struct op_system_config *sys,
@@ -77,23 +187,14 @@ static void fsl_booke_reg_setup(struct op_counter_config *ctr,
 
 	num_counters = num_ctrs;
 
-	/* freeze all counters */
-	pmc_stop_ctrs();
-
 	/* Our counters count up, and "count" refers to
 	 * how much before the next interrupt, and we interrupt
 	 * on overflow.  So we calculate the starting value
 	 * which will give us "count" until overflow.
 	 * Then we set the events on the enabled counters */
-	for (i = 0; i < num_counters; ++i) {
+	for (i = 0; i < num_counters; ++i)
 		reset_value[i] = 0x80000000UL - ctr[i].count;
 
-		init_pmc_stop(i);
-
-		set_pmc_event(i, ctr[i].event);
-
-		set_pmc_user_kernel(i, ctr[i].user, ctr[i].kernel);
-	}
 }
 
 static void fsl_booke_start(struct op_counter_config *ctr)
@@ -105,8 +206,8 @@ static void fsl_booke_start(struct op_counter_config *ctr)
 	for (i = 0; i < num_counters; ++i) {
 		if (ctr[i].enabled) {
 			ctr_write(i, reset_value[i]);
-			/* Set Each enabled counterd to only
-			 * count when the Mark bit is not set */
+			/* Set each enabled counter to only
+			 * count when the Mark bit is *not* set */
 			set_pmc_marked(i, 1, 0);
 			pmc_start_ctr(i, 1);
 		} else {
@@ -177,6 +278,7 @@ static void fsl_booke_handle_interrupt(struct pt_regs *regs,
 
 struct op_powerpc_model op_model_fsl_booke = {
 	.reg_setup		= fsl_booke_reg_setup,
+	.cpu_setup		= fsl_booke_cpu_setup,
 	.start			= fsl_booke_start,
 	.stop			= fsl_booke_stop,
 	.handle_interrupt	= fsl_booke_handle_interrupt,
diff --git a/arch/powerpc/oprofile/op_model_power4.c b/arch/powerpc/oprofile/op_model_power4.c
index 6a927effcc77..356709d515b9 100644
--- a/arch/powerpc/oprofile/op_model_power4.c
+++ b/arch/powerpc/oprofile/op_model_power4.c
@@ -82,7 +82,7 @@ static inline int mmcra_must_set_sample(void)
 	return 0;
 }
 
-static void power4_cpu_setup(void *unused)
+static void power4_cpu_setup(struct op_counter_config *ctr)
 {
 	unsigned int mmcr0 = mmcr0_val;
 	unsigned long mmcra = mmcra_val;
diff --git a/arch/powerpc/oprofile/op_model_rs64.c b/arch/powerpc/oprofile/op_model_rs64.c
index 042f8f4867ad..19c5ee089bc9 100644
--- a/arch/powerpc/oprofile/op_model_rs64.c
+++ b/arch/powerpc/oprofile/op_model_rs64.c
@@ -102,7 +102,7 @@ static void rs64_reg_setup(struct op_counter_config *ctr,
 	/* XXX setup user and kernel profiling */
 }
 
-static void rs64_cpu_setup(void *unused)
+static void rs64_cpu_setup(struct op_counter_config *ctr)
 {
 	unsigned int mmcr0;
 
diff --git a/include/asm-powerpc/oprofile_impl.h b/include/asm-powerpc/oprofile_impl.h
index 5b33994cd488..07a10e590c1d 100644
--- a/include/asm-powerpc/oprofile_impl.h
+++ b/include/asm-powerpc/oprofile_impl.h
@@ -42,7 +42,7 @@ struct op_powerpc_model {
 	void (*reg_setup) (struct op_counter_config *,
 			   struct op_system_config *,
 			   int num_counters);
-	void (*cpu_setup) (void *);
+	void (*cpu_setup) (struct op_counter_config *);
 	void (*start) (struct op_counter_config *);
 	void (*stop) (void);
 	void (*handle_interrupt) (struct pt_regs *,
@@ -121,7 +121,90 @@ static inline void ctr_write(unsigned int i, unsigned int val)
 		break;
 	}
 }
-#endif /* !CONFIG_FSL_BOOKE */
+#else /* CONFIG_FSL_BOOKE */
+static inline u32 get_pmlca(int ctr)
+{
+	u32 pmlca;
+
+	switch (ctr) {
+		case 0:
+			pmlca = mfpmr(PMRN_PMLCA0);
+			break;
+		case 1:
+			pmlca = mfpmr(PMRN_PMLCA1);
+			break;
+		case 2:
+			pmlca = mfpmr(PMRN_PMLCA2);
+			break;
+		case 3:
+			pmlca = mfpmr(PMRN_PMLCA3);
+			break;
+		default:
+			panic("Bad ctr number\n");
+	}
+
+	return pmlca;
+}
+
+static inline void set_pmlca(int ctr, u32 pmlca)
+{
+	switch (ctr) {
+		case 0:
+			mtpmr(PMRN_PMLCA0, pmlca);
+			break;
+		case 1:
+			mtpmr(PMRN_PMLCA1, pmlca);
+			break;
+		case 2:
+			mtpmr(PMRN_PMLCA2, pmlca);
+			break;
+		case 3:
+			mtpmr(PMRN_PMLCA3, pmlca);
+			break;
+		default:
+			panic("Bad ctr number\n");
+	}
+}
+
+static inline unsigned int ctr_read(unsigned int i)
+{
+	switch(i) {
+		case 0:
+			return mfpmr(PMRN_PMC0);
+		case 1:
+			return mfpmr(PMRN_PMC1);
+		case 2:
+			return mfpmr(PMRN_PMC2);
+		case 3:
+			return mfpmr(PMRN_PMC3);
+		default:
+			return 0;
+	}
+}
+
+static inline void ctr_write(unsigned int i, unsigned int val)
+{
+	switch(i) {
+		case 0:
+			mtpmr(PMRN_PMC0, val);
+			break;
+		case 1:
+			mtpmr(PMRN_PMC1, val);
+			break;
+		case 2:
+			mtpmr(PMRN_PMC2, val);
+			break;
+		case 3:
+			mtpmr(PMRN_PMC3, val);
+			break;
+		default:
+			break;
+	}
+}
+
+
+#endif /* CONFIG_FSL_BOOKE */
+
 
 extern void op_powerpc_backtrace(struct pt_regs * const regs, unsigned int depth);
 
diff --git a/include/asm-powerpc/pmc.h b/include/asm-powerpc/pmc.h
index 07d6a4279319..8588be68e0ad 100644
--- a/include/asm-powerpc/pmc.h
+++ b/include/asm-powerpc/pmc.h
@@ -32,18 +32,5 @@ void release_pmc_hardware(void);
 void power4_enable_pmcs(void);
 #endif
 
-#ifdef CONFIG_FSL_BOOKE
-void init_pmc_stop(int ctr);
-void set_pmc_event(int ctr, int event);
-void set_pmc_user_kernel(int ctr, int user, int kernel);
-void set_pmc_marked(int ctr, int mark0, int mark1);
-void pmc_start_ctr(int ctr, int enable);
-void pmc_start_ctrs(int enable);
-void pmc_stop_ctrs(void);
-void dump_pmcs(void);
-
-extern struct op_powerpc_model op_model_fsl_booke;
-#endif
-
 #endif /* __KERNEL__ */
 #endif /* _POWERPC_PMC_H */
-- 
cgit v1.2.3


From 5d2efba64b231a1733c4048d1708d77e07f26426 Mon Sep 17 00:00:00 2001
From: Linas Vepstas <linas@austin.ibm.com>
Date: Mon, 30 Oct 2006 16:15:59 +1100
Subject: [POWERPC] Use 4kB iommu pages even on 64kB-page systems

The 10Gigabit ethernet device drivers appear to be able to chew
up all 256MB of TCE mappings on pSeries systems, as evidenced by
numerous error messages:

 iommu_alloc failed, tbl c0000000010d5c48 vaddr c0000000d875eff0 npages 1

Some experimentation indicates that this is essentially because
one 1500 byte ethernet MTU gets mapped as a 64K DMA region when
the large 64K pages are enabled. Thus, it doesn't take much to
exhaust all of the available DMA mappings for a high-speed card.

This patch changes the iommu allocator to work with its own
unique, distinct page size. Although the patch is long, its
actually quite simple: it just #defines a distinct IOMMU_PAGE_SIZE
and then uses this in all the places that matter.

As a side effect, it also dramatically improves network performance
on platforms with H-calls on iommu translation inserts/removes (since
we no longer call it 16 times for a 1500 bytes packet when the iommu HW
is still 4k).

In the future, we might want to make the IOMMU_PAGE_SIZE a variable
in the iommu_table instance, thus allowing support for different HW
page sizes in the iommu itself.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Olof Johansson <olof@lixom.net>
Acked-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/iommu.c            | 77 ++++++++++++++++++++--------------
 arch/powerpc/kernel/vio.c              |  4 +-
 arch/powerpc/platforms/iseries/iommu.c | 11 +----
 arch/powerpc/platforms/pseries/iommu.c | 35 ++++------------
 arch/powerpc/sysdev/dart.h             |  1 -
 arch/powerpc/sysdev/dart_iommu.c       |  8 +---
 include/asm-powerpc/iommu.h            | 22 +++++++++-
 include/asm-powerpc/tce.h              |  3 +-
 8 files changed, 80 insertions(+), 81 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/iommu.c b/arch/powerpc/kernel/iommu.c
index f88a2a675d90..ba6b7256084b 100644
--- a/arch/powerpc/kernel/iommu.c
+++ b/arch/powerpc/kernel/iommu.c
@@ -47,6 +47,17 @@ static int novmerge = 0;
 static int novmerge = 1;
 #endif
 
+static inline unsigned long iommu_num_pages(unsigned long vaddr,
+					    unsigned long slen)
+{
+	unsigned long npages;
+
+	npages = IOMMU_PAGE_ALIGN(vaddr + slen) - (vaddr & IOMMU_PAGE_MASK);
+	npages >>= IOMMU_PAGE_SHIFT;
+
+	return npages;
+}
+
 static int __init setup_iommu(char *str)
 {
 	if (!strcmp(str, "novmerge"))
@@ -178,10 +189,10 @@ static dma_addr_t iommu_alloc(struct iommu_table *tbl, void *page,
 	}
 
 	entry += tbl->it_offset;	/* Offset into real TCE table */
-	ret = entry << PAGE_SHIFT;	/* Set the return dma address */
+	ret = entry << IOMMU_PAGE_SHIFT;	/* Set the return dma address */
 
 	/* Put the TCEs in the HW table */
-	ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & PAGE_MASK,
+	ppc_md.tce_build(tbl, entry, npages, (unsigned long)page & IOMMU_PAGE_MASK,
 			 direction);
 
 
@@ -203,7 +214,7 @@ static void __iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr,
 	unsigned long entry, free_entry;
 	unsigned long i;
 
-	entry = dma_addr >> PAGE_SHIFT;
+	entry = dma_addr >> IOMMU_PAGE_SHIFT;
 	free_entry = entry - tbl->it_offset;
 
 	if (((free_entry + npages) > tbl->it_size) ||
@@ -270,7 +281,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 	/* Init first segment length for backout at failure */
 	outs->dma_length = 0;
 
-	DBG("mapping %d elements:\n", nelems);
+	DBG("sg mapping %d elements:\n", nelems);
 
 	spin_lock_irqsave(&(tbl->it_lock), flags);
 
@@ -285,9 +296,8 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 		}
 		/* Allocate iommu entries for that segment */
 		vaddr = (unsigned long)page_address(s->page) + s->offset;
-		npages = PAGE_ALIGN(vaddr + slen) - (vaddr & PAGE_MASK);
-		npages >>= PAGE_SHIFT;
-		entry = iommu_range_alloc(tbl, npages, &handle, mask >> PAGE_SHIFT, 0);
+		npages = iommu_num_pages(vaddr, slen);
+		entry = iommu_range_alloc(tbl, npages, &handle, mask >> IOMMU_PAGE_SHIFT, 0);
 
 		DBG("  - vaddr: %lx, size: %lx\n", vaddr, slen);
 
@@ -301,14 +311,14 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 
 		/* Convert entry to a dma_addr_t */
 		entry += tbl->it_offset;
-		dma_addr = entry << PAGE_SHIFT;
-		dma_addr |= s->offset;
+		dma_addr = entry << IOMMU_PAGE_SHIFT;
+		dma_addr |= (s->offset & ~IOMMU_PAGE_MASK);
 
-		DBG("  - %lx pages, entry: %lx, dma_addr: %lx\n",
+		DBG("  - %lu pages, entry: %lx, dma_addr: %lx\n",
 			    npages, entry, dma_addr);
 
 		/* Insert into HW table */
-		ppc_md.tce_build(tbl, entry, npages, vaddr & PAGE_MASK, direction);
+		ppc_md.tce_build(tbl, entry, npages, vaddr & IOMMU_PAGE_MASK, direction);
 
 		/* If we are in an open segment, try merging */
 		if (segstart != s) {
@@ -323,7 +333,7 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 				DBG("    can't merge, new segment.\n");
 			} else {
 				outs->dma_length += s->length;
-				DBG("    merged, new len: %lx\n", outs->dma_length);
+				DBG("    merged, new len: %ux\n", outs->dma_length);
 			}
 		}
 
@@ -367,9 +377,8 @@ int iommu_map_sg(struct device *dev, struct iommu_table *tbl,
 		if (s->dma_length != 0) {
 			unsigned long vaddr, npages;
 
-			vaddr = s->dma_address & PAGE_MASK;
-			npages = (PAGE_ALIGN(s->dma_address + s->dma_length) - vaddr)
-				>> PAGE_SHIFT;
+			vaddr = s->dma_address & IOMMU_PAGE_MASK;
+			npages = iommu_num_pages(s->dma_address, s->dma_length);
 			__iommu_free(tbl, vaddr, npages);
 			s->dma_address = DMA_ERROR_CODE;
 			s->dma_length = 0;
@@ -398,8 +407,7 @@ void iommu_unmap_sg(struct iommu_table *tbl, struct scatterlist *sglist,
 
 		if (sglist->dma_length == 0)
 			break;
-		npages = (PAGE_ALIGN(dma_handle + sglist->dma_length)
-			  - (dma_handle & PAGE_MASK)) >> PAGE_SHIFT;
+		npages = iommu_num_pages(dma_handle,sglist->dma_length);
 		__iommu_free(tbl, dma_handle, npages);
 		sglist++;
 	}
@@ -532,12 +540,11 @@ dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
 	BUG_ON(direction == DMA_NONE);
 
 	uaddr = (unsigned long)vaddr;
-	npages = PAGE_ALIGN(uaddr + size) - (uaddr & PAGE_MASK);
-	npages >>= PAGE_SHIFT;
+	npages = iommu_num_pages(uaddr, size);
 
 	if (tbl) {
 		dma_handle = iommu_alloc(tbl, vaddr, npages, direction,
-					 mask >> PAGE_SHIFT, 0);
+					 mask >> IOMMU_PAGE_SHIFT, 0);
 		if (dma_handle == DMA_ERROR_CODE) {
 			if (printk_ratelimit())  {
 				printk(KERN_INFO "iommu_alloc failed, "
@@ -545,7 +552,7 @@ dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
 						tbl, vaddr, npages);
 			}
 		} else
-			dma_handle |= (uaddr & ~PAGE_MASK);
+			dma_handle |= (uaddr & ~IOMMU_PAGE_MASK);
 	}
 
 	return dma_handle;
@@ -554,11 +561,14 @@ dma_addr_t iommu_map_single(struct iommu_table *tbl, void *vaddr,
 void iommu_unmap_single(struct iommu_table *tbl, dma_addr_t dma_handle,
 		size_t size, enum dma_data_direction direction)
 {
+	unsigned int npages;
+
 	BUG_ON(direction == DMA_NONE);
 
-	if (tbl)
-		iommu_free(tbl, dma_handle, (PAGE_ALIGN(dma_handle + size) -
-					(dma_handle & PAGE_MASK)) >> PAGE_SHIFT);
+	if (tbl) {
+		npages = iommu_num_pages(dma_handle, size);
+		iommu_free(tbl, dma_handle, npages);
+	}
 }
 
 /* Allocates a contiguous real buffer and creates mappings over it.
@@ -570,11 +580,11 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
 {
 	void *ret = NULL;
 	dma_addr_t mapping;
-	unsigned int npages, order;
+	unsigned int order;
+	unsigned int nio_pages, io_order;
 	struct page *page;
 
 	size = PAGE_ALIGN(size);
-	npages = size >> PAGE_SHIFT;
 	order = get_order(size);
 
  	/*
@@ -598,8 +608,10 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
 	memset(ret, 0, size);
 
 	/* Set up tces to cover the allocated range */
-	mapping = iommu_alloc(tbl, ret, npages, DMA_BIDIRECTIONAL,
-			      mask >> PAGE_SHIFT, order);
+	nio_pages = size >> IOMMU_PAGE_SHIFT;
+	io_order = get_iommu_order(size);
+	mapping = iommu_alloc(tbl, ret, nio_pages, DMA_BIDIRECTIONAL,
+			      mask >> IOMMU_PAGE_SHIFT, io_order);
 	if (mapping == DMA_ERROR_CODE) {
 		free_pages((unsigned long)ret, order);
 		return NULL;
@@ -611,12 +623,13 @@ void *iommu_alloc_coherent(struct iommu_table *tbl, size_t size,
 void iommu_free_coherent(struct iommu_table *tbl, size_t size,
 			 void *vaddr, dma_addr_t dma_handle)
 {
-	unsigned int npages;
-
 	if (tbl) {
+		unsigned int nio_pages;
+
+		size = PAGE_ALIGN(size);
+		nio_pages = size >> IOMMU_PAGE_SHIFT;
+		iommu_free(tbl, dma_handle, nio_pages);
 		size = PAGE_ALIGN(size);
-		npages = size >> PAGE_SHIFT;
-		iommu_free(tbl, dma_handle, npages);
 		free_pages((unsigned long)vaddr, get_order(size));
 	}
 }
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
index cb87e71eec66..ed007878d1bf 100644
--- a/arch/powerpc/kernel/vio.c
+++ b/arch/powerpc/kernel/vio.c
@@ -92,9 +92,9 @@ static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
 				&tbl->it_index, &offset, &size);
 
 		/* TCE table size - measured in tce entries */
-		tbl->it_size = size >> PAGE_SHIFT;
+		tbl->it_size = size >> IOMMU_PAGE_SHIFT;
 		/* offset for VIO should always be 0 */
-		tbl->it_offset = offset >> PAGE_SHIFT;
+		tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
 		tbl->it_busno = 0;
 		tbl->it_type = TCE_VB;
 
diff --git a/arch/powerpc/platforms/iseries/iommu.c b/arch/powerpc/platforms/iseries/iommu.c
index f4cbbcf8773a..218817d13c5c 100644
--- a/arch/powerpc/platforms/iseries/iommu.c
+++ b/arch/powerpc/platforms/iseries/iommu.c
@@ -43,9 +43,6 @@ static void tce_build_iSeries(struct iommu_table *tbl, long index, long npages,
 	u64 rc;
 	u64 tce, rpn;
 
-	index <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	while (npages--) {
 		rpn = virt_to_abs(uaddr) >> TCE_SHIFT;
 		tce = (rpn & TCE_RPN_MASK) << TCE_RPN_SHIFT;
@@ -75,9 +72,6 @@ static void tce_free_iSeries(struct iommu_table *tbl, long index, long npages)
 {
 	u64 rc;
 
-	npages <<= TCE_PAGE_FACTOR;
-	index <<= TCE_PAGE_FACTOR;
-
 	while (npages--) {
 		rc = HvCallXm_setTce((u64)tbl->it_index, (u64)index, 0);
 		if (rc)
@@ -136,10 +130,9 @@ void iommu_table_getparms_iSeries(unsigned long busno,
 		panic("PCI_DMA: parms->size is zero, parms is 0x%p", parms);
 
 	/* itc_size is in pages worth of table, it_size is in # of entries */
-	tbl->it_size = ((parms->itc_size * TCE_PAGE_SIZE) /
-			TCE_ENTRY_SIZE) >> TCE_PAGE_FACTOR;
+	tbl->it_size = (parms->itc_size * TCE_PAGE_SIZE) / TCE_ENTRY_SIZE;
 	tbl->it_busno = parms->itc_busno;
-	tbl->it_offset = parms->itc_offset >> TCE_PAGE_FACTOR;
+	tbl->it_offset = parms->itc_offset;
 	tbl->it_index = parms->itc_index;
 	tbl->it_blocksize = 1;
 	tbl->it_type = virtbus ? TCE_VB : TCE_PCI;
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index d24ba547e53f..556c279a789d 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -57,9 +57,6 @@ static void tce_build_pSeries(struct iommu_table *tbl, long index,
 	u64 *tcep;
 	u64 rpn;
 
-	index <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	proto_tce = TCE_PCI_READ; // Read allowed
 
 	if (direction != DMA_TO_DEVICE)
@@ -82,9 +79,6 @@ static void tce_free_pSeries(struct iommu_table *tbl, long index, long npages)
 {
 	u64 *tcep;
 
-	npages <<= TCE_PAGE_FACTOR;
-	index <<= TCE_PAGE_FACTOR;
-
 	tcep = ((u64 *)tbl->it_base) + index;
 
 	while (npages--)
@@ -95,7 +89,6 @@ static unsigned long tce_get_pseries(struct iommu_table *tbl, long index)
 {
 	u64 *tcep;
 
-	index <<= TCE_PAGE_FACTOR;
 	tcep = ((u64 *)tbl->it_base) + index;
 
 	return *tcep;
@@ -109,9 +102,6 @@ static void tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	u64 proto_tce, tce;
 	u64 rpn;
 
-	tcenum <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
@@ -146,7 +136,7 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 	u64 rpn;
 	long l, limit;
 
-	if (TCE_PAGE_FACTOR == 0 && npages == 1)
+	if (npages == 1)
 		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
 					   direction);
 
@@ -164,9 +154,6 @@ static void tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
 		__get_cpu_var(tce_page) = tcep;
 	}
 
-	tcenum <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 	proto_tce = TCE_PCI_READ;
 	if (direction != DMA_TO_DEVICE)
@@ -207,9 +194,6 @@ static void tce_free_pSeriesLP(struct iommu_table *tbl, long tcenum, long npages
 {
 	u64 rc;
 
-	tcenum <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	while (npages--) {
 		rc = plpar_tce_put((u64)tbl->it_index, (u64)tcenum << 12, 0);
 
@@ -229,9 +213,6 @@ static void tce_freemulti_pSeriesLP(struct iommu_table *tbl, long tcenum, long n
 {
 	u64 rc;
 
-	tcenum <<= TCE_PAGE_FACTOR;
-	npages <<= TCE_PAGE_FACTOR;
-
 	rc = plpar_tce_stuff((u64)tbl->it_index, (u64)tcenum << 12, 0, npages);
 
 	if (rc && printk_ratelimit()) {
@@ -248,7 +229,6 @@ static unsigned long tce_get_pSeriesLP(struct iommu_table *tbl, long tcenum)
 	u64 rc;
 	unsigned long tce_ret;
 
-	tcenum <<= TCE_PAGE_FACTOR;
 	rc = plpar_tce_get((u64)tbl->it_index, (u64)tcenum << 12, &tce_ret);
 
 	if (rc && printk_ratelimit()) {
@@ -289,7 +269,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	tbl->it_busno = phb->bus->number;
 
 	/* Units of tce entries */
-	tbl->it_offset = phb->dma_window_base_cur >> PAGE_SHIFT;
+	tbl->it_offset = phb->dma_window_base_cur >> IOMMU_PAGE_SHIFT;
 
 	/* Test if we are going over 2GB of DMA space */
 	if (phb->dma_window_base_cur + phb->dma_window_size > 0x80000000ul) {
@@ -300,7 +280,7 @@ static void iommu_table_setparms(struct pci_controller *phb,
 	phb->dma_window_base_cur += phb->dma_window_size;
 
 	/* Set the tce table size - measured in entries */
-	tbl->it_size = phb->dma_window_size >> PAGE_SHIFT;
+	tbl->it_size = phb->dma_window_size >> IOMMU_PAGE_SHIFT;
 
 	tbl->it_index = 0;
 	tbl->it_blocksize = 16;
@@ -325,8 +305,8 @@ static void iommu_table_setparms_lpar(struct pci_controller *phb,
 	tbl->it_base   = 0;
 	tbl->it_blocksize  = 16;
 	tbl->it_type = TCE_PCI;
-	tbl->it_offset = offset >> PAGE_SHIFT;
-	tbl->it_size = size >> PAGE_SHIFT;
+	tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
+	tbl->it_size = size >> IOMMU_PAGE_SHIFT;
 }
 
 static void iommu_bus_setup_pSeries(struct pci_bus *bus)
@@ -522,8 +502,6 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
 	const void *dma_window = NULL;
 	struct pci_dn *pci;
 
-	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s)\n", dev, pci_name(dev));
-
 	/* dev setup for LPAR is a little tricky, since the device tree might
 	 * contain the dma-window properties per-device and not neccesarily
 	 * for the bus. So we need to search upwards in the tree until we
@@ -532,6 +510,9 @@ static void iommu_dev_setup_pSeriesLP(struct pci_dev *dev)
 	 */
 	dn = pci_device_to_OF_node(dev);
 
+	DBG("iommu_dev_setup_pSeriesLP, dev %p (%s) %s\n",
+	     dev, pci_name(dev), dn->full_name);
+
 	for (pdn = dn; pdn && PCI_DN(pdn) && !PCI_DN(pdn)->iommu_table;
 	     pdn = pdn->parent) {
 		dma_window = get_property(pdn, "ibm,dma-window", NULL);
diff --git a/arch/powerpc/sysdev/dart.h b/arch/powerpc/sysdev/dart.h
index 1c8817c4835e..ff202edb0591 100644
--- a/arch/powerpc/sysdev/dart.h
+++ b/arch/powerpc/sysdev/dart.h
@@ -72,7 +72,6 @@
 
 #define DART_PAGE_SHIFT		12
 #define DART_PAGE_SIZE		(1 << DART_PAGE_SHIFT)
-#define DART_PAGE_FACTOR	(PAGE_SHIFT - DART_PAGE_SHIFT)
 
 
 #endif /* _POWERPC_SYSDEV_DART_H */
diff --git a/arch/powerpc/sysdev/dart_iommu.c b/arch/powerpc/sysdev/dart_iommu.c
index 03b4477dd7f0..572b7846cc77 100644
--- a/arch/powerpc/sysdev/dart_iommu.c
+++ b/arch/powerpc/sysdev/dart_iommu.c
@@ -156,9 +156,6 @@ static void dart_build(struct iommu_table *tbl, long index,
 
 	DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
 
-	index <<= DART_PAGE_FACTOR;
-	npages <<= DART_PAGE_FACTOR;
-
 	dp = ((unsigned int*)tbl->it_base) + index;
 
 	/* On U3, all memory is contigous, so we can move this
@@ -199,9 +196,6 @@ static void dart_free(struct iommu_table *tbl, long index, long npages)
 
 	DBG("dart: free at: %lx, %lx\n", index, npages);
 
-	index <<= DART_PAGE_FACTOR;
-	npages <<= DART_PAGE_FACTOR;
-
 	dp  = ((unsigned int *)tbl->it_base) + index;
 
 	while (npages--)
@@ -281,7 +275,7 @@ static void iommu_table_dart_setup(void)
 	iommu_table_dart.it_busno = 0;
 	iommu_table_dart.it_offset = 0;
 	/* it_size is in number of entries */
-	iommu_table_dart.it_size = (dart_tablesize / sizeof(u32)) >> DART_PAGE_FACTOR;
+	iommu_table_dart.it_size = dart_tablesize / sizeof(u32);
 
 	/* Initialize the common IOMMU code */
 	iommu_table_dart.it_base = (unsigned long)dart_vbase;
diff --git a/include/asm-powerpc/iommu.h b/include/asm-powerpc/iommu.h
index a5e98641a2ae..39fad685ffab 100644
--- a/include/asm-powerpc/iommu.h
+++ b/include/asm-powerpc/iommu.h
@@ -22,17 +22,35 @@
 #define _ASM_IOMMU_H
 #ifdef __KERNEL__
 
-#include <asm/types.h>
+#include <linux/compiler.h>
 #include <linux/spinlock.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <asm/types.h>
+#include <asm/bitops.h>
+
+#define IOMMU_PAGE_SHIFT      12
+#define IOMMU_PAGE_SIZE       (ASM_CONST(1) << IOMMU_PAGE_SHIFT)
+#define IOMMU_PAGE_MASK       (~((1 << IOMMU_PAGE_SHIFT) - 1))
+#define IOMMU_PAGE_ALIGN(addr) _ALIGN_UP(addr, IOMMU_PAGE_SIZE)
+
+#ifndef __ASSEMBLY__
+
+/* Pure 2^n version of get_order */
+static __inline__ __attribute_const__ int get_iommu_order(unsigned long size)
+{
+	return __ilog2((size - 1) >> IOMMU_PAGE_SHIFT) + 1;
+}
+
+#endif   /* __ASSEMBLY__ */
+
 
 /*
  * IOMAP_MAX_ORDER defines the largest contiguous block
  * of dma space we can get.  IOMAP_MAX_ORDER = 13
  * allows up to 2**12 pages (4096 * 4096) = 16 MB
  */
-#define IOMAP_MAX_ORDER 13
+#define IOMAP_MAX_ORDER		13
 
 struct iommu_table {
 	unsigned long  it_busno;     /* Bus number this table belongs to */
diff --git a/include/asm-powerpc/tce.h b/include/asm-powerpc/tce.h
index c9483adbf599..f663634cccc9 100644
--- a/include/asm-powerpc/tce.h
+++ b/include/asm-powerpc/tce.h
@@ -22,6 +22,8 @@
 #define _ASM_POWERPC_TCE_H
 #ifdef __KERNEL__
 
+#include <asm/iommu.h>
+
 /*
  * Tces come in two formats, one for the virtual bus and a different
  * format for PCI
@@ -33,7 +35,6 @@
 
 #define TCE_SHIFT	12
 #define TCE_PAGE_SIZE	(1 << TCE_SHIFT)
-#define TCE_PAGE_FACTOR	(PAGE_SHIFT - TCE_SHIFT)
 
 #define TCE_ENTRY_SIZE		8		/* each TCE is 64 bits */
 
-- 
cgit v1.2.3


From 5fe8e8b88e68e517637e3f8287f1fee89e2d9252 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 31 Oct 2006 18:39:31 +0000
Subject: [POWERPC] Make current preempt-safe

Repeated -j20 kernel builds on a G5 Quad running an SMP PREEMPT kernel
would often collapse within a day, some exec failing with "Bad address".
In each case examined, load_elf_binary was doing a kernel_read, but
generic_file_aio_read's access_ok saw current->thread.fs.seg as USER_DS
instead of KERNEL_DS.

objdump of filemap.o shows gcc 4.1.0 emitting "mr r5,r13 ... ld r9,416(r5)"
here for get_paca()->__current, instead of the expected and much more usual
"ld r9,416(r13)"; I've seen other gcc4s do the same, but perhaps not gcc3s.

So, if the task is preempted and rescheduled on a different cpu in between
the mr and the ld, r5 will be looking at a different paca_struct from the
one it's now on, pick up the wrong __current, and perhaps the wrong seg.
Presumably much worse could happen elsewhere, though that split is rare.

Other architectures appear to be safe (x86_64's read_pda is more limiting
than get_paca), but ppc64 needs to force "current" into one instruction.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/current.h | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/current.h b/include/asm-powerpc/current.h
index 1938d6abd255..b8708aedf925 100644
--- a/include/asm-powerpc/current.h
+++ b/include/asm-powerpc/current.h
@@ -14,7 +14,17 @@ struct task_struct;
 #ifdef __powerpc64__
 #include <asm/paca.h>
 
-#define current		(get_paca()->__current)
+static inline struct task_struct *get_current(void)
+{
+	struct task_struct *task;
+
+	__asm__ __volatile__("ld %0,%1(13)"
+	: "=r" (task)
+	: "i" (offsetof(struct paca_struct, __current)));
+
+	return task;
+}
+#define current	get_current()
 
 #else
 
-- 
cgit v1.2.3


From 292f86f005e3867277b2126c2399eea3e773a4fc Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 31 Oct 2006 18:41:51 +0000
Subject: [POWERPC] Make mmiowb's io_sync preempt safe

If mmiowb() is always used prior to releasing spinlock as Doc suggests,
then it's safe against preemption; but I'm not convinced that's always
the case.  If preemption occurs between sync and get_paca()->io_sync = 0,
I believe there's no problem.  But in the unlikely event that gcc does
the store relative to another register than r13 (as it did with current),
then there's a small danger of setting another cpu's io_sync to 0, after
it had just set it to 1.  Rewrite ppc64 mmiowb to prevent that.

The remaining io_sync assignments in io.h all get_paca()->io_sync = 1,
which is harmless even if preempted to the wrong cpu (the context switch
itself syncs); and those in spinlock.h are while preemption is disabled.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/io.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 3baff8b0fd5a..c2c5f14b5f5f 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -163,8 +163,11 @@ extern void _outsl_ns(volatile u32 __iomem *port, const void *buf, long count);
 
 static inline void mmiowb(void)
 {
-	__asm__ __volatile__ ("sync" : : : "memory");
-	get_paca()->io_sync = 0;
+	unsigned long tmp;
+
+	__asm__ __volatile__("sync; li %0,0; stb %0,%1(13)"
+	: "=&r" (tmp) : "i" (offsetof(struct paca_struct, io_sync))
+	: "memory");
 }
 
 /*
-- 
cgit v1.2.3


From 130fe05dbc0114609cfef9815c0c5580b42decfa Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@macmini.osdl.org>
Date: Wed, 1 Nov 2006 09:11:00 -0800
Subject: i386: clean up io-apic accesses

This is preparation for fixing the ordering of the accesses that
got broken by the commit cf4c6a2f27f5db810b69dcb1da7f194489e8ff88 when
factoring out the "common" io apic routing entry accesses.

Move the accessor function (that were only used by io_apic.c) out
of a header file, and use proper memory-mapped accesses rather than
making up our own "volatile" pointers.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/io_apic.c | 40 ++++++++++++++++++++++++++++++++++++++++
 include/asm-i386/io_apic.h | 29 +----------------------------
 2 files changed, 41 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 350192d6ab98..eb10bd5da64e 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -91,6 +91,46 @@ static struct irq_pin_list {
 	int apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
+struct io_apic {
+	unsigned int index;
+	unsigned int unused[3];
+	unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ *
+ * Older SiS APIC requires we rewrite the index register
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	volatile struct io_apic *io_apic = io_apic_base(apic);
+	if (sis_apic_bug)
+		writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
 union entry_union {
 	struct { u32 w1, w2; };
 	struct IO_APIC_route_entry entry;
diff --git a/include/asm-i386/io_apic.h b/include/asm-i386/io_apic.h
index 276ea7e8144a..059a9ff28b4d 100644
--- a/include/asm-i386/io_apic.h
+++ b/include/asm-i386/io_apic.h
@@ -12,10 +12,6 @@
 
 #ifdef CONFIG_X86_IO_APIC
 
-#define IO_APIC_BASE(idx) \
-		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
-		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
-
 /*
  * The structure of the IO-APIC:
  */
@@ -119,31 +115,8 @@ extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* non-0 if default (table-less) MP configuration */
 extern int mpc_default_type;
 
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	*IO_APIC_BASE(apic) = reg;
-	return *(IO_APIC_BASE(apic)+4);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	*IO_APIC_BASE(apic) = reg;
-	*(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- *
- * Older SiS APIC requires we rewrite the index regiser
- */
+/* Older SiS APIC requires we rewrite the index register */
 extern int sis_apic_bug;
-static inline void io_apic_modify(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	if (sis_apic_bug)
-		*IO_APIC_BASE(apic) = reg;
-	*(IO_APIC_BASE(apic)+4) = value;
-}
 
 /* 1 if "noapic" boot option passed */
 extern int skip_ioapic_setup;
-- 
cgit v1.2.3


From 242954b5aa8e5ec84f46a84637daf08ee4247c6e Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Tue, 24 Oct 2006 02:29:01 +0100
Subject: [MIPS] 16K & 64K page size fixes

Derived from Peter Watkins <treestem@gmail.com>'s work.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/asm-offsets.c         |  2 +-
 arch/mips/kernel/head.S                |  3 ++-
 arch/mips/kernel/r4k_switch.S          |  5 +++++
 arch/mips/kernel/vmlinux.lds.S         | 10 ++++++++++
 arch/mips/lib-64/dump_tlb.c            |  6 +++---
 arch/mips/mips-boards/generic/memory.c |  2 +-
 arch/mips/mm/pg-r4k.c                  | 30 ++++++++++++++++++++++++++++--
 arch/mips/mm/tlbex.c                   | 13 ++++++++++---
 include/asm-mips/asm.h                 |  2 ++
 include/asm-mips/pgalloc.h             |  2 +-
 include/asm-mips/pgtable-64.h          |  2 +-
 11 files changed, 64 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/asm-offsets.c b/arch/mips/kernel/asm-offsets.c
index e9ce5b3721af..ff88b06f89df 100644
--- a/arch/mips/kernel/asm-offsets.c
+++ b/arch/mips/kernel/asm-offsets.c
@@ -22,7 +22,7 @@
 #define offset(string, ptr, member) \
 	__asm__("\n@@@" string "%0" : : "i" (_offset(ptr, member)))
 #define constant(string, member) \
-	__asm__("\n@@@" string "%x0" : : "ri" (member))
+	__asm__("\n@@@" string "%X0" : : "ri" (member))
 #define size(string, size) \
 	__asm__("\n@@@" string "%0" : : "i" (sizeof(size)))
 #define linefeed text("")
diff --git a/arch/mips/kernel/head.S b/arch/mips/kernel/head.S
index 8c6db0fc72f0..ddc1b71c9378 100644
--- a/arch/mips/kernel/head.S
+++ b/arch/mips/kernel/head.S
@@ -189,7 +189,8 @@ NESTED(kernel_entry, 16, sp)			# kernel entry point
 
 	MTC0		zero, CP0_CONTEXT	# clear context register
 	PTR_LA		$28, init_thread_union
-	PTR_ADDIU	sp, $28, _THREAD_SIZE - 32
+	PTR_LI		sp, _THREAD_SIZE - 32
+	PTR_ADDU	sp, $28
 	set_saved_sp	sp, t0, t1
 	PTR_SUBU	sp, 4 * SZREG		# init stack pointer
 
diff --git a/arch/mips/kernel/r4k_switch.S b/arch/mips/kernel/r4k_switch.S
index d5c8b82fed72..cc566cf12246 100644
--- a/arch/mips/kernel/r4k_switch.S
+++ b/arch/mips/kernel/r4k_switch.S
@@ -85,7 +85,12 @@
 	move	$28, a2
 	cpu_restore_nonscratch a1
 
+#if (_THREAD_SIZE - 32) < 0x10000
 	PTR_ADDIU	t0, $28, _THREAD_SIZE - 32
+#else
+	PTR_LI		t0, _THREAD_SIZE - 32
+	PTR_ADDU	t0, $28
+#endif
 	set_saved_sp	t0, t1, t2
 #ifdef CONFIG_MIPS_MT_SMTC
 	/* Read-modify-writes of Status must be atomic on a VPE */
diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S
index 25ed3337ce35..79f0317d84ac 100644
--- a/arch/mips/kernel/vmlinux.lds.S
+++ b/arch/mips/kernel/vmlinux.lds.S
@@ -50,6 +50,16 @@ SECTIONS
   /* writeable */
   .data : {			/* Data */
     . = . + DATAOFFSET;		/* for CONFIG_MAPPED_KERNEL */
+    /*
+     * This ALIGN is needed as a workaround for a bug a gcc bug upto 4.1 which
+     * limits the maximum alignment to at most 32kB and results in the following
+     * warning:
+     *
+     *  CC      arch/mips/kernel/init_task.o
+     * arch/mips/kernel/init_task.c:30: warning: alignment of ‘init_thread_union’
+     * is greater than maximum object file alignment.  Using 32768
+     */
+    . = ALIGN(_PAGE_SIZE);
     *(.data.init_task)
 
     *(.data)
diff --git a/arch/mips/lib-64/dump_tlb.c b/arch/mips/lib-64/dump_tlb.c
index be8261be679b..594df1a05ecc 100644
--- a/arch/mips/lib-64/dump_tlb.c
+++ b/arch/mips/lib-64/dump_tlb.c
@@ -149,7 +149,7 @@ void dump_list_process(struct task_struct *t, void *address)
 	printk("Addr                 == %08lx\n", addr);
 	printk("tasks->mm.pgd        == %08lx\n", (unsigned long) t->mm->pgd);
 
-	page_dir = pgd_offset(t->mm, 0);
+	page_dir = pgd_offset(t->mm, 0UL);
 	printk("page_dir == %016lx\n", (unsigned long) page_dir);
 
 	pgd = pgd_offset(t->mm, addr);
@@ -184,13 +184,13 @@ void dump_list_current(void *address)
 	dump_list_process(current, address);
 }
 
-unsigned int vtop(void *address)
+unsigned long vtop(void *address)
 {
 	pgd_t	*pgd;
 	pud_t	*pud;
 	pmd_t	*pmd;
 	pte_t	*pte;
-	unsigned int addr, paddr;
+	unsigned long addr, paddr;
 
 	addr = (unsigned long) address;
 	pgd = pgd_offset(current->mm, addr);
diff --git a/arch/mips/mips-boards/generic/memory.c b/arch/mips/mips-boards/generic/memory.c
index be80c5dd4a0c..eeed944e0f83 100644
--- a/arch/mips/mips-boards/generic/memory.c
+++ b/arch/mips/mips-boards/generic/memory.c
@@ -176,7 +176,7 @@ unsigned long __init prom_free_prom_memory(void)
 		if (boot_mem_map.map[i].type != BOOT_MEM_ROM_DATA)
 			continue;
 
-		addr = boot_mem_map.map[i].addr;
+		addr = PAGE_ALIGN(boot_mem_map.map[i].addr);
 		while (addr < boot_mem_map.map[i].addr
 			      + boot_mem_map.map[i].size) {
 			ClearPageReserved(virt_to_page(__va(addr)));
diff --git a/arch/mips/mm/pg-r4k.c b/arch/mips/mm/pg-r4k.c
index b7c749232ffe..d41fc5885e87 100644
--- a/arch/mips/mm/pg-r4k.c
+++ b/arch/mips/mm/pg-r4k.c
@@ -270,6 +270,20 @@ static inline void build_addiu_a2_a0(unsigned long offset)
 	emit_instruction(mi);
 }
 
+static inline void build_addiu_a2(unsigned long offset)
+{
+	union mips_instruction mi;
+
+	BUG_ON(offset > 0x7fff);
+
+	mi.i_format.opcode     = cpu_has_64bit_gp_regs ? daddiu_op : addiu_op;
+	mi.i_format.rs         = 6;		/* $a2 */
+	mi.i_format.rt         = 6;		/* $a2 */
+	mi.i_format.simmediate = offset;
+
+	emit_instruction(mi);
+}
+
 static inline void build_addiu_a1(unsigned long offset)
 {
 	union mips_instruction mi;
@@ -333,6 +347,7 @@ static inline void build_jr_ra(void)
 void __init build_clear_page(void)
 {
 	unsigned int loop_start;
+	unsigned long off;
 
 	epc = (unsigned int *) &clear_page_array;
 	instruction_pending = 0;
@@ -369,7 +384,12 @@ void __init build_clear_page(void)
 		}
 	}
 
-	build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0));
+        off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_clear : 0);
+	if (off > 0x7fff) {
+		build_addiu_a2_a0(off >> 1);
+		build_addiu_a2(off >> 1);
+	} else
+		build_addiu_a2_a0(off);
 
 	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
@@ -420,12 +440,18 @@ dest = label();
 void __init build_copy_page(void)
 {
 	unsigned int loop_start;
+	unsigned long off;
 
 	epc = (unsigned int *) &copy_page_array;
 	store_offset = load_offset = 0;
 	instruction_pending = 0;
 
-	build_addiu_a2_a0(PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0));
+	off = PAGE_SIZE - (cpu_has_prefetch ? pref_offset_copy : 0);
+	if (off > 0x7fff) {
+		build_addiu_a2_a0(off >> 1);
+		build_addiu_a2(off >> 1);
+	} else
+		build_addiu_a2_a0(off);
 
 	if (R4600_V2_HIT_CACHEOP_WAR && cpu_is_r4600_v2_x())
 		build_insn_word(0x3c01a000);	/* lui     $at, 0xa000  */
diff --git a/arch/mips/mm/tlbex.c b/arch/mips/mm/tlbex.c
index 6f8b25cfa6f0..fec318a1c8c5 100644
--- a/arch/mips/mm/tlbex.c
+++ b/arch/mips/mm/tlbex.c
@@ -102,7 +102,7 @@ enum opcode {
 	insn_addu, insn_addiu, insn_and, insn_andi, insn_beq,
 	insn_beql, insn_bgez, insn_bgezl, insn_bltz, insn_bltzl,
 	insn_bne, insn_daddu, insn_daddiu, insn_dmfc0, insn_dmtc0,
-	insn_dsll, insn_dsll32, insn_dsra, insn_dsrl,
+	insn_dsll, insn_dsll32, insn_dsra, insn_dsrl, insn_dsrl32,
 	insn_dsubu, insn_eret, insn_j, insn_jal, insn_jr, insn_ld,
 	insn_ll, insn_lld, insn_lui, insn_lw, insn_mfc0, insn_mtc0,
 	insn_ori, insn_rfe, insn_sc, insn_scd, insn_sd, insn_sll,
@@ -145,6 +145,7 @@ static __initdata struct insn insn_table[] = {
 	{ insn_dsll32, M(spec_op,0,0,0,0,dsll32_op), RT | RD | RE },
 	{ insn_dsra, M(spec_op,0,0,0,0,dsra_op), RT | RD | RE },
 	{ insn_dsrl, M(spec_op,0,0,0,0,dsrl_op), RT | RD | RE },
+	{ insn_dsrl32, M(spec_op,0,0,0,0,dsrl32_op), RT | RD | RE },
 	{ insn_dsubu, M(spec_op,0,0,0,0,dsubu_op), RS | RT | RD },
 	{ insn_eret, M(cop0_op,cop_op,0,0,0,eret_op), 0 },
 	{ insn_j, M(j_op,0,0,0,0,0), JIMM },
@@ -385,6 +386,7 @@ I_u2u1u3(_dsll);
 I_u2u1u3(_dsll32);
 I_u2u1u3(_dsra);
 I_u2u1u3(_dsrl);
+I_u2u1u3(_dsrl32);
 I_u3u1u2(_dsubu);
 I_0(_eret);
 I_u1(_j);
@@ -996,7 +998,12 @@ build_get_pmde64(u32 **p, struct label **l, struct reloc **r,
 #endif
 
 	l_vmalloc_done(l, *p);
-	i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3); /* get pgd offset in bytes */
+
+	if (PGDIR_SHIFT - 3 < 32)		/* get pgd offset in bytes */
+		i_dsrl(p, tmp, tmp, PGDIR_SHIFT-3);
+	else
+		i_dsrl32(p, tmp, tmp, PGDIR_SHIFT - 3 - 32);
+
 	i_andi(p, tmp, tmp, (PTRS_PER_PGD - 1)<<3);
 	i_daddu(p, ptr, ptr, tmp); /* add in pgd offset */
 	i_dmfc0(p, tmp, C0_BADVADDR); /* get faulting address */
@@ -1073,7 +1080,7 @@ build_get_pgde32(u32 **p, unsigned int tmp, unsigned int ptr)
 
 static __init void build_adjust_context(u32 **p, unsigned int ctx)
 {
-	unsigned int shift = 4 - (PTE_T_LOG2 + 1);
+	unsigned int shift = 4 - (PTE_T_LOG2 + 1) + PAGE_SHIFT - 12;
 	unsigned int mask = (PTRS_PER_PTE / 2 - 1) << (PTE_T_LOG2 + 1);
 
 	switch (current_cpu_data.cputype) {
diff --git a/include/asm-mips/asm.h b/include/asm-mips/asm.h
index e3038a4599ee..838eb3144d81 100644
--- a/include/asm-mips/asm.h
+++ b/include/asm-mips/asm.h
@@ -344,6 +344,7 @@ symbol		=	value
 #define PTR_L		lw
 #define PTR_S		sw
 #define PTR_LA		la
+#define PTR_LI		li
 #define PTR_SLL		sll
 #define PTR_SLLV	sllv
 #define PTR_SRL		srl
@@ -368,6 +369,7 @@ symbol		=	value
 #define PTR_L		ld
 #define PTR_S		sd
 #define PTR_LA		dla
+#define PTR_LI		dli
 #define PTR_SLL		dsll
 #define PTR_SLLV	dsllv
 #define PTR_SRL		dsrl
diff --git a/include/asm-mips/pgalloc.h b/include/asm-mips/pgalloc.h
index 582c1fe6cc4a..af121c67dc71 100644
--- a/include/asm-mips/pgalloc.h
+++ b/include/asm-mips/pgalloc.h
@@ -48,7 +48,7 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 
 	ret = (pgd_t *) __get_free_pages(GFP_KERNEL, PGD_ORDER);
 	if (ret) {
-		init = pgd_offset(&init_mm, 0);
+		init = pgd_offset(&init_mm, 0UL);
 		pgd_init((unsigned long)ret);
 		memcpy(ret + USER_PTRS_PER_PGD, init + USER_PTRS_PER_PGD,
 		       (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
diff --git a/include/asm-mips/pgtable-64.h b/include/asm-mips/pgtable-64.h
index d05fb6f38aa7..7e7320300aa3 100644
--- a/include/asm-mips/pgtable-64.h
+++ b/include/asm-mips/pgtable-64.h
@@ -174,7 +174,7 @@ static inline void pud_clear(pud_t *pudp)
 #define __pmd_offset(address)	pmd_index(address)
 
 /* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(&init_mm, 0)
+#define pgd_offset_k(address) pgd_offset(&init_mm, 0UL)
 
 #define pgd_index(address)	(((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
 #define pmd_index(address)	(((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-- 
cgit v1.2.3


From c7fed9d75074f7c243ec8ff2c55d04de2839a6f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 1 Nov 2006 16:30:39 -0800
Subject: [SPARC64]: Fix futex_atomic_cmpxchg_inatomic implementation.

I copied the logic from ll/sc arch implementations, but that
was wrong and makes no sense at all.  Just do a straight
compare-exchange instruction, just like x86.

Based upon bug reports from Dennis Gilmore and Fabio Massimo.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/asm-sparc64/futex.h | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/asm-sparc64/futex.h b/include/asm-sparc64/futex.h
index dee40206b221..7392fc4a954e 100644
--- a/include/asm-sparc64/futex.h
+++ b/include/asm-sparc64/futex.h
@@ -87,24 +87,22 @@ static inline int
 futex_atomic_cmpxchg_inatomic(int __user *uaddr, int oldval, int newval)
 {
 	__asm__ __volatile__(
-	"\n1:	lduwa	[%2] %%asi, %0\n"
-	"2:	casa	[%2] %%asi, %0, %1\n"
-	"3:\n"
+	"\n1:	casa	[%3] %%asi, %2, %0\n"
+	"2:\n"
 	"	.section .fixup,#alloc,#execinstr\n"
 	"	.align	4\n"
-	"4:	ba	3b\n"
-	"	 mov	%3, %0\n"
+	"3:	ba	2b\n"
+	"	 mov	%4, %0\n"
 	"	.previous\n"
 	"	.section __ex_table,\"a\"\n"
 	"	.align	4\n"
-	"	.word	1b, 4b\n"
-	"	.word	2b, 4b\n"
+	"	.word	1b, 3b\n"
 	"	.previous\n"
-	: "=&r" (oldval)
-	: "r" (newval), "r" (uaddr), "i" (-EFAULT)
+	: "=r" (newval)
+	: "0" (newval), "r" (oldval), "r" (uaddr), "i" (-EFAULT)
 	: "memory");
 
-	return oldval;
+	return newval;
 }
 
 #endif /* !(_SPARC64_FUTEX_H) */
-- 
cgit v1.2.3


From 904880e717c5466041485ca6d6e8c6c1ef06d0fd Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Fri, 13 Oct 2006 11:32:50 +0100
Subject: [MIPS] Don't use R10000 llsc workaround version for all llsc-full
 processors.

Found and original patch by bile@landofbile.com.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 include/asm-mips/system.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-mips/system.h b/include/asm-mips/system.h
index dcb4701d5728..3056feed5a36 100644
--- a/include/asm-mips/system.h
+++ b/include/asm-mips/system.h
@@ -392,7 +392,7 @@ static inline unsigned long __cmpxchg_u64(volatile int * m, unsigned long old,
 {
 	__u64 retval;
 
-	if (cpu_has_llsc) {
+	if (cpu_has_llsc && R10000_LLSC_WAR) {
 		__asm__ __volatile__(
 		"	.set	push					\n"
 		"	.set	noat					\n"
-- 
cgit v1.2.3


From 7a118df3ea23820b9922a1b51cd2f24e464f4c17 Mon Sep 17 00:00:00 2001
From: Sean Hefty <sean.hefty@intel.com>
Date: Tue, 31 Oct 2006 11:12:59 -0800
Subject: RDMA/addr: Use client registration to fix module unload race

Require registration with ib_addr module to prevent caller from
unloading while a callback is in progress.

Signed-off-by: Sean Hefty <sean.hefty@intel.com>
Signed-off-by: Roland Dreier <rolandd@cisco.com>
---
 drivers/infiniband/core/addr.c | 28 +++++++++++++++++++++++++++-
 drivers/infiniband/core/cma.c  |  8 ++++++--
 include/rdma/ib_addr.h         | 20 +++++++++++++++++++-
 3 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/core/addr.c b/drivers/infiniband/core/addr.c
index 60d3fbdd216c..e11187ecc931 100644
--- a/drivers/infiniband/core/addr.c
+++ b/drivers/infiniband/core/addr.c
@@ -47,6 +47,7 @@ struct addr_req {
 	struct sockaddr src_addr;
 	struct sockaddr dst_addr;
 	struct rdma_dev_addr *addr;
+	struct rdma_addr_client *client;
 	void *context;
 	void (*callback)(int status, struct sockaddr *src_addr,
 			 struct rdma_dev_addr *addr, void *context);
@@ -61,6 +62,26 @@ static LIST_HEAD(req_list);
 static DECLARE_WORK(work, process_req, NULL);
 static struct workqueue_struct *addr_wq;
 
+void rdma_addr_register_client(struct rdma_addr_client *client)
+{
+	atomic_set(&client->refcount, 1);
+	init_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_register_client);
+
+static inline void put_client(struct rdma_addr_client *client)
+{
+	if (atomic_dec_and_test(&client->refcount))
+		complete(&client->comp);
+}
+
+void rdma_addr_unregister_client(struct rdma_addr_client *client)
+{
+	put_client(client);
+	wait_for_completion(&client->comp);
+}
+EXPORT_SYMBOL(rdma_addr_unregister_client);
+
 int rdma_copy_addr(struct rdma_dev_addr *dev_addr, struct net_device *dev,
 		     const unsigned char *dst_dev_addr)
 {
@@ -229,6 +250,7 @@ static void process_req(void *data)
 		list_del(&req->list);
 		req->callback(req->status, &req->src_addr, req->addr,
 			      req->context);
+		put_client(req->client);
 		kfree(req);
 	}
 }
@@ -264,7 +286,8 @@ static int addr_resolve_local(struct sockaddr_in *src_in,
 	return ret;
 }
 
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
@@ -285,6 +308,8 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
 	req->addr = addr;
 	req->callback = callback;
 	req->context = context;
+	req->client = client;
+	atomic_inc(&client->refcount);
 
 	src_in = (struct sockaddr_in *) &req->src_addr;
 	dst_in = (struct sockaddr_in *) &req->dst_addr;
@@ -305,6 +330,7 @@ int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		break;
 	default:
 		ret = req->status;
+		atomic_dec(&client->refcount);
 		kfree(req);
 		break;
 	}
diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c
index d8ca3c1368b5..845090b0859c 100644
--- a/drivers/infiniband/core/cma.c
+++ b/drivers/infiniband/core/cma.c
@@ -63,6 +63,7 @@ static struct ib_client cma_client = {
 };
 
 static struct ib_sa_client sa_client;
+static struct rdma_addr_client addr_client;
 static LIST_HEAD(dev_list);
 static LIST_HEAD(listen_any_list);
 static DEFINE_MUTEX(lock);
@@ -1625,8 +1626,8 @@ int rdma_resolve_addr(struct rdma_cm_id *id, struct sockaddr *src_addr,
 	if (cma_any_addr(dst_addr))
 		ret = cma_resolve_loopback(id_priv);
 	else
-		ret = rdma_resolve_ip(&id->route.addr.src_addr, dst_addr,
-				      &id->route.addr.dev_addr,
+		ret = rdma_resolve_ip(&addr_client, &id->route.addr.src_addr,
+				      dst_addr, &id->route.addr.dev_addr,
 				      timeout_ms, addr_handler, id_priv);
 	if (ret)
 		goto err;
@@ -2217,6 +2218,7 @@ static int cma_init(void)
 		return -ENOMEM;
 
 	ib_sa_register_client(&sa_client);
+	rdma_addr_register_client(&addr_client);
 
 	ret = ib_register_client(&cma_client);
 	if (ret)
@@ -2224,6 +2226,7 @@ static int cma_init(void)
 	return 0;
 
 err:
+	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	return ret;
@@ -2232,6 +2235,7 @@ err:
 static void cma_cleanup(void)
 {
 	ib_unregister_client(&cma_client);
+	rdma_addr_unregister_client(&addr_client);
 	ib_sa_unregister_client(&sa_client);
 	destroy_workqueue(cma_wq);
 	idr_destroy(&sdp_ps);
diff --git a/include/rdma/ib_addr.h b/include/rdma/ib_addr.h
index 81b62307621d..c094e5012862 100644
--- a/include/rdma/ib_addr.h
+++ b/include/rdma/ib_addr.h
@@ -36,6 +36,22 @@
 #include <linux/socket.h>
 #include <rdma/ib_verbs.h>
 
+struct rdma_addr_client {
+	atomic_t refcount;
+	struct completion comp;
+};
+
+/**
+ * rdma_addr_register_client - Register an address client.
+ */
+void rdma_addr_register_client(struct rdma_addr_client *client);
+
+/**
+ * rdma_addr_unregister_client - Deregister an address client.
+ * @client: Client object to deregister.
+ */
+void rdma_addr_unregister_client(struct rdma_addr_client *client);
+
 struct rdma_dev_addr {
 	unsigned char src_dev_addr[MAX_ADDR_LEN];
 	unsigned char dst_dev_addr[MAX_ADDR_LEN];
@@ -52,6 +68,7 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
 /**
  * rdma_resolve_ip - Resolve source and destination IP addresses to
  *   RDMA hardware addresses.
+ * @client: Address client associated with request.
  * @src_addr: An optional source address to use in the resolution.  If a
  *   source address is not provided, a usable address will be returned via
  *   the callback.
@@ -64,7 +81,8 @@ int rdma_translate_ip(struct sockaddr *addr, struct rdma_dev_addr *dev_addr);
  *   or been canceled.  A status of 0 indicates success.
  * @context: User-specified context associated with the call.
  */
-int rdma_resolve_ip(struct sockaddr *src_addr, struct sockaddr *dst_addr,
+int rdma_resolve_ip(struct rdma_addr_client *client,
+		    struct sockaddr *src_addr, struct sockaddr *dst_addr,
 		    struct rdma_dev_addr *addr, int timeout_ms,
 		    void (*callback)(int status, struct sockaddr *src_addr,
 				     struct rdma_dev_addr *addr, void *context),
-- 
cgit v1.2.3


From 4fa2eeeac5e13a8579ee45bc172eed690d28fbb7 Mon Sep 17 00:00:00 2001
From: Peer Chen <pchen@nvidia.com>
Date: Thu, 2 Nov 2006 18:55:48 -0500
Subject: pci_ids.h: Add NVIDIA PCI ID

Signed-off-by: Jeff Garzik <jeff@garzik.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index f3a168f3c9df..fa4e1d799782 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -1213,6 +1213,7 @@
 #define PCI_DEVICE_ID_NVIDIA_NVENET_21              0x0451
 #define PCI_DEVICE_ID_NVIDIA_NVENET_22              0x0452
 #define PCI_DEVICE_ID_NVIDIA_NVENET_23              0x0453
+#define PCI_DEVICE_ID_NVIDIA_NFORCE_MCP67_IDE       0x0560
 
 #define PCI_VENDOR_ID_IMS		0x10e0
 #define PCI_DEVICE_ID_IMS_TT128		0x9128
-- 
cgit v1.2.3


From 1f4a39319e9226c3b1d5b91a1e4d3559ef8740e4 Mon Sep 17 00:00:00 2001
From: Enrico Scholz <enrico.scholz@de.rmk.(none)>
Date: Fri, 3 Nov 2006 13:47:39 +0100
Subject: [ARM] 3919/1: Fixed definition of some PXA270 CIF related registers

Fixed definition of some CIF registers; see PXA27x Developer\'s Manual.

Signed-off-by: Enrico Scholz <enrico.scholz@sigma-chemnitz.de>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/asm-arm/arch-pxa/pxa-regs.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/asm-arm/arch-pxa/pxa-regs.h b/include/asm-arm/arch-pxa/pxa-regs.h
index 68731e0923a4..cff752f35230 100644
--- a/include/asm-arm/arch-pxa/pxa-regs.h
+++ b/include/asm-arm/arch-pxa/pxa-regs.h
@@ -2242,7 +2242,7 @@
 
 #define CICR1_TBIT	(1 << 31)	/* Transparency bit */
 #define CICR1_RGBT_CONV	(0x3 << 30)	/* RGBT conversion mask */
-#define CICR1_PPL	(0x3f << 15)	/* Pixels per line mask */
+#define CICR1_PPL	(0x7ff << 15)	/* Pixels per line mask */
 #define CICR1_RGB_CONV	(0x7 << 12)	/* RGB conversion mask */
 #define CICR1_RGB_F	(1 << 11)	/* RGB format */
 #define CICR1_YCBCR_F	(1 << 10)	/* YCbCr format */
@@ -2268,7 +2268,7 @@
 #define CICR3_VSW	(0x3f << 10)	/* Vertical sync pulse width mask */
 #define CICR3_BFPW	(0x3f << 3)	/* Beginning-of-frame pixel clock
 					   wait count mask */
-#define CICR3_LPF	(0x3ff << 0)	/* Lines per frame mask */
+#define CICR3_LPF	(0x7ff << 0)	/* Lines per frame mask */
 
 #define CICR4_MCLK_DLY	(0x3 << 24)	/* MCLK Data Capture Delay mask */
 #define CICR4_PCLK_EN	(1 << 23)	/* Pixel clock enable */
@@ -2289,8 +2289,8 @@
 #define CISR_EOL	(1 << 8)	/* End of line */
 #define CISR_PAR_ERR	(1 << 7)	/* Parity error */
 #define CISR_CQD	(1 << 6)	/* Camera interface quick disable */
-#define CISR_SOF	(1 << 5)	/* Start of frame */
-#define CISR_CDD	(1 << 4)	/* Camera interface disable done */
+#define CISR_CDD	(1 << 5)	/* Camera interface disable done */
+#define CISR_SOF	(1 << 4)	/* Start of frame */
 #define CISR_EOF	(1 << 3)	/* End of frame */
 #define CISR_IFO_2	(1 << 2)	/* FIFO overrun for Channel 2 */
 #define CISR_IFO_1	(1 << 1)	/* FIFO overrun for Channel 1 */
-- 
cgit v1.2.3


From 86f4f0f9ba6e35fbbc409dfc3d8615c1a9822482 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Thu, 2 Nov 2006 22:07:05 -0800
Subject: [PATCH] fix UFS superblock alignment issues

ufs2 fails to mount on x86_64, claiming bad magic.  This is because
ufs_super_block_third's fs_un1 member is padded out by 4 bytes for 8-byte
alignment, pushing down the rest of the struct.

Forcing this to be packed solves it.  I took a quick look over other
on-disk structures and didn't immediately find other problems.  I was able
to mount & ls a populated ufs2 filesystem w/ this change.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Cc: Evgeniy Dushistov <dushistov@mail.ru>
Cc: <stable@kernel.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/ufs_fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ufs_fs.h b/include/linux/ufs_fs.h
index 61eef508b041..28967eda9d7b 100644
--- a/include/linux/ufs_fs.h
+++ b/include/linux/ufs_fs.h
@@ -908,7 +908,7 @@ struct ufs_super_block_third {
 			__fs64   fs_csaddr;	/* blk addr of cyl grp summary area */
 			__fs64    fs_pendingblocks;/* blocks in process of being freed */
 			__fs32    fs_pendinginodes;/*inodes in process of being freed */
-		} fs_u2;
+		} __attribute__ ((packed)) fs_u2;
 	} fs_un1;
 	union {
 		struct {
-- 
cgit v1.2.3


From f46c483357c2d87606bbefb511321e3efd4baae0 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@osdl.org>
Date: Thu, 2 Nov 2006 22:07:16 -0800
Subject: [PATCH] Add printk_timed_ratelimit()

printk_ratelimit() has global state which makes it not useful for callers
which wish to perform ratelimiting at a particular frequency.

Add a printk_timed_ratelimit() which utilises caller-provided state storage to
permit more flexibility.

This function can in fact be used for things other than printk ratelimiting
and is perhaps poorly named.

Cc: Ulrich Drepper <drepper@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/kernel.h |  2 ++
 kernel/printk.c        | 21 +++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 80f39cab470a..24b611147adb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -171,6 +171,8 @@ __attribute_const__ roundup_pow_of_two(unsigned long x)
 
 extern int printk_ratelimit(void);
 extern int __printk_ratelimit(int ratelimit_jiffies, int ratelimit_burst);
+extern bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+				unsigned int interval_msec);
 
 static inline void console_silent(void)
 {
diff --git a/kernel/printk.c b/kernel/printk.c
index f7d427ef5038..66426552fbfe 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,6 +31,7 @@
 #include <linux/security.h>
 #include <linux/bootmem.h>
 #include <linux/syscalls.h>
+#include <linux/jiffies.h>
 
 #include <asm/uaccess.h>
 
@@ -1101,3 +1102,23 @@ int printk_ratelimit(void)
 				printk_ratelimit_burst);
 }
 EXPORT_SYMBOL(printk_ratelimit);
+
+/**
+ * printk_timed_ratelimit - caller-controlled printk ratelimiting
+ * @caller_jiffies: pointer to caller's state
+ * @interval_msecs: minimum interval between prints
+ *
+ * printk_timed_ratelimit() returns true if more than @interval_msecs
+ * milliseconds have elapsed since the last time printk_timed_ratelimit()
+ * returned true.
+ */
+bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+			unsigned int interval_msecs)
+{
+	if (*caller_jiffies == 0 || time_after(jiffies, *caller_jiffies)) {
+		*caller_jiffies = jiffies + msecs_to_jiffies(interval_msecs);
+		return true;
+	}
+	return false;
+}
+EXPORT_SYMBOL(printk_timed_ratelimit);
-- 
cgit v1.2.3


From b918f6e62cd46774f9fc0a3fbba6bd10ad85ee14 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 2 Nov 2006 22:07:19 -0800
Subject: [PATCH] swsusp: debugging

Add a swsusp debugging mode.  This does everything that's needed for a suspend
except for actually suspending.  So we can look in the log messages and work
out a) what code is being slow and b) which drivers are misbehaving.

(1)
# echo testproc > /sys/power/disk
# echo disk > /sys/power/state

This should turn off the non-boot CPU, freeze all processes, wait for 5
seconds and then thaw the processes and the CPU.

(2)
# echo test > /sys/power/disk
# echo disk > /sys/power/state

This should turn off the non-boot CPU, freeze all processes, shrink
memory, suspend all devices, wait for 5 seconds, resume the devices etc.

Cc: Pavel Machek <pavel@ucw.cz>
Cc: Stefan Seyfried <seife@suse.de>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/ABI/testing/sysfs-power | 17 +++++++++++++++-
 Documentation/power/interface.txt     | 13 ++++++++++++
 include/linux/pm.h                    |  4 +++-
 kernel/power/disk.c                   | 37 ++++++++++++++++++++++++++---------
 4 files changed, 60 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index d882f8093871..dcff4d0623ad 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -21,7 +21,7 @@ Description:
 		these states.
 
 What:		/sys/power/disk
-Date:		August 2006
+Date:		September 2006
 Contact:	Rafael J. Wysocki <rjw@sisk.pl>
 Description:
 		The /sys/power/disk file controls the operating mode of the
@@ -39,6 +39,19 @@ Description:
 		'reboot' - the memory image will be saved by the kernel and
 		the system will be rebooted.
 
+		Additionally, /sys/power/disk can be used to turn on one of the
+		two testing modes of the suspend-to-disk mechanism: 'testproc'
+		or 'test'.  If the suspend-to-disk mechanism is in the
+		'testproc' mode, writing 'disk' to /sys/power/state will cause
+		the kernel to disable nonboot CPUs and freeze tasks, wait for 5
+		seconds, unfreeze tasks and enable nonboot CPUs.  If it is in
+		the 'test' mode, writing 'disk' to /sys/power/state will cause
+		the kernel to disable nonboot CPUs and freeze tasks, shrink
+		memory, suspend devices, wait for 5 seconds, resume devices,
+		unfreeze tasks and enable nonboot CPUs.  Then, we are able to
+		look in the log messages and work out, for example, which code
+		is being slow and which device drivers are misbehaving.
+
 		The suspend-to-disk method may be chosen by writing to this
 		file one of the accepted strings:
 
@@ -46,6 +59,8 @@ Description:
 		'platform'
 		'shutdown'
 		'reboot'
+		'testproc'
+		'test'
 
 		It will only change to 'firmware' or 'platform' if the system
 		supports that.
diff --git a/Documentation/power/interface.txt b/Documentation/power/interface.txt
index a66bec222b16..74311d7e0f3c 100644
--- a/Documentation/power/interface.txt
+++ b/Documentation/power/interface.txt
@@ -30,6 +30,17 @@ testing). The system will support either 'firmware' or 'platform', and
 that is known a priori. But, the user may choose 'shutdown' or
 'reboot' as alternatives. 
 
+Additionally, /sys/power/disk can be used to turn on one of the two testing
+modes of the suspend-to-disk mechanism: 'testproc' or 'test'.  If the
+suspend-to-disk mechanism is in the 'testproc' mode, writing 'disk' to
+/sys/power/state will cause the kernel to disable nonboot CPUs and freeze
+tasks, wait for 5 seconds, unfreeze tasks and enable nonboot CPUs.  If it is
+in the 'test' mode, writing 'disk' to /sys/power/state will cause the kernel
+to disable nonboot CPUs and freeze tasks, shrink memory, suspend devices, wait
+for 5 seconds, resume devices, unfreeze tasks and enable nonboot CPUs.  Then,
+we are able to look in the log messages and work out, for example, which code
+is being slow and which device drivers are misbehaving.
+
 Reading from this file will display what the mode is currently set
 to. Writing to this file will accept one of
 
@@ -37,6 +48,8 @@ to. Writing to this file will accept one of
        'platform'
        'shutdown'
        'reboot'
+       'testproc'
+       'test'
 
 It will only change to 'firmware' or 'platform' if the system supports
 it. 
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 6b27e07aef19..070394e846d0 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -116,7 +116,9 @@ typedef int __bitwise suspend_disk_method_t;
 #define	PM_DISK_PLATFORM	((__force suspend_disk_method_t) 2)
 #define	PM_DISK_SHUTDOWN	((__force suspend_disk_method_t) 3)
 #define	PM_DISK_REBOOT		((__force suspend_disk_method_t) 4)
-#define	PM_DISK_MAX		((__force suspend_disk_method_t) 5)
+#define	PM_DISK_TEST		((__force suspend_disk_method_t) 5)
+#define	PM_DISK_TESTPROC	((__force suspend_disk_method_t) 6)
+#define	PM_DISK_MAX		((__force suspend_disk_method_t) 7)
 
 struct pm_ops {
 	suspend_disk_method_t pm_disk_mode;
diff --git a/kernel/power/disk.c b/kernel/power/disk.c
index d3a158a60312..b1fb7866b0b3 100644
--- a/kernel/power/disk.c
+++ b/kernel/power/disk.c
@@ -71,7 +71,7 @@ static inline void platform_finish(void)
 
 static int prepare_processes(void)
 {
-	int error;
+	int error = 0;
 
 	pm_prepare_console();
 
@@ -84,6 +84,12 @@ static int prepare_processes(void)
 		goto thaw;
 	}
 
+	if (pm_disk_mode == PM_DISK_TESTPROC) {
+		printk("swsusp debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		goto thaw;
+	}
+
 	/* Free memory before shutting down devices. */
 	if (!(error = swsusp_shrink_memory()))
 		return 0;
@@ -120,13 +126,21 @@ int pm_suspend_disk(void)
 	if (error)
 		return error;
 
+	if (pm_disk_mode == PM_DISK_TESTPROC)
+		goto Thaw;
+
 	suspend_console();
 	error = device_suspend(PMSG_FREEZE);
 	if (error) {
 		resume_console();
 		printk("Some devices failed to suspend\n");
-		unprepare_processes();
-		return error;
+		goto Thaw;
+	}
+
+	if (pm_disk_mode == PM_DISK_TEST) {
+		printk("swsusp debug: Waiting for 5 seconds.\n");
+		mdelay(5000);
+		goto Done;
 	}
 
 	pr_debug("PM: snapshotting memory.\n");
@@ -143,16 +157,17 @@ int pm_suspend_disk(void)
 			power_down(pm_disk_mode);
 		else {
 			swsusp_free();
-			unprepare_processes();
-			return error;
+			goto Thaw;
 		}
-	} else
+	} else {
 		pr_debug("PM: Image restored successfully.\n");
+	}
 
 	swsusp_free();
  Done:
 	device_resume();
 	resume_console();
+ Thaw:
 	unprepare_processes();
 	return error;
 }
@@ -249,6 +264,8 @@ static const char * const pm_disk_modes[] = {
 	[PM_DISK_PLATFORM]	= "platform",
 	[PM_DISK_SHUTDOWN]	= "shutdown",
 	[PM_DISK_REBOOT]	= "reboot",
+	[PM_DISK_TEST]		= "test",
+	[PM_DISK_TESTPROC]	= "testproc",
 };
 
 /**
@@ -303,17 +320,19 @@ static ssize_t disk_store(struct subsystem * s, const char * buf, size_t n)
 		}
 	}
 	if (mode) {
-		if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT)
+		if (mode == PM_DISK_SHUTDOWN || mode == PM_DISK_REBOOT ||
+		     mode == PM_DISK_TEST || mode == PM_DISK_TESTPROC) {
 			pm_disk_mode = mode;
-		else {
+		} else {
 			if (pm_ops && pm_ops->enter &&
 			    (mode == pm_ops->pm_disk_mode))
 				pm_disk_mode = mode;
 			else
 				error = -EINVAL;
 		}
-	} else
+	} else {
 		error = -EINVAL;
+	}
 
 	pr_debug("PM: suspend-to-disk mode set to '%s'\n",
 		 pm_disk_modes[mode]);
-- 
cgit v1.2.3


From 3fd593979802f81ff6452596ac61e3840f917589 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 2 Nov 2006 22:07:24 -0800
Subject: [PATCH] Create compat_sys_migrate_pages

This is needed on bigendian 64bit architectures.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Acked-by: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/compat.h |  4 ++++
 kernel/compat.c        | 33 +++++++++++++++++++++++++++++++++
 kernel/sys_ni.c        |  1 +
 3 files changed, 38 insertions(+)

(limited to 'include')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index f1553196826f..80b17f440ec1 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -230,5 +230,9 @@ asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp);
 extern int compat_printk(const char *fmt, ...);
 extern void sigset_from_compat(sigset_t *set, compat_sigset_t *compat);
 
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+		compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes,
+		const compat_ulong_t __user *new_nodes);
+
 #endif /* CONFIG_COMPAT */
 #endif /* _LINUX_COMPAT_H */
diff --git a/kernel/compat.c b/kernel/compat.c
index d4898aad6cfa..6952dd057300 100644
--- a/kernel/compat.c
+++ b/kernel/compat.c
@@ -982,4 +982,37 @@ asmlinkage long compat_sys_move_pages(pid_t pid, unsigned long nr_pages,
 	}
 	return sys_move_pages(pid, nr_pages, pages, nodes, status, flags);
 }
+
+asmlinkage long compat_sys_migrate_pages(compat_pid_t pid,
+			compat_ulong_t maxnode,
+			const compat_ulong_t __user *old_nodes,
+			const compat_ulong_t __user *new_nodes)
+{
+	unsigned long __user *old = NULL;
+	unsigned long __user *new = NULL;
+	nodemask_t tmp_mask;
+	unsigned long nr_bits;
+	unsigned long size;
+
+	nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+	size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+	if (old_nodes) {
+		if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+			return -EFAULT;
+		old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+		if (new_nodes)
+			new = old + size / sizeof(unsigned long);
+		if (copy_to_user(old, nodes_addr(tmp_mask), size))
+			return -EFAULT;
+	}
+	if (new_nodes) {
+		if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+			return -EFAULT;
+		if (new == NULL)
+			new = compat_alloc_user_space(size);
+		if (copy_to_user(new, nodes_addr(tmp_mask), size))
+			return -EFAULT;
+	}
+	return sys_migrate_pages(pid, nr_bits + 1, old, new);
+}
 #endif
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 0e53314b14de..d7306d0f3dfc 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -135,6 +135,7 @@ cond_syscall(sys_madvise);
 cond_syscall(sys_mremap);
 cond_syscall(sys_remap_file_pages);
 cond_syscall(compat_sys_move_pages);
+cond_syscall(compat_sys_migrate_pages);
 
 /* block-layer dependent */
 cond_syscall(sys_bdflush);
-- 
cgit v1.2.3


From 43530d2b04b63ac4bb4ac25deee5f1180ccedc2e Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Thu, 2 Nov 2006 22:07:24 -0800
Subject: [PATCH] powerpc: wire up sys_migrate_pages

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Christoph Lameter <clameter@sgi.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/asm-powerpc/systbl.h | 2 +-
 include/asm-powerpc/unistd.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index eac85ce101b6..c6a03187f932 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -261,7 +261,7 @@ SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
 PPC_SYS_SPU(rtas)
 OLDSYS(debug_setcontext)
 SYSCALL(ni_syscall)
-SYSCALL(ni_syscall)
+COMPAT_SYS(migrate_pages)
 COMPAT_SYS(mbind)
 COMPAT_SYS(get_mempolicy)
 COMPAT_SYS(set_mempolicy)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index 464a48cce7f5..b5fe93291c96 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -276,7 +276,7 @@
 #define __NR_rtas		255
 #define __NR_sys_debug_setcontext 256
 /* Number 257 is reserved for vserver */
-/* 258 currently unused */
+#define __NR_migrate_pages	258
 #define __NR_mbind		259
 #define __NR_get_mempolicy	260
 #define __NR_set_mempolicy	261
-- 
cgit v1.2.3


From 4833ed094097323f5f219820f6ebdc8dd66f501f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 3 Nov 2006 00:27:06 -0800
Subject: [IPX]: Trivial parts of endianness annotations

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipx.h | 14 +++++++-------
 include/net/ipx.h   | 22 +++++++++++-----------
 net/ipx/af_ipx.c    | 54 ++++++++++++++++++++++++++---------------------------
 net/ipx/ipx_proc.c  | 12 ++++++------
 net/ipx/ipx_route.c | 10 +++++-----
 5 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipx.h b/include/linux/ipx.h
index 4f29c60964c4..eb19b4ea84f4 100644
--- a/include/linux/ipx.h
+++ b/include/linux/ipx.h
@@ -7,8 +7,8 @@
 
 struct sockaddr_ipx {
 	sa_family_t	sipx_family;
-	__u16		sipx_port;
-	__u32		sipx_network;
+	__be16		sipx_port;
+	__be32		sipx_network;
 	unsigned char 	sipx_node[IPX_NODE_LEN];
 	__u8		sipx_type;
 	unsigned char	sipx_zero;	/* 16 byte fill */
@@ -23,13 +23,13 @@ struct sockaddr_ipx {
 #define IPX_CRTITF	1
 
 struct ipx_route_definition {
-	__u32         ipx_network;
-	__u32         ipx_router_network;
+	__be32        ipx_network;
+	__be32        ipx_router_network;
 	unsigned char ipx_router_node[IPX_NODE_LEN];
 };
 
 struct ipx_interface_definition {
-	__u32         ipx_network;
+	__be32        ipx_network;
 	unsigned char ipx_device[16];
 	unsigned char ipx_dlink_type;
 #define IPX_FRAME_NONE		0
@@ -55,8 +55,8 @@ struct ipx_config_data {
  */
 
 struct ipx_route_def {
-	__u32		ipx_network;
-	__u32		ipx_router_network;
+	__be32		ipx_network;
+	__be32		ipx_router_network;
 #define IPX_ROUTE_NO_ROUTER	0
 	unsigned char	ipx_router_node[IPX_NODE_LEN];
 	unsigned char	ipx_device[16];
diff --git a/include/net/ipx.h b/include/net/ipx.h
index 5c0cf33826c5..4a423d2695ba 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -15,9 +15,9 @@
 #include <linux/list.h>
 
 struct ipx_address {
-	__u32   net;
+	__be32  net;
 	__u8    node[IPX_NODE_LEN]; 
-	__u16   sock;
+	__be16  sock;
 };
 
 #define ipx_broadcast_node	"\377\377\377\377\377\377"
@@ -28,7 +28,7 @@ struct ipx_address {
 struct ipxhdr {
 	__u16			ipx_checksum __attribute__ ((packed));
 #define IPX_NO_CHECKSUM	0xFFFF
-	__u16			ipx_pktsize __attribute__ ((packed));
+	__be16			ipx_pktsize __attribute__ ((packed));
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
 #define IPX_TYPE_UNKNOWN	0x00
@@ -48,14 +48,14 @@ static __inline__ struct ipxhdr *ipx_hdr(struct sk_buff *skb)
 
 struct ipx_interface {
 	/* IPX address */
-	__u32			if_netnum;
+	__be32			if_netnum;
 	unsigned char		if_node[IPX_NODE_LEN];
 	atomic_t		refcnt;
 
 	/* physical device info */
 	struct net_device	*if_dev;
 	struct datalink_proto	*if_dlink;
-	unsigned short		if_dlink_type;
+	__be16			if_dlink_type;
 
 	/* socket support */
 	unsigned short		if_sknum;
@@ -71,7 +71,7 @@ struct ipx_interface {
 };
 
 struct ipx_route {
-	__u32			ir_net;
+	__be32			ir_net;
 	struct ipx_interface	*ir_intrfc;
 	unsigned char		ir_routed;
 	unsigned char		ir_router_node[IPX_NODE_LEN];
@@ -82,10 +82,10 @@ struct ipx_route {
 #ifdef __KERNEL__
 struct ipx_cb {
 	u8	ipx_tctrl;
-	u32	ipx_dest_net;
-	u32	ipx_source_net;
+	__be32	ipx_dest_net;
+	__be32	ipx_source_net;
 	struct {
-		u32 netnum;
+		__be32 netnum;
 		int index;
 	} last_hop;
 };
@@ -97,7 +97,7 @@ struct ipx_sock {
 	struct sock		sk;
 	struct ipx_address	dest_addr;
 	struct ipx_interface	*intrfc;
-	unsigned short		port;
+	__be16			port;
 #ifdef CONFIG_IPX_INTERN
 	unsigned char		node[IPX_NODE_LEN];
 #endif
@@ -132,7 +132,7 @@ extern struct ipx_interface *ipx_primary_net;
 extern int ipx_proc_init(void);
 extern void ipx_proc_exit(void);
 
-extern const char *ipx_frame_name(unsigned short);
+extern const char *ipx_frame_name(__be16);
 extern const char *ipx_device_name(struct ipx_interface *intrfc);
 
 static __inline__ void ipxitf_hold(struct ipx_interface *intrfc)
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index bef3f61569f7..c272a38af325 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -83,13 +83,13 @@ DEFINE_SPINLOCK(ipx_interfaces_lock);
 struct ipx_interface *ipx_primary_net;
 struct ipx_interface *ipx_internal_net;
 
-extern int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+extern int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
 			    unsigned char *node);
 extern void ipxrtr_del_routes(struct ipx_interface *intrfc);
 extern int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 			       struct iovec *iov, int len, int noblock);
 extern int ipxrtr_route_skb(struct sk_buff *skb);
-extern struct ipx_route *ipxrtr_lookup(__u32 net);
+extern struct ipx_route *ipxrtr_lookup(__be32 net);
 extern int ipxrtr_ioctl(unsigned int cmd, void __user *arg);
 
 #undef IPX_REFCNT_DEBUG
@@ -177,7 +177,7 @@ static void ipxitf_clear_primary_net(void)
 }
 
 static struct ipx_interface *__ipxitf_find_using_phys(struct net_device *dev,
-						      unsigned short datalink)
+						      __be16 datalink)
 {
 	struct ipx_interface *i;
 
@@ -190,7 +190,7 @@ out:
 }
 
 static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
-						    unsigned short datalink)
+						    __be16 datalink)
 {
 	struct ipx_interface *i;
 
@@ -202,7 +202,7 @@ static struct ipx_interface *ipxitf_find_using_phys(struct net_device *dev,
 	return i;
 }
 
-struct ipx_interface *ipxitf_find_using_net(__u32 net)
+struct ipx_interface *ipxitf_find_using_net(__be32 net)
 {
 	struct ipx_interface *i;
 
@@ -237,7 +237,7 @@ static void ipxitf_insert_socket(struct ipx_interface *intrfc, struct sock *sk)
 
 /* caller must hold intrfc->if_sklist_lock */
 static struct sock *__ipxitf_find_socket(struct ipx_interface *intrfc,
-					 unsigned short port)
+					 __be16 port)
 {
 	struct sock *s;
 	struct hlist_node *node;
@@ -252,7 +252,7 @@ found:
 
 /* caller must hold a reference to intrfc */
 static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
-					unsigned short port)
+					__be16 port)
 {
 	struct sock *s;
 
@@ -268,7 +268,7 @@ static struct sock *ipxitf_find_socket(struct ipx_interface *intrfc,
 #ifdef CONFIG_IPX_INTERN
 static struct sock *ipxitf_find_internal_socket(struct ipx_interface *intrfc,
 						unsigned char *ipx_node,
-						unsigned short port)
+						__be16 port)
 {
 	struct sock *s;
 	struct hlist_node *node;
@@ -600,10 +600,10 @@ int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb, char *node)
 
 	/* see if we need to include the netnum in the route list */
 	if (IPX_SKB_CB(skb)->last_hop.index >= 0) {
-		u32 *last_hop = (u32 *)(((u8 *) skb->data) +
+		__be32 *last_hop = (__be32 *)(((u8 *) skb->data) +
 				sizeof(struct ipxhdr) +
 				IPX_SKB_CB(skb)->last_hop.index *
-				sizeof(u32));
+				sizeof(__be32));
 		*last_hop = IPX_SKB_CB(skb)->last_hop.netnum;
 		IPX_SKB_CB(skb)->last_hop.index = -1;
 	}
@@ -772,7 +772,7 @@ static void ipxitf_discover_netnum(struct ipx_interface *intrfc,
 		} else {
 			printk(KERN_WARNING "IPX: Network number collision "
 				"%lx\n        %s %s and %s %s\n",
-				(unsigned long) htonl(cb->ipx_source_net),
+				(unsigned long) ntohl(cb->ipx_source_net),
 				ipx_device_name(i),
 				ipx_frame_name(i->if_dlink_type),
 				ipx_device_name(intrfc),
@@ -812,7 +812,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
 	int i, rc = -EINVAL;
 	struct ipx_interface *ifcs;
 	char *c;
-	u32 *l;
+	__be32 *l;
 
 	/* Illegal packet - too many hops or too short */
 	/* We decide to throw it away: no broadcasting, no local processing.
@@ -833,7 +833,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
 		goto out;
 	
 	c = ((u8 *) ipx) + sizeof(struct ipxhdr);
-	l = (u32 *) c;
+	l = (__be32 *) c;
 
 	/* Don't broadcast packet if already seen this net */
 	for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
@@ -855,7 +855,7 @@ static int ipxitf_pprop(struct ipx_interface *intrfc, struct sk_buff *skb)
 		/* That aren't in the list */
 		if (ifcs == intrfc)
 			continue;
-		l = (__u32 *) c;
+		l = (__be32 *) c;
 		/* don't consider the last entry in the packet list,
 		 * it is our netnum, and it is not there yet */
 		for (i = 0; i < IPX_SKB_CB(skb)->ipx_tctrl; i++)
@@ -885,8 +885,8 @@ static void ipxitf_insert(struct ipx_interface *intrfc)
 		ipx_primary_net = intrfc;
 }
 
-static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __u32 netnum,
-					  unsigned short dlink_type,
+static struct ipx_interface *ipxitf_alloc(struct net_device *dev, __be32 netnum,
+					  __be16 dlink_type,
 					  struct datalink_proto *dlink,
 					  unsigned char internal,
 					  int ipx_offset)
@@ -960,7 +960,7 @@ static __be16 ipx_map_frame_type(unsigned char type)
 static int ipxitf_create(struct ipx_interface_definition *idef)
 {
 	struct net_device *dev;
-	unsigned short dlink_type = 0;
+	__be16 dlink_type = 0;
 	struct datalink_proto *datalink = NULL;
 	struct ipx_interface *intrfc;
 	int rc;
@@ -1073,7 +1073,7 @@ out:
 static int ipxitf_delete(struct ipx_interface_definition *idef)
 {
 	struct net_device *dev = NULL;
-	unsigned short dlink_type = 0;
+	__be16 dlink_type = 0;
 	struct ipx_interface *intrfc;
 	int rc = 0;
 
@@ -1110,7 +1110,7 @@ out:
 }
 
 static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
-						unsigned short dlink_type)
+						__be16 dlink_type)
 {
 	struct ipx_interface *intrfc = NULL;
 	struct datalink_proto *datalink;
@@ -1122,7 +1122,7 @@ static struct ipx_interface *ipxitf_auto_create(struct net_device *dev,
 	if (dev->addr_len > IPX_NODE_LEN)
 		goto out;
 
-	switch (htons(dlink_type)) {
+	switch (ntohs(dlink_type)) {
 	case ETH_P_IPX:		datalink = pEII_datalink;	break;
 	case ETH_P_802_2:	datalink = p8022_datalink;	break;
 	case ETH_P_SNAP:	datalink = pSNAP_datalink;	break;
@@ -1266,7 +1266,7 @@ __u16 ipx_cksum(struct ipxhdr *packet, int length)
 	return ~sum;
 }
 
-const char *ipx_frame_name(unsigned short frame)
+const char *ipx_frame_name(__be16 frame)
 {
 	char* rc = "None";
 
@@ -1401,7 +1401,7 @@ out:
 
 /* caller must hold a reference to intrfc */
 
-static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
+static __be16 ipx_first_free_socketnum(struct ipx_interface *intrfc)
 {
 	unsigned short socketNum = intrfc->if_sknum;
 
@@ -1410,7 +1410,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
 	if (socketNum < IPX_MIN_EPHEMERAL_SOCKET)
 		socketNum = IPX_MIN_EPHEMERAL_SOCKET;
 
-	while (__ipxitf_find_socket(intrfc, ntohs(socketNum)))
+	while (__ipxitf_find_socket(intrfc, htons(socketNum)))
 		if (socketNum > IPX_MAX_EPHEMERAL_SOCKET)
 			socketNum = IPX_MIN_EPHEMERAL_SOCKET;
 		else
@@ -1419,7 +1419,7 @@ static unsigned short ipx_first_free_socketnum(struct ipx_interface *intrfc)
 	spin_unlock_bh(&intrfc->if_sklist_lock);
 	intrfc->if_sknum = socketNum;
 
-	return ntohs(socketNum);
+	return htons(socketNum);
 }
 
 static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
@@ -1473,7 +1473,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 						ipxs->port)) {
 			SOCK_DEBUG(sk,
 				"IPX: bind failed because port %X in use.\n",
-				ntohs((int)addr->sipx_port));
+				ntohs(addr->sipx_port));
 			goto out_put;
 		}
 	} else {
@@ -1488,7 +1488,7 @@ static int ipx_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 		if (ipxitf_find_socket(intrfc, addr->sipx_port)) {
 			SOCK_DEBUG(sk,
 				"IPX: bind failed because port %X in use.\n",
-				ntohs((int)addr->sipx_port));
+				ntohs(addr->sipx_port));
 			goto out_put;
 		}
 	}
@@ -1665,7 +1665,7 @@ static int ipx_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_ty
 	intrfc = ipxitf_find_using_phys(dev, pt->type);
 	if (!intrfc) {
 		if (ipxcfg_auto_create_interfaces &&
-		   ntohl(IPX_SKB_CB(skb)->ipx_dest_net)) {
+		   IPX_SKB_CB(skb)->ipx_dest_net) {
 			intrfc = ipxitf_auto_create(dev, pt->type);
 			if (intrfc)
 				ipxitf_hold(intrfc);
diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c
index 4c0c71206e54..b7463dfca63e 100644
--- a/net/ipx/ipx_proc.c
+++ b/net/ipx/ipx_proc.c
@@ -260,22 +260,22 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v)
 	ipxs = ipx_sk(s);
 #ifdef CONFIG_IPX_INTERN
 	seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X  ",
-		   (unsigned long)htonl(ipxs->intrfc->if_netnum),
+		   (unsigned long)ntohl(ipxs->intrfc->if_netnum),
 		   ipxs->node[0], ipxs->node[1], ipxs->node[2], ipxs->node[3],
-		   ipxs->node[4], ipxs->node[5], htons(ipxs->port));
+		   ipxs->node[4], ipxs->node[5], ntohs(ipxs->port));
 #else
-	seq_printf(seq, "%08lX:%04X  ", (unsigned long) htonl(ipxs->intrfc->if_netnum),
-		   htons(ipxs->port));
+	seq_printf(seq, "%08lX:%04X  ", (unsigned long) ntohl(ipxs->intrfc->if_netnum),
+		   ntohs(ipxs->port));
 #endif	/* CONFIG_IPX_INTERN */
 	if (s->sk_state != TCP_ESTABLISHED)
 		seq_printf(seq, "%-28s", "Not_Connected");
 	else {
 		seq_printf(seq, "%08lX:%02X%02X%02X%02X%02X%02X:%04X  ",
-			   (unsigned long)htonl(ipxs->dest_addr.net),
+			   (unsigned long)ntohl(ipxs->dest_addr.net),
 			   ipxs->dest_addr.node[0], ipxs->dest_addr.node[1],
 			   ipxs->dest_addr.node[2], ipxs->dest_addr.node[3],
 			   ipxs->dest_addr.node[4], ipxs->dest_addr.node[5],
-			   htons(ipxs->dest_addr.sock));
+			   ntohs(ipxs->dest_addr.sock));
 	}
 
 	seq_printf(seq, "%08X  %08X  %02X     %03d\n",
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index a30dbb1e08fb..8bfaefaf8841 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -20,16 +20,16 @@ DEFINE_RWLOCK(ipx_routes_lock);
 extern struct ipx_interface *ipx_internal_net;
 
 extern __u16 ipx_cksum(struct ipxhdr *packet, int length);
-extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
 extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
 			       struct sk_buff *skb, int copy);
 extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
 			       struct sk_buff *skb, int copy);
 extern int ipxitf_send(struct ipx_interface *intrfc, struct sk_buff *skb,
 		       char *node);
-extern struct ipx_interface *ipxitf_find_using_net(__u32 net);
+extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
 
-struct ipx_route *ipxrtr_lookup(__u32 net)
+struct ipx_route *ipxrtr_lookup(__be32 net)
 {
 	struct ipx_route *r;
 
@@ -48,7 +48,7 @@ unlock:
 /*
  * Caller must hold a reference to intrfc
  */
-int ipxrtr_add_route(__u32 network, struct ipx_interface *intrfc,
+int ipxrtr_add_route(__be32 network, struct ipx_interface *intrfc,
 		     unsigned char *node)
 {
 	struct ipx_route *rt;
@@ -118,7 +118,7 @@ out:
 	return rc;
 }
 
-static int ipxrtr_delete(__u32 net)
+static int ipxrtr_delete(__be32 net)
 {
 	struct ipx_route *r, *tmp;
 	int rc;
-- 
cgit v1.2.3


From 02e60370d4dac83f22d5ae75d5512bcb9a3f24b7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 3 Nov 2006 00:28:23 -0800
Subject: [IPX]: Annotate and fix IPX checksum

Calculation of IPX checksum got buggered about 2.4.0.  The old variant
mangled the packet; that got fixed, but calculation itself got buggered.
Restored the correct logics, fixed a subtle breakage we used to have even
back then: if the sum is 0 mod 0xffff, we want to return 0, not 0xffff.
The latter has special meaning for IPX (cheksum disabled).  Observation
(and obvious fix) nicked from history of FreeBSD ipx_cksum.c...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipx.h   |  4 ++--
 net/ipx/af_ipx.c    | 31 +++++++++++++++++++------------
 net/ipx/ipx_route.c |  4 ++--
 3 files changed, 23 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/ipx.h b/include/net/ipx.h
index 4a423d2695ba..c6b2ee610866 100644
--- a/include/net/ipx.h
+++ b/include/net/ipx.h
@@ -26,8 +26,8 @@ struct ipx_address {
 #define IPX_MAX_PPROP_HOPS 8
 
 struct ipxhdr {
-	__u16			ipx_checksum __attribute__ ((packed));
-#define IPX_NO_CHECKSUM	0xFFFF
+	__be16			ipx_checksum __attribute__ ((packed));
+#define IPX_NO_CHECKSUM	__constant_htons(0xFFFF)
 	__be16			ipx_pktsize __attribute__ ((packed));
 	__u8			ipx_tctrl;
 	__u8			ipx_type;
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index c272a38af325..76c661566dfd 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1234,27 +1234,27 @@ static int ipxitf_ioctl(unsigned int cmd, void __user *arg)
 /* Note: We assume ipx_tctrl==0 and htons(length)==ipx_pktsize */
 /* This functions should *not* mess with packet contents */
 
-__u16 ipx_cksum(struct ipxhdr *packet, int length) 
+__be16 ipx_cksum(struct ipxhdr *packet, int length)
 {
 	/* 
 	 *	NOTE: sum is a net byte order quantity, which optimizes the 
 	 *	loop. This only works on big and little endian machines. (I
 	 *	don't know of a machine that isn't.)
 	 */
-	/* start at ipx_dest - We skip the checksum field and start with
-	 * ipx_type before the loop, not considering ipx_tctrl in the calc */
-	__u16 *p = (__u16 *)&packet->ipx_dest;
-	__u32 i = (length >> 1) - 1; /* Number of complete words */
-	__u32 sum = packet->ipx_type << sizeof(packet->ipx_tctrl); 
-
-	/* Loop through all complete words except the checksum field,
-	 * ipx_type (accounted above) and ipx_tctrl (not used in the cksum) */
-	while (--i)
+	/* handle the first 3 words separately; checksum should be skipped
+	 * and ipx_tctrl masked out */
+	__u16 *p = (__u16 *)packet;
+	__u32 sum = p[1] + (p[2] & (__force u16)htons(0x00ff));
+	__u32 i = (length >> 1) - 3; /* Number of remaining complete words */
+
+	/* Loop through them */
+	p += 3;
+	while (i--)
 		sum += *p++;
 
 	/* Add on the last part word if it exists */
 	if (packet->ipx_pktsize & htons(1))
-		sum += ntohs(0xff00) & *p;
+		sum += (__force u16)htons(0xff00) & *p;
 
 	/* Do final fixup */
 	sum = (sum & 0xffff) + (sum >> 16);
@@ -1263,7 +1263,14 @@ __u16 ipx_cksum(struct ipxhdr *packet, int length)
 	if (sum >= 0x10000)
 		sum++;
 
-	return ~sum;
+	/*
+	 * Leave 0 alone; we don't want 0xffff here.  Note that we can't get
+	 * here with 0x10000, so this check is the same as ((__u16)sum)
+	 */
+	if (sum)
+		sum = ~sum;
+
+	return (__force __be16)sum;
 }
 
 const char *ipx_frame_name(__be16 frame)
diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c
index 8bfaefaf8841..68560ee0d797 100644
--- a/net/ipx/ipx_route.c
+++ b/net/ipx/ipx_route.c
@@ -19,7 +19,7 @@ DEFINE_RWLOCK(ipx_routes_lock);
 
 extern struct ipx_interface *ipx_internal_net;
 
-extern __u16 ipx_cksum(struct ipxhdr *packet, int length);
+extern __be16 ipx_cksum(struct ipxhdr *packet, int length);
 extern struct ipx_interface *ipxitf_find_using_net(__be32 net);
 extern int ipxitf_demux_socket(struct ipx_interface *intrfc,
 			       struct sk_buff *skb, int copy);
@@ -238,7 +238,7 @@ int ipxrtr_route_packet(struct sock *sk, struct sockaddr_ipx *usipx,
 
 	/* Apply checksum. Not allowed on 802.3 links. */
 	if (sk->sk_no_check || intrfc->if_dlink_type == htons(IPX_FRAME_8023))
-		ipx->ipx_checksum = 0xFFFF;
+		ipx->ipx_checksum = htons(0xFFFF);
 	else
 		ipx->ipx_checksum = ipx_cksum(ipx, len + sizeof(struct ipxhdr));
 
-- 
cgit v1.2.3


From 95026cd242bd4188a036f2eba20994113ed5a5d7 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 3 Nov 2006 00:55:35 -0800
Subject: [IPV6]: Fix ECN bug on big-endian

__constant_htons(2<<4) is not a replacement for
htonl(2<<20).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_ecn.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h
index d599c6bfbb86..7849844a4911 100644
--- a/include/net/inet_ecn.h
+++ b/include/net/inet_ecn.h
@@ -48,7 +48,7 @@ static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner)
 
 #define	IP6_ECN_flow_xmit(sk, label) do {				\
 	if (INET_ECN_is_capable(inet_sk(sk)->tos))			\
-		(label) |= __constant_htons(INET_ECN_ECT_0 << 4);	\
+		(label) |= htonl(INET_ECN_ECT_0 << 20);			\
     } while (0)
 
 static inline int IP_ECN_set_ce(struct iphdr *iph)
-- 
cgit v1.2.3


From 59359ff87700f5e742c96a55da9cf0819984c128 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sun, 5 Nov 2006 16:51:03 -0800
Subject: [SPARC]: Fix robust futex syscalls and wire up migrate_pages.

When I added the entries for the robust futex syscall entries, I
forgot to bump NR_SYSCALLS.  The current situation is error-prone
because NR_SYSCALLS lives in entry.S where the system call limit
checks are enforced.  Move the definition to asm/unistd.h in order to
make this mistake much more difficult to make.

And wire up sys_migrate_pages since the powerpc folks implemented the
compat wrapper for us.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/sparc/kernel/entry.S     |  3 +--
 arch/sparc/kernel/systbls.S   |  5 +++--
 arch/sparc64/kernel/entry.S   |  3 +--
 arch/sparc64/kernel/systbls.S |  8 +++++---
 include/asm-sparc/unistd.h    |  9 ++++++---
 include/asm-sparc64/unistd.h  | 10 +++++++---
 6 files changed, 23 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/arch/sparc/kernel/entry.S b/arch/sparc/kernel/entry.S
index a4edff4c3be3..831f540251f8 100644
--- a/arch/sparc/kernel/entry.S
+++ b/arch/sparc/kernel/entry.S
@@ -32,13 +32,12 @@
 #include <asm/mxcc.h>
 #include <asm/thread_info.h>
 #include <asm/param.h>
+#include <asm/unistd.h>
 
 #include <asm/asmmacro.h>
 
 #define curptr      g6
 
-#define NR_SYSCALLS 300      /* Each OS is different... */
-
 /* These are just handy. */
 #define _SV	save	%sp, -STACKFRAME_SZ, %sp
 #define _RS     restore 
diff --git a/arch/sparc/kernel/systbls.S b/arch/sparc/kernel/systbls.S
index 10df38eeae08..ea75ca569052 100644
--- a/arch/sparc/kernel/systbls.S
+++ b/arch/sparc/kernel/systbls.S
@@ -78,7 +78,7 @@ sys_call_table:
 /*285*/	.long sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*290*/	.long sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 /*295*/	.long sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
-/*300*/	.long sys_set_robust_list, sys_get_robust_list
+/*300*/	.long sys_set_robust_list, sys_get_robust_list, sys_migrate_pages
 
 #ifdef CONFIG_SUNOS_EMUL
 	/* Now the SunOS syscall table. */
@@ -190,6 +190,7 @@ sunos_sys_table:
 /*290*/	.long sunos_nosys, sunos_nosys, sunos_nosys
 	.long sunos_nosys, sunos_nosys, sunos_nosys
 	.long sunos_nosys, sunos_nosys, sunos_nosys
-	.long sunos_nosys, sunos_nosys, sunos_nosys
+	.long sunos_nosys
+/*300*/	.long sunos_nosys, sunos_nosys, sunos_nosys
 
 #endif
diff --git a/arch/sparc64/kernel/entry.S b/arch/sparc64/kernel/entry.S
index 0aaa35fc5a9c..6f28bec0a9bf 100644
--- a/arch/sparc64/kernel/entry.S
+++ b/arch/sparc64/kernel/entry.S
@@ -22,11 +22,10 @@
 #include <asm/auxio.h>
 #include <asm/sfafsr.h>
 #include <asm/pil.h>
+#include <asm/unistd.h>
 
 #define curptr      g6
 
-#define NR_SYSCALLS 300      /* Each OS is different... */
-
 	.text
 	.align		32
 
diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
index 419a63fca172..9a8026797ac0 100644
--- a/arch/sparc64/kernel/systbls.S
+++ b/arch/sparc64/kernel/systbls.S
@@ -79,7 +79,7 @@ sys_call_table32:
 	.word sys_mkdirat, sys_mknodat, sys_fchownat, compat_sys_futimesat, compat_sys_fstatat64
 /*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, compat_sys_pselect6, compat_sys_ppoll, sys_unshare
-/*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list
+/*300*/	.word compat_sys_set_robust_list, compat_sys_get_robust_list, compat_sys_migrate_pages
 
 #endif /* CONFIG_COMPAT */
 
@@ -149,7 +149,7 @@ sys_call_table:
 	.word sys_mkdirat, sys_mknodat, sys_fchownat, sys_futimesat, sys_fstatat64
 /*290*/	.word sys_unlinkat, sys_renameat, sys_linkat, sys_symlinkat, sys_readlinkat
 	.word sys_fchmodat, sys_faccessat, sys_pselect6, sys_ppoll, sys_unshare
-/*300*/	.word sys_set_robust_list, sys_get_robust_list
+/*300*/	.word sys_set_robust_list, sys_get_robust_list, sys_migrate_pages
 
 #if defined(CONFIG_SUNOS_EMUL) || defined(CONFIG_SOLARIS_EMUL) || \
     defined(CONFIG_SOLARIS_EMUL_MODULE)
@@ -262,5 +262,7 @@ sunos_sys_table:
 /*290*/	.word sunos_nosys, sunos_nosys, sunos_nosys
 	.word sunos_nosys, sunos_nosys, sunos_nosys
 	.word sunos_nosys, sunos_nosys, sunos_nosys
-	.word sunos_nosys, sunos_nosys, sunos_nosys
+	.word sunos_nosys
+/*300*/	.word sunos_nosys, sunos_nosys, sunos_nosys
+
 #endif
diff --git a/include/asm-sparc/unistd.h b/include/asm-sparc/unistd.h
index c7a495afc82e..f7827fa4cd5e 100644
--- a/include/asm-sparc/unistd.h
+++ b/include/asm-sparc/unistd.h
@@ -318,12 +318,15 @@
 #define __NR_unshare		299
 #define __NR_set_robust_list	300
 #define __NR_get_robust_list	301
+#define __NR_migrate_pages	302
+
+#define NR_SYSCALLS		303
 
 #ifdef __KERNEL__
-/* WARNING: You MAY NOT add syscall numbers larger than 301, since
+/* WARNING: You MAY NOT add syscall numbers larger than 302, since
  *          all of the syscall tables in the Sparc kernel are
- *          sized to have 301 entries (starting at zero).  Therefore
- *          find a free slot in the 0-301 range.
+ *          sized to have 302 entries (starting at zero).  Therefore
+ *          find a free slot in the 0-302 range.
  */
 
 #define _syscall0(type,name) \
diff --git a/include/asm-sparc64/unistd.h b/include/asm-sparc64/unistd.h
index 124cf076717f..63669dad0d72 100644
--- a/include/asm-sparc64/unistd.h
+++ b/include/asm-sparc64/unistd.h
@@ -320,12 +320,16 @@
 #define __NR_unshare		299
 #define __NR_set_robust_list	300
 #define __NR_get_robust_list	301
+#define __NR_migrate_pages	302
+
+#define NR_SYSCALLS		303
 
 #ifdef __KERNEL__
-/* WARNING: You MAY NOT add syscall numbers larger than 301, since
+
+/* WARNING: You MAY NOT add syscall numbers larger than 302, since
  *          all of the syscall tables in the Sparc kernel are
- *          sized to have 301 entries (starting at zero).  Therefore
- *          find a free slot in the 0-301 range.
+ *          sized to have 302 entries (starting at zero).  Therefore
+ *          find a free slot in the 0-302 range.
  */
 
 #define _syscall0(type,name) \
-- 
cgit v1.2.3


From d99f160ac53e51090f015a8f0617cea25f81a191 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Nov 2006 23:52:12 -0800
Subject: [PATCH] sysctl: allow a zero ctl_name in the middle of a sysctl table

Since it is becoming clear that there are just enough users of the binary
sysctl interface that completely removing the binary interface from the kernel
will not be an option for foreseeable future, we need to find a way to address
the sysctl maintenance issues.

The basic problem is that sysctl requires one central authority to allocate
sysctl numbers, or else conflicts and ABI breakage occur.  The proc interface
to sysctl does not have that problem, as names are not densely allocated.

By not terminating a sysctl table until I have neither a ctl_name nor a
procname, it becomes simple to add sysctl entries that don't show up in the
binary sysctl interface.  Which allows people to avoid allocating a binary
sysctl value when not needed.

I have audited the kernel code and in my reading I have not found a single
sysctl table that wasn't terminated by a completely zero filled entry.  So
this change in behavior should not affect anything.

I think this mechanism eases the pain enough that combined with a little
disciple we can solve the reoccurring sysctl ABI breakage.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/sysctl.h | 9 ++++++---
 kernel/sysctl.c        | 8 +++++---
 2 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 1b24bd45e080..c184732a70fc 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -961,8 +961,8 @@ extern ctl_handler sysctl_ms_jiffies;
 /*
  * Register a set of sysctl names by calling register_sysctl_table
  * with an initialised array of ctl_table's.  An entry with zero
- * ctl_name terminates the table.  table->de will be set up by the
- * registration and need not be initialised in advance.
+ * ctl_name and NULL procname terminates the table.  table->de will be
+ * set up by the registration and need not be initialised in advance.
  *
  * sysctl names can be mirrored automatically under /proc/sys.  The
  * procname supplied controls /proc naming.
@@ -973,7 +973,10 @@ extern ctl_handler sysctl_ms_jiffies;
  * Leaf nodes in the sysctl tree will be represented by a single file
  * under /proc; non-leaf nodes will be represented by directories.  A
  * null procname disables /proc mirroring at this node.
- * 
+ *
+ * sysctl entries with a zero ctl_name will not be available through
+ * the binary sysctl interface.
+ *
  * sysctl(2) can automatically manage read and write requests through
  * the sysctl table.  The data and maxlen fields of the ctl_table
  * struct enable minimal validation of the values being written to be
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 0c8e805bbd6f..09e569f4792b 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1315,7 +1315,9 @@ repeat:
 		return -ENOTDIR;
 	if (get_user(n, name))
 		return -EFAULT;
-	for ( ; table->ctl_name; table++) {
+	for ( ; table->ctl_name || table->procname; table++) {
+		if (!table->ctl_name)
+			continue;
 		if (n == table->ctl_name || table->ctl_name == CTL_ANY) {
 			int error;
 			if (table->child) {
@@ -1532,7 +1534,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root,
 	int len;
 	mode_t mode;
 	
-	for (; table->ctl_name; table++) {
+	for (; table->ctl_name || table->procname; table++) {
 		/* Can't do anything without a proc name. */
 		if (!table->procname)
 			continue;
@@ -1579,7 +1581,7 @@ static void register_proc_table(ctl_table * table, struct proc_dir_entry *root,
 static void unregister_proc_table(ctl_table * table, struct proc_dir_entry *root)
 {
 	struct proc_dir_entry *de;
-	for (; table->ctl_name; table++) {
+	for (; table->ctl_name || table->procname; table++) {
 		if (!(de = table->de))
 			continue;
 		if (de->mode & S_IFDIR) {
-- 
cgit v1.2.3


From 7cc13edc139108bb527b692f0548dce6bc648572 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Sun, 5 Nov 2006 23:52:13 -0800
Subject: [PATCH] sysctl: implement CTL_UNNUMBERED

This patch takes the CTL_UNNUMBERD concept from NFS and makes it available to
all new sysctl users.

At the same time the sysctl binary interface maintenance documentation is
updated to mention and to describe what is needed to successfully maintain the
sysctl binary interface.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Acked-by: Alan Cox <alan@redhat.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/lockd/svc.c         |  3 ---
 fs/nfs/sysctl.c        |  5 -----
 include/linux/sysctl.h | 14 +++++++++++---
 3 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 634139232aaf..8ca18085e68d 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -353,9 +353,6 @@ EXPORT_SYMBOL(lockd_down);
  * Sysctl parameters (same as module parameters, different interface).
  */
 
-/* Something that isn't CTL_ANY, CTL_NONE or a value that may clash. */
-#define CTL_UNNUMBERED		-2
-
 static ctl_table nlm_sysctls[] = {
 	{
 		.ctl_name	= CTL_UNNUMBERED,
diff --git a/fs/nfs/sysctl.c b/fs/nfs/sysctl.c
index 2fe3403c2409..3ea50ac64820 100644
--- a/fs/nfs/sysctl.c
+++ b/fs/nfs/sysctl.c
@@ -18,11 +18,6 @@
 static const int nfs_set_port_min = 0;
 static const int nfs_set_port_max = 65535;
 static struct ctl_table_header *nfs_callback_sysctl_table;
-/*
- * Something that isn't CTL_ANY, CTL_NONE or a value that may clash.
- * Use the same values as fs/lockd/svc.c
- */
-#define CTL_UNNUMBERED -2
 
 static ctl_table nfs_cb_sysctls[] = {
 #ifdef CONFIG_NFS_V4
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index c184732a70fc..d98562f1df76 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -6,10 +6,17 @@
  ****************************************************************
  ****************************************************************
  **
+ **  WARNING:
  **  The values in this file are exported to user space via 
- **  the sysctl() binary interface.  However this interface
- **  is unstable and deprecated and will be removed in the future. 
- **  For a stable interface use /proc/sys.
+ **  the sysctl() binary interface.  Do *NOT* change the
+ **  numbering of any existing values here, and do not change
+ **  any numbers within any one set of values.  If you have to
+ **  have to redefine an existing interface, use a new number for it.
+ **  The kernel will then return -ENOTDIR to any application using
+ **  the old binary interface.
+ **
+ **  For new interfaces unless you really need a binary number
+ **  please use CTL_UNNUMBERED.
  **
  ****************************************************************
  ****************************************************************
@@ -48,6 +55,7 @@ struct __sysctl_args {
 #ifdef __KERNEL__
 #define CTL_ANY		-1	/* Matches any name */
 #define CTL_NONE	0
+#define CTL_UNNUMBERED	CTL_NONE	/* sysctl without a binary number */
 #endif
 
 enum
-- 
cgit v1.2.3


From bbfd2bf9028a55cb85ff51eb34dc663b7e49ed55 Mon Sep 17 00:00:00 2001
From: Haavard Skinnemoen <hskinnemoen@atmel.com>
Date: Mon, 6 Nov 2006 14:02:44 +0100
Subject: AVR32: Wire up sys_epoll_pwait

Signed-off-by: Haavard Skinnemoen <hskinnemoen@atmel.com>
---
 arch/avr32/kernel/syscall-stubs.S | 9 +++++++++
 arch/avr32/kernel/syscall_table.S | 1 +
 include/asm-avr32/unistd.h        | 3 ++-
 3 files changed, 12 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/avr32/kernel/syscall-stubs.S b/arch/avr32/kernel/syscall-stubs.S
index 7589a9b426cb..890286a1e62b 100644
--- a/arch/avr32/kernel/syscall-stubs.S
+++ b/arch/avr32/kernel/syscall-stubs.S
@@ -100,3 +100,12 @@ __sys_splice:
 	rcall	sys_splice
 	sub	sp, -4
 	popm	pc
+
+	.global	__sys_epoll_pwait
+	.type	__sys_epoll_pwait,@function
+__sys_epoll_pwait:
+	pushm	lr
+	st.w	--sp, ARG6
+	rcall	sys_epoll_pwait
+	sub	sp, -4
+	popm	pc
diff --git a/arch/avr32/kernel/syscall_table.S b/arch/avr32/kernel/syscall_table.S
index 63b206965d05..db8f8b55ffdf 100644
--- a/arch/avr32/kernel/syscall_table.S
+++ b/arch/avr32/kernel/syscall_table.S
@@ -286,4 +286,5 @@ sys_call_table:
 	.long	sys_sync_file_range
 	.long	sys_tee
 	.long	sys_vmsplice
+	.long	__sys_epoll_pwait	/* 265 */
 	.long	sys_ni_syscall		/* r8 is saturated at nr_syscalls */
diff --git a/include/asm-avr32/unistd.h b/include/asm-avr32/unistd.h
index a50e5004550c..56ed1f9d348a 100644
--- a/include/asm-avr32/unistd.h
+++ b/include/asm-avr32/unistd.h
@@ -280,9 +280,10 @@
 #define __NR_sync_file_range	262
 #define __NR_tee		263
 #define __NR_vmsplice		264
+#define __NR_epoll_pwait	265
 
 #ifdef __KERNEL__
-#define NR_syscalls		265
+#define NR_syscalls		266
 
 
 #define __ARCH_WANT_IPC_PARSE_VERSION
-- 
cgit v1.2.3


From 4a4cf77923eeb3cec40a302656d6ab5ced04ba48 Mon Sep 17 00:00:00 2001
From: Ralf Baechle <ralf@linux-mips.org>
Date: Mon, 6 Nov 2006 17:41:06 +0000
Subject: [MIPS] Make irq number allocator generally available for fixing
 EV64120.

Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/kernel/irq.c          | 42 +++++++++++++++++++++++++++++++++++++++++
 arch/mips/sgi-ip27/ip27-irq.c   | 23 ----------------------
 arch/mips/sgi-ip27/ip27-timer.c |  2 --
 include/asm-mips/irq.h          |  4 ++++
 4 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/arch/mips/kernel/irq.c b/arch/mips/kernel/irq.c
index dd24434392b6..9b0e49d63d7b 100644
--- a/arch/mips/kernel/irq.c
+++ b/arch/mips/kernel/irq.c
@@ -26,6 +26,48 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
+static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
+
+int __devinit allocate_irqno(void)
+{
+	int irq;
+
+again:
+	irq = find_first_zero_bit(irq_map, NR_IRQS);
+
+	if (irq >= NR_IRQS)
+		return -ENOSPC;
+
+	if (test_and_set_bit(irq, irq_map))
+		goto again;
+
+	return irq;
+}
+
+EXPORT_SYMBOL_GPL(allocate_irqno);
+
+/*
+ * Allocate the 16 legacy interrupts for i8259 devices.  This happens early
+ * in the kernel initialization so treating allocation failure as BUG() is
+ * ok.
+ */
+void __init alloc_legacy_irqno(void)
+{
+	int i;
+
+	for (i = 0; i <= 16; i++)
+		BUG_ON(test_and_set_bit(i, irq_map));
+}
+
+void __devinit free_irqno(unsigned int irq)
+{
+	smp_mb__before_clear_bit();
+	clear_bit(irq, irq_map);
+	smp_mb__after_clear_bit();
+}
+
+EXPORT_SYMBOL_GPL(free_irqno);
+
 /*
  * 'what should we do if we get a hw irq event on an illegal vector'.
  * each architecture has to answer this themselves.
diff --git a/arch/mips/sgi-ip27/ip27-irq.c b/arch/mips/sgi-ip27/ip27-irq.c
index f01ba1f90770..270ecd3e6b4a 100644
--- a/arch/mips/sgi-ip27/ip27-irq.c
+++ b/arch/mips/sgi-ip27/ip27-irq.c
@@ -354,29 +354,6 @@ static struct irq_chip bridge_irq_type = {
 	.end		= end_bridge_irq,
 };
 
-static unsigned long irq_map[NR_IRQS / BITS_PER_LONG];
-
-int allocate_irqno(void)
-{
-	int irq;
-
-again:
-	irq = find_first_zero_bit(irq_map, NR_IRQS);
-
-	if (irq >= NR_IRQS)
-		return -ENOSPC;
-
-	if (test_and_set_bit(irq, irq_map))
-		goto again;
-
-	return irq;
-}
-
-void free_irqno(unsigned int irq)
-{
-	clear_bit(irq, irq_map);
-}
-
 void __devinit register_bridge_irq(unsigned int irq)
 {
 	irq_desc[irq].status	= IRQ_DISABLED;
diff --git a/arch/mips/sgi-ip27/ip27-timer.c b/arch/mips/sgi-ip27/ip27-timer.c
index c965705f3427..5e82a268e3c9 100644
--- a/arch/mips/sgi-ip27/ip27-timer.c
+++ b/arch/mips/sgi-ip27/ip27-timer.c
@@ -214,8 +214,6 @@ static struct irqaction rt_irqaction = {
 	.name		= "timer"
 };
 
-extern int allocate_irqno(void);
-
 void __init plat_timer_setup(struct irqaction *irq)
 {
 	int irqno  = allocate_irqno();
diff --git a/include/asm-mips/irq.h b/include/asm-mips/irq.h
index 0ce2a80b689e..35a05ca5560c 100644
--- a/include/asm-mips/irq.h
+++ b/include/asm-mips/irq.h
@@ -74,4 +74,8 @@ extern int setup_irq_smtc(unsigned int irq, struct irqaction * new,
                           unsigned long hwmask);
 #endif /* CONFIG_MIPS_MT_SMTC */
 
+extern int allocate_irqno(void);
+extern void alloc_legacy_irqno(void);
+extern void free_irqno(unsigned int irq);
+
 #endif /* _ASM_IRQ_H */
-- 
cgit v1.2.3


From 6c0ffb9d2fd987c79c6cbb81c3f3011c63749b1a Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@merom.osdl.org>
Date: Wed, 8 Nov 2006 10:23:03 -0800
Subject: x86-64: clean up io-apic accesses

This is just commit 130fe05dbc0114609cfef9815c0c5580b42decfa ported to
x86-64, for all the same reasons.  It cleans up the IO-APIC accesses in
order to then fix the ordering issues.

We move the accessor functions (that were only used by io_apic.c) out of
a header file, and use proper memory-mapped accesses rather than making
up our own "volatile" pointers.

Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/x86_64/kernel/io_apic.c | 46 ++++++++++++++++++++++++++++++++++++++++++++
 include/asm-x86_64/io_apic.h | 34 --------------------------------
 2 files changed, 46 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index fe429e5d6b29..96e02d845716 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -88,6 +88,52 @@ static struct irq_pin_list {
 	short apic, pin, next;
 } irq_2_pin[PIN_MAP_SIZE];
 
+struct io_apic {
+	unsigned int index;
+	unsigned int unused[3];
+	unsigned int data;
+};
+
+static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
+{
+	return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
+		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK);
+}
+
+static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	return readl(&io_apic->data);
+}
+
+static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(reg, &io_apic->index);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Re-write a value: to be used for read-modify-write
+ * cycles where the read already set up the index register.
+ */
+static inline void io_apic_modify(unsigned int apic, unsigned int value)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	writel(value, &io_apic->data);
+}
+
+/*
+ * Synchronize the IO-APIC and the CPU by doing
+ * a dummy read from the IO-APIC
+ */
+static inline void io_apic_sync(unsigned int apic)
+{
+	struct io_apic __iomem *io_apic = io_apic_base(apic);
+	readl(&io_apic->data);
+}
+
 #define __DO_ACTION(R, ACTION, FINAL)					\
 									\
 {									\
diff --git a/include/asm-x86_64/io_apic.h b/include/asm-x86_64/io_apic.h
index 171ec2dc8c04..561ecbfd4cb5 100644
--- a/include/asm-x86_64/io_apic.h
+++ b/include/asm-x86_64/io_apic.h
@@ -12,10 +12,6 @@
 
 #define APIC_MISMATCH_DEBUG
 
-#define IO_APIC_BASE(idx) \
-		((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
-		+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
-
 /*
  * The structure of the IO-APIC:
  */
@@ -119,36 +115,6 @@ extern struct mpc_config_intsrc mp_irqs[MAX_IRQ_SOURCES];
 /* non-0 if default (table-less) MP configuration */
 extern int mpc_default_type;
 
-static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
-{
-	*IO_APIC_BASE(apic) = reg;
-	return *(IO_APIC_BASE(apic)+4);
-}
-
-static inline void io_apic_write(unsigned int apic, unsigned int reg, unsigned int value)
-{
-	*IO_APIC_BASE(apic) = reg;
-	*(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Re-write a value: to be used for read-modify-write
- * cycles where the read already set up the index register.
- */
-static inline void io_apic_modify(unsigned int apic, unsigned int value)
-{
-	*(IO_APIC_BASE(apic)+4) = value;
-}
-
-/*
- * Synchronize the IO-APIC and the CPU by doing
- * a dummy read from the IO-APIC
- */
-static inline void io_apic_sync(unsigned int apic)
-{
-	(void) *(IO_APIC_BASE(apic)+4);
-}
-
 /* 1 if "noapic" boot option passed */
 extern int skip_ioapic_setup;
 
-- 
cgit v1.2.3


From 81ac95c5569d7a60ab5db6c1ccec56c12b3ebcb5 Mon Sep 17 00:00:00 2001
From: "J. Bruce Fields" <bfields@fieldses.org>
Date: Wed, 8 Nov 2006 17:44:40 -0800
Subject: [PATCH] nfsd4: fix open-create permissions

In the case where an open creates the file, we shouldn't be rechecking
permissions to open the file; the open succeeds regardless of what the new
file's mode bits say.

This patch fixes the problem, but only by introducing yet another parameter
to nfsd_create_v3.  This is ugly.  This will be fixed by later patches.

Signed-off-by: J. Bruce Fields <bfields@citi.umich.edu>
Acked-by: Neil Brown <neilb@suse.de>
Cc: Jeff Garzik <jeff@garzik.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/nfsd/nfs3proc.c        | 2 +-
 fs/nfsd/nfs4proc.c        | 6 ++++--
 fs/nfsd/vfs.c             | 4 +++-
 include/linux/nfsd/nfsd.h | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 64db601c2bd2..7f5bad0393b1 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -258,7 +258,7 @@ nfsd3_proc_create(struct svc_rqst *rqstp, struct nfsd3_createargs *argp,
 	/* Now create the file and set attributes */
 	nfserr = nfsd_create_v3(rqstp, dirfhp, argp->name, argp->len,
 				attr, newfhp,
-				argp->createmode, argp->verf, NULL);
+				argp->createmode, argp->verf, NULL, NULL);
 
 	RETURN_STATUS(nfserr);
 }
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4a73f5b2546f..50bc94243ca1 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -93,6 +93,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 {
 	struct svc_fh resfh;
 	__be32 status;
+	int created = 0;
 
 	fh_init(&resfh, NFS4_FHSIZE);
 	open->op_truncate = 0;
@@ -105,7 +106,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 		status = nfsd_create_v3(rqstp, current_fh, open->op_fname.data,
 					open->op_fname.len, &open->op_iattr,
 					&resfh, open->op_createmode,
-					(u32 *)open->op_verf.data, &open->op_truncate);
+					(u32 *)open->op_verf.data, &open->op_truncate, &created);
 	} else {
 		status = nfsd_lookup(rqstp, current_fh,
 				     open->op_fname.data, open->op_fname.len, &resfh);
@@ -122,7 +123,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o
 	memcpy(open->op_stateowner->so_replay.rp_openfh,
 			&resfh.fh_handle.fh_base, resfh.fh_handle.fh_size);
 
-	status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
+	if (!created)
+		status = do_open_permission(rqstp, current_fh, open, MAY_NOP);
 
 out:
 	fh_put(&resfh);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f21e917bb8ed..1a7ad8c983d1 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1237,7 +1237,7 @@ __be32
 nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 		char *fname, int flen, struct iattr *iap,
 		struct svc_fh *resfhp, int createmode, u32 *verifier,
-	        int *truncp)
+	        int *truncp, int *created)
 {
 	struct dentry	*dentry, *dchild = NULL;
 	struct inode	*dirp;
@@ -1331,6 +1331,8 @@ nfsd_create_v3(struct svc_rqst *rqstp, struct svc_fh *fhp,
 	host_err = vfs_create(dirp, dchild, iap->ia_mode, NULL);
 	if (host_err < 0)
 		goto out_nfserr;
+	if (created)
+		*created = 1;
 
 	if (EX_ISSYNC(fhp->fh_export)) {
 		err = nfserrno(nfsd_sync_dir(dentry));
diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h
index eb231143d579..edb54c3171b3 100644
--- a/include/linux/nfsd/nfsd.h
+++ b/include/linux/nfsd/nfsd.h
@@ -89,7 +89,7 @@ __be32		nfsd_access(struct svc_rqst *, struct svc_fh *, u32 *, u32 *);
 __be32		nfsd_create_v3(struct svc_rqst *, struct svc_fh *,
 				char *name, int len, struct iattr *attrs,
 				struct svc_fh *res, int createmode,
-				u32 *verifier, int *truncp);
+				u32 *verifier, int *truncp, int *created);
 __be32		nfsd_commit(struct svc_rqst *, struct svc_fh *,
 				loff_t, unsigned long);
 #endif /* CONFIG_NFSD_V3 */
-- 
cgit v1.2.3


From 46d52b09fa6a2d1e313cb75ca352d6f466e67bd1 Mon Sep 17 00:00:00 2001
From: Corey Minyard <minyard@acm.org>
Date: Wed, 8 Nov 2006 17:44:55 -0800
Subject: [PATCH] IPMI: retry messages on certain error returns

Some more errors from the IPMI send message command are retryable, but are not
being retried by the IPMI code.  Make sure they get retried.

Signed-off-by: Corey Minyard <minyard@acm.org>
Cc: Frederic Lelievre <Frederic.Lelievre@ca.kontron.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 +++-
 include/linux/ipmi_msgdefs.h        | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index e55a0d276729..0b07ca1b71fa 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -3242,7 +3242,9 @@ void ipmi_smi_msg_received(ipmi_smi_t          intf,
                    report the error immediately. */
 		if ((msg->rsp_size >= 3) && (msg->rsp[2] != 0)
 		    && (msg->rsp[2] != IPMI_NODE_BUSY_ERR)
-		    && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR))
+		    && (msg->rsp[2] != IPMI_LOST_ARBITRATION_ERR)
+		    && (msg->rsp[2] != IPMI_BUS_ERR)
+		    && (msg->rsp[2] != IPMI_NAK_ON_WRITE_ERR))
 		{
 			int chan = msg->rsp[3] & 0xf;
 
diff --git a/include/linux/ipmi_msgdefs.h b/include/linux/ipmi_msgdefs.h
index 22f5e2afda4f..4d04d8b58a0a 100644
--- a/include/linux/ipmi_msgdefs.h
+++ b/include/linux/ipmi_msgdefs.h
@@ -75,6 +75,8 @@
 #define IPMI_INVALID_COMMAND_ERR	0xc1
 #define IPMI_ERR_MSG_TRUNCATED		0xc6
 #define IPMI_LOST_ARBITRATION_ERR	0x81
+#define IPMI_BUS_ERR			0x82
+#define IPMI_NAK_ON_WRITE_ERR		0x83
 #define IPMI_ERR_UNSPECIFIED		0xff
 
 #define IPMI_CHANNEL_PROTOCOL_IPMB	1
-- 
cgit v1.2.3


From ec68307cc5a8dc499e48693843bb42f6b6028458 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 8 Nov 2006 17:44:57 -0800
Subject: [PATCH] htirq: refactor so we only have one function that writes to
 the chip

This refactoring actually optimizes the code a little by caching the value
that we think the device is programmed with instead of reading it back from
the hardware.  Which simplifies the code a little and should speed things up a
bit.

This patch introduces the concept of a ht_irq_msg and modifies the
architecture read/write routines to update this code.

There is a minor consistency fix here as well as x86_64 forgot to initialize
the htirq as masked.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Acked-by: Bryan O'Sullivan <bos@pathscale.com>
Cc: <olson@pathscale.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/i386/kernel/io_apic.c   | 26 ++++++++--------
 arch/x86_64/kernel/io_apic.c | 31 +++++++++----------
 drivers/pci/htirq.c          | 72 +++++++++++++++-----------------------------
 include/linux/htirq.h        | 11 ++++---
 4 files changed, 58 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 507983c513c3..ad84bc2802a6 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -2624,18 +2624,16 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 static void target_ht_irq(unsigned int irq, unsigned int dest)
 {
-	u32 low, high;
-	low  = read_ht_irq_low(irq);
-	high = read_ht_irq_high(irq);
+	struct ht_irq_msg msg;
+	fetch_ht_irq_msg(irq, &msg);
 
-	low  &= ~(HT_IRQ_LOW_DEST_ID_MASK);
-	high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+	msg.address_lo &= ~(HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-	low  |= HT_IRQ_LOW_DEST_ID(dest);
-	high |= HT_IRQ_HIGH_DEST_ID(dest);
+	msg.address_lo |= HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
-	write_ht_irq_low(irq, low);
-	write_ht_irq_high(irq, high);
+	write_ht_irq_msg(irq, &msg);
 }
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -2673,7 +2671,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	vector = assign_irq_vector(irq);
 	if (vector >= 0) {
-		u32 low, high;
+		struct ht_irq_msg msg;
 		unsigned dest;
 		cpumask_t tmp;
 
@@ -2681,9 +2679,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 		cpu_set(vector >> 8, tmp);
 		dest = cpu_mask_to_apicid(tmp);
 
-		high = 	HT_IRQ_HIGH_DEST_ID(dest);
+		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-		low =	HT_IRQ_LOW_BASE |
+		msg.address_lo =
+			HT_IRQ_LOW_BASE |
 			HT_IRQ_LOW_DEST_ID(dest) |
 			HT_IRQ_LOW_VECTOR(vector) |
 			((INT_DEST_MODE == 0) ?
@@ -2695,8 +2694,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 				HT_IRQ_LOW_MT_ARBITRATED) |
 			HT_IRQ_LOW_IRQ_MASKED;
 
-		write_ht_irq_low(irq, low);
-		write_ht_irq_high(irq, high);
+		write_ht_irq_msg(irq, &msg);
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
diff --git a/arch/x86_64/kernel/io_apic.c b/arch/x86_64/kernel/io_apic.c
index 3b8f9c68ad3c..41bfc49301ad 100644
--- a/arch/x86_64/kernel/io_apic.c
+++ b/arch/x86_64/kernel/io_apic.c
@@ -1955,18 +1955,16 @@ void arch_teardown_msi_irq(unsigned int irq)
 
 static void target_ht_irq(unsigned int irq, unsigned int dest, u8 vector)
 {
-	u32 low, high;
-	low  = read_ht_irq_low(irq);
-	high = read_ht_irq_high(irq);
+	struct ht_irq_msg msg;
+	fetch_ht_irq_msg(irq, &msg);
 
-	low  &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
-	high &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
+	msg.address_lo &= ~(HT_IRQ_LOW_VECTOR_MASK | HT_IRQ_LOW_DEST_ID_MASK);
+	msg.address_hi &= ~(HT_IRQ_HIGH_DEST_ID_MASK);
 
-	low  |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
-	high |= HT_IRQ_HIGH_DEST_ID(dest);
+	msg.address_lo |= HT_IRQ_LOW_VECTOR(vector) | HT_IRQ_LOW_DEST_ID(dest);
+	msg.address_hi |= HT_IRQ_HIGH_DEST_ID(dest);
 
-	write_ht_irq_low(irq, low);
-	write_ht_irq_high(irq, high);
+	write_ht_irq_msg(irq, &msg);
 }
 
 static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
@@ -1987,7 +1985,7 @@ static void set_ht_irq_affinity(unsigned int irq, cpumask_t mask)
 
 	dest = cpu_mask_to_apicid(tmp);
 
-	target_ht_irq(irq, dest, vector & 0xff);
+	target_ht_irq(irq, dest, vector);
 	set_native_irq_info(irq, mask);
 }
 #endif
@@ -2010,14 +2008,15 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 
 	vector = assign_irq_vector(irq, TARGET_CPUS, &tmp);
 	if (vector >= 0) {
-		u32 low, high;
+		struct ht_irq_msg msg;
 		unsigned dest;
 
 		dest = cpu_mask_to_apicid(tmp);
 
-		high = 	HT_IRQ_HIGH_DEST_ID(dest);
+		msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
 
-		low =	HT_IRQ_LOW_BASE |
+		msg.address_lo =
+			HT_IRQ_LOW_BASE |
 			HT_IRQ_LOW_DEST_ID(dest) |
 			HT_IRQ_LOW_VECTOR(vector) |
 			((INT_DEST_MODE == 0) ?
@@ -2026,10 +2025,10 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
 			HT_IRQ_LOW_RQEOI_EDGE |
 			((INT_DELIVERY_MODE != dest_LowestPrio) ?
 				HT_IRQ_LOW_MT_FIXED :
-				HT_IRQ_LOW_MT_ARBITRATED);
+				HT_IRQ_LOW_MT_ARBITRATED) |
+			HT_IRQ_LOW_IRQ_MASKED;
 
-		write_ht_irq_low(irq, low);
-		write_ht_irq_high(irq, high);
+		write_ht_irq_msg(irq, &msg);
 
 		set_irq_chip_and_handler_name(irq, &ht_irq_chip,
 					      handle_edge_irq, "edge");
diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index 0e27f2404a83..e346fe31f97a 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -27,82 +27,55 @@ struct ht_irq_cfg {
 	struct pci_dev *dev;
 	unsigned pos;
 	unsigned idx;
+	struct ht_irq_msg msg;
 };
 
-void write_ht_irq_low(unsigned int irq, u32 data)
-{
-	struct ht_irq_cfg *cfg = get_irq_data(irq);
-	unsigned long flags;
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-	pci_write_config_dword(cfg->dev, cfg->pos + 4, data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-}
-
-void write_ht_irq_high(unsigned int irq, u32 data)
-{
-	struct ht_irq_cfg *cfg = get_irq_data(irq);
-	unsigned long flags;
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
-	pci_write_config_dword(cfg->dev, cfg->pos + 4, data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-}
 
-u32 read_ht_irq_low(unsigned int irq)
+void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 {
 	struct ht_irq_cfg *cfg = get_irq_data(irq);
 	unsigned long flags;
-	u32 data;
 	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-	pci_read_config_dword(cfg->dev, cfg->pos + 4, &data);
+	if (cfg->msg.address_lo != msg->address_lo) {
+		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
+		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_lo);
+	}
+	if (cfg->msg.address_hi != msg->address_hi) {
+		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
+		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
+	}
 	spin_unlock_irqrestore(&ht_irq_lock, flags);
-	return data;
+	cfg->msg = *msg;
 }
 
-u32 read_ht_irq_high(unsigned int irq)
+void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 {
 	struct ht_irq_cfg *cfg = get_irq_data(irq);
-	unsigned long flags;
-	u32 data;
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
-	pci_read_config_dword(cfg->dev, cfg->pos + 4, &data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
-	return data;
+	*msg = cfg->msg;
 }
 
 void mask_ht_irq(unsigned int irq)
 {
 	struct ht_irq_cfg *cfg;
-	unsigned long flags;
-	u32 data;
+	struct ht_irq_msg msg;
 
 	cfg = get_irq_data(irq);
 
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-	pci_read_config_dword(cfg->dev, cfg->pos + 4, &data);
-	data |= 1;
-	pci_write_config_dword(cfg->dev, cfg->pos + 4, data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
+	msg = cfg->msg;
+	msg.address_lo |= 1;
+	write_ht_irq_msg(irq, &msg);
 }
 
 void unmask_ht_irq(unsigned int irq)
 {
 	struct ht_irq_cfg *cfg;
-	unsigned long flags;
-	u32 data;
+	struct ht_irq_msg msg;
 
 	cfg = get_irq_data(irq);
 
-	spin_lock_irqsave(&ht_irq_lock, flags);
-	pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx);
-	pci_read_config_dword(cfg->dev, cfg->pos + 4, &data);
-	data &= ~1;
-	pci_write_config_dword(cfg->dev, cfg->pos + 4, data);
-	spin_unlock_irqrestore(&ht_irq_lock, flags);
+	msg = cfg->msg;
+	msg.address_lo &= ~1;
+	write_ht_irq_msg(irq, &msg);
 }
 
 /**
@@ -152,6 +125,9 @@ int ht_create_irq(struct pci_dev *dev, int idx)
 	cfg->dev = dev;
 	cfg->pos = pos;
 	cfg->idx = 0x10 + (idx * 2);
+	/* Initialize msg to a value that will never match the first write. */
+	cfg->msg.address_lo = 0xffffffff;
+	cfg->msg.address_hi = 0xffffffff;
 
 	irq = create_irq();
 	if (irq < 0) {
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index 1f15ce279a23..108f0d91e11e 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -1,11 +1,14 @@
 #ifndef LINUX_HTIRQ_H
 #define LINUX_HTIRQ_H
 
+struct ht_irq_msg {
+	u32	address_lo;	/* low 32 bits of the ht irq message */
+	u32	address_hi;	/* high 32 bits of the it irq message */
+};
+
 /* Helper functions.. */
-void write_ht_irq_low(unsigned int irq, u32 data);
-void write_ht_irq_high(unsigned int irq, u32 data);
-u32  read_ht_irq_low(unsigned int irq);
-u32  read_ht_irq_high(unsigned int irq);
+void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
+void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg);
 void mask_ht_irq(unsigned int irq);
 void unmask_ht_irq(unsigned int irq);
 
-- 
cgit v1.2.3


From 43539c38cd8edb915d1f0e1f55dcb70638b4cc8e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Wed, 8 Nov 2006 17:44:57 -0800
Subject: [PATCH] htirq: allow buggy drivers of buggy hardware to write the
 registers

This patch adds a variant of ht_create_irq __ht_create_irq that takes an
aditional parameter update that is a function that is called whenever we want
to write to a drivers htirq configuration registers.

This is needed to support the ipath_iba6110 because it's registers in the
proper location are not actually conected to the hardware that controlls
interrupt delivery.

[bos@serpentine.com: fixes]
Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Cc: Andi Kleen <ak@suse.de>
Cc: <olson@pathscale.com>
Cc: Roland Dreier <rolandd@cisco.com>
Signed-off-by: Bryan O'Sullivan <bryan.osullivan@qlogic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/pci/htirq.c   | 29 ++++++++++++++++++++++++-----
 include/linux/htirq.h |  5 +++++
 2 files changed, 29 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/drivers/pci/htirq.c b/drivers/pci/htirq.c
index e346fe31f97a..0a8d1cce9fa0 100644
--- a/drivers/pci/htirq.c
+++ b/drivers/pci/htirq.c
@@ -25,6 +25,8 @@ static DEFINE_SPINLOCK(ht_irq_lock);
 
 struct ht_irq_cfg {
 	struct pci_dev *dev;
+	 /* Update callback used to cope with buggy hardware */
+	ht_irq_update_t *update;
 	unsigned pos;
 	unsigned idx;
 	struct ht_irq_msg msg;
@@ -44,6 +46,8 @@ void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg)
 		pci_write_config_byte(cfg->dev, cfg->pos + 2, cfg->idx + 1);
 		pci_write_config_dword(cfg->dev, cfg->pos + 4, msg->address_hi);
 	}
+	if (cfg->update)
+		cfg->update(cfg->dev, irq, msg);
 	spin_unlock_irqrestore(&ht_irq_lock, flags);
 	cfg->msg = *msg;
 }
@@ -79,16 +83,14 @@ void unmask_ht_irq(unsigned int irq)
 }
 
 /**
- * ht_create_irq - create an irq and attach it to a device.
+ * __ht_create_irq - create an irq and attach it to a device.
  * @dev: The hypertransport device to find the irq capability on.
  * @idx: Which of the possible irqs to attach to.
- *
- * ht_create_irq is needs to be called for all hypertransport devices
- * that generate irqs.
+ * @update: Function to be called when changing the htirq message
  *
  * The irq number of the new irq or a negative error value is returned.
  */
-int ht_create_irq(struct pci_dev *dev, int idx)
+int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update)
 {
 	struct ht_irq_cfg *cfg;
 	unsigned long flags;
@@ -123,6 +125,7 @@ int ht_create_irq(struct pci_dev *dev, int idx)
 		return -ENOMEM;
 
 	cfg->dev = dev;
+	cfg->update = update;
 	cfg->pos = pos;
 	cfg->idx = 0x10 + (idx * 2);
 	/* Initialize msg to a value that will never match the first write. */
@@ -144,6 +147,21 @@ int ht_create_irq(struct pci_dev *dev, int idx)
 	return irq;
 }
 
+/**
+ * ht_create_irq - create an irq and attach it to a device.
+ * @dev: The hypertransport device to find the irq capability on.
+ * @idx: Which of the possible irqs to attach to.
+ *
+ * ht_create_irq needs to be called for all hypertransport devices
+ * that generate irqs.
+ *
+ * The irq number of the new irq or a negative error value is returned.
+ */
+int ht_create_irq(struct pci_dev *dev, int idx)
+{
+	return __ht_create_irq(dev, idx, NULL);
+}
+
 /**
  * ht_destroy_irq - destroy an irq created with ht_create_irq
  *
@@ -162,5 +180,6 @@ void ht_destroy_irq(unsigned int irq)
 	kfree(cfg);
 }
 
+EXPORT_SYMBOL(__ht_create_irq);
 EXPORT_SYMBOL(ht_create_irq);
 EXPORT_SYMBOL(ht_destroy_irq);
diff --git a/include/linux/htirq.h b/include/linux/htirq.h
index 108f0d91e11e..c96ea46737d0 100644
--- a/include/linux/htirq.h
+++ b/include/linux/htirq.h
@@ -15,4 +15,9 @@ void unmask_ht_irq(unsigned int irq);
 /* The arch hook for getting things started */
 int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev);
 
+/* For drivers of buggy hardware */
+typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq,
+			       struct ht_irq_msg *msg);
+int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update);
+
 #endif /* LINUX_HTIRQ_H */
-- 
cgit v1.2.3


From da413908d5e9ebdd0889a599e80d21d7237021c6 Mon Sep 17 00:00:00 2001
From: Simon Horman <horms@verge.net.au>
Date: Thu, 9 Nov 2006 20:00:55 -0800
Subject: [IPVS]: Compile fix for annotations in userland.

This change makes __beXX available to user-space applications, such as
ipvsadm, which include ip_vs.h

Signed-Off-By: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_vs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h
index 49c717e3b040..903108e583f8 100644
--- a/include/net/ip_vs.h
+++ b/include/net/ip_vs.h
@@ -7,6 +7,7 @@
 #define _IP_VS_H
 
 #include <asm/types.h>		/* For __uXX types */
+#include <linux/types.h>	/* For __beXX types in userland */
 
 #define IP_VS_VERSION_CODE	0x010201
 #define NVERSION(version)			\
-- 
cgit v1.2.3


From 2b4ac44e7c7e16cf9411b81693ff3e604f332bf1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <dada1@cosmosbay.com>
Date: Fri, 10 Nov 2006 12:27:48 -0800
Subject: [PATCH] vmalloc: optimization, cleanup, bugfixes

- reorder 'struct vm_struct' to speedup lookups on CPUS with small cache
  lines.  The fields 'next,addr,size' should be now in the same cache line,
  to speedup lookups.

- One minor cleanup in __get_vm_area_node()

- Bugfixes in vmalloc_user() and vmalloc_32_user() NULL returns from
  __vmalloc() and __find_vm_area() were not tested.

[akpm@osdl.org: remove redundant BUG_ONs]
Signed-off-by: Eric Dumazet <dada1@cosmosbay.com>
Cc: Nick Piggin <nickpiggin@yahoo.com.au>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/vmalloc.h |  3 ++-
 mm/vmalloc.c            | 26 +++++++++++++-------------
 2 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index dc9a29d84abc..924e502905d4 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -23,13 +23,14 @@ struct vm_area_struct;
 #endif
 
 struct vm_struct {
+	/* keep next,addr,size together to speedup lookups */
+	struct vm_struct	*next;
 	void			*addr;
 	unsigned long		size;
 	unsigned long		flags;
 	struct page		**pages;
 	unsigned int		nr_pages;
 	unsigned long		phys_addr;
-	struct vm_struct	*next;
 };
 
 /*
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 46606c133e82..7dc6aa745166 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -186,10 +186,8 @@ static struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long fl
 	if (unlikely(!area))
 		return NULL;
 
-	if (unlikely(!size)) {
-		kfree (area);
+	if (unlikely(!size))
 		return NULL;
-	}
 
 	/*
 	 * We always allocate a guard page.
@@ -532,11 +530,12 @@ void *vmalloc_user(unsigned long size)
 	void *ret;
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO, PAGE_KERNEL);
-	write_lock(&vmlist_lock);
-	area = __find_vm_area(ret);
-	area->flags |= VM_USERMAP;
-	write_unlock(&vmlist_lock);
-
+	if (ret) {
+		write_lock(&vmlist_lock);
+		area = __find_vm_area(ret);
+		area->flags |= VM_USERMAP;
+		write_unlock(&vmlist_lock);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(vmalloc_user);
@@ -605,11 +604,12 @@ void *vmalloc_32_user(unsigned long size)
 	void *ret;
 
 	ret = __vmalloc(size, GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL);
-	write_lock(&vmlist_lock);
-	area = __find_vm_area(ret);
-	area->flags |= VM_USERMAP;
-	write_unlock(&vmlist_lock);
-
+	if (ret) {
+		write_lock(&vmlist_lock);
+		area = __find_vm_area(ret);
+		area->flags |= VM_USERMAP;
+		write_unlock(&vmlist_lock);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(vmalloc_32_user);
-- 
cgit v1.2.3


From 1a4b0fc503ff4149f5915be4aeb179b9453cf485 Mon Sep 17 00:00:00 2001
From: Jes Sorensen <jes@sgi.com>
Date: Fri, 10 Nov 2006 12:27:49 -0800
Subject: [PATCH] mspec driver build fix

Fix MSPEC driver to build for non SN2 enabled configs as the driver should
work in cached and uncached modes (no fetchop) on these systems.  In
addition make MSPEC select IA64_UNCACHED_ALLOCATOR, which is required for
it and move it to arch/ia64/Kconfig to avoid warnings on non ia64
architectures running allmodconfig.  Once the Kconfig code is fixed, we can
move it back.

Signed-off-by: Jes Sorensen <jes@sgi.com>
Cc: Fernando Luis Vzquez Cao <fernando@oss.ntt.co.jp>
Cc: "Luck, Tony" <tony.luck@intel.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/Kconfig           | 9 +++++++++
 drivers/char/Kconfig        | 8 --------
 drivers/char/mspec.c        | 8 +++++++-
 include/asm-ia64/sn/addrs.h | 6 +++++-
 4 files changed, 21 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 14682396f7f7..683b12c6f76c 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -484,6 +484,15 @@ source "net/Kconfig"
 
 source "drivers/Kconfig"
 
+config MSPEC
+	tristate "Memory special operations driver"
+	depends on IA64
+	select IA64_UNCACHED_ALLOCATOR
+	help
+	  If you have an ia64 and you want to enable memory special
+	  operations support (formerly known as fetchop), say Y here,
+	  otherwise say N.
+
 source "fs/Kconfig"
 
 source "lib/Kconfig"
diff --git a/drivers/char/Kconfig b/drivers/char/Kconfig
index 39a9f8cc6412..2af12fc45115 100644
--- a/drivers/char/Kconfig
+++ b/drivers/char/Kconfig
@@ -409,14 +409,6 @@ config SGI_MBCS
          If you have an SGI Altix with an attached SABrick
          say Y or M here, otherwise say N.
 
-config MSPEC
-	tristate "Memory special operations driver"
-	depends on IA64
-	help
-	  If you have an ia64 and you want to enable memory special
-	  operations support (formerly known as fetchop), say Y here,
-	  otherwise say N.
-
 source "drivers/serial/Kconfig"
 
 config UNIX98_PTYS
diff --git a/drivers/char/mspec.c b/drivers/char/mspec.c
index 5c0dec39cf6c..235e89226112 100644
--- a/drivers/char/mspec.c
+++ b/drivers/char/mspec.c
@@ -72,7 +72,11 @@ enum {
 	MSPEC_UNCACHED
 };
 
+#ifdef CONFIG_SGI_SN
 static int is_sn2;
+#else
+#define is_sn2		0
+#endif
 
 /*
  * One of these structures is allocated when an mspec region is mmaped. The
@@ -211,7 +215,7 @@ mspec_nopfn(struct vm_area_struct *vma, unsigned long address)
 	if (vdata->type == MSPEC_FETCHOP)
 		paddr = TO_AMO(maddr);
 	else
-		paddr = __pa(TO_CAC(maddr));
+		paddr = maddr & ~__IA64_UNCACHED_OFFSET;
 
 	pfn = paddr >> PAGE_SHIFT;
 
@@ -335,6 +339,7 @@ mspec_init(void)
 	 * The fetchop device only works on SN2 hardware, uncached and cached
 	 * memory drivers should both be valid on all ia64 hardware
 	 */
+#ifdef CONFIG_SGI_SN
 	if (ia64_platform_is("sn2")) {
 		is_sn2 = 1;
 		if (is_shub2()) {
@@ -363,6 +368,7 @@ mspec_init(void)
 			goto free_scratch_pages;
 		}
 	}
+#endif
 	ret = misc_register(&cached_miscdev);
 	if (ret) {
 		printk(KERN_ERR "%s: failed to register device %i\n",
diff --git a/include/asm-ia64/sn/addrs.h b/include/asm-ia64/sn/addrs.h
index 1d9efe541662..e715c794b186 100644
--- a/include/asm-ia64/sn/addrs.h
+++ b/include/asm-ia64/sn/addrs.h
@@ -136,9 +136,13 @@
  */
 #define TO_PHYS(x)		(TO_PHYS_MASK & (x))
 #define TO_CAC(x)		(CAC_BASE     | TO_PHYS(x))
+#ifdef CONFIG_SGI_SN
 #define TO_AMO(x)		(AMO_BASE     | TO_PHYS(x))
 #define TO_GET(x)		(GET_BASE     | TO_PHYS(x))
-
+#else
+#define TO_AMO(x)		({ BUG(); x; })
+#define TO_GET(x)		({ BUG(); x; })
+#endif
 
 /*
  * Covert from processor physical address to II/TIO physical address:
-- 
cgit v1.2.3


From d8b295f29091310d746509bb6d5828aaf4907a18 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+lkml@arm.linux.org.uk>
Date: Fri, 10 Nov 2006 12:27:53 -0800
Subject: [PATCH] Fix missing parens in set_personality()

If you call set_personality() with an expression such as:

	set_personality(foo ? PERS_FOO1 : PERS_FOO2);

then this evaluates to:

	((current->personality == foo ? PERS_FOO1 : PERS_FOO2) ? ...

which is obviously not the intended result.  Add the missing parents
to ensure this gets evaluated as expected:

	((current->personality == (foo ? PERS_FOO1 : PERS_FOO2)) ? ...

Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 include/linux/personality.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/personality.h b/include/linux/personality.h
index bf4cf2080e5c..012cd558189b 100644
--- a/include/linux/personality.h
+++ b/include/linux/personality.h
@@ -114,7 +114,7 @@ struct exec_domain {
  * Change personality of the currently running process.
  */
 #define set_personality(pers) \
-	((current->personality == pers) ? 0 : __set_personality(pers))
+	((current->personality == (pers)) ? 0 : __set_personality(pers))
 
 #endif /* __KERNEL__ */
 
-- 
cgit v1.2.3


From fa18f477d0987c011cce047a7c3cd1284f547a14 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86: Add acpi_user_timer_override option for Asus boards

Timer overrides are normally disabled on Nvidia board because
they are commonly wrong, except on new ones with HPET support.
Unfortunately there are quite some Asus boards around that
don't have HPET, but need a timer override.

We don't know yet how to handle this transparently,
but at least add a command line option to force the timer override
and let them boot.

Cc: len.brown@intel.com

Signed-off-by: Andi Kleen <ak@suse.de>
---
 Documentation/kernel-parameters.txt | 4 ++++
 arch/i386/kernel/acpi/boot.c        | 8 ++++++++
 arch/i386/kernel/acpi/earlyquirk.c  | 8 +++++++-
 arch/x86_64/kernel/early-quirks.c   | 8 ++++++++
 include/asm-i386/acpi.h             | 1 +
 include/asm-x86_64/acpi.h           | 1 +
 6 files changed, 29 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dd00fd556a60..67473849f20e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -164,6 +164,10 @@ and is between 256 and 4096 characters. It is defined in the file
 	acpi_skip_timer_override [HW,ACPI]
 			Recognize and ignore IRQ0/pin2 Interrupt Override.
 			For broken nForce2 BIOS resulting in XT-PIC timer.
+	acpi_use_timer_override [HW,ACPI}
+			Use timer override. For some broken Nvidia NF5 boards
+			that require a timer override, but don't have
+			HPET
 
 	acpi_dbg_layer=	[HW,ACPI]
 			Format: <int>
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index 22e4c466e5a3..d12fb97a5337 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -82,6 +82,7 @@ EXPORT_SYMBOL(acpi_strict);
 acpi_interrupt_flags acpi_sci_flags __initdata;
 int acpi_sci_override_gsi __initdata;
 int acpi_skip_timer_override __initdata;
+int acpi_use_timer_override __initdata;
 
 #ifdef CONFIG_X86_LOCAL_APIC
 static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
@@ -1300,6 +1301,13 @@ static int __init parse_acpi_skip_timer_override(char *arg)
 	return 0;
 }
 early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
+
+static int __init parse_acpi_use_timer_override(char *arg)
+{
+	acpi_use_timer_override = 1;
+	return 0;
+}
+early_param("acpi_use_timer_override", parse_acpi_use_timer_override);
 #endif /* CONFIG_X86_IO_APIC */
 
 static int __init setup_acpi_sci(char *s)
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index fe799b11ac0a..c9841692bb7c 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -27,11 +27,17 @@ static int __init check_bridge(int vendor, int device)
 #ifdef CONFIG_ACPI
 	/* According to Nvidia all timer overrides are bogus unless HPET
 	   is enabled. */
-	if (vendor == PCI_VENDOR_ID_NVIDIA) {
+	if (!acpi_use_timer_override && vendor == PCI_VENDOR_ID_NVIDIA) {
 		nvidia_hpet_detected = 0;
 		acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
 		if (nvidia_hpet_detected == 0) {
 			acpi_skip_timer_override = 1;
+			  printk(KERN_INFO "Nvidia board "
+                       "detected. Ignoring ACPI "
+                       "timer override.\n");
+                printk(KERN_INFO "If you got timer trouble "
+			 	 "try acpi_use_timer_override\n");
+
 		}
 	}
 #endif
diff --git a/arch/x86_64/kernel/early-quirks.c b/arch/x86_64/kernel/early-quirks.c
index 2b1245d86258..68273bff58cc 100644
--- a/arch/x86_64/kernel/early-quirks.c
+++ b/arch/x86_64/kernel/early-quirks.c
@@ -45,7 +45,13 @@ static void nvidia_bugs(void)
 	/*
 	 * All timer overrides on Nvidia are
 	 * wrong unless HPET is enabled.
+	 * Unfortunately that's not true on many Asus boards.
+	 * We don't know yet how to detect this automatically, but
+	 * at least allow a command line override.
 	 */
+	if (acpi_use_timer_override)
+		return;
+
 	nvidia_hpet_detected = 0;
 	acpi_table_parse(ACPI_HPET, nvidia_hpet_check);
 	if (nvidia_hpet_detected == 0) {
@@ -53,6 +59,8 @@ static void nvidia_bugs(void)
 		printk(KERN_INFO "Nvidia board "
 		       "detected. Ignoring ACPI "
 		       "timer override.\n");
+		printk(KERN_INFO "If you got timer trouble "
+			"try acpi_use_timer_override\n");
 	}
 #endif
 	/* RED-PEN skip them on mptables too? */
diff --git a/include/asm-i386/acpi.h b/include/asm-i386/acpi.h
index 6016632d032f..c80b3a94511a 100644
--- a/include/asm-i386/acpi.h
+++ b/include/asm-i386/acpi.h
@@ -132,6 +132,7 @@ extern int acpi_gsi_to_irq(u32 gsi, unsigned int *irq);
 
 #ifdef CONFIG_X86_IO_APIC
 extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
 #endif
 
 static inline void acpi_noirq_set(void) { acpi_noirq = 1; }
diff --git a/include/asm-x86_64/acpi.h b/include/asm-x86_64/acpi.h
index ed59aa4c6ff9..9d1916e59c04 100644
--- a/include/asm-x86_64/acpi.h
+++ b/include/asm-x86_64/acpi.h
@@ -163,6 +163,7 @@ extern u8 x86_acpiid_to_apicid[];
 #define ARCH_HAS_POWER_INIT 1
 
 extern int acpi_skip_timer_override;
+extern int acpi_use_timer_override;
 
 #endif /*__KERNEL__*/
 
-- 
cgit v1.2.3


From 8c131af1db510793f87dc43edbc8950a35370df3 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Fix vgetcpu when CONFIG_HOTPLUG_CPU is disabled

The vgetcpu per CPU initialization previously relied on CPU hotplug
events for all CPUs to initialize the per CPU state. That only
worked only on kernels with CONFIG_HOTPLUG_CPU enabled.  On the
others some CPUs didn't get their state initialized properly
and vgetcpu wouldn't work.

Change the initialization sequence to instead run in a normal
initcall (which runs after the normal CPU bootup) and initialize
all running CPUs there. Later hotplug CPUs are still handled
with an hotplug notifier.

This actually simplifies the code somewhat.

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/smp.c      |  3 +--
 arch/x86_64/kernel/time.c     | 11 -----------
 arch/x86_64/kernel/vsyscall.c | 45 ++++++++++++++++++++++++-------------------
 include/asm-x86_64/vsyscall.h |  2 --
 4 files changed, 26 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/smp.c b/arch/x86_64/kernel/smp.c
index 4f67697f5036..9f74c883568c 100644
--- a/arch/x86_64/kernel/smp.c
+++ b/arch/x86_64/kernel/smp.c
@@ -376,9 +376,8 @@ int smp_call_function_single (int cpu, void (*func) (void *info), void *info,
 	/* prevent preemption and reschedule on another processor */
 	int me = get_cpu();
 	if (cpu == me) {
-		WARN_ON(1);
 		put_cpu();
-		return -EBUSY;
+		return 0;
 	}
 	spin_lock_bh(&call_lock);
 	__smp_call_function_single(cpu, func, info, nonatomic, wait);
diff --git a/arch/x86_64/kernel/time.c b/arch/x86_64/kernel/time.c
index 88722f11ca13..e3ef544d2cfb 100644
--- a/arch/x86_64/kernel/time.c
+++ b/arch/x86_64/kernel/time.c
@@ -876,15 +876,6 @@ static struct irqaction irq0 = {
 	timer_interrupt, IRQF_DISABLED, CPU_MASK_NONE, "timer", NULL, NULL
 };
 
-static int __cpuinit
-time_cpu_notifier(struct notifier_block *nb, unsigned long action, void *hcpu)
-{
-	unsigned cpu = (unsigned long) hcpu;
-	if (action == CPU_ONLINE)
-		vsyscall_set_cpu(cpu);
-	return NOTIFY_DONE;
-}
-
 void __init time_init(void)
 {
 	if (nohpet)
@@ -925,8 +916,6 @@ void __init time_init(void)
 	vxtime.last_tsc = get_cycles_sync();
 	set_cyc2ns_scale(cpu_khz);
 	setup_irq(0, &irq0);
-	hotcpu_notifier(time_cpu_notifier, 0);
-	time_cpu_notifier(NULL, CPU_ONLINE, (void *)(long)smp_processor_id());
 
 #ifndef CONFIG_SMP
 	time_init_gtod();
diff --git a/arch/x86_64/kernel/vsyscall.c b/arch/x86_64/kernel/vsyscall.c
index a98b460af6a1..a730bacecb0b 100644
--- a/arch/x86_64/kernel/vsyscall.c
+++ b/arch/x86_64/kernel/vsyscall.c
@@ -27,6 +27,9 @@
 #include <linux/jiffies.h>
 #include <linux/sysctl.h>
 #include <linux/getcpu.h>
+#include <linux/cpu.h>
+#include <linux/smp.h>
+#include <linux/notifier.h>
 
 #include <asm/vsyscall.h>
 #include <asm/pgtable.h>
@@ -243,32 +246,17 @@ static ctl_table kernel_root_table2[] = {
 
 #endif
 
-static void __cpuinit write_rdtscp_cb(void *info)
-{
-	write_rdtscp_aux((unsigned long)info);
-}
-
-void __cpuinit vsyscall_set_cpu(int cpu)
+/* Assume __initcall executes before all user space. Hopefully kmod
+   doesn't violate that. We'll find out if it does. */
+static void __cpuinit vsyscall_set_cpu(int cpu)
 {
 	unsigned long *d;
 	unsigned long node = 0;
 #ifdef CONFIG_NUMA
 	node = cpu_to_node[cpu];
 #endif
-	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP)) {
-		void *info = (void *)((node << 12) | cpu);
-		/* Can happen on preemptive kernel */
-		if (get_cpu() == cpu)
-			write_rdtscp_cb(info);
-#ifdef CONFIG_SMP
-		else {
-			/* the notifier is unfortunately not executed on the
-			   target CPU */
-			smp_call_function_single(cpu,write_rdtscp_cb,info,0,1);
-		}
-#endif
-		put_cpu();
-	}
+	if (cpu_has(&cpu_data[cpu], X86_FEATURE_RDTSCP))
+		write_rdtscp_aux((node << 12) | cpu);
 
 	/* Store cpu number in limit so that it can be loaded quickly
 	   in user space in vgetcpu.
@@ -280,6 +268,21 @@ void __cpuinit vsyscall_set_cpu(int cpu)
 	*d |= (node >> 4) << 48;
 }
 
+static void __cpuinit cpu_vsyscall_init(void *arg)
+{
+	/* preemption should be already off */
+	vsyscall_set_cpu(raw_smp_processor_id());
+}
+
+static int __cpuinit
+cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
+{
+	long cpu = (long)arg;
+	if (action == CPU_ONLINE)
+		smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1);
+	return NOTIFY_DONE;
+}
+
 static void __init map_vsyscall(void)
 {
 	extern char __vsyscall_0;
@@ -299,6 +302,8 @@ static int __init vsyscall_init(void)
 #ifdef CONFIG_SYSCTL
 	register_sysctl_table(kernel_root_table2, 0);
 #endif
+	on_each_cpu(cpu_vsyscall_init, NULL, 0, 1);
+	hotcpu_notifier(cpu_vsyscall_notifier, 0);
 	return 0;
 }
 
diff --git a/include/asm-x86_64/vsyscall.h b/include/asm-x86_64/vsyscall.h
index fd452fc2c037..01d1c17e2849 100644
--- a/include/asm-x86_64/vsyscall.h
+++ b/include/asm-x86_64/vsyscall.h
@@ -59,8 +59,6 @@ extern seqlock_t xtime_lock;
 
 extern int sysctl_vsyscall;
 
-extern void vsyscall_set_cpu(int cpu);
-
 #define ARCH_HAVE_XTIME_LOCK 1
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 9446868b5383eb87f76b2d4389dea4bb968a6657 Mon Sep 17 00:00:00 2001
From: Andi Kleen <ak@suse.de>
Date: Tue, 14 Nov 2006 16:57:46 +0100
Subject: [PATCH] x86-64: Fix race in exit_idle

When another interrupt happens in exit_idle the exit idle notifier
could be called an incorrect number of times.

Add a test_and_clear_bit_pda and use it handle the bit
atomically against interrupts to avoid this.

Pointed out by Stephane Eranian

Signed-off-by: Andi Kleen <ak@suse.de>
---
 arch/x86_64/kernel/process.c | 3 +--
 include/asm-x86_64/pda.h     | 9 +++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86_64/kernel/process.c b/arch/x86_64/kernel/process.c
index 49f7fac6229e..f6226055d53d 100644
--- a/arch/x86_64/kernel/process.c
+++ b/arch/x86_64/kernel/process.c
@@ -88,9 +88,8 @@ void enter_idle(void)
 
 static void __exit_idle(void)
 {
-	if (read_pda(isidle) == 0)
+	if (test_and_clear_bit_pda(0, isidle) == 0)
 		return;
-	write_pda(isidle, 0);
 	atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
 }
 
diff --git a/include/asm-x86_64/pda.h b/include/asm-x86_64/pda.h
index 14996d962bac..5642634843c4 100644
--- a/include/asm-x86_64/pda.h
+++ b/include/asm-x86_64/pda.h
@@ -109,6 +109,15 @@ extern struct x8664_pda _proxy_pda;
 #define sub_pda(field,val) pda_to_op("sub",field,val)
 #define or_pda(field,val) pda_to_op("or",field,val)
 
+/* This is not atomic against other CPUs -- CPU preemption needs to be off */
+#define test_and_clear_bit_pda(bit,field) ({		\
+	int old__;						\
+	asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0"		\
+	    : "=r" (old__), "+m" (_proxy_pda.field) 		\
+	    : "dIr" (bit), "i" (pda_offset(field)) : "memory");	\
+	old__;							\
+})
+
 #endif
 
 #define PDA_STACKOFFSET (5*8)
-- 
cgit v1.2.3


From 68589bc353037f233fe510ad9ff432338c95db66 Mon Sep 17 00:00:00 2001
From: Hugh Dickins <hugh@veritas.com>
Date: Tue, 14 Nov 2006 02:03:32 -0800
Subject: [PATCH] hugetlb: prepare_hugepage_range check offset too

(David:)

If hugetlbfs_file_mmap() returns a failure to do_mmap_pgoff() - for example,
because the given file offset is not hugepage aligned - then do_mmap_pgoff
will go to the unmap_and_free_vma backout path.

But at this stage the vma hasn't been marked as hugepage, and the backout path
will call unmap_region() on it.  That will eventually call down to the
non-hugepage version of unmap_page_range().  On ppc64, at least, that will
cause serious problems if there are any existing hugepage pagetable entries in
the vicinity - for example if there are any other hugepage mappings under the
same PUD.  unmap_page_range() will trigger a bad_pud() on the hugepage pud
entries.  I suspect this will also cause bad problems on ia64, though I don't
have a machine to test it on.

(Hugh:)

prepare_hugepage_range() should check file offset alignment when it checks
virtual address and length, to stop MAP_FIXED with a bad huge offset from
unmapping before it fails further down.  PowerPC should apply the same
prepare_hugepage_range alignment checks as ia64 and all the others do.

Then none of the alignment checks in hugetlbfs_file_mmap are required (nor
is the check for too small a mapping); but even so, move up setting of
VM_HUGETLB and add a comment to warn of what David Gibson discovered - if
hugetlbfs_file_mmap fails before setting it, do_mmap_pgoff's unmap_region
when unwinding from error will go the non-huge way, which may cause bad
behaviour on architectures (powerpc and ia64) which segregate their huge
mappings into a separate region of the address space.

Signed-off-by: Hugh Dickins <hugh@veritas.com>
Cc: "Luck, Tony" <tony.luck@intel.com>
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Adam Litke <agl@us.ibm.com>
Acked-by: David Gibson <david@gibson.dropbear.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/ia64/mm/hugetlbpage.c    |  4 +++-
 arch/powerpc/mm/hugetlbpage.c |  8 ++++++--
 fs/hugetlbfs/inode.c          | 21 ++++++++-------------
 include/linux/hugetlb.h       | 10 +++++++---
 mm/mmap.c                     |  2 +-
 5 files changed, 25 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/mm/hugetlbpage.c b/arch/ia64/mm/hugetlbpage.c
index eee5c1cfbe32..f3a9585e98a8 100644
--- a/arch/ia64/mm/hugetlbpage.c
+++ b/arch/ia64/mm/hugetlbpage.c
@@ -70,8 +70,10 @@ huge_pte_offset (struct mm_struct *mm, unsigned long addr)
  * Don't actually need to do any preparation, but need to make sure
  * the address is in the right region.
  */
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
 {
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 	if (addr & ~HPAGE_MASK)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index fd68b74c07c3..506d89768d45 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -491,11 +491,15 @@ static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas)
 	return 0;
 }
 
-int prepare_hugepage_range(unsigned long addr, unsigned long len)
+int prepare_hugepage_range(unsigned long addr, unsigned long len, pgoff_t pgoff)
 {
 	int err = 0;
 
-	if ( (addr+len) < addr )
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
+	if (len & ~HPAGE_MASK)
+		return -EINVAL;
+	if (addr & ~HPAGE_MASK)
 		return -EINVAL;
 
 	if (addr < 0x100000000UL)
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 0bea6a619e10..7f4756963d05 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -62,24 +62,19 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
 	loff_t len, vma_len;
 	int ret;
 
-	if (vma->vm_pgoff & (HPAGE_SIZE / PAGE_SIZE - 1))
-		return -EINVAL;
-
-	if (vma->vm_start & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (vma->vm_end & ~HPAGE_MASK)
-		return -EINVAL;
-
-	if (vma->vm_end - vma->vm_start < HPAGE_SIZE)
-		return -EINVAL;
+	/*
+	 * vma alignment has already been checked by prepare_hugepage_range.
+	 * If you add any error returns here, do so after setting VM_HUGETLB,
+	 * so is_vm_hugetlb_page tests below unmap_region go the right way
+	 * when do_mmap_pgoff unwinds (may be important on powerpc and ia64).
+	 */
+	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
+	vma->vm_ops = &hugetlb_vm_ops;
 
 	vma_len = (loff_t)(vma->vm_end - vma->vm_start);
 
 	mutex_lock(&inode->i_mutex);
 	file_accessed(file);
-	vma->vm_flags |= VM_HUGETLB | VM_RESERVED;
-	vma->vm_ops = &hugetlb_vm_ops;
 
 	ret = -ENOMEM;
 	len = vma_len + ((loff_t)vma->vm_pgoff << PAGE_SHIFT);
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 5081d27bfa27..ace64e57e17f 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -60,8 +60,11 @@ void hugetlb_free_pgd_range(struct mmu_gather **tlb, unsigned long addr,
  * If the arch doesn't supply something else, assume that hugepage
  * size aligned regions are ok without further preparation.
  */
-static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
+static inline int prepare_hugepage_range(unsigned long addr, unsigned long len,
+						pgoff_t pgoff)
 {
+	if (pgoff & (~HPAGE_MASK >> PAGE_SHIFT))
+		return -EINVAL;
 	if (len & ~HPAGE_MASK)
 		return -EINVAL;
 	if (addr & ~HPAGE_MASK)
@@ -69,7 +72,8 @@ static inline int prepare_hugepage_range(unsigned long addr, unsigned long len)
 	return 0;
 }
 #else
-int prepare_hugepage_range(unsigned long addr, unsigned long len);
+int prepare_hugepage_range(unsigned long addr, unsigned long len,
+						pgoff_t pgoff);
 #endif
 
 #ifndef ARCH_HAS_SETCLEAR_HUGE_PTE
@@ -107,7 +111,7 @@ static inline unsigned long hugetlb_total_pages(void)
 #define hugetlb_report_meminfo(buf)		0
 #define hugetlb_report_node_meminfo(n, buf)	0
 #define follow_huge_pmd(mm, addr, pmd, write)	NULL
-#define prepare_hugepage_range(addr, len)	(-EINVAL)
+#define prepare_hugepage_range(addr,len,pgoff)	(-EINVAL)
 #define pmd_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
diff --git a/mm/mmap.c b/mm/mmap.c
index 497e502dfd6b..bdace87d7c01 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1379,7 +1379,7 @@ get_unmapped_area(struct file *file, unsigned long addr, unsigned long len,
 		 * Check if the given range is hugepage aligned, and
 		 * can be made suitable for hugepages.
 		 */
-		ret = prepare_hugepage_range(addr, len);
+		ret = prepare_hugepage_range(addr, len, pgoff);
 	} else {
 		/*
 		 * Ensure that a normal request is not falling in a
-- 
cgit v1.2.3


From 056f4faa572f64fa926491a7d42c627c9dc507a7 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 13 Nov 2006 14:51:46 +1100
Subject: [POWERPC] Add the thread_siblings files to sysfs

This adds the /sys/devices/system/cpu/*/topology/thread_siblings
files on powerpc.  These files are already available on other
architectures.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/topology.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/topology.h b/include/asm-powerpc/topology.h
index 8f7ee16781a4..9fe7894ee035 100644
--- a/include/asm-powerpc/topology.h
+++ b/include/asm-powerpc/topology.h
@@ -96,7 +96,13 @@ static inline void sysfs_remove_device_from_node(struct sys_device *dev,
 
 #ifdef CONFIG_SMP
 #include <asm/cputable.h>
-#define smt_capable() 		(cpu_has_feature(CPU_FTR_SMT))
+#define smt_capable()		(cpu_has_feature(CPU_FTR_SMT))
+
+#ifdef CONFIG_PPC64
+#include <asm/smp.h>
+
+#define topology_thread_siblings(cpu)	(cpu_sibling_map[cpu])
+#endif
 #endif
 
 #endif /* __KERNEL__ */
-- 
cgit v1.2.3


From 9716a340310a383751a06589d0775fad04bd3f54 Mon Sep 17 00:00:00 2001
From: Stephen Rothwell <sfr@canb.auug.org.au>
Date: Mon, 13 Nov 2006 14:52:42 +1100
Subject: [POWERPC] Wire up sys_move_pages

All the infrastructure is already in place for this, so we only need
to allocate a syscall number and hook it up.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 include/asm-powerpc/systbl.h | 1 +
 include/asm-powerpc/unistd.h | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/asm-powerpc/systbl.h b/include/asm-powerpc/systbl.h
index c6a03187f932..97b435484177 100644
--- a/include/asm-powerpc/systbl.h
+++ b/include/asm-powerpc/systbl.h
@@ -304,3 +304,4 @@ SYSCALL_SPU(fchmodat)
 SYSCALL_SPU(faccessat)
 COMPAT_SYS_SPU(get_robust_list)
 COMPAT_SYS_SPU(set_robust_list)
+COMPAT_SYS(move_pages)
diff --git a/include/asm-powerpc/unistd.h b/include/asm-powerpc/unistd.h
index b5fe93291c96..0e4ea37f6466 100644
--- a/include/asm-powerpc/unistd.h
+++ b/include/asm-powerpc/unistd.h
@@ -323,10 +323,11 @@
 #define __NR_faccessat		298
 #define __NR_get_robust_list	299
 #define __NR_set_robust_list	300
+#define __NR_move_pages		301
 
 #ifdef __KERNEL__
 
-#define __NR_syscalls		301
+#define __NR_syscalls		302
 
 #define __NR__exit __NR_exit
 #define NR_syscalls	__NR_syscalls
-- 
cgit v1.2.3


From b96e7ecbd052a0916b6078e7600604d7e276a336 Mon Sep 17 00:00:00 2001
From: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Date: Tue, 14 Nov 2006 19:48:48 -0800
Subject: [NETFILTER]: ip6_tables: fixed conflicted optname for getsockopt

66 and 67 for getsockopt on IPv6 socket is doubly used for IPv6 Advanced
API and ip6tables. This moves numbers for ip6tables to 68 and 69.
This also kills XT_SO_* because {ip,ip6,arp}_tables doesn't have so much
common numbers now.

The old userland tools keep to behave as ever, because old kernel always
calls functions of IPv6 Advanced API for their numbers.

Signed-off-by: Yasuyuki Kozakai <yasuyuki.kozakai@toshiba.co.jp>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in6.h                       | 12 +++++++++++-
 include/linux/netfilter/x_tables.h        | 16 ----------------
 include/linux/netfilter_arp/arp_tables.h  | 25 +++++++++++++------------
 include/linux/netfilter_ipv4/ip_tables.h  | 27 +++++++++++++++------------
 include/linux/netfilter_ipv6/ip6_tables.h | 27 +++++++++++++++------------
 5 files changed, 54 insertions(+), 53 deletions(-)

(limited to 'include')

diff --git a/include/linux/in6.h b/include/linux/in6.h
index 9be6a4756f0b..f28621f638e0 100644
--- a/include/linux/in6.h
+++ b/include/linux/in6.h
@@ -225,7 +225,7 @@ struct in6_flowlabel_req
 #endif
 
 /*
- * Netfilter
+ * Netfilter (1)
  *
  * Following socket options are used in ip6_tables;
  * see include/linux/netfilter_ipv6/ip6_tables.h.
@@ -240,4 +240,14 @@ struct in6_flowlabel_req
 #define IPV6_RECVTCLASS		66
 #define IPV6_TCLASS		67
 
+/*
+ * Netfilter (2)
+ *
+ * Following socket options are used in ip6_tables;
+ * see include/linux/netfilter_ipv6/ip6_tables.h.
+ *
+ * IP6T_SO_GET_REVISION_MATCH	68
+ * IP6T_SO_GET_REVISION_TARGET	69
+ */
+
 #endif
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 04319a76103a..022edfa97ed9 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -96,22 +96,6 @@ struct _xt_align
 /* Error verdict. */
 #define XT_ERROR_TARGET "ERROR"
 
-/*
- * New IP firewall options for [gs]etsockopt at the RAW IP level.
- * Unlike BSD Linux inherits IP options so you don't have to use a raw
- * socket for this. Instead we check rights in the calls. */
-#define XT_BASE_CTL		64	/* base for firewall socket options */
-
-#define XT_SO_SET_REPLACE	(XT_BASE_CTL)
-#define XT_SO_SET_ADD_COUNTERS	(XT_BASE_CTL + 1)
-#define XT_SO_SET_MAX		XT_SO_SET_ADD_COUNTERS
-
-#define XT_SO_GET_INFO			(XT_BASE_CTL)
-#define XT_SO_GET_ENTRIES		(XT_BASE_CTL + 1)
-#define XT_SO_GET_REVISION_MATCH	(XT_BASE_CTL + 2)
-#define XT_SO_GET_REVISION_TARGET	(XT_BASE_CTL + 3)
-#define XT_SO_GET_MAX			XT_SO_GET_REVISION_TARGET
-
 #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
 
diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h
index 44e39b61d9e7..0be235418a2f 100644
--- a/include/linux/netfilter_arp/arp_tables.h
+++ b/include/linux/netfilter_arp/arp_tables.h
@@ -112,19 +112,20 @@ struct arpt_entry
  * New IP firewall options for [gs]etsockopt at the RAW IP level.
  * Unlike BSD Linux inherits IP options so you don't have to use a raw
  * socket for this. Instead we check rights in the calls.
+ *
+ * ATTENTION: check linux/in.h before adding new number here.
  */
-#define ARPT_CTL_OFFSET		32
-#define ARPT_BASE_CTL		(XT_BASE_CTL+ARPT_CTL_OFFSET)
-
-#define ARPT_SO_SET_REPLACE		(XT_SO_SET_REPLACE+ARPT_CTL_OFFSET)
-#define ARPT_SO_SET_ADD_COUNTERS	(XT_SO_SET_ADD_COUNTERS+ARPT_CTL_OFFSET)
-#define ARPT_SO_SET_MAX			(XT_SO_SET_MAX+ARPT_CTL_OFFSET)
-
-#define ARPT_SO_GET_INFO		(XT_SO_GET_INFO+ARPT_CTL_OFFSET)
-#define ARPT_SO_GET_ENTRIES		(XT_SO_GET_ENTRIES+ARPT_CTL_OFFSET)
-/* #define ARPT_SO_GET_REVISION_MATCH	XT_SO_GET_REVISION_MATCH  */
-#define ARPT_SO_GET_REVISION_TARGET	(XT_SO_GET_REVISION_TARGET+ARPT_CTL_OFFSET)
-#define ARPT_SO_GET_MAX			(XT_SO_GET_REVISION_TARGET+ARPT_CTL_OFFSET)
+#define ARPT_BASE_CTL		96
+
+#define ARPT_SO_SET_REPLACE		(ARPT_BASE_CTL)
+#define ARPT_SO_SET_ADD_COUNTERS	(ARPT_BASE_CTL + 1)
+#define ARPT_SO_SET_MAX			ARPT_SO_SET_ADD_COUNTERS
+
+#define ARPT_SO_GET_INFO		(ARPT_BASE_CTL)
+#define ARPT_SO_GET_ENTRIES		(ARPT_BASE_CTL + 1)
+/* #define ARPT_SO_GET_REVISION_MATCH	(APRT_BASE_CTL + 2) */
+#define ARPT_SO_GET_REVISION_TARGET	(ARPT_BASE_CTL + 3)
+#define ARPT_SO_GET_MAX			(ARPT_SO_GET_REVISION_TARGET)
 
 /* CONTINUE verdict for targets */
 #define ARPT_CONTINUE XT_CONTINUE
diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h
index a536bbdef145..4f06dad0bde9 100644
--- a/include/linux/netfilter_ipv4/ip_tables.h
+++ b/include/linux/netfilter_ipv4/ip_tables.h
@@ -101,18 +101,21 @@ struct ipt_entry
 /*
  * New IP firewall options for [gs]etsockopt at the RAW IP level.
  * Unlike BSD Linux inherits IP options so you don't have to use a raw
- * socket for this. Instead we check rights in the calls. */
-#define IPT_BASE_CTL		XT_BASE_CTL
-
-#define IPT_SO_SET_REPLACE	XT_SO_SET_REPLACE
-#define IPT_SO_SET_ADD_COUNTERS	XT_SO_SET_ADD_COUNTERS
-#define IPT_SO_SET_MAX		XT_SO_SET_MAX
-
-#define IPT_SO_GET_INFO			XT_SO_GET_INFO
-#define IPT_SO_GET_ENTRIES		XT_SO_GET_ENTRIES
-#define IPT_SO_GET_REVISION_MATCH	XT_SO_GET_REVISION_MATCH
-#define IPT_SO_GET_REVISION_TARGET	XT_SO_GET_REVISION_TARGET
-#define IPT_SO_GET_MAX			XT_SO_GET_REVISION_TARGET
+ * socket for this. Instead we check rights in the calls.
+ *
+ * ATTENTION: check linux/in.h before adding new number here.
+ */
+#define IPT_BASE_CTL		64
+
+#define IPT_SO_SET_REPLACE	(IPT_BASE_CTL)
+#define IPT_SO_SET_ADD_COUNTERS	(IPT_BASE_CTL + 1)
+#define IPT_SO_SET_MAX		IPT_SO_SET_ADD_COUNTERS
+
+#define IPT_SO_GET_INFO			(IPT_BASE_CTL)
+#define IPT_SO_GET_ENTRIES		(IPT_BASE_CTL + 1)
+#define IPT_SO_GET_REVISION_MATCH	(IPT_BASE_CTL + 2)
+#define IPT_SO_GET_REVISION_TARGET	(IPT_BASE_CTL + 3)
+#define IPT_SO_GET_MAX			IPT_SO_GET_REVISION_TARGET
 
 #define IPT_CONTINUE XT_CONTINUE
 #define IPT_RETURN XT_RETURN
diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h
index d7a8e9c0dad0..4aed340401db 100644
--- a/include/linux/netfilter_ipv6/ip6_tables.h
+++ b/include/linux/netfilter_ipv6/ip6_tables.h
@@ -107,18 +107,21 @@ struct ip6t_entry
 /*
  * New IP firewall options for [gs]etsockopt at the RAW IP level.
  * Unlike BSD Linux inherits IP options so you don't have to use
- * a raw socket for this. Instead we check rights in the calls. */
-#define IP6T_BASE_CTL			XT_BASE_CTL
-
-#define IP6T_SO_SET_REPLACE		XT_SO_SET_REPLACE
-#define IP6T_SO_SET_ADD_COUNTERS	XT_SO_SET_ADD_COUNTERS
-#define IP6T_SO_SET_MAX			XT_SO_SET_MAX
-
-#define IP6T_SO_GET_INFO		XT_SO_GET_INFO
-#define IP6T_SO_GET_ENTRIES		XT_SO_GET_ENTRIES
-#define	IP6T_SO_GET_REVISION_MATCH	XT_SO_GET_REVISION_MATCH
-#define	IP6T_SO_GET_REVISION_TARGET	XT_SO_GET_REVISION_TARGET
-#define IP6T_SO_GET_MAX			XT_SO_GET_REVISION_TARGET
+ * a raw socket for this. Instead we check rights in the calls.
+ *
+ * ATTENTION: check linux/in6.h before adding new number here.
+ */
+#define IP6T_BASE_CTL			64
+
+#define IP6T_SO_SET_REPLACE		(IP6T_BASE_CTL)
+#define IP6T_SO_SET_ADD_COUNTERS	(IP6T_BASE_CTL + 1)
+#define IP6T_SO_SET_MAX			IP6T_SO_SET_ADD_COUNTERS
+
+#define IP6T_SO_GET_INFO		(IP6T_BASE_CTL)
+#define IP6T_SO_GET_ENTRIES		(IP6T_BASE_CTL + 1)
+#define IP6T_SO_GET_REVISION_MATCH	(IP6T_BASE_CTL + 4)
+#define IP6T_SO_GET_REVISION_TARGET	(IP6T_BASE_CTL + 5)
+#define IP6T_SO_GET_MAX			IP6T_SO_GET_REVISION_TARGET
 
 /* CONTINUE verdict for targets */
 #define IP6T_CONTINUE XT_CONTINUE
-- 
cgit v1.2.3


From c7835a77c86422d276b0d1a4c70924d933014c13 Mon Sep 17 00:00:00 2001
From: Michael Chan <mchan@broadcom.com>
Date: Wed, 15 Nov 2006 21:14:42 -0800
Subject: [TG3]: Disable TSO on 5906 if CLKREQ is enabled.

Due to hardware errata, TSO must be disabled if the PCI Express clock
request is enabled on 5906.  The chip may hang when transmitting TSO
frames if CLKREQ is enabled.

Update version to 3.69.

Signed-off-by: Michael Chan <mchan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tg3.c        | 20 ++++++++++++++++----
 include/linux/pci_regs.h |  1 +
 2 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index 6e86866bd3fa..1dbdd6bb587b 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -68,8 +68,8 @@
 
 #define DRV_MODULE_NAME		"tg3"
 #define PFX DRV_MODULE_NAME	": "
-#define DRV_MODULE_VERSION	"3.68"
-#define DRV_MODULE_RELDATE	"November 02, 2006"
+#define DRV_MODULE_VERSION	"3.69"
+#define DRV_MODULE_RELDATE	"November 15, 2006"
 
 #define TG3_DEF_MAC_MODE	0
 #define TG3_DEF_RX_MODE		0
@@ -10366,7 +10366,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	u32 pci_state_reg, grc_misc_cfg;
 	u32 val;
 	u16 pci_cmd;
-	int err;
+	int err, pcie_cap;
 
 	/* Force memory write invalidate off.  If we leave it on,
 	 * then on 5700_BX chips we have to enable a workaround.
@@ -10541,8 +10541,19 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
 	    GET_ASIC_REV(tp->pci_chip_rev_id) != ASIC_REV_5906)
 		tp->tg3_flags2 |= TG3_FLG2_JUMBO_CAPABLE;
 
-	if (pci_find_capability(tp->pdev, PCI_CAP_ID_EXP) != 0)
+	pcie_cap = pci_find_capability(tp->pdev, PCI_CAP_ID_EXP);
+	if (pcie_cap != 0) {
 		tp->tg3_flags2 |= TG3_FLG2_PCI_EXPRESS;
+		if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906) {
+			u16 lnkctl;
+
+			pci_read_config_word(tp->pdev,
+					     pcie_cap + PCI_EXP_LNKCTL,
+					     &lnkctl);
+			if (lnkctl & PCI_EXP_LNKCTL_CLKREQ_EN)
+				tp->tg3_flags2 &= ~TG3_FLG2_HW_TSO_2;
+		}
+	}
 
 	/* If we have an AMD 762 or VIA K8T800 chipset, write
 	 * reordering to the mailbox registers done by the host
@@ -11809,6 +11820,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
 	else if (GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5700 ||
 	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5701 ||
 	    tp->pci_chip_rev_id == CHIPREV_ID_5705_A0 ||
+	    GET_ASIC_REV(tp->pci_chip_rev_id) == ASIC_REV_5906 ||
 	    (tp->tg3_flags & TG3_FLAG_ENABLE_ASF) != 0) {
 		tp->tg3_flags2 &= ~TG3_FLG2_TSO_CAPABLE;
 	} else {
diff --git a/include/linux/pci_regs.h b/include/linux/pci_regs.h
index c312a12ad2d6..c321316f1bc7 100644
--- a/include/linux/pci_regs.h
+++ b/include/linux/pci_regs.h
@@ -371,6 +371,7 @@
 #define  PCI_EXP_DEVSTA_TRPND	0x20	/* Transactions Pending */
 #define PCI_EXP_LNKCAP		12	/* Link Capabilities */
 #define PCI_EXP_LNKCTL		16	/* Link Control */
+#define  PCI_EXP_LNKCTL_CLKREQ_EN 0x100	/* Enable clkreq */
 #define PCI_EXP_LNKSTA		18	/* Link Status */
 #define PCI_EXP_SLTCAP		20	/* Slot Capabilities */
 #define PCI_EXP_SLTCTL		24	/* Slot Control */
-- 
cgit v1.2.3


From da63fc7ce63b43426dc3c69c05e28de2872c159a Mon Sep 17 00:00:00 2001
From: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Date: Thu, 16 Nov 2006 01:19:28 -0800
Subject: [PATCH] fat: add fat_getattr()

This adds fat_getattr() for setting stat->blksize. (FAT uses the size
of cluster for proper I/O)

Signed-off-by: OGAWA Hirofumi <hirofumi@mail.parknet.co.jp>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/fat/file.c            | 10 ++++++++++
 fs/msdos/namei.c         |  1 +
 fs/vfat/namei.c          |  1 +
 include/linux/msdos_fs.h |  2 ++
 4 files changed, 14 insertions(+)

(limited to 'include')

diff --git a/fs/fat/file.c b/fs/fat/file.c
index 8337451e7897..0aa813d944a6 100644
--- a/fs/fat/file.c
+++ b/fs/fat/file.c
@@ -303,7 +303,17 @@ void fat_truncate(struct inode *inode)
 	fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
+int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+{
+	struct inode *inode = dentry->d_inode;
+	generic_fillattr(inode, stat);
+	stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fat_getattr);
+
 struct inode_operations fat_file_inode_operations = {
 	.truncate	= fat_truncate,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c
index b0f01b3b0536..452461955cbd 100644
--- a/fs/msdos/namei.c
+++ b/fs/msdos/namei.c
@@ -654,6 +654,7 @@ static struct inode_operations msdos_dir_inode_operations = {
 	.rmdir		= msdos_rmdir,
 	.rename		= msdos_rename,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
 
 static int msdos_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c
index edb711ff7b05..0afd745a37cd 100644
--- a/fs/vfat/namei.c
+++ b/fs/vfat/namei.c
@@ -1004,6 +1004,7 @@ static struct inode_operations vfat_dir_inode_operations = {
 	.rmdir		= vfat_rmdir,
 	.rename		= vfat_rename,
 	.setattr	= fat_notify_change,
+	.getattr	= fat_getattr,
 };
 
 static int vfat_fill_super(struct super_block *sb, void *data, int silent)
diff --git a/include/linux/msdos_fs.h b/include/linux/msdos_fs.h
index ce6c85815cbd..24a9ef1506b6 100644
--- a/include/linux/msdos_fs.h
+++ b/include/linux/msdos_fs.h
@@ -402,6 +402,8 @@ extern const struct file_operations fat_file_operations;
 extern struct inode_operations fat_file_inode_operations;
 extern int fat_notify_change(struct dentry * dentry, struct iattr * attr);
 extern void fat_truncate(struct inode *inode);
+extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry,
+		       struct kstat *stat);
 
 /* fat/inode.c */
 extern void fat_attach(struct inode *inode, loff_t i_pos);
-- 
cgit v1.2.3


From 610a5b742e9df4e59047f22d13d8bd83cafce388 Mon Sep 17 00:00:00 2001
From: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Date: Fri, 17 Nov 2006 11:51:41 +1100
Subject: [CRYPTO] api: Remove one too many semicolon

This patch has removed one too many semicolon in crypto.h.

Signed-off-by: Yoichi Yuasa <yoichi_yuasa@tripeaks.co.jp>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 8f2ffa4caabf..6485e9716b36 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -245,7 +245,7 @@ int crypto_alg_available(const char *name, u32 flags)
 	__deprecated_for_modules;
 int crypto_has_alg(const char *name, u32 type, u32 mask);
 #else
-static int crypto_alg_available(const char *name, u32 flags);
+static int crypto_alg_available(const char *name, u32 flags)
 	__deprecated_for_modules;
 static inline int crypto_alg_available(const char *name, u32 flags)
 {
-- 
cgit v1.2.3


From ace5f1d425beaa272b6e91cecc87b2c075d7feb2 Mon Sep 17 00:00:00 2001
From: Greg Ungerer <gerg@snapgear.com>
Date: Mon, 20 Nov 2006 15:46:22 +1000
Subject: [PATCH] m68knommu: fix up for the irq_handler_t changes

Switch to using irq_handler_t for interrupt function handler pointers.

Change name of m68knommu's irq_hanlder_t data structure so it doesn't
clash with the common type (include/linux/interrupt.h).

Signed-off-by: Greg Ungerer <gerg@uclinux.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/m68knommu/kernel/setup.c       |  2 +-
 arch/m68knommu/kernel/time.c        |  2 +-
 arch/m68knommu/platform/5307/ints.c | 17 ++++++++---------
 include/asm-m68knommu/irq_regs.h    |  1 +
 include/asm-m68knommu/irqnode.h     |  8 ++++----
 include/asm-m68knommu/machdep.h     |  2 +-
 6 files changed, 16 insertions(+), 16 deletions(-)
 create mode 100644 include/asm-m68knommu/irq_regs.h

(limited to 'include')

diff --git a/arch/m68knommu/kernel/setup.c b/arch/m68knommu/kernel/setup.c
index bde9811cf98c..7b21959eaeae 100644
--- a/arch/m68knommu/kernel/setup.c
+++ b/arch/m68knommu/kernel/setup.c
@@ -62,7 +62,7 @@ int (*mach_kbdrate) (struct kbd_repeat *);
 void (*mach_kbd_leds) (unsigned int);
 /* machine dependent irq functions */
 void (*mach_init_IRQ) (void);
-irqreturn_t (*(*mach_default_handler)[]) (int, void *, struct pt_regs *);
+irq_handler_t mach_default_handler;
 int (*mach_get_irq_list) (struct seq_file *, void *);
 void (*mach_process_int) (int irq, struct pt_regs *fp);
 void (*mach_trap_init) (void);
diff --git a/arch/m68knommu/kernel/time.c b/arch/m68knommu/kernel/time.c
index c5667bdddd5e..9226264abf1a 100644
--- a/arch/m68knommu/kernel/time.c
+++ b/arch/m68knommu/kernel/time.c
@@ -54,7 +54,7 @@ static irqreturn_t timer_interrupt(int irq, void *dummy, struct pt_regs * regs)
 	update_process_times(user_mode(regs));
 #endif
 	if (current->pid)
-		profile_tick(CPU_PROFILING, regs);
+		profile_tick(CPU_PROFILING);
 
 	/*
 	 * If we have an externally synchronized Linux clock, then update
diff --git a/arch/m68knommu/platform/5307/ints.c b/arch/m68knommu/platform/5307/ints.c
index b4b55093ae7e..a57239ec6c8c 100644
--- a/arch/m68knommu/platform/5307/ints.c
+++ b/arch/m68knommu/platform/5307/ints.c
@@ -33,7 +33,7 @@
 /*
  *	This table stores the address info for each vector handler.
  */
-irq_handler_t irq_list[SYS_IRQS];
+struct irq_entry irq_list[SYS_IRQS];
 
 #define NUM_IRQ_NODES 16
 static irq_node_t nodes[NUM_IRQ_NODES];
@@ -44,7 +44,7 @@ volatile unsigned int num_spurious;
 unsigned int local_bh_count[NR_CPUS];
 unsigned int local_irq_count[NR_CPUS];
 
-static irqreturn_t default_irq_handler(int irq, void *ptr, struct pt_regs *regs)
+static irqreturn_t default_irq_handler(int irq, void *ptr)
 {
 #if 1
 	printk(KERN_INFO "%s(%d): default irq handler vec=%d [0x%x]\n",
@@ -70,7 +70,7 @@ void __init init_IRQ(void)
 
 	for (i = 0; i < SYS_IRQS; i++) {
 		if (mach_default_handler)
-			irq_list[i].handler = (*mach_default_handler)[i];
+			irq_list[i].handler = mach_default_handler;
 		else
 			irq_list[i].handler = default_irq_handler;
 		irq_list[i].flags   = IRQ_FLG_STD;
@@ -100,7 +100,7 @@ irq_node_t *new_irq_node(void)
 
 int request_irq(
 	unsigned int irq,
-	irqreturn_t (*handler)(int, void *, struct pt_regs *),
+	irq_handler_t handler,
 	unsigned long flags,
 	const char *devname,
 	void *dev_id)
@@ -157,7 +157,7 @@ void free_irq(unsigned int irq, void *dev_id)
 	}
 
 	if (mach_default_handler)
-		irq_list[irq].handler = (*mach_default_handler)[irq];
+		irq_list[irq].handler = mach_default_handler;
 	else
 		irq_list[irq].handler = default_irq_handler;
 	irq_list[irq].flags   = IRQ_FLG_STD;
@@ -168,8 +168,7 @@ void free_irq(unsigned int irq, void *dev_id)
 EXPORT_SYMBOL(free_irq);
 
 
-int sys_request_irq(unsigned int irq, 
-                    irqreturn_t (*handler)(int, void *, struct pt_regs *), 
+int sys_request_irq(unsigned int irq, irq_handler_t handler, 
                     unsigned long flags, const char *devname, void *dev_id)
 {
 	if (irq > IRQ7) {
@@ -211,7 +210,7 @@ void sys_free_irq(unsigned int irq, void *dev_id)
 		printk(KERN_WARNING "%s: Removing probably wrong IRQ %d from %s\n",
 		       __FUNCTION__, irq, irq_list[irq].devname);
 
-	irq_list[irq].handler = (*mach_default_handler)[irq];
+	irq_list[irq].handler = mach_default_handler;
 	irq_list[irq].flags   = 0;
 	irq_list[irq].dev_id  = NULL;
 	irq_list[irq].devname = NULL;
@@ -241,7 +240,7 @@ asmlinkage void process_int(unsigned long vec, struct pt_regs *fp)
 	if (vec >= VEC_INT1 && vec <= VEC_INT7) {
 		vec -= VEC_SPUR;
 		kstat_cpu(0).irqs[vec]++;
-		irq_list[vec].handler(vec, irq_list[vec].dev_id, fp);
+		irq_list[vec].handler(vec, irq_list[vec].dev_id);
 	} else {
 		if (mach_process_int)
 			mach_process_int(vec, fp);
diff --git a/include/asm-m68knommu/irq_regs.h b/include/asm-m68knommu/irq_regs.h
new file mode 100644
index 000000000000..3dd9c0b70270
--- /dev/null
+++ b/include/asm-m68knommu/irq_regs.h
@@ -0,0 +1 @@
+#include <asm-generic/irq_regs.h>
diff --git a/include/asm-m68knommu/irqnode.h b/include/asm-m68knommu/irqnode.h
index a2503dfc554c..6132a9858b52 100644
--- a/include/asm-m68knommu/irqnode.h
+++ b/include/asm-m68knommu/irqnode.h
@@ -8,7 +8,7 @@
  * interrupt source (if it supports chaining).
  */
 typedef struct irq_node {
-	irqreturn_t	(*handler)(int, void *, struct pt_regs *);
+	irq_handler_t	handler;
 	unsigned long	flags;
 	void		*dev_id;
 	const char	*devname;
@@ -18,12 +18,12 @@ typedef struct irq_node {
 /*
  * This structure has only 4 elements for speed reasons
  */
-typedef struct irq_handler {
-	irqreturn_t	(*handler)(int, void *, struct pt_regs *);
+struct irq_entry {
+	irq_handler_t	handler;
 	unsigned long	flags;
 	void		*dev_id;
 	const char	*devname;
-} irq_handler_t;
+};
 
 /* count of spurious interrupts */
 extern volatile unsigned int num_spurious;
diff --git a/include/asm-m68knommu/machdep.h b/include/asm-m68knommu/machdep.h
index 27c90afd3339..6ce28f8e0ead 100644
--- a/include/asm-m68knommu/machdep.h
+++ b/include/asm-m68knommu/machdep.h
@@ -18,7 +18,7 @@ extern int (*mach_kbdrate) (struct kbd_repeat *);
 extern void (*mach_kbd_leds) (unsigned int);
 /* machine dependent irq functions */
 extern void (*mach_init_IRQ) (void);
-extern irqreturn_t (*(*mach_default_handler)[]) (int, void *, struct pt_regs *);
+extern irq_handler_t mach_default_handler;
 extern int (*mach_request_irq) (unsigned int irq, void (*handler)(int, void *, struct pt_regs *),
                                 unsigned long flags, const char *devname, void *dev_id);
 extern void (*mach_free_irq) (unsigned int irq, void *dev_id);
-- 
cgit v1.2.3


From b3438f8266cb1f5010085ac47d7ad6a36a212164 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@woody.osdl.org>
Date: Mon, 20 Nov 2006 11:47:18 -0800
Subject: Add "pure_initcall" for static variable initialization

This is a quick hack to overcome the fact that SRCU currently does not
allow static initializers, and we need to sometimes initialize those
things before any other initializers (even "core" ones) can do so.

Currently we don't allow this at all for modules, and the only user that
needs is right now is cpufreq. As reported by Thomas Gleixner:

   "Commit b4dfdbb3c707474a2254c5b4d7e62be31a4b7da9 ("[PATCH] cpufreq:
    make the transition_notifier chain use SRCU breaks cpu frequency
    notification users, which register the callback > on core_init
    level."

Cc: Thomas Gleixner <tglx@timesys.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@infradead.org>
Cc: Andrew Morton <akpm@osdl.org>,
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/cpufreq/cpufreq.c         | 2 +-
 include/asm-generic/vmlinux.lds.h | 2 ++
 include/linux/init.h              | 8 ++++++++
 3 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 86e69b7f9122..dd0c2623e27b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -59,7 +59,7 @@ static int __init init_cpufreq_transition_notifier_list(void)
 	srcu_init_notifier_head(&cpufreq_transition_notifier_list);
 	return 0;
 }
-core_initcall(init_cpufreq_transition_notifier_list);
+pure_initcall(init_cpufreq_transition_notifier_list);
 
 static LIST_HEAD(cpufreq_governor_list);
 static DEFINE_MUTEX (cpufreq_governor_mutex);
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 9d873163a7ab..e60d6f21fa62 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -215,6 +215,8 @@
 		.notes : { *(.note.*) } :note
 
 #define INITCALLS							\
+  	*(.initcall0.init)						\
+  	*(.initcall0s.init)						\
   	*(.initcall1.init)						\
   	*(.initcall1s.init)						\
   	*(.initcall2.init)						\
diff --git a/include/linux/init.h b/include/linux/init.h
index ff40ea118e3a..5eb5d24b7680 100644
--- a/include/linux/init.h
+++ b/include/linux/init.h
@@ -93,6 +93,14 @@ extern void setup_arch(char **);
 	static initcall_t __initcall_##fn##id __attribute_used__ \
 	__attribute__((__section__(".initcall" level ".init"))) = fn
 
+/*
+ * A "pure" initcall has no dependencies on anything else, and purely
+ * initializes variables that couldn't be statically initialized.
+ *
+ * This only exists for built-in code, not for modules.
+ */
+#define pure_initcall(fn)		__define_initcall("0",fn,1)
+
 #define core_initcall(fn)		__define_initcall("1",fn,1)
 #define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
 #define postcore_initcall(fn)		__define_initcall("2",fn,2)
-- 
cgit v1.2.3


From fb47ddb2db9c18664bd7b06c201a2398885b64fc Mon Sep 17 00:00:00 2001
From: David L Stevens <dlstevens@us.ibm.com>
Date: Sun, 19 Nov 2006 10:38:39 -0800
Subject: [IGMP]: Fix IGMPV3_EXP() normalization bit shift value.

The IGMPV3_EXP() macro doesn't correctly shift the normalization bit, so
time-out values are longer than they should be.

Thanks to Dirk Ooms for finding the problem in IGMPv3 - MLDv2 had a
similar problem that was already fixed a year ago. :-(

Signed-off-by: David L Stevens <dlstevens@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 03f43e2893a4..21dd56905271 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -191,7 +191,7 @@ struct ip_mc_list
 #define IGMPV3_MASK(value, nb) ((nb)>=32 ? (value) : ((1<<(nb))-1) & (value))
 #define IGMPV3_EXP(thresh, nbmant, nbexp, value) \
 	((value) < (thresh) ? (value) : \
-        ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant+nbexp))) << \
+        ((IGMPV3_MASK(value, nbmant) | (1<<(nbmant))) << \
          (IGMPV3_MASK((value) >> (nbmant), nbexp) + (nbexp))))
 
 #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value)
-- 
cgit v1.2.3


From df9c23095fc8652798c41dd860676d3dafb2f1dc Mon Sep 17 00:00:00 2001
From: Kim Phillips <kim.phillips@freescale.com>
Date: Tue, 21 Nov 2006 10:31:14 -0600
Subject: [POWERPC] Revert "[POWERPC] Add powerpc get/set_rtc_time interface to
 new generic rtc class"

This reverts commit 7a69af63e788a324d162201a0b23df41bcf158dd.

As advised by David Brownell:

http://marc.theaimsgroup.com/?l=linux-kernel&m=116387226902131&w=2

Signed-off-by: Kim Phillips <kim.phillips@freescale.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
 arch/powerpc/kernel/time.c | 42 ------------------------------------------
 include/asm-powerpc/time.h |  4 ----
 2 files changed, 46 deletions(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a1b5e4b16151..46a24de36fec 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -1014,48 +1014,6 @@ void __init time_init(void)
 	set_dec(tb_ticks_per_jiffy);
 }
 
-#ifdef CONFIG_RTC_CLASS
-static int set_rtc_class_time(struct rtc_time *tm)
-{
-	int err;
-	struct class_device *class_dev =
-		rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
-
-	if (class_dev == NULL)
-		return -ENODEV;
-
-	err = rtc_set_time(class_dev, tm);
-
-	rtc_class_close(class_dev);
-
-	return 0;
-}
-
-static void get_rtc_class_time(struct rtc_time *tm)
-{
-	int err;
-	struct class_device *class_dev =
-		rtc_class_open(CONFIG_RTC_HCTOSYS_DEVICE);
-
-	if (class_dev == NULL)
-		return;
-
-	err = rtc_read_time(class_dev, tm);
-
-	rtc_class_close(class_dev);
-
-	return;
-}
-
-int __init rtc_class_hookup(void)
-{
-	ppc_md.get_rtc_time = get_rtc_class_time;
-	ppc_md.set_rtc_time = set_rtc_class_time;
-
-	return 0;
-}
-#endif /* CONFIG_RTC_CLASS */
-
 
 #define FEBRUARY	2
 #define	STARTOFTIME	1970
diff --git a/include/asm-powerpc/time.h b/include/asm-powerpc/time.h
index a78285010d62..4cff977ad526 100644
--- a/include/asm-powerpc/time.h
+++ b/include/asm-powerpc/time.h
@@ -39,10 +39,6 @@ extern void generic_calibrate_decr(void);
 extern void wakeup_decrementer(void);
 extern void snapshot_timebase(void);
 
-#ifdef CONFIG_RTC_CLASS
-extern int __init rtc_class_hookup(void);
-#endif
-
 /* Some sane defaults: 125 MHz timebase, 1GHz processor */
 extern unsigned long ppc_proc_freq;
 #define DEFAULT_PROC_FREQ	(DEFAULT_TB_FREQ * 8)
-- 
cgit v1.2.3