From 68575476718ed1c6d6ddafeec8310b109e7a7a7f Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@austin.rr.com>
Date: Sat, 30 Apr 2005 11:10:57 -0700
Subject: [PATCH] cifs: append \* properly on ASCII servers

For older servers which do not support Unicode

Signed-off-by: Steve French (sfrench@us.ibm.com)
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/CHANGES   | 4 +++-
 fs/cifs/cifssmb.c | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/CHANGES b/fs/cifs/CHANGES
index 4d2404305ab6..95483baab706 100644
--- a/fs/cifs/CHANGES
+++ b/fs/cifs/CHANGES
@@ -4,7 +4,9 @@ Fix error mapping of the TOO_MANY_LINKS (hardlinks) case.
 Do not oops if root user kills cifs oplock kernel thread or
 kills the cifsd thread (NB: killing the cifs kernel threads is not
 recommended, unmount and rmmod cifs will kill them when they are
-no longer needed).
+no longer needed).  Fix readdir to ASCII servers (ie older servers
+which do not support Unicode) and also require asterik.
+
 
 Version 1.33
 ------------
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index b004fef0a42b..741ff0c69f37 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -2451,12 +2451,14 @@ findFirstRetry:
 		name_len += 2;
 	} else {	/* BB add check for overrun of SMB buf BB */
 		name_len = strnlen(searchName, PATH_MAX);
-		name_len++;	/* trailing null */
 /* BB fix here and in unicode clause above ie
 		if(name_len > buffersize-header)
 			free buffer exit; BB */
 		strncpy(pSMB->FileName, searchName, name_len);
-		pSMB->FileName[name_len] = 0; /* just in case */
+		pSMB->FileName[name_len] = '\\';
+		pSMB->FileName[name_len+1] = '*';
+		pSMB->FileName[name_len+2] = 0;
+		name_len += 3;
 	}
 
 	params = 12 + name_len /* includes null */ ;
-- 
cgit v1.2.3


From 9ea1f8f505f6f770bd593e689960ac4f893509b2 Mon Sep 17 00:00:00 2001
From: Steve French <smfrench@austin.rr.com>
Date: Sat, 30 Apr 2005 11:10:58 -0700
Subject: [PATCH] cifs: Update cifs todo list

Signed-off-by: Steve French (sfrench@us.ibm.com)
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/cifs/TODO | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/cifs/TODO b/fs/cifs/TODO
index 1e8490ed6948..8cc881694e29 100644
--- a/fs/cifs/TODO
+++ b/fs/cifs/TODO
@@ -1,4 +1,4 @@
-version 1.32 April 3, 2005
+version 1.34 April 29, 2005
 
 A Partial List of Missing Features
 ==================================
@@ -70,7 +70,15 @@ r) Implement O_DIRECT flag on open (already supported on mount)
 s) Allow remapping of last remaining character (\) to +0xF000 which
 (this character is valid for POSIX but not for Windows)
 
-KNOWN BUGS (updated April 3, 2005)
+t) Create UID mapping facility so server UIDs can be mapped on a per
+mount or a per server basis to client UIDs or nobody if no mapping
+exists.  This is helpful when Unix extensions are negotiated to
+allow better permission checking when UIDs differ on the server
+and client.  Add new protocol request to the CIFS protocol 
+standard for asking the server for the corresponding name of a
+particular uid.
+
+KNOWN BUGS (updated April 29, 2005)
 ====================================
 See http://bugzilla.samba.org - search on product "CifsVFS" for
 current bug list.
-- 
cgit v1.2.3


From 552fca4cbe552520d85b21e839f289c880fa48d2 Mon Sep 17 00:00:00 2001
From: Nikita Danilov <nikita@clusterfs.com>
Date: Sun, 1 May 2005 08:58:39 -0700
Subject: [PATCH] mpage_writepages() page locking fix

When ->writepage() returns WRITEPAGE_ACTIVATE, the page is still locked.
Explicitly unlock the page in mpage_writepages().

Signed-off-by: Nikita Danilov <nikita@clusterfs.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/mpage.c | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'fs')

diff --git a/fs/mpage.c b/fs/mpage.c
index e7d8d1a77606..3923facf94eb 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -727,6 +727,8 @@ retry:
 						&last_block_in_bio, &ret, wbc,
 						writepage_fn);
 			}
+			if (unlikely(ret == WRITEPAGE_ACTIVATE))
+				unlock_page(page);
 			if (ret || (--(wbc->nr_to_write) <= 0))
 				done = 1;
 			if (wbc->nonblocking && bdi_write_congested(bdi)) {
-- 
cgit v1.2.3


From de7d5a3b6c9ff8429bf046c36b56d3192b75c3da Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Sun, 1 May 2005 08:58:39 -0700
Subject: [PATCH] drop_buffers() oops fix

In rare situations, drop_buffers() can be called for a page which has buffers,
but no ->mapping (it was truncated, but the buffers were left behind because
ext3 was still fiddling with them).

But if there was an I/O error in a buffer_head, drop_buffers() will try to get
at the address_space and will oops.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 3b12cf947aba..665db84a1f9f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2917,7 +2917,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free)
 
 	bh = head;
 	do {
-		if (buffer_write_io_error(bh))
+		if (buffer_write_io_error(bh) && page->mapping)
 			set_bit(AS_EIO, &page->mapping->flags);
 		if (buffer_busy(bh))
 			goto failed;
-- 
cgit v1.2.3


From d59dd4620fb8d6422555a9e2b82a707718e68327 Mon Sep 17 00:00:00 2001
From: "akpm@osdl.org" <akpm@osdl.org>
Date: Sun, 1 May 2005 08:58:47 -0700
Subject: [PATCH] use smp_mb/wmb/rmb where possible

Replace a number of memory barriers with smp_ variants.  This means we won't
take the unnecessary hit on UP machines.

Signed-off-by: Anton Blanchard <anton@samba.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/buffer.c           |  6 +++---
 ipc/mqueue.c          |  4 ++--
 kernel/kthread.c      |  2 +-
 kernel/profile.c      |  2 +-
 kernel/ptrace.c       |  2 +-
 kernel/stop_machine.c | 10 +++++-----
 kernel/sys.c          | 20 ++++++++++----------
 kernel/timer.c        |  2 +-
 lib/rwsem-spinlock.c  |  6 +++---
 lib/rwsem.c           |  4 ++--
 mm/mempool.c          |  4 ++--
 11 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'fs')

diff --git a/fs/buffer.c b/fs/buffer.c
index 665db84a1f9f..188365c79204 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -218,7 +218,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sb = get_super(bdev);
 	if (sb && !(sb->s_flags & MS_RDONLY)) {
 		sb->s_frozen = SB_FREEZE_WRITE;
-		wmb();
+		smp_wmb();
 
 		sync_inodes_sb(sb, 0);
 		DQUOT_SYNC(sb);
@@ -235,7 +235,7 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 		sync_inodes_sb(sb, 1);
 
 		sb->s_frozen = SB_FREEZE_TRANS;
-		wmb();
+		smp_wmb();
 
 		sync_blockdev(sb->s_bdev);
 
@@ -263,7 +263,7 @@ void thaw_bdev(struct block_device *bdev, struct super_block *sb)
 		if (sb->s_op->unlockfs)
 			sb->s_op->unlockfs(sb);
 		sb->s_frozen = SB_UNFROZEN;
-		wmb();
+		smp_wmb();
 		wake_up(&sb->s_wait_unfrozen);
 		drop_super(sb);
 	}
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index cb0cd3cf3b5a..33f71520b89c 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -767,7 +767,7 @@ static inline void pipelined_send(struct mqueue_inode_info *info,
 	list_del(&receiver->list);
 	receiver->state = STATE_PENDING;
 	wake_up_process(receiver->task);
-	wmb();
+	smp_wmb();
 	receiver->state = STATE_READY;
 }
 
@@ -786,7 +786,7 @@ static inline void pipelined_receive(struct mqueue_inode_info *info)
 	list_del(&sender->list);
 	sender->state = STATE_PENDING;
 	wake_up_process(sender->task);
-	wmb();
+	smp_wmb();
 	sender->state = STATE_READY;
 }
 
diff --git a/kernel/kthread.c b/kernel/kthread.c
index e377e2244103..f50f174e92da 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -174,7 +174,7 @@ int kthread_stop(struct task_struct *k)
 
 	/* Must init completion *before* thread sees kthread_stop_info.k */
 	init_completion(&kthread_stop_info.done);
-	wmb();
+	smp_wmb();
 
 	/* Now set kthread_should_stop() to true, and wake it up. */
 	kthread_stop_info.k = k;
diff --git a/kernel/profile.c b/kernel/profile.c
index a38fa70075fe..a66be468c422 100644
--- a/kernel/profile.c
+++ b/kernel/profile.c
@@ -522,7 +522,7 @@ static int __init create_hash_tables(void)
 	return 0;
 out_cleanup:
 	prof_on = 0;
-	mb();
+	smp_mb();
 	on_each_cpu(profile_nop, NULL, 0, 1);
 	for_each_online_cpu(cpu) {
 		struct page *page;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 88b306c4e841..f5cc1cec0fb4 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -135,7 +135,7 @@ int ptrace_attach(struct task_struct *task)
  	    (current->gid != task->sgid) ||
  	    (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE))
 		goto bad;
-	rmb();
+	smp_rmb();
 	if (!task->mm->dumpable && !capable(CAP_SYS_PTRACE))
 		goto bad;
 	/* the same process cannot be attached many times */
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index c39ed70af174..6116b25aa7cf 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -33,7 +33,7 @@ static int stopmachine(void *cpu)
 	set_cpus_allowed(current, cpumask_of_cpu((int)(long)cpu));
 
 	/* Ack: we are alive */
-	mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
+	smp_mb(); /* Theoretically the ack = 0 might not be on this CPU yet. */
 	atomic_inc(&stopmachine_thread_ack);
 
 	/* Simple state machine */
@@ -43,14 +43,14 @@ static int stopmachine(void *cpu)
 			local_irq_disable();
 			irqs_disabled = 1;
 			/* Ack: irqs disabled. */
-			mb(); /* Must read state first. */
+			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
 		} else if (stopmachine_state == STOPMACHINE_PREPARE
 			   && !prepared) {
 			/* Everyone is in place, hold CPU. */
 			preempt_disable();
 			prepared = 1;
-			mb(); /* Must read state first. */
+			smp_mb(); /* Must read state first. */
 			atomic_inc(&stopmachine_thread_ack);
 		}
 		/* Yield in first stage: migration threads need to
@@ -62,7 +62,7 @@ static int stopmachine(void *cpu)
 	}
 
 	/* Ack: we are exiting. */
-	mb(); /* Must read state first. */
+	smp_mb(); /* Must read state first. */
 	atomic_inc(&stopmachine_thread_ack);
 
 	if (irqs_disabled)
@@ -77,7 +77,7 @@ static int stopmachine(void *cpu)
 static void stopmachine_set_state(enum stopmachine_state state)
 {
 	atomic_set(&stopmachine_thread_ack, 0);
-	wmb();
+	smp_wmb();
 	stopmachine_state = state;
 	while (atomic_read(&stopmachine_thread_ack) != stopmachine_num_threads)
 		cpu_relax();
diff --git a/kernel/sys.c b/kernel/sys.c
index 462d78d55895..df2ddcc6863b 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -525,7 +525,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid)
 	if (new_egid != old_egid)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	if (rgid != (gid_t) -1 ||
 	    (egid != (gid_t) -1 && egid != old_rgid))
@@ -556,7 +556,7 @@ asmlinkage long sys_setgid(gid_t gid)
 		if(old_egid != gid)
 		{
 			current->mm->dumpable=0;
-			wmb();
+			smp_wmb();
 		}
 		current->gid = current->egid = current->sgid = current->fsgid = gid;
 	}
@@ -565,7 +565,7 @@ asmlinkage long sys_setgid(gid_t gid)
 		if(old_egid != gid)
 		{
 			current->mm->dumpable=0;
-			wmb();
+			smp_wmb();
 		}
 		current->egid = current->fsgid = gid;
 	}
@@ -596,7 +596,7 @@ static int set_user(uid_t new_ruid, int dumpclear)
 	if(dumpclear)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	current->uid = new_ruid;
 	return 0;
@@ -653,7 +653,7 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid)
 	if (new_euid != old_euid)
 	{
 		current->mm->dumpable=0;
-		wmb();
+		smp_wmb();
 	}
 	current->fsuid = current->euid = new_euid;
 	if (ruid != (uid_t) -1 ||
@@ -703,7 +703,7 @@ asmlinkage long sys_setuid(uid_t uid)
 	if (old_euid != uid)
 	{
 		current->mm->dumpable = 0;
-		wmb();
+		smp_wmb();
 	}
 	current->fsuid = current->euid = uid;
 	current->suid = new_suid;
@@ -748,7 +748,7 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid)
 		if (euid != current->euid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->euid = euid;
 	}
@@ -798,7 +798,7 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid)
 		if (egid != current->egid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->egid = egid;
 	}
@@ -845,7 +845,7 @@ asmlinkage long sys_setfsuid(uid_t uid)
 		if (uid != old_fsuid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->fsuid = uid;
 	}
@@ -875,7 +875,7 @@ asmlinkage long sys_setfsgid(gid_t gid)
 		if (gid != old_fsgid)
 		{
 			current->mm->dumpable = 0;
-			wmb();
+			smp_wmb();
 		}
 		current->fsgid = gid;
 		key_fsgid_changed(current);
diff --git a/kernel/timer.c b/kernel/timer.c
index ecb3d67c0e14..207aa4f0aa10 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1007,7 +1007,7 @@ asmlinkage long sys_getppid(void)
 		 * Make sure we read the pid before re-reading the
 		 * parent pointer:
 		 */
-		rmb();
+		smp_rmb();
 		parent = me->group_leader->real_parent;
 		if (old != parent)
 			continue;
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
index 21f0db2c9711..40ffde940a86 100644
--- a/lib/rwsem-spinlock.c
+++ b/lib/rwsem-spinlock.c
@@ -76,7 +76,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 		list_del(&waiter->list);
 		tsk = waiter->task;
 		/* Don't touch waiter after ->task has been NULLed */
-		mb();
+		smp_mb();
 		waiter->task = NULL;
 		wake_up_process(tsk);
 		put_task_struct(tsk);
@@ -91,7 +91,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
 
 		list_del(&waiter->list);
 		tsk = waiter->task;
-		mb();
+		smp_mb();
 		waiter->task = NULL;
 		wake_up_process(tsk);
 		put_task_struct(tsk);
@@ -123,7 +123,7 @@ __rwsem_wake_one_writer(struct rw_semaphore *sem)
 	list_del(&waiter->list);
 
 	tsk = waiter->task;
-	mb();
+	smp_mb();
 	waiter->task = NULL;
 	wake_up_process(tsk);
 	put_task_struct(tsk);
diff --git a/lib/rwsem.c b/lib/rwsem.c
index 7644089ec8fa..62fa4eba9ffe 100644
--- a/lib/rwsem.c
+++ b/lib/rwsem.c
@@ -74,7 +74,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 	 */
 	list_del(&waiter->list);
 	tsk = waiter->task;
-	mb();
+	smp_mb();
 	waiter->task = NULL;
 	wake_up_process(tsk);
 	put_task_struct(tsk);
@@ -117,7 +117,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, int downgrading)
 		waiter = list_entry(next, struct rwsem_waiter, list);
 		next = waiter->list.next;
 		tsk = waiter->task;
-		mb();
+		smp_mb();
 		waiter->task = NULL;
 		wake_up_process(tsk);
 		put_task_struct(tsk);
diff --git a/mm/mempool.c b/mm/mempool.c
index e9a0a6337b21..c9f3d4620428 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -229,7 +229,7 @@ repeat_alloc:
 	/* Now start performing page reclaim */
 	gfp_temp = gfp_mask;
 	prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE);
-	mb();
+	smp_mb();
 	if (!pool->curr_nr)
 		io_schedule();
 	finish_wait(&pool->wait, &wait);
@@ -250,7 +250,7 @@ void mempool_free(void *element, mempool_t *pool)
 {
 	unsigned long flags;
 
-	mb();
+	smp_mb();
 	if (pool->curr_nr < pool->min_nr) {
 		spin_lock_irqsave(&pool->lock, flags);
 		if (pool->curr_nr < pool->min_nr) {
-- 
cgit v1.2.3


From ffa0aea681a5f3c8aecbb86f1cfd3486043805de Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sun, 1 May 2005 08:58:56 -0700
Subject: [PATCH] uml - hostfs: avoid buffers

Use this:
	.set_page_dirty = __set_page_dirty_nobuffers,

We already dropped the inclusion of <linux/buffer_head.h>, and we don't have a
backing block device for this FS.

"Without having looked at it, I'm sure that hostfs does not use buffer_heads.
So setting your ->set_page_dirty a_op to point at __set_page_dirty_nobuffers()
is a reasonable thing to do - it'll provide a slight speedup."

This speedup is one less spinlock held and one less conditional branch, which
isn't bad.

Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hostfs/hostfs_kern.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index a88ad2924851..e6c63d9cac7b 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -521,7 +521,7 @@ int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
 static struct address_space_operations hostfs_aops = {
 	.writepage 	= hostfs_writepage,
 	.readpage	= hostfs_readpage,
-/* 	.set_page_dirty = __set_page_dirty_nobuffers, */
+	.set_page_dirty = __set_page_dirty_nobuffers,
 	.prepare_write	= hostfs_prepare_write,
 	.commit_write	= hostfs_commit_write
 };
-- 
cgit v1.2.3


From cd7619d6bf36564cf54ff7218ef54e558a741913 Mon Sep 17 00:00:00 2001
From: Matt Mackall <mpm@selenic.com>
Date: Sun, 1 May 2005 08:59:01 -0700
Subject: [PATCH] Exterminate PAGE_BUG

Remove PAGE_BUG - repalce it with BUG and BUG_ON.

Signed-off-by: Matt Mackall <mpm@selenic.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/arm26/mm/small_page.c |  6 ++----
 fs/afs/file.c              |  3 +--
 fs/buffer.c                |  3 +--
 fs/jffs2/file.c            |  3 +--
 fs/udf/file.c              |  6 ++----
 fs/udf/inode.c             |  4 ++--
 include/asm-cris/page.h    |  4 ----
 include/asm-generic/bug.h  | 11 -----------
 include/asm-sh64/bug.h     |  4 ----
 mm/filemap.c               |  3 +--
 10 files changed, 10 insertions(+), 37 deletions(-)

(limited to 'fs')

diff --git a/arch/arm26/mm/small_page.c b/arch/arm26/mm/small_page.c
index 77be86cca789..30447106c25f 100644
--- a/arch/arm26/mm/small_page.c
+++ b/arch/arm26/mm/small_page.c
@@ -92,8 +92,7 @@ static unsigned long __get_small_page(int priority, struct order *order)
 		page = list_entry(order->queue.next, struct page, lru);
 again:
 #ifdef PEDANTIC
-		if (USED_MAP(page) & ~order->all_used)
-			PAGE_BUG(page);
+		BUG_ON(USED_MAP(page) & ~order->all_used);
 #endif
 		offset = ffz(USED_MAP(page));
 		SET_USED(page, offset);
@@ -141,8 +140,7 @@ static void __free_small_page(unsigned long spage, struct order *order)
 			goto non_small;
 
 #ifdef PEDANTIC
-		if (USED_MAP(page) & ~order->all_used)
-			PAGE_BUG(page);
+		BUG_ON(USED_MAP(page) & ~order->all_used);
 #endif
 
 		spage = spage >> order->shift;
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 6b6bb7c8abf6..23c125128024 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -131,8 +131,7 @@ static int afs_file_readpage(struct file *file, struct page *page)
 
 	vnode = AFS_FS_I(inode);
 
-	if (!PageLocked(page))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
 
 	ret = -ESTALE;
 	if (vnode->flags & AFS_VNODE_DELETED)
diff --git a/fs/buffer.c b/fs/buffer.c
index 188365c79204..792cbacbbf41 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2078,8 +2078,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
 	int nr, i;
 	int fully_mapped = 1;
 
-	if (!PageLocked(page))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
 	blocksize = 1 << inode->i_blkbits;
 	if (!page_has_buffers(page))
 		create_empty_buffers(page, blocksize, 0);
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index 0c607c1388f4..771a554701d6 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -79,8 +79,7 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
 
 	D2(printk(KERN_DEBUG "jffs2_do_readpage_nolock(): ino #%lu, page at offset 0x%lx\n", inode->i_ino, pg->index << PAGE_CACHE_SHIFT));
 
-	if (!PageLocked(pg))
-                PAGE_BUG(pg);
+	BUG_ON(!PageLocked(pg));
 
 	pg_buf = kmap(pg);
 	/* FIXME: Can kmap fail? */
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 2faa4172b9f7..bb40d63f328f 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -49,8 +49,7 @@ static int udf_adinicb_readpage(struct file *file, struct page * page)
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 
-	if (!PageLocked(page))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap(page);
 	memset(kaddr, 0, PAGE_CACHE_SIZE);
@@ -67,8 +66,7 @@ static int udf_adinicb_writepage(struct page *page, struct writeback_control *wb
 	struct inode *inode = page->mapping->host;
 	char *kaddr;
 
-	if (!PageLocked(page))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
 
 	kaddr = kmap(page);
 	memcpy(UDF_I_DATA(inode) + UDF_I_LENEATTR(inode), kaddr, inode->i_size);
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 0506e1173784..3d68de39fad6 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -167,8 +167,8 @@ void udf_expand_file_adinicb(struct inode * inode, int newsize, int * err)
 	}
 
 	page = grab_cache_page(inode->i_mapping, 0);
-	if (!PageLocked(page))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
+
 	if (!PageUptodate(page))
 	{
 		kaddr = kmap(page);
diff --git a/include/asm-cris/page.h b/include/asm-cris/page.h
index ddd8915e41e6..c767da1ef8f5 100644
--- a/include/asm-cris/page.h
+++ b/include/asm-cris/page.h
@@ -77,10 +77,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
   printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
 } while (0)
 
-#define PAGE_BUG(page) do { \
-         BUG(); \
-} while (0)
-
 /* Pure 2^n version of get_order */
 static inline int get_order(unsigned long size)
 {
diff --git a/include/asm-generic/bug.h b/include/asm-generic/bug.h
index 6e5aaaa9a2fb..400c2b41896e 100644
--- a/include/asm-generic/bug.h
+++ b/include/asm-generic/bug.h
@@ -12,13 +12,6 @@
 } while (0)
 #endif
 
-#ifndef HAVE_ARCH_PAGE_BUG
-#define PAGE_BUG(page) do { \
-	printk("page BUG for page at %p\n", page); \
-	BUG(); \
-} while (0)
-#endif
-
 #ifndef HAVE_ARCH_BUG_ON
 #define BUG_ON(condition) do { if (unlikely((condition)!=0)) BUG(); } while(0)
 #endif
@@ -37,10 +30,6 @@
 #define BUG()
 #endif
 
-#ifndef HAVE_ARCH_PAGE_BUG
-#define PAGE_BUG(page) do { if (page) ; } while (0)
-#endif
-
 #ifndef HAVE_ARCH_BUG_ON
 #define BUG_ON(condition) do { if (condition) ; } while(0)
 #endif
diff --git a/include/asm-sh64/bug.h b/include/asm-sh64/bug.h
index 3acd54d59566..5d659ec28e10 100644
--- a/include/asm-sh64/bug.h
+++ b/include/asm-sh64/bug.h
@@ -17,10 +17,6 @@
 		BUG(); \
 } while(0)
 
-#define PAGE_BUG(page) do { \
-	BUG(); \
-} while (0)
-
 #define WARN_ON(condition) do { \
 	if (unlikely((condition)!=0)) { \
 		printk("Badness in %s at %s:%d\n", __FUNCTION__, __FILE__, __LINE__); \
diff --git a/mm/filemap.c b/mm/filemap.c
index ee79b5d3439f..c085af2332d8 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -123,8 +123,7 @@ void remove_from_page_cache(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 
-	if (unlikely(!PageLocked(page)))
-		PAGE_BUG(page);
+	BUG_ON(!PageLocked(page));
 
 	write_lock_irq(&mapping->tree_lock);
 	__remove_from_page_cache(page);
-- 
cgit v1.2.3


From bcf88e1163623e8e8ef2ba7feface9c826a890c9 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Sun, 1 May 2005 08:59:03 -0700
Subject: [PATCH] procfs: Fix hardlink counts

The pid directories in /proc/ currently return the wrong hardlink count - 3,
when there are actually 4 : ".", "..", "fd", and "task".

This is easy to notice using find(1):
	cd /proc/<pid>
	find

In the output, you'll see a message similar to:

find: WARNING: Hard link count is wrong for .: this may be a bug in your
filesystem driver.  Automatically turning on find's -noleaf option.
Earlier results may have failed to include directories that should have
been searched.

http://bugs.gentoo.org/show_bug.cgi?id=86031

I also noticed that CONFIG_SECURITY can add a 5th: attr, and performed a
similar fix on the task directories too.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 39fd336cfdb9..4718173af2c8 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1800,8 +1800,12 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct
 	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
 	inode->i_op = &proc_tgid_base_inode_operations;
 	inode->i_fop = &proc_tgid_base_operations;
-	inode->i_nlink = 3;
 	inode->i_flags|=S_IMMUTABLE;
+#ifdef CONFIG_SECURITY
+	inode->i_nlink = 5;
+#else
+	inode->i_nlink = 4;
+#endif
 
 	dentry->d_op = &pid_base_dentry_operations;
 
@@ -1855,8 +1859,12 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry
 	inode->i_mode = S_IFDIR|S_IRUGO|S_IXUGO;
 	inode->i_op = &proc_tid_base_inode_operations;
 	inode->i_fop = &proc_tid_base_operations;
-	inode->i_nlink = 3;
 	inode->i_flags|=S_IMMUTABLE;
+#ifdef CONFIG_SECURITY
+	inode->i_nlink = 4;
+#else
+	inode->i_nlink = 3;
+#endif
 
 	dentry->d_op = &pid_base_dentry_operations;
 
-- 
cgit v1.2.3


From f246315e1ab96c40978777d1e159820ecca45aa8 Mon Sep 17 00:00:00 2001
From: Daniel Drake <dsd@gentoo.org>
Date: Sun, 1 May 2005 08:59:03 -0700
Subject: [PATCH] procfs: Fix hardlink counts for /proc/<PID>/task

The current logic assumes that a /proc/<PID>/task directory should have a
hardlink count of 3, probably counting ".", "..", and a directory for a
single child task.

It's fairly obvious that this doesn't work out correctly when a PID has
more than one child task, which is quite often the case.

Signed-off-by: Daniel Drake <dsd@gentoo.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/proc/base.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 4718173af2c8..2eac86d46c51 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1419,6 +1419,8 @@ static struct file_operations proc_tgid_attr_operations;
 static struct inode_operations proc_tgid_attr_inode_operations;
 #endif
 
+static int get_tid_list(int index, unsigned int *tids, struct inode *dir);
+
 /* SMP-safe */
 static struct dentry *proc_pident_lookup(struct inode *dir, 
 					 struct dentry *dentry,
@@ -1458,7 +1460,7 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
 	 */
 	switch(p->type) {
 		case PROC_TGID_TASK:
-			inode->i_nlink = 3;
+			inode->i_nlink = 2 + get_tid_list(2, NULL, dir);
 			inode->i_op = &proc_task_inode_operations;
 			inode->i_fop = &proc_task_operations;
 			break;
@@ -1943,7 +1945,8 @@ static int get_tid_list(int index, unsigned int *tids, struct inode *dir)
 
 		if (--index >= 0)
 			continue;
-		tids[nr_tids] = tid;
+		if (tids != NULL)
+			tids[nr_tids] = tid;
 		nr_tids++;
 		if (nr_tids >= PROC_MAXPIDS)
 			break;
@@ -2043,6 +2046,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
 	}
 
 	nr_tids = get_tid_list(pos, tid_array, inode);
+	inode->i_nlink = pos + nr_tids;
 
 	for (i = 0; i < nr_tids; i++) {
 		unsigned long j = PROC_NUMBUF;
-- 
cgit v1.2.3


From 9a3bb3017383fbb6fe56431d17f60bd0d50f0717 Mon Sep 17 00:00:00 2001
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Date: Sun, 1 May 2005 08:59:05 -0700
Subject: [PATCH] reiserfs: make resize option auto-get new device size

It's trivial for the resize option to auto-get the underlying device size,
while it's harder for the user.  I've copied the code from jfs.

Since of the different reiserfs option parser (which does not use the
superior match_token used by almost every other filesystem), I've had to
use the "resize=auto" and not "resize" option to specify this behaviour.
Changing the option parser to the kernel one wouldn't be bad but I've no
time to do this cleanup in this moment.

Btw, the mount(8) man page should be updated to include this option.  Cc
the relevant people, please (I hope I cc'ed the right people).

Cc: <reiserfs-dev@namesys.com>
Cc: <reiserfs-list@namesys.com>
Cc: <mtk-manpages@gmx.net>
Cc: Alex Zarochentsev <zam@namesys.com>
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/super.c | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index bcdf2438d152..bc5e8893b5d5 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -889,12 +889,18 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 	    char * p;
 	    
 	    p = NULL;
-	    /* "resize=NNN" */
-	    *blocks = simple_strtoul (arg, &p, 0);
-	    if (*p != '\0') {
-		/* NNN does not look like a number */
-		reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
-		return 0;
+	    /* "resize=NNN" or "resize=auto" */
+
+	    if (!strcmp(arg, "auto")) {
+		    /* From JFS code, to auto-get the size.*/
+		    *blocks = s->s_bdev->bd_inode->i_size >> s->s_blocksize_bits;
+	    } else {
+		    *blocks = simple_strtoul (arg, &p, 0);
+		    if (*p != '\0') {
+			/* NNN does not look like a number */
+			reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
+			return 0;
+		    }
 	    }
 	}
 
@@ -903,7 +909,8 @@ static int reiserfs_parse_options (struct super_block * s, char * options, /* st
 		unsigned long val = simple_strtoul (arg, &p, 0);
 		/* commit=NNN (time in seconds) */
 		if ( *p != '\0' || val >= (unsigned int)-1) {
-			reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);			return 0;
+			reiserfs_warning (s, "reiserfs_parse_options: bad value %s", arg);
+			return 0;
 		}
 		*commit_max_age = (unsigned int)val;
 	}
-- 
cgit v1.2.3


From 127144df4ce817ad648af15a3983c8d52aacf670 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Sun, 1 May 2005 08:59:07 -0700
Subject: [PATCH] Fix rewriting on a full reiserfs filesystem

Allow rewriting of a file and extending a file upto the end of the
allocated block on a full filesystem.

From: Chris Mason <mason@suse.com>
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/file.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 26950113af8c..f6860e83521d 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -1284,10 +1284,11 @@ static ssize_t reiserfs_file_write( struct file *file, /* the file we are going
 	reiserfs_claim_blocks_to_be_allocated(inode->i_sb, num_pages << (PAGE_CACHE_SHIFT - inode->i_blkbits));
 	reiserfs_write_unlock(inode->i_sb);
 
-	if ( !num_pages ) { /* If we do not have enough space even for */
-	    res = -ENOSPC;  /* single page, return -ENOSPC */
-	    if ( pos > (inode->i_size & (inode->i_sb->s_blocksize-1)))
-		break; // In case we are writing past the file end, break.
+	if ( !num_pages ) { /* If we do not have enough space even for a single page... */
+	    if ( pos > inode->i_size+inode->i_sb->s_blocksize-(pos & (inode->i_sb->s_blocksize-1))) {
+		res = -ENOSPC;
+		break; // In case we are writing past the end of the last file block, break.
+	    }
 	    // Otherwise we are possibly overwriting the file, so
 	    // let's set write size to be equal or less than blocksize.
 	    // This way we get it correctly for file holes.
-- 
cgit v1.2.3


From 74f9f974a64dc3de554aa1977bf108334436e47b Mon Sep 17 00:00:00 2001
From: Edward Shishkin <edward@namesys.com>
Date: Sun, 1 May 2005 08:59:09 -0700
Subject: [PATCH] reiserfs: journal_init fix

This fixes segmentation fault when specifying bad journal device via
a mount option.

Don't pass a zero pointer to bdevname() if filp_open() returns error.

Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index c9ad3a7849f4..b16d65acb550 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2306,13 +2306,16 @@ static int journal_init_dev( struct super_block *super,
 	if( !IS_ERR( journal -> j_dev_file ) ) {
 		struct inode *jdev_inode = journal->j_dev_file->f_mapping->host;
 		if( !S_ISBLK( jdev_inode -> i_mode ) ) {
-			reiserfs_warning  (super, "journal_init_dev: '%s' is "
-					   "not a block device", jdev_name );
+			reiserfs_warning(super, "journal_init_dev: '%s' is "
+					 "not a block device", jdev_name );
 			result = -ENOTBLK;
+			release_journal_dev( super, journal );
 		} else  {
 			/* ok */
 			journal->j_dev_bd = I_BDEV(jdev_inode);
 			set_blocksize(journal->j_dev_bd, super->s_blocksize);
+			reiserfs_info(super, "journal_init_dev: journal device: %s\n",
+				      bdevname(journal->j_dev_bd, b));
 		}
 	} else {
 		result = PTR_ERR( journal -> j_dev_file );
@@ -2321,11 +2324,6 @@ static int journal_init_dev( struct super_block *super,
 				  "journal_init_dev: Cannot open '%s': %i",
 				  jdev_name, result );
 	}
-	if( result != 0 ) {
-		release_journal_dev( super, journal );
-	}
-	reiserfs_info(super, "journal_init_dev: journal device: %s\n",
-		bdevname(journal->j_dev_bd, b));
 	return result;
 }
 
-- 
cgit v1.2.3


From 7ed20e1ad521b5f5df61bf6559ae60738e393741 Mon Sep 17 00:00:00 2001
From: Jesper Juhl <juhl-lkml@dif.dk>
Date: Sun, 1 May 2005 08:59:14 -0700
Subject: [PATCH] convert that currently tests _NSIG directly to use
 valid_signal()

Convert most of the current code that uses _NSIG directly to instead use
valid_signal().  This avoids gcc -W warnings and off-by-one errors.

Signed-off-by: Jesper Juhl <juhl-lkml@dif.dk>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 arch/alpha/kernel/ptrace.c         | 5 +++--
 arch/arm/kernel/ptrace.c           | 5 +++--
 arch/arm26/kernel/ptrace.c         | 5 +++--
 arch/cris/arch-v10/kernel/ptrace.c | 5 +++--
 arch/frv/kernel/ptrace.c           | 5 +++--
 arch/h8300/kernel/ptrace.c         | 5 +++--
 arch/i386/kernel/ptrace.c          | 5 +++--
 arch/ia64/kernel/ptrace.c          | 5 +++--
 arch/m32r/kernel/ptrace.c          | 5 +++--
 arch/m68k/kernel/ptrace.c          | 5 +++--
 arch/m68knommu/kernel/ptrace.c     | 5 +++--
 arch/mips/kernel/ptrace.c          | 3 ++-
 arch/mips/kernel/ptrace32.c        | 3 ++-
 arch/parisc/kernel/ptrace.c        | 7 ++++---
 arch/ppc/kernel/ptrace.c           | 5 +++--
 arch/ppc64/kernel/ptrace.c         | 5 +++--
 arch/ppc64/kernel/ptrace32.c       | 5 +++--
 arch/s390/kernel/ptrace.c          | 5 +++--
 arch/sh/kernel/ptrace.c            | 5 +++--
 arch/sh64/kernel/ptrace.c          | 5 +++--
 arch/sparc/kernel/ptrace.c         | 3 ++-
 arch/sparc64/kernel/ptrace.c       | 3 ++-
 arch/um/kernel/ptrace.c            | 4 ++--
 arch/v850/kernel/ptrace.c          | 3 ++-
 arch/x86_64/kernel/ptrace.c        | 5 +++--
 drivers/char/vt_ioctl.c            | 3 ++-
 fs/fcntl.c                         | 3 ++-
 ipc/mqueue.c                       | 4 ++--
 kernel/exit.c                      | 5 +++--
 kernel/futex.c                     | 3 ++-
 kernel/ptrace.c                    | 3 ++-
 kernel/signal.c                    | 9 +++++----
 kernel/sys.c                       | 3 ++-
 33 files changed, 90 insertions(+), 59 deletions(-)

(limited to 'fs')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index d00583161574..bbd37536d14e 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -14,6 +14,7 @@
 #include <linux/user.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -335,7 +336,7 @@ do_sys_ptrace(long request, long pid, long addr, long data,
 		/* continue and stop at next (return from) syscall */
 	case PTRACE_CONT:    /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -365,7 +366,7 @@ do_sys_ptrace(long request, long pid, long addr, long data,
 
 	case PTRACE_SINGLESTEP:  /* execute single instruction. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		/* Mark single stepping.  */
 		child->thread_info->bpt_nsaved = -1;
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index efd7a341614b..cd99b83f14c2 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/init.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -693,7 +694,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 		case PTRACE_SYSCALL:
 		case PTRACE_CONT:
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			if (request == PTRACE_SYSCALL)
 				set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -728,7 +729,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 		 */
 		case PTRACE_SINGLESTEP:
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			child->ptrace |= PT_SINGLESTEP;
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/arm26/kernel/ptrace.c b/arch/arm26/kernel/ptrace.c
index 2a137146a77c..8a52124de0e1 100644
--- a/arch/arm26/kernel/ptrace.c
+++ b/arch/arm26/kernel/ptrace.c
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -591,7 +592,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 		case PTRACE_SYSCALL:
 		case PTRACE_CONT:
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			if (request == PTRACE_SYSCALL)
 				set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -626,7 +627,7 @@ static int do_ptrace(int request, struct task_struct *child, long addr, long dat
 		 */
 		case PTRACE_SINGLESTEP:
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			child->ptrace |= PT_SINGLESTEP;
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c
index da15db8ae482..581ecabaae53 100644
--- a/arch/cris/arch-v10/kernel/ptrace.c
+++ b/arch/cris/arch-v10/kernel/ptrace.c
@@ -10,6 +10,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -184,7 +185,7 @@ sys_ptrace(long request, long pid, long addr, long data)
 		case PTRACE_CONT:
 			ret = -EIO;
 			
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
                         
 			if (request == PTRACE_SYSCALL) {
@@ -219,7 +220,7 @@ sys_ptrace(long request, long pid, long addr, long data)
 		case PTRACE_SINGLESTEP:
 			ret = -EIO;
 			
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/frv/kernel/ptrace.c b/arch/frv/kernel/ptrace.c
index 2a0efb739adc..cbe03cba9f02 100644
--- a/arch/frv/kernel/ptrace.c
+++ b/arch/frv/kernel/ptrace.c
@@ -20,6 +20,7 @@
 #include <linux/user.h>
 #include <linux/config.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -239,7 +240,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -267,7 +268,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SINGLESTEP:  /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		ptrace_enable(child);
diff --git a/arch/h8300/kernel/ptrace.c b/arch/h8300/kernel/ptrace.c
index 5f19d774a288..05c15e869777 100644
--- a/arch/h8300/kernel/ptrace.c
+++ b/arch/h8300/kernel/ptrace.c
@@ -24,6 +24,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/config.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -171,7 +172,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 		case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 		case PTRACE_CONT: { /* restart after signal. */
 			ret = -EIO;
-			if ((unsigned long) data >= _NSIG)
+			if (!valid_signal(data))
 				break ;
 			if (request == PTRACE_SYSCALL)
 				set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -202,7 +203,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 
 		case PTRACE_SINGLESTEP: {  /* set the trap flag. */
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 			child->exit_code = data;
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index b2f17640ceff..e8c965ce86eb 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -16,6 +16,7 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -511,7 +512,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL:	/* continue and stop at next (return from) syscall */
 	case PTRACE_CONT:	/* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL) {
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -543,7 +544,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SINGLESTEP:	/* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		set_singlestep(child);
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 55789fcd7210..c253fd5914fc 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -17,6 +17,7 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
+#include <linux/signal.h>
 
 #include <asm/pgtable.h>
 #include <asm/processor.h>
@@ -1481,7 +1482,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
 	      case PTRACE_CONT:
 		/* restart after signal. */
 		ret = -EIO;
-		if (data > _NSIG)
+		if (!valid_signal(data))
 			goto out_tsk;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -1520,7 +1521,7 @@ sys_ptrace (long request, pid_t pid, unsigned long addr, unsigned long data)
 		/* let child execute for one instruction */
 	      case PTRACE_SINGLEBLOCK:
 		ret = -EIO;
-		if (data > _NSIG)
+		if (!valid_signal(data))
 			goto out_tsk;
 
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c
index 8b40f362dd6f..124f7c1b775e 100644
--- a/arch/m32r/kernel/ptrace.c
+++ b/arch/m32r/kernel/ptrace.c
@@ -24,6 +24,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/string.h>
+#include <linux/signal.h>
 
 #include <asm/cacheflush.h>
 #include <asm/io.h>
@@ -665,7 +666,7 @@ do_ptrace(long request, struct task_struct *child, long addr, long data)
 	case PTRACE_SYSCALL:
 	case PTRACE_CONT:
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -700,7 +701,7 @@ do_ptrace(long request, struct task_struct *child, long addr, long data)
 		unsigned long pc, insn;
 
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		if ((child->ptrace & PT_DTRACE) == 0) {
diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c
index 0beb53333ba3..f4e1e5eb8e12 100644
--- a/arch/m68k/kernel/ptrace.c
+++ b/arch/m68k/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/config.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -251,7 +252,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 			long tmp;
 
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			if (request == PTRACE_SYSCALL) {
 					child->thread.work.syscall_trace = ~0;
@@ -292,7 +293,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 			long tmp;
 
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			child->thread.work.syscall_trace = 0;
 			tmp = get_reg(child, PT_SR) | (TRACE_BITS << 16);
diff --git a/arch/m68knommu/kernel/ptrace.c b/arch/m68knommu/kernel/ptrace.c
index 15cf79080b15..9724e1cd82e5 100644
--- a/arch/m68knommu/kernel/ptrace.c
+++ b/arch/m68knommu/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/config.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -240,7 +241,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 			long tmp;
 
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			if (request == PTRACE_SYSCALL)
 				set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -278,7 +279,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 			long tmp;
 
 			ret = -EIO;
-			if ((unsigned long) data > _NSIG)
+			if (!valid_signal(data))
 				break;
 			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 			tmp = get_reg(child, PT_SR) | (TRACE_BITS << 16);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 92f2c39afe27..a2f899c2f4d4 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -26,6 +26,7 @@
 #include <linux/smp_lock.h>
 #include <linux/user.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/cpu.h>
 #include <asm/fpu.h>
@@ -257,7 +258,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL) {
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c
index 611dee919d50..eee207969c21 100644
--- a/arch/mips/kernel/ptrace32.c
+++ b/arch/mips/kernel/ptrace32.c
@@ -24,6 +24,7 @@
 #include <linux/smp_lock.h>
 #include <linux/user.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/cpu.h>
 #include <asm/fpu.h>
@@ -241,7 +242,7 @@ asmlinkage int sys32_ptrace(int request, int pid, int addr, int data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned int) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL) {
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index 2937a9236384..c07db9dff7cd 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -17,6 +17,7 @@
 #include <linux/personality.h>
 #include <linux/security.h>
 #include <linux/compat.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -285,7 +286,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data)
 		ret = -EIO;
 		DBG("sys_ptrace(%s)\n",
 			request == PTRACE_SYSCALL ? "SYSCALL" : "CONT");
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			goto out_tsk;
 		child->ptrace &= ~(PT_SINGLESTEP|PT_BLOCKSTEP);
 		if (request == PTRACE_SYSCALL) {
@@ -311,7 +312,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data)
 	case PTRACE_SINGLEBLOCK:
 		DBG("sys_ptrace(SINGLEBLOCK)\n");
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			goto out_tsk;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->ptrace &= ~PT_SINGLESTEP;
@@ -328,7 +329,7 @@ long sys_ptrace(long request, pid_t pid, long addr, long data)
 	case PTRACE_SINGLESTEP:
 		DBG("sys_ptrace(SINGLESTEP)\n");
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			goto out_tsk;
 
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
diff --git a/arch/ppc/kernel/ptrace.c b/arch/ppc/kernel/ptrace.c
index 426b6f7d9de3..59d59a8dc249 100644
--- a/arch/ppc/kernel/ptrace.c
+++ b/arch/ppc/kernel/ptrace.c
@@ -26,6 +26,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -356,7 +357,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL) {
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -389,7 +390,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		set_single_step(child);
diff --git a/arch/ppc64/kernel/ptrace.c b/arch/ppc64/kernel/ptrace.c
index 354a287c67eb..5a846324ca8c 100644
--- a/arch/ppc64/kernel/ptrace.c
+++ b/arch/ppc64/kernel/ptrace.c
@@ -28,6 +28,7 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -162,7 +163,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -194,7 +195,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		set_single_step(child);
diff --git a/arch/ppc64/kernel/ptrace32.c b/arch/ppc64/kernel/ptrace32.c
index ee81b1b776cc..16436426c7e2 100644
--- a/arch/ppc64/kernel/ptrace32.c
+++ b/arch/ppc64/kernel/ptrace32.c
@@ -26,6 +26,7 @@
 #include <linux/ptrace.h>
 #include <linux/user.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/page.h>
@@ -293,7 +294,7 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -325,7 +326,7 @@ int sys32_ptrace(long request, long pid, unsigned long addr, unsigned long data)
 
 	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		set_single_step(child);
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 647233c02fc8..9f0d73e3f5f7 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -32,6 +32,7 @@
 #include <linux/user.h>
 #include <linux/security.h>
 #include <linux/audit.h>
+#include <linux/signal.h>
 
 #include <asm/segment.h>
 #include <asm/page.h>
@@ -609,7 +610,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 		/* continue and stop at next (return from) syscall */
 	case PTRACE_CONT:
 		/* restart after signal. */
-		if ((unsigned long) data >= _NSIG)
+		if (!valid_signal(data))
 			return -EIO;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -637,7 +638,7 @@ do_ptrace(struct task_struct *child, long request, long addr, long data)
 
 	case PTRACE_SINGLESTEP:
 		/* set the trap flag. */
-		if ((unsigned long) data >= _NSIG)
+		if (!valid_signal(data))
 			return -EIO;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->exit_code = data;
diff --git a/arch/sh/kernel/ptrace.c b/arch/sh/kernel/ptrace.c
index 1b0dfb4d8ea4..b28919b65682 100644
--- a/arch/sh/kernel/ptrace.c
+++ b/arch/sh/kernel/ptrace.c
@@ -20,6 +20,7 @@
 #include <linux/user.h>
 #include <linux/slab.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -197,7 +198,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -228,7 +229,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 		struct pt_regs *dummy = NULL;
 
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		if ((child->ptrace & PT_DTRACE) == 0) {
diff --git a/arch/sh64/kernel/ptrace.c b/arch/sh64/kernel/ptrace.c
index 800288c1562b..fd2000956dae 100644
--- a/arch/sh64/kernel/ptrace.c
+++ b/arch/sh64/kernel/ptrace.c
@@ -27,6 +27,7 @@
 #include <linux/errno.h>
 #include <linux/ptrace.h>
 #include <linux/user.h>
+#include <linux/signal.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -255,7 +256,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
@@ -285,7 +286,7 @@ asmlinkage int sys_ptrace(long request, long pid, long addr, long data)
 		struct pt_regs *regs;
 
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		if ((child->ptrace & PT_DTRACE) == 0) {
diff --git a/arch/sparc/kernel/ptrace.c b/arch/sparc/kernel/ptrace.c
index c4f93bd2daf2..475c4c13462c 100644
--- a/arch/sparc/kernel/ptrace.c
+++ b/arch/sparc/kernel/ptrace.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/pgtable.h>
 #include <asm/system.h>
@@ -526,7 +527,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
 		addr = 1;
 
 	case PTRACE_CONT: { /* restart after signal. */
-		if (data > _NSIG) {
+		if (!valid_signal(data)) {
 			pt_error_return(regs, EIO);
 			goto out_tsk;
 		}
diff --git a/arch/sparc64/kernel/ptrace.c b/arch/sparc64/kernel/ptrace.c
index 5f080cf04b33..80a76e2ad732 100644
--- a/arch/sparc64/kernel/ptrace.c
+++ b/arch/sparc64/kernel/ptrace.c
@@ -19,6 +19,7 @@
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/asi.h>
 #include <asm/pgtable.h>
@@ -559,7 +560,7 @@ asmlinkage void do_ptrace(struct pt_regs *regs)
 		addr = 1;
 
 	case PTRACE_CONT: { /* restart after signal. */
-		if (data > _NSIG) {
+		if (!valid_signal(data)) {
 			pt_error_return(regs, EIO);
 			goto out_tsk;
 		}
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 3a99ee6d94eb..e50e60ff5d27 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -143,7 +143,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
 	case PTRACE_SYSCALL: /* continue and stop at next (return from) syscall */
 	case PTRACE_CONT: { /* restart after signal. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 
 		child->ptrace &= ~PT_DTRACE;
@@ -179,7 +179,7 @@ long sys_ptrace(long request, long pid, long addr, long data)
 
 	case PTRACE_SINGLESTEP: {  /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
 		child->ptrace |= PT_DTRACE;
diff --git a/arch/v850/kernel/ptrace.c b/arch/v850/kernel/ptrace.c
index 8fa780757dcd..4726b87f5e5a 100644
--- a/arch/v850/kernel/ptrace.c
+++ b/arch/v850/kernel/ptrace.c
@@ -23,6 +23,7 @@
 #include <linux/sched.h>
 #include <linux/smp_lock.h>
 #include <linux/ptrace.h>
+#include <linux/signal.h>
 
 #include <asm/errno.h>
 #include <asm/ptrace.h>
@@ -208,7 +209,7 @@ int sys_ptrace(long request, long pid, long addr, long data)
 	/* Execute a single instruction. */
 	case PTRACE_SINGLESTEP:
 		rval = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 
 		/* Turn CHILD's single-step flag on or off.  */
diff --git a/arch/x86_64/kernel/ptrace.c b/arch/x86_64/kernel/ptrace.c
index c7011675007d..c64b9c97c745 100644
--- a/arch/x86_64/kernel/ptrace.c
+++ b/arch/x86_64/kernel/ptrace.c
@@ -18,6 +18,7 @@
 #include <linux/security.h>
 #include <linux/audit.h>
 #include <linux/seccomp.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -467,7 +468,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
 	case PTRACE_CONT:    /* restart after signal. */
 
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		if (request == PTRACE_SYSCALL)
 			set_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
@@ -529,7 +530,7 @@ asmlinkage long sys_ptrace(long request, long pid, unsigned long addr, long data
 
 	case PTRACE_SINGLESTEP:    /* set the trap flag. */
 		ret = -EIO;
-		if ((unsigned long) data > _NSIG)
+		if (!valid_signal(data))
 			break;
 		clear_tsk_thread_flag(child,TIF_SYSCALL_TRACE);
 		set_singlestep(child);
diff --git a/drivers/char/vt_ioctl.c b/drivers/char/vt_ioctl.c
index 5d386f4bea49..8971484b956b 100644
--- a/drivers/char/vt_ioctl.c
+++ b/drivers/char/vt_ioctl.c
@@ -24,6 +24,7 @@
 #include <linux/major.h>
 #include <linux/fs.h>
 #include <linux/console.h>
+#include <linux/signal.h>
 
 #include <asm/io.h>
 #include <asm/uaccess.h>
@@ -641,7 +642,7 @@ int vt_ioctl(struct tty_struct *tty, struct file * file,
 		extern int spawnpid, spawnsig;
 		if (!perm || !capable(CAP_KILL))
 		  return -EPERM;
-		if (arg < 1 || arg > _NSIG || arg == SIGKILL)
+		if (!valid_signal(arg) || arg < 1 || arg == SIGKILL)
 		  return -EINVAL;
 		spawnpid = current->pid;
 		spawnsig = arg;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 3e7ab16ed154..286a9f8f3d49 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/security.h>
 #include <linux/ptrace.h>
+#include <linux/signal.h>
 
 #include <asm/poll.h>
 #include <asm/siginfo.h>
@@ -308,7 +309,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		break;
 	case F_SETSIG:
 		/* arg == 0 restores default behaviour. */
-		if (arg < 0 || arg > _NSIG) {
+		if (!valid_signal(arg)) {
 			break;
 		}
 		err = 0;
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 33f71520b89c..0acf245f441d 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -23,6 +23,7 @@
 #include <linux/skbuff.h>
 #include <linux/netlink.h>
 #include <linux/syscalls.h>
+#include <linux/signal.h>
 #include <net/sock.h>
 #include "util.h"
 
@@ -976,8 +977,7 @@ asmlinkage long sys_mq_notify(mqd_t mqdes,
 			     notification.sigev_notify != SIGEV_THREAD))
 			return -EINVAL;
 		if (notification.sigev_notify == SIGEV_SIGNAL &&
-			(notification.sigev_signo < 0 ||
-			 notification.sigev_signo > _NSIG)) {
+			!valid_signal(notification.sigev_signo)) {
 			return -EINVAL;
 		}
 		if (notification.sigev_notify == SIGEV_THREAD) {
diff --git a/kernel/exit.c b/kernel/exit.c
index 93851bcd9584..eb8da36e13df 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -27,6 +27,7 @@
 #include <linux/mempolicy.h>
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
+#include <linux/signal.h>
 
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -277,7 +278,7 @@ void set_special_pids(pid_t session, pid_t pgrp)
  */
 int allow_signal(int sig)
 {
-	if (sig < 1 || sig > _NSIG)
+	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 
 	spin_lock_irq(&current->sighand->siglock);
@@ -298,7 +299,7 @@ EXPORT_SYMBOL(allow_signal);
 
 int disallow_signal(int sig)
 {
-	if (sig < 1 || sig > _NSIG)
+	if (!valid_signal(sig) || sig < 1)
 		return -EINVAL;
 
 	spin_lock_irq(&current->sighand->siglock);
diff --git a/kernel/futex.c b/kernel/futex.c
index 7b54a672d0ad..c7130f86106c 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -39,6 +39,7 @@
 #include <linux/mount.h>
 #include <linux/pagemap.h>
 #include <linux/syscalls.h>
+#include <linux/signal.h>
 
 #define FUTEX_HASHBITS (CONFIG_BASE_SMALL ? 4 : 8)
 
@@ -654,7 +655,7 @@ static int futex_fd(unsigned long uaddr, int signal)
 	int ret, err;
 
 	ret = -EINVAL;
-	if (signal < 0 || signal > _NSIG)
+	if (!valid_signal(signal))
 		goto out;
 
 	ret = get_unused_fd();
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index f5cc1cec0fb4..8dcb8f6288bc 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -16,6 +16,7 @@
 #include <linux/smp_lock.h>
 #include <linux/ptrace.h>
 #include <linux/security.h>
+#include <linux/signal.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -166,7 +167,7 @@ bad:
 
 int ptrace_detach(struct task_struct *child, unsigned int data)
 {
-	if ((unsigned long) data > _NSIG)
+	if (!valid_signal(data))
 		return	-EIO;
 
 	/* Architecture-specific hardware disable .. */
diff --git a/kernel/signal.c b/kernel/signal.c
index e6567d7f2b62..8f3debc77c5b 100644
--- a/kernel/signal.c
+++ b/kernel/signal.c
@@ -23,6 +23,7 @@
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
 #include <linux/posix-timers.h>
+#include <linux/signal.h>
 #include <asm/param.h>
 #include <asm/uaccess.h>
 #include <asm/unistd.h>
@@ -646,7 +647,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 				 struct task_struct *t)
 {
 	int error = -EINVAL;
-	if (sig < 0 || sig > _NSIG)
+	if (!valid_signal(sig))
 		return error;
 	error = -EPERM;
 	if ((!info || ((unsigned long)info != 1 &&
@@ -1245,7 +1246,7 @@ send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 	 * Make sure legacy kernel users don't send in bad values
 	 * (normal paths check this in check_kill_permission).
 	 */
-	if (sig < 0 || sig > _NSIG)
+	if (!valid_signal(sig))
 		return -EINVAL;
 
 	/*
@@ -1520,7 +1521,7 @@ void do_notify_parent(struct task_struct *tsk, int sig)
 		if (psig->action[SIGCHLD-1].sa.sa_handler == SIG_IGN)
 			sig = 0;
 	}
-	if (sig > 0 && sig <= _NSIG)
+	if (valid_signal(sig) && sig > 0)
 		__group_send_sig_info(sig, &info, tsk->parent);
 	__wake_up_parent(tsk, tsk->parent);
 	spin_unlock_irqrestore(&psig->siglock, flags);
@@ -2364,7 +2365,7 @@ do_sigaction(int sig, const struct k_sigaction *act, struct k_sigaction *oact)
 {
 	struct k_sigaction *k;
 
-	if (sig < 1 || sig > _NSIG || (act && sig_kernel_only(sig)))
+	if (!valid_signal(sig) || sig < 1 || (act && sig_kernel_only(sig)))
 		return -EINVAL;
 
 	k = &current->sighand->action[sig-1];
diff --git a/kernel/sys.c b/kernel/sys.c
index 7f43d6e62c7a..f64e97cabe25 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -25,6 +25,7 @@
 #include <linux/dcookies.h>
 #include <linux/suspend.h>
 #include <linux/tty.h>
+#include <linux/signal.h>
 
 #include <linux/compat.h>
 #include <linux/syscalls.h>
@@ -1637,7 +1638,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3,
 	switch (option) {
 		case PR_SET_PDEATHSIG:
 			sig = arg2;
-			if (sig < 0 || sig > _NSIG) {
+			if (!valid_signal(sig)) {
 				error = -EINVAL;
 				break;
 			}
-- 
cgit v1.2.3


From 212079cf4ee99e492a57b817e796825d423a30bb Mon Sep 17 00:00:00 2001
From: Ken Chen <kenneth.w.chen@intel.com>
Date: Sun, 1 May 2005 08:59:15 -0700
Subject: [PATCH] aio: remove superfluous kiocb member initialization

This patch removes superfluous kiocb member initialization in the AIO
allocation and deallocation path.  For example, in really_put_req(),
right before kiocb is returned to slab, 5 variables are reset to NULL.
The same variables will be initialized at the kiocb allocation time,
so why bother reset them knowing that they will be set to valid data
at alloc time?  Another example: ki_retry is initialized in __aio_get_req,
but is initialized again in io_submit_one.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index a82214d2e46d..9f807a541fbe 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -405,7 +405,6 @@ static struct kiocb fastcall *__aio_get_req(struct kioctx *ctx)
 	req->ki_ctx = ctx;
 	req->ki_cancel = NULL;
 	req->ki_retry = NULL;
-	req->ki_obj.user = NULL;
 	req->ki_dtor = NULL;
 	req->private = NULL;
 	INIT_LIST_HEAD(&req->ki_run_list);
@@ -451,11 +450,6 @@ static inline void really_put_req(struct kioctx *ctx, struct kiocb *req)
 {
 	if (req->ki_dtor)
 		req->ki_dtor(req);
-	req->ki_ctx = NULL;
-	req->ki_filp = NULL;
-	req->ki_obj.user = NULL;
-	req->ki_dtor = NULL;
-	req->private = NULL;
 	kmem_cache_free(kiocb_cachep, req);
 	ctx->reqs_active--;
 
@@ -1515,8 +1509,7 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	}
 
 	req->ki_filp = file;
-	iocb->aio_key = req->ki_key;
-	ret = put_user(iocb->aio_key, &user_iocb->aio_key);
+	ret = put_user(req->ki_key, &user_iocb->aio_key);
 	if (unlikely(ret)) {
 		dprintk("EFAULT: aio_key\n");
 		goto out_put_req;
@@ -1531,8 +1524,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	req->ki_opcode = iocb->aio_lio_opcode;
 	init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
 	INIT_LIST_HEAD(&req->ki_wait.task_list);
-	req->ki_run_list.next = req->ki_run_list.prev = NULL;
-	req->ki_retry = NULL;
 	req->ki_retried = 0;
 	req->ki_kicked = 0;
 	req->ki_queued = 0;
-- 
cgit v1.2.3


From 4bf69b2a06090c01c27f25ea5cd1440f7bf9256f Mon Sep 17 00:00:00 2001
From: Ken Chen <kenneth.w.chen@intel.com>
Date: Sun, 1 May 2005 08:59:15 -0700
Subject: [PATCH] aio: ring wrapping simplification

Since the tail pointer in aio_ring structure never wrap ring size more than
once, so a simple compare is sufficient to wrap the index around.  This avoid
a more expensive mod operation.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 9f807a541fbe..40517f35daae 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -978,7 +978,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
 
 	tail = info->tail;
 	event = aio_ring_event(info, tail, KM_IRQ0);
-	tail = (tail + 1) % info->nr;
+	if (++tail >= info->nr)
+		tail = 0;
 
 	event->obj = (u64)(unsigned long)iocb->ki_obj.user;
 	event->data = iocb->ki_user_data;
-- 
cgit v1.2.3


From 644d3a088a3b862ed0b57c286cf58a6bd338ce08 Mon Sep 17 00:00:00 2001
From: Ken Chen <kenneth.w.chen@intel.com>
Date: Sun, 1 May 2005 08:59:15 -0700
Subject: [PATCH] aio: clean up debug code

Clean up code that was previously used for debug purpose.  Remove aio_run,
aio_wakeups, iocb->ki_queued and iocb->ki_kicked.  Also clean up unused
variable count in __aio_run_iocbs() and debug code in read_events().

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 33 +++++----------------------------
 1 file changed, 5 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 40517f35daae..674bb47fed29 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -40,9 +40,6 @@
 #define dprintk(x...)	do { ; } while (0)
 #endif
 
-static long aio_run = 0; /* for testing only */
-static long aio_wakeups = 0; /* for testing only */
-
 /*------ sysctl variables----*/
 atomic_t aio_nr = ATOMIC_INIT(0);	/* current system wide number of aio requests */
 unsigned aio_max_nr = 0x10000;	/* system wide maximum number of aio requests */
@@ -617,7 +614,6 @@ static inline int __queue_kicked_iocb(struct kiocb *iocb)
 	if (list_empty(&iocb->ki_run_list)) {
 		list_add_tail(&iocb->ki_run_list,
 			&ctx->run_list);
-		iocb->ki_queued++;
 		return 1;
 	}
 	return 0;
@@ -658,10 +654,8 @@ static ssize_t aio_run_iocb(struct kiocb *iocb)
 	}
 
 	if (!(iocb->ki_retried & 0xff)) {
-		pr_debug("%ld retry: %d of %d (kick %ld, Q %ld run %ld, wake %ld)\n",
-			iocb->ki_retried,
-			iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes,
-			iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups);
+		pr_debug("%ld retry: %d of %d\n", iocb->ki_retried,
+			iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 	}
 
 	if (!(retry = iocb->ki_retry)) {
@@ -768,7 +762,6 @@ out:
 static int __aio_run_iocbs(struct kioctx *ctx)
 {
 	struct kiocb *iocb;
-	int count = 0;
 	LIST_HEAD(run_list);
 
 	list_splice_init(&ctx->run_list, &run_list);
@@ -783,9 +776,7 @@ static int __aio_run_iocbs(struct kioctx *ctx)
 		aio_run_iocb(iocb);
 		if (__aio_put_req(ctx, iocb))  /* drop extra ref */
 			put_ioctx(ctx);
-		count++;
  	}
-	aio_run++;
 	if (!list_empty(&ctx->run_list))
 		return 1;
 	return 0;
@@ -884,10 +875,8 @@ static void queue_kicked_iocb(struct kiocb *iocb)
 	spin_lock_irqsave(&ctx->ctx_lock, flags);
 	run = __queue_kicked_iocb(iocb);
 	spin_unlock_irqrestore(&ctx->ctx_lock, flags);
-	if (run) {
+	if (run)
 		aio_queue_work(ctx);
-		aio_wakeups++;
-	}
 }
 
 /*
@@ -907,7 +896,6 @@ void fastcall kick_iocb(struct kiocb *iocb)
 		return;
 	}
 
-	iocb->ki_kicked++;
 	/* If its already kicked we shouldn't queue it again */
 	if (!kiocbTryKick(iocb)) {
 		queue_kicked_iocb(iocb);
@@ -1003,10 +991,8 @@ int fastcall aio_complete(struct kiocb *iocb, long res, long res2)
 
 	pr_debug("added to ring %p at [%lu]\n", iocb, tail);
 
-	pr_debug("%ld retries: %d of %d (kicked %ld, Q %ld run %ld wake %ld)\n",
-		iocb->ki_retried,
-		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes,
-		iocb->ki_kicked, iocb->ki_queued, aio_run, aio_wakeups);
+	pr_debug("%ld retries: %d of %d\n", iocb->ki_retried,
+		iocb->ki_nbytes - iocb->ki_left, iocb->ki_nbytes);
 put_rq:
 	/* everything turned out well, dispose of the aiocb. */
 	ret = __aio_put_req(ctx, iocb);
@@ -1114,7 +1100,6 @@ static int read_events(struct kioctx *ctx,
 	int			i = 0;
 	struct io_event		ent;
 	struct aio_timeout	to;
-	int 			event_loop = 0; /* testing only */
 	int			retry = 0;
 
 	/* needed to zero any padding within an entry (there shouldn't be 
@@ -1181,7 +1166,6 @@ retry:
 			if (to.timed_out)	/* Only check after read evt */
 				break;
 			schedule();
-			event_loop++;
 			if (signal_pending(tsk)) {
 				ret = -EINTR;
 				break;
@@ -1209,9 +1193,6 @@ retry:
 	if (timeout)
 		clear_timeout(&to);
 out:
-	pr_debug("event loop executed %d times\n", event_loop);
-	pr_debug("aio_run %ld\n", aio_run);
-	pr_debug("aio_wakeups %ld\n", aio_wakeups);
 	return i ? i : ret;
 }
 
@@ -1526,10 +1507,6 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 	init_waitqueue_func_entry(&req->ki_wait, aio_wake_function);
 	INIT_LIST_HEAD(&req->ki_wait.task_list);
 	req->ki_retried = 0;
-	req->ki_kicked = 0;
-	req->ki_queued = 0;
-	aio_run = 0;
-	aio_wakeups = 0;
 
 	ret = aio_setup_iocb(req);
 
-- 
cgit v1.2.3


From 954d3e95369cf73b4bc1e570729f68264a0e6fe0 Mon Sep 17 00:00:00 2001
From: Ken Chen <kenneth.w.chen@intel.com>
Date: Sun, 1 May 2005 08:59:16 -0700
Subject: [PATCH] aio: optimize io_submit_one()

This patch optimizes io_submit_one to call aio_run_iocb() directly if
ctx->run_list is empty.  When the list is empty, the operation of adding to
the list, then call to __aio_run_iocbs() is unnecessary because these
operations are done in one atomic step.  ctx->run_list always has only one
element in this case.  This optimization speeds up industry standard db
transaction processing benchmark by 0.2%.

Signed-off-by: Ken Chen <kenneth.w.chen@intel.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Suparna Bhattacharya <suparna@in.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/aio.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/aio.c b/fs/aio.c
index 674bb47fed29..7afa222f6802 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1514,10 +1514,14 @@ int fastcall io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
 		goto out_put_req;
 
 	spin_lock_irq(&ctx->ctx_lock);
-	list_add_tail(&req->ki_run_list, &ctx->run_list);
-	/* drain the run list */
-	while (__aio_run_iocbs(ctx))
-		;
+	if (likely(list_empty(&ctx->run_list))) {
+		aio_run_iocb(req);
+	} else {
+		list_add_tail(&req->ki_run_list, &ctx->run_list);
+		/* drain the run list */
+		while (__aio_run_iocbs(ctx))
+			;
+	}
 	spin_unlock_irq(&ctx->ctx_lock);
 	aio_put_req(req);	/* drop extra ref to req */
 	return 0;
-- 
cgit v1.2.3


From 945b092011c6af71a0107be96e119c8c08776f3f Mon Sep 17 00:00:00 2001
From: Colin Leroy <colin@colino.net>
Date: Sun, 1 May 2005 08:59:16 -0700
Subject: [PATCH] hfs, hfsplus: don't leak s_fs_info and fix an oops

This patch fixes the leak of sb->s_fs_info in both the HFS and HFS+
modules.  In addition to this, it fixes an oops happening when trying to
mount a non-hfsplus filesystem using hfsplus.  This patch is from Roman
Zippel, based off patches sent by myself.

Signed-off-by: Colin Leroy <colin@colino.net>
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/hfs/mdb.c       | 5 +++++
 fs/hfs/super.c     | 8 +++-----
 fs/hfsplus/super.c | 6 +++++-
 3 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'fs')

diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c
index 4efb640c4d0c..217e32f37e0b 100644
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -333,6 +333,8 @@ void hfs_mdb_close(struct super_block *sb)
  * Release the resources associated with the in-core MDB.  */
 void hfs_mdb_put(struct super_block *sb)
 {
+	if (!HFS_SB(sb))
+		return;
 	/* free the B-trees */
 	hfs_btree_close(HFS_SB(sb)->ext_tree);
 	hfs_btree_close(HFS_SB(sb)->cat_tree);
@@ -340,4 +342,7 @@ void hfs_mdb_put(struct super_block *sb)
 	/* free the buffers holding the primary and alternate MDBs */
 	brelse(HFS_SB(sb)->mdb_bh);
 	brelse(HFS_SB(sb)->alt_mdb_bh);
+
+	kfree(HFS_SB(sb));
+	sb->s_fs_info = NULL;
 }
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 1e2c193134cc..ab783f6afa3b 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -297,7 +297,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 	res = -EINVAL;
 	if (!parse_options((char *)data, sbi)) {
 		hfs_warn("hfs_fs: unable to parse mount options.\n");
-		goto bail3;
+		goto bail;
 	}
 
 	sb->s_op = &hfs_super_operations;
@@ -310,7 +310,7 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
 			hfs_warn("VFS: Can't find a HFS filesystem on dev %s.\n",
 				hfs_mdb_name(sb));
 		res = -EINVAL;
-		goto bail2;
+		goto bail;
 	}
 
 	/* try to get the root inode */
@@ -340,10 +340,8 @@ bail_iput:
 	iput(root_inode);
 bail_no_root:
 	hfs_warn("hfs_fs: get root inode failed.\n");
+bail:
 	hfs_mdb_put(sb);
-bail2:
-bail3:
-	kfree(sbi);
 	return res;
 }
 
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5f8044664a3c..d55ad67b8e42 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -208,7 +208,9 @@ static void hfsplus_write_super(struct super_block *sb)
 static void hfsplus_put_super(struct super_block *sb)
 {
 	dprint(DBG_SUPER, "hfsplus_put_super\n");
-	if (!(sb->s_flags & MS_RDONLY)) {
+	if (!sb->s_fs_info)
+		return;
+	if (!(sb->s_flags & MS_RDONLY) && HFSPLUS_SB(sb).s_vhdr) {
 		struct hfsplus_vh *vhdr = HFSPLUS_SB(sb).s_vhdr;
 
 		vhdr->modify_date = hfsp_now2mt();
@@ -226,6 +228,8 @@ static void hfsplus_put_super(struct super_block *sb)
 	brelse(HFSPLUS_SB(sb).s_vhbh);
 	if (HFSPLUS_SB(sb).nls)
 		unload_nls(HFSPLUS_SB(sb).nls);
+	kfree(sb->s_fs_info);
+	sb->s_fs_info = NULL;
 }
 
 static int hfsplus_statfs(struct super_block *sb, struct kstatfs *buf)
-- 
cgit v1.2.3


From 4dcd00b18118d174c4b8d838c11f437f0af3c20c Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Sun, 1 May 2005 08:59:16 -0700
Subject: [PATCH] autofs4: wait order fix

It's possible for an event wait request to arive before the event
requestor.  If this happens the daemon never gets notified and autofs
hangs.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h |  1 +
 fs/autofs4/waitq.c    | 22 ++++++++++++----------
 2 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index f5a52c871726..978987735252 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -84,6 +84,7 @@ struct autofs_wait_queue {
 	char *name;
 	/* This is for status reporting upon return */
 	int status;
+	atomic_t notified;
 	atomic_t wait_ctr;
 };
 
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 1ab24a662e09..5a40d36e5a51 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -210,17 +210,8 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 		wq->len = len;
 		wq->status = -EINTR; /* Status return if interrupted */
 		atomic_set(&wq->wait_ctr, 2);
+		atomic_set(&wq->notified, 1);
 		up(&sbi->wq_sem);
-
-		DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d",
-			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
-		/* autofs4_notify_daemon() may block */
-		if (notify != NFY_NONE) {
-			autofs4_notify_daemon(sbi,wq, 
-					notify == NFY_MOUNT ?
-						  autofs_ptype_missing :
-						  autofs_ptype_expire_multi);
-		}
 	} else {
 		atomic_inc(&wq->wait_ctr);
 		up(&sbi->wq_sem);
@@ -229,6 +220,17 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
 			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify);
 	}
 
+	if (notify != NFY_NONE && atomic_dec_and_test(&wq->notified)) {
+		int type = (notify == NFY_MOUNT ?
+			autofs_ptype_missing : autofs_ptype_expire_multi);
+
+		DPRINTK(("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
+			(unsigned long) wq->wait_queue_token, wq->len, wq->name, notify));
+
+		/* autofs4_notify_daemon() may block */
+		autofs4_notify_daemon(sbi, wq, type);
+	}
+
 	/* wq->name is NULL if and only if the lock is already released */
 
 	if ( sbi->catatonic ) {
-- 
cgit v1.2.3


From 3a9720ce73c9247e5262922d65e90444ea75eb50 Mon Sep 17 00:00:00 2001
From: Ian Kent <raven@themaw.net>
Date: Sun, 1 May 2005 08:59:17 -0700
Subject: [PATCH] autofs4: tree race fix

For tree mount maps, a call to chdir or chroot, to a directory above the
moint point directories at a certain time during the expire results in the
expire incorrectly thinking the tree is not busy.  This patch adds a check
to see if the filesystem above the tree mount points is busy and also locks
the filesystem during the tree mount expire to prevent the race.

Signed-off-by: Ian Kent <raven@themaw.net>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/autofs4/autofs_i.h | 14 ++++++++++++--
 fs/autofs4/expire.c   | 16 ++++++++++++++--
 fs/autofs4/inode.c    |  1 +
 3 files changed, 27 insertions(+), 4 deletions(-)

(limited to 'fs')

diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index 978987735252..c7b2b8890188 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -102,6 +102,7 @@ struct autofs_sb_info {
 	int needs_reghost;
 	struct super_block *sb;
 	struct semaphore wq_sem;
+	spinlock_t fs_lock;
 	struct autofs_wait_queue *queues; /* Wait queue pointer */
 };
 
@@ -127,9 +128,18 @@ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
 static inline int autofs4_ispending(struct dentry *dentry)
 {
 	struct autofs_info *inf = autofs4_dentry_ino(dentry);
+	int pending = 0;
 
-	return (dentry->d_flags & DCACHE_AUTOFS_PENDING) ||
-		(inf != NULL && inf->flags & AUTOFS_INF_EXPIRING);
+	if (dentry->d_flags & DCACHE_AUTOFS_PENDING)
+		return 1;
+
+	if (inf) {
+		spin_lock(&inf->sbi->fs_lock);
+		pending = inf->flags & AUTOFS_INF_EXPIRING;
+		spin_unlock(&inf->sbi->fs_lock);
+	}
+
+	return pending;
 }
 
 static inline void autofs4_copy_atime(struct file *src, struct file *dst)
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 31540a6404d9..500425e24fba 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -99,6 +99,10 @@ static int autofs4_check_tree(struct vfsmount *mnt,
 	if (!autofs4_can_expire(top, timeout, do_now))
 		return 0;
 
+	/* Is someone visiting anywhere in the tree ? */
+	if (may_umount_tree(mnt))
+		return 0;
+
 	spin_lock(&dcache_lock);
 repeat:
 	next = this_parent->d_subdirs.next;
@@ -270,10 +274,18 @@ static struct dentry *autofs4_expire(struct super_block *sb,
 
 		/* Case 2: tree mount, expire iff entire tree is not busy */
 		if (!exp_leaves) {
+			/* Lock the tree as we must expire as a whole */
+			spin_lock(&sbi->fs_lock);
 			if (autofs4_check_tree(mnt, dentry, timeout, do_now)) {
-			expired = dentry;
-			break;
+				struct autofs_info *inf = autofs4_dentry_ino(dentry);
+
+				/* Set this flag early to catch sys_chdir and the like */
+				inf->flags |= AUTOFS_INF_EXPIRING;
+				spin_unlock(&sbi->fs_lock);
+				expired = dentry;
+				break;
 			}
+			spin_unlock(&sbi->fs_lock);
 		/* Case 3: direct mount, expire individual leaves */
 		} else {
 			expired = autofs4_check_leaves(mnt, dentry, timeout, do_now);
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a52560746628..4bb14cc68040 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -206,6 +206,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
 	sbi->version = 0;
 	sbi->sub_version = 0;
 	init_MUTEX(&sbi->wq_sem);
+	spin_lock_init(&sbi->fs_lock);
 	sbi->queues = NULL;
 	s->s_blocksize = 1024;
 	s->s_blocksize_bits = 10;
-- 
cgit v1.2.3


From 6a3a16f2ef6f335286e2b2bf8284b0ab4ff38ec0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@www.linux.org.uk>
Date: Sun, 1 May 2005 08:59:17 -0700
Subject: [PATCH] reiserfs endianness: clone struct reiserfs_key

struct reiserfs_key cloned; (currently) identical struct in_core_key added.
Places that expect host-endian data in reiserfs_key switched to in_core_key.
Basically, we get annotation of reiserfs_key users and keep the resulting tree
obviously equivalent to original.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/bitmap.c        |  4 ++--
 fs/reiserfs/stree.c         |  1 +
 fs/reiserfs/super.c         |  4 ++--
 include/linux/reiserfs_fs.h | 32 +++++++++++++++++++++++++++++---
 4 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index a4e2ed544bbe..f4f16fada14c 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -736,7 +736,7 @@ static inline int this_blocknr_allocation_would_make_it_a_large_file(reiserfs_bl
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
 static inline void displace_new_packing_locality (reiserfs_blocknr_hint_t *hint)
 {
-    struct reiserfs_key * key = &hint->key;
+    struct in_core_key * key = &hint->key;
 
     hint->th->displace_new_blocks = 0;
     hint->search_start = hint->beg + keyed_hash((char*)(&key->k_objectid),4) % (hint->end - hint->beg);
@@ -777,7 +777,7 @@ static inline int old_way (reiserfs_blocknr_hint_t * hint)
 
 static inline void hundredth_slices (reiserfs_blocknr_hint_t * hint)
 {
-    struct reiserfs_key * key = &hint->key;
+    struct in_core_key * key = &hint->key;
     b_blocknr_t slice_start;
 
     slice_start = (keyed_hash((char*)(&key->k_dir_id),4) % 100) * (hint->end / 100);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 73ec5212178b..1d380a5da39c 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -229,6 +229,7 @@ const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 
 /* Maximal possible key. It is never in the tree. */
 const struct reiserfs_key  MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
+const struct in_core_key  MAX_IN_CORE_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
 
 
 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index bc5e8893b5d5..d6d1d7e2801f 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -110,7 +110,7 @@ static void reiserfs_unlockfs(struct super_block *s) {
   reiserfs_allow_writes(s) ;
 }
 
-extern const struct reiserfs_key  MAX_KEY;
+extern const struct in_core_key  MAX_IN_CORE_KEY;
 
 
 /* this is used to delete "save link" when there are no items of a
@@ -164,7 +164,7 @@ static int finish_unfinished (struct super_block * s)
  
     /* compose key to look for "save" links */
     max_cpu_key.version = KEY_FORMAT_3_5;
-    max_cpu_key.on_disk_key = MAX_KEY;
+    max_cpu_key.on_disk_key = MAX_IN_CORE_KEY;
     max_cpu_key.key_length = 3;
 
 #ifdef CONFIG_QUOTA
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index bccff8b17dc4..d0867873a1b5 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -433,6 +433,23 @@ static inline void set_offset_v2_k_offset( struct offset_v2 *v2, loff_t offset )
 # define set_offset_v2_k_offset(v2,val) (offset_v2_k_offset(v2) = (val))
 #endif
 
+struct in_core_offset_v1 {
+    __u32 k_offset;
+    __u32 k_uniqueness;
+} __attribute__ ((__packed__));
+
+struct in_core_offset_v2 {
+#ifdef __LITTLE_ENDIAN
+	    /* little endian version */
+	    __u64 k_offset:60;
+	    __u64 k_type: 4;
+#else
+	    /* big endian version */
+	    __u64 k_type: 4;
+	    __u64 k_offset:60;
+#endif
+} __attribute__ ((__packed__));
+
 /* Key of an item determines its location in the S+tree, and
    is composed of 4 components */
 struct reiserfs_key {
@@ -445,9 +462,18 @@ struct reiserfs_key {
     } __attribute__ ((__packed__)) u;
 } __attribute__ ((__packed__));
 
+struct in_core_key {
+    __u32 k_dir_id;    /* packing locality: by default parent
+			  directory object id */
+    __u32 k_objectid;  /* object identifier */
+    union {
+	struct in_core_offset_v1 k_offset_v1;
+	struct in_core_offset_v2 k_offset_v2;
+    } __attribute__ ((__packed__)) u;
+} __attribute__ ((__packed__));
 
 struct cpu_key {
-    struct reiserfs_key on_disk_key;
+    struct in_core_key on_disk_key;
     int version;
     int key_length; /* 3 in all cases but direct2indirect and
 		       indirect2direct conversion */
@@ -1476,7 +1502,7 @@ struct tree_balance
   int fs_gen;                  /* saved value of `reiserfs_generation' counter
 			          see FILESYSTEM_CHANGED() macro in reiserfs_fs.h */
 #ifdef DISPLACE_NEW_PACKING_LOCALITIES
-  struct reiserfs_key  key;	      /* key pointer, to pass to block allocator or
+  struct in_core_key  key;	      /* key pointer, to pass to block allocator or
 				 another low-level subsystem */
 #endif
 } ;
@@ -2117,7 +2143,7 @@ struct buffer_head * get_FEB (struct tree_balance *);
  struct __reiserfs_blocknr_hint {
      struct inode * inode;		/* inode passed to allocator, if we allocate unf. nodes */
      long block;			/* file offset, in blocks */
-     struct reiserfs_key key;
+     struct in_core_key key;
      struct path * path;		/* search path, used by allocator to deternine search_start by
 					 * various ways */
      struct reiserfs_transaction_handle * th; /* transaction handle is needed to log super blocks and
-- 
cgit v1.2.3


From 3e8962be915bacc1d70e4849a075041838d60a3f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@www.linux.org.uk>
Date: Sun, 1 May 2005 08:59:18 -0700
Subject: [PATCH] reiserfs endianness: annotate little-endian objects

little-endian objects annotated as such; again, obviously no changes of
resulting code, we only replace __u16 with __le16, etc.  in relevant places.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/bitmap.c           |   7 +-
 fs/reiserfs/dir.c              |   8 +-
 fs/reiserfs/file.c             |   4 +-
 fs/reiserfs/inode.c            |  10 +--
 fs/reiserfs/item_ops.c         |   5 +-
 fs/reiserfs/objectid.c         |  18 ++---
 fs/reiserfs/procfs.c           |   4 +-
 fs/reiserfs/stree.c            |  22 ++++--
 fs/reiserfs/super.c            |   6 +-
 include/linux/reiserfs_acl.h   |  12 +--
 include/linux/reiserfs_fs.h    | 168 ++++++++++++++++++++---------------------
 include/linux/reiserfs_xattr.h |   4 +-
 12 files changed, 138 insertions(+), 130 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c
index f4f16fada14c..49c479c9454a 100644
--- a/fs/reiserfs/bitmap.c
+++ b/fs/reiserfs/bitmap.c
@@ -260,8 +260,9 @@ static inline int block_group_used(struct super_block *s, u32 id) {
 /*
  * the packing is returned in disk byte order
  */
-u32 reiserfs_choose_packing(struct inode *dir) {
-    u32 packing;
+__le32 reiserfs_choose_packing(struct inode *dir)
+{
+    __le32 packing;
     if (TEST_OPTION(packing_groups, dir->i_sb)) {
 	u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
 	/*
@@ -655,7 +656,7 @@ static int get_left_neighbor(reiserfs_blocknr_hint_t *hint)
     struct buffer_head * bh;
     struct item_head * ih;
     int pos_in_item;
-    __u32 * item;
+    __le32 * item;
     int ret = 0;
 
     if (!hint->path)		/* reiserfs code can call this function w/o pointer to path
diff --git a/fs/reiserfs/dir.c b/fs/reiserfs/dir.c
index d1514a9b0514..fbde4b01a325 100644
--- a/fs/reiserfs/dir.c
+++ b/fs/reiserfs/dir.c
@@ -209,8 +209,8 @@ static int reiserfs_readdir (struct file * filp, void * dirent, filldir_t filldi
 /* compose directory item containing "." and ".." entries (entries are
    not aligned to 4 byte boundary) */
 /* the last four params are LE */
-void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid,
-			     __u32 par_dirid, __u32 par_objid)
+void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid,
+			     __le32 par_dirid, __le32 par_objid)
 {
     struct reiserfs_de_head * deh;
 
@@ -242,8 +242,8 @@ void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid,
 }
 
 /* compose directory item containing "." and ".." entries */
-void make_empty_dir_item (char * body, __u32 dirid, __u32 objid,
-			  __u32 par_dirid, __u32 par_objid)
+void make_empty_dir_item (char * body, __le32 dirid, __le32 objid,
+			  __le32 par_dirid, __le32 par_objid)
 {
     struct reiserfs_de_head * deh;
 
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index f6860e83521d..2230afff1870 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -166,7 +166,7 @@ static int reiserfs_allocate_blocks_for_region(
     struct cpu_key key; // cpu key of item that we are going to deal with
     struct item_head *ih; // pointer to item head that we are going to deal with
     struct buffer_head *bh; // Buffer head that contains items that we are going to deal with
-    __u32 * item; // pointer to item we are going to deal with
+    __le32 * item; // pointer to item we are going to deal with
     INITIALIZE_PATH(path); // path to item, that we are going to deal with.
     b_blocknr_t *allocated_blocks; // Pointer to a place where allocated blocknumbers would be stored.
     reiserfs_blocknr_hint_t hint; // hint structure for block allocator.
@@ -891,7 +891,7 @@ static int reiserfs_prepare_file_region_for_write(
     struct item_head *ih = NULL; // pointer to item head that we are going to deal with
     struct buffer_head *itembuf=NULL; // Buffer head that contains items that we are going to deal with
     INITIALIZE_PATH(path); // path to item, that we are going to deal with.
-    __u32 * item=NULL; // pointer to item we are going to deal with
+    __le32 * item=NULL; // pointer to item we are going to deal with
     int item_pos=-1; /* Position in indirect item */
 
 
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 7543031396f4..5fdb9f97b99e 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -173,7 +173,7 @@ static inline void fix_tail_page_for_writing(struct page *page) {
    done already or non-hole position has been found in the indirect item */
 static inline int allocation_needed (int retval, b_blocknr_t allocated, 
 				     struct item_head * ih,
-				     __u32 * item, int pos_in_item)
+				     __le32 * item, int pos_in_item)
 {
   if (allocated)
 	 return 0;
@@ -278,7 +278,7 @@ research:
     bh = get_last_bh (&path);
     ih = get_ih (&path);
     if (is_indirect_le_ih (ih)) {
-	__u32 * ind_item = (__u32 *)B_I_PITEM (bh, ih);
+	__le32 * ind_item = (__le32 *)B_I_PITEM (bh, ih);
 	
 	/* FIXME: here we could cache indirect item or part of it in
 	   the inode to avoid search_by_key in case of subsequent
@@ -581,7 +581,7 @@ int reiserfs_get_block (struct inode * inode, sector_t block,
     struct cpu_key key;
     struct buffer_head * bh, * unbh = NULL;
     struct item_head * ih, tmp_ih;
-    __u32 * item;
+    __le32 * item;
     int done;
     int fs_gen;
     struct reiserfs_transaction_handle *th = NULL;
@@ -746,7 +746,7 @@ start_trans:
     done = 0;
     do {
 	if (is_statdata_le_ih (ih)) {
-	    __u32 unp = 0;
+	    __le32 unp = 0;
 	    struct cpu_key tmp_key;
 
 	    /* indirect item has to be inserted */
@@ -2067,7 +2067,7 @@ static int map_block_for_writepage(struct inode *inode,
     struct item_head tmp_ih ;
     struct item_head *ih ;
     struct buffer_head *bh ;
-    __u32 *item ;
+    __le32 *item ;
     struct cpu_key key ;
     INITIALIZE_PATH(path) ;
     int pos_in_item ;
diff --git a/fs/reiserfs/item_ops.c b/fs/reiserfs/item_ops.c
index 9cf7c13b120d..0ce33db1acdf 100644
--- a/fs/reiserfs/item_ops.c
+++ b/fs/reiserfs/item_ops.c
@@ -296,10 +296,11 @@ static void print_sequence (__u32 start, int len)
 static void indirect_print_item (struct item_head * ih, char * item)
 {
     int j;
-    __u32 * unp, prev = INT_MAX;
+    __le32 * unp;
+    __u32 prev = INT_MAX;
     int num;
 
-    unp = (__u32 *)item;
+    unp = (__le32 *)item;
 
     if (ih_item_len(ih) % UNFM_P_SIZE)
 	reiserfs_warning (NULL, "indirect_print_item: invalid item len");
diff --git a/fs/reiserfs/objectid.c b/fs/reiserfs/objectid.c
index 0785c43a7486..bfe8e25ef293 100644
--- a/fs/reiserfs/objectid.c
+++ b/fs/reiserfs/objectid.c
@@ -11,13 +11,13 @@
 
 // find where objectid map starts
 #define objectid_map(s,rs) (old_format_only (s) ? \
-                         (__u32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
-			 (__u32 *)((rs) + 1))
+                         (__le32 *)((struct reiserfs_super_block_v1 *)(rs) + 1) :\
+			 (__le32 *)((rs) + 1))
 
 
 #ifdef CONFIG_REISERFS_CHECK
 
-static void check_objectid_map (struct super_block * s, __u32 * map)
+static void check_objectid_map (struct super_block * s, __le32 * map)
 {
     if (le32_to_cpu (map[0]) != 1)
 	reiserfs_panic (s, "vs-15010: check_objectid_map: map corrupted: %lx",
@@ -27,7 +27,7 @@ static void check_objectid_map (struct super_block * s, __u32 * map)
 }
 
 #else
-static void check_objectid_map (struct super_block * s, __u32 * map)
+static void check_objectid_map (struct super_block * s, __le32 * map)
 {;}
 #endif
 
@@ -52,7 +52,7 @@ __u32 reiserfs_get_unused_objectid (struct reiserfs_transaction_handle *th)
 {
     struct super_block * s = th->t_super;
     struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
-    __u32 * map = objectid_map (s, rs);
+    __le32 * map = objectid_map (s, rs);
     __u32 unused_objectid;
 
     BUG_ON (!th->t_trans_id);
@@ -97,7 +97,7 @@ void reiserfs_release_objectid (struct reiserfs_transaction_handle *th,
 {
     struct super_block * s = th->t_super;
     struct reiserfs_super_block * rs = SB_DISK_SUPER_BLOCK (s);
-    __u32 * map = objectid_map (s, rs);
+    __le32 * map = objectid_map (s, rs);
     int i = 0;
 
     BUG_ON (!th->t_trans_id);
@@ -172,12 +172,12 @@ int reiserfs_convert_objectid_map_v1(struct super_block *s) {
     int new_size = (s->s_blocksize - SB_SIZE) / sizeof(__u32) / 2 * 2 ;
     int old_max = sb_oid_maxsize(disk_sb);
     struct reiserfs_super_block_v1 *disk_sb_v1 ;
-    __u32 *objectid_map, *new_objectid_map ;
+    __le32 *objectid_map, *new_objectid_map ;
     int i ;
 
     disk_sb_v1=(struct reiserfs_super_block_v1 *)(SB_BUFFER_WITH_SB(s)->b_data);
-    objectid_map = (__u32 *)(disk_sb_v1 + 1) ;
-    new_objectid_map = (__u32 *)(disk_sb + 1) ;
+    objectid_map = (__le32 *)(disk_sb_v1 + 1) ;
+    new_objectid_map = (__le32 *)(disk_sb + 1) ;
 
     if (cur_size > new_size) {
 	/* mark everyone used that was listed as free at the end of the objectid
diff --git a/fs/reiserfs/procfs.c b/fs/reiserfs/procfs.c
index f4ea81ae0e0f..e242ebc7f6f6 100644
--- a/fs/reiserfs/procfs.c
+++ b/fs/reiserfs/procfs.c
@@ -73,8 +73,8 @@ int reiserfs_global_version_in_proc( char *buffer, char **start, off_t offset,
 #define DFL( x ) D4C( rs -> s_v1.x )
 
 #define objectid_map( s, rs ) (old_format_only (s) ?				\
-                         (__u32 *)((struct reiserfs_super_block_v1 *)rs + 1) :	\
-			 (__u32 *)(rs + 1))
+                         (__le32 *)((struct reiserfs_super_block_v1 *)rs + 1) :	\
+			 (__le32 *)(rs + 1))
 #define MAP( i ) D4C( objectid_map( sb, rs )[ i ] )
 
 #define DJF( x ) le32_to_cpu( rs -> x )
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 1d380a5da39c..15fa4cbdce3e 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -87,10 +87,11 @@ inline void copy_item_head(struct item_head * p_v_to,
 inline int  comp_short_keys (const struct reiserfs_key * le_key,
 			     const struct cpu_key * cpu_key)
 {
-  __u32 * p_s_le_u32, * p_s_cpu_u32;
+  __le32 * p_s_le_u32;
+  __u32 * p_s_cpu_u32;
   int n_key_length = REISERFS_SHORT_KEY_LEN;
 
-  p_s_le_u32 = (__u32 *)le_key;
+  p_s_le_u32 = (__le32 *)le_key;
   p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key;
   for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
     if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
@@ -228,7 +229,12 @@ extern struct tree_balance * cur_tb;
 const struct reiserfs_key  MIN_KEY = {0, 0, {{0, 0},}};
 
 /* Maximal possible key. It is never in the tree. */
-const struct reiserfs_key  MAX_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
+const struct reiserfs_key  MAX_KEY = {
+	__constant_cpu_to_le32(0xffffffff),
+	__constant_cpu_to_le32(0xffffffff),
+	{{__constant_cpu_to_le32(0xffffffff),
+	__constant_cpu_to_le32(0xffffffff)},}
+};
 const struct in_core_key  MAX_IN_CORE_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
 
 
@@ -998,7 +1004,7 @@ static char  prepare_for_delete_or_cut(
 	int                   n_unfm_number,    /* Number of the item unformatted nodes. */
 	    n_counter,
 	    n_blk_size;
-	__u32               * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
+	__le32               * p_n_unfm_pointer; /* Pointer to the unformatted node number. */
 	__u32 tmp;
 	struct item_head      s_ih;           /* Item header. */
 	char                  c_mode;           /* Returned mode of the balance. */
@@ -1060,7 +1066,7 @@ static char  prepare_for_delete_or_cut(
 	    /* pointers to be cut */
 	    n_unfm_number -= pos_in_item (p_s_path);
 	    /* Set pointer to the last unformatted node pointer that is to be cut. */
-	    p_n_unfm_pointer = (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
+	    p_n_unfm_pointer = (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1 - *p_n_removed;
 
 
 	    /* We go through the unformatted nodes pointers of the indirect
@@ -1082,8 +1088,8 @@ static char  prepare_for_delete_or_cut(
 		    need_research = 1 ;
 		    break;
 		}
-		RFALSE( p_n_unfm_pointer < (__u32 *)B_I_PITEM(p_s_bh, &s_ih) ||
-			p_n_unfm_pointer > (__u32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
+		RFALSE( p_n_unfm_pointer < (__le32 *)B_I_PITEM(p_s_bh, &s_ih) ||
+			p_n_unfm_pointer > (__le32 *)B_I_PITEM(p_s_bh, &s_ih) + I_UNFM_NUM(&s_ih) - 1,
 			"vs-5265: pointer out of range");
 
 		/* Hole, nothing to remove. */
@@ -1432,7 +1438,7 @@ int reiserfs_delete_object (struct reiserfs_transaction_handle *th, struct inode
 #if defined( USE_INODE_GENERATION_COUNTER )
     if( !old_format_only ( th -> t_super ) )
       {
-       __u32 *inode_generation;
+       __le32 *inode_generation;
        
        inode_generation = 
          &REISERFS_SB(th -> t_super) -> s_rs -> s_inode_generation;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index d6d1d7e2801f..2283f18aa1dc 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -216,7 +216,7 @@ static int finish_unfinished (struct super_block * s)
  
         /* reiserfs_iget needs k_dirid and k_objectid only */
         item = B_I_PITEM (bh, ih);
-        obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__u32 *)item);
+        obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item);
         obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid);
 	obj_key.on_disk_key.u.k_offset_v1.k_offset = 0;
 	obj_key.on_disk_key.u.k_offset_v1.k_uniqueness = 0;
@@ -304,7 +304,7 @@ void add_save_link (struct reiserfs_transaction_handle * th,
     int retval;
     struct cpu_key key;
     struct item_head ih;
-    __u32 link;
+    __le32 link;
 
     BUG_ON (!th->t_trans_id);
 
@@ -1336,7 +1336,7 @@ static int read_super_block (struct super_block * s, int offset)
 	return 1;
     }
 
-    if ( rs->s_v1.s_root_block == -1 ) {
+    if ( rs->s_v1.s_root_block == cpu_to_le32(-1) ) {
        brelse(bh) ;
        reiserfs_warning (s, "Unfinished reiserfsck --rebuild-tree run detected. Please run\n"
               "reiserfsck --rebuild-tree and wait for a completion. If that fails\n"
diff --git a/include/linux/reiserfs_acl.h b/include/linux/reiserfs_acl.h
index a57e973af0bd..2aef9c3f5ce8 100644
--- a/include/linux/reiserfs_acl.h
+++ b/include/linux/reiserfs_acl.h
@@ -5,18 +5,18 @@
 #define REISERFS_ACL_VERSION	0x0001
 
 typedef struct {
-	__u16		e_tag;
-	__u16		e_perm;
-	__u32		e_id;
+	__le16		e_tag;
+	__le16		e_perm;
+	__le32		e_id;
 } reiserfs_acl_entry;
 
 typedef struct {
-	__u16		e_tag;
-	__u16		e_perm;
+	__le16		e_tag;
+	__le16		e_perm;
 } reiserfs_acl_entry_short;
 
 typedef struct {
-	__u32		a_version;
+	__le32		a_version;
 } reiserfs_acl_header;
 
 static inline size_t reiserfs_acl_size(int count)
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index d0867873a1b5..cc39c5305b80 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -114,47 +114,47 @@ if( !( cond ) ) 								\
 
 
 struct journal_params {
-    __u32 jp_journal_1st_block;	      /* where does journal start from on its
+    __le32 jp_journal_1st_block;	      /* where does journal start from on its
 				       * device */
-    __u32 jp_journal_dev;	      /* journal device st_rdev */
-    __u32 jp_journal_size;	      /* size of the journal */
-    __u32 jp_journal_trans_max;	      /* max number of blocks in a transaction. */
-    __u32 jp_journal_magic; 	      /* random value made on fs creation (this
+    __le32 jp_journal_dev;	      /* journal device st_rdev */
+    __le32 jp_journal_size;	      /* size of the journal */
+    __le32 jp_journal_trans_max;	      /* max number of blocks in a transaction. */
+    __le32 jp_journal_magic; 	      /* random value made on fs creation (this
 				       * was sb_journal_block_count) */
-    __u32 jp_journal_max_batch;	      /* max number of blocks to batch into a
+    __le32 jp_journal_max_batch;	      /* max number of blocks to batch into a
 				       * trans */
-    __u32 jp_journal_max_commit_age;  /* in seconds, how old can an async
+    __le32 jp_journal_max_commit_age;  /* in seconds, how old can an async
 				       * commit be */
-    __u32 jp_journal_max_trans_age;   /* in seconds, how old can a transaction
+    __le32 jp_journal_max_trans_age;   /* in seconds, how old can a transaction
 				       * be */
 };
 
 /* this is the super from 3.5.X, where X >= 10 */
 struct reiserfs_super_block_v1
 {
-    __u32 s_block_count;	   /* blocks count         */
-    __u32 s_free_blocks;           /* free blocks count    */
-    __u32 s_root_block;            /* root block number    */
+    __le32 s_block_count;	   /* blocks count         */
+    __le32 s_free_blocks;           /* free blocks count    */
+    __le32 s_root_block;            /* root block number    */
     struct journal_params s_journal;
-    __u16 s_blocksize;             /* block size */
-    __u16 s_oid_maxsize;	   /* max size of object id array, see
+    __le16 s_blocksize;             /* block size */
+    __le16 s_oid_maxsize;	   /* max size of object id array, see
 				    * get_objectid() commentary  */
-    __u16 s_oid_cursize;	   /* current size of object id array */
-    __u16 s_umount_state;          /* this is set to 1 when filesystem was
+    __le16 s_oid_cursize;	   /* current size of object id array */
+    __le16 s_umount_state;          /* this is set to 1 when filesystem was
 				    * umounted, to 2 - when not */    
     char s_magic[10];              /* reiserfs magic string indicates that
 				    * file system is reiserfs:
 				    * "ReIsErFs" or "ReIsEr2Fs" or "ReIsEr3Fs" */
-    __u16 s_fs_state;	           /* it is set to used by fsck to mark which
+    __le16 s_fs_state;	           /* it is set to used by fsck to mark which
 				    * phase of rebuilding is done */
-    __u32 s_hash_function_code;    /* indicate, what hash function is being use
+    __le32 s_hash_function_code;    /* indicate, what hash function is being use
 				    * to sort names in a directory*/
-    __u16 s_tree_height;           /* height of disk tree */
-    __u16 s_bmap_nr;               /* amount of bitmap blocks needed to address
+    __le16 s_tree_height;           /* height of disk tree */
+    __le16 s_bmap_nr;               /* amount of bitmap blocks needed to address
 				    * each block of file system */
-    __u16 s_version;               /* this field is only reliable on filesystem
+    __le16 s_version;               /* this field is only reliable on filesystem
 				    * with non-standard journal */
-    __u16 s_reserved_for_journal;  /* size in blocks of journal area on main
+    __le16 s_reserved_for_journal;  /* size in blocks of journal area on main
 				    * device, we need to keep after
 				    * making fs with non-standard journal */	
 } __attribute__ ((__packed__));
@@ -165,8 +165,8 @@ struct reiserfs_super_block_v1
 struct reiserfs_super_block
 {
     struct reiserfs_super_block_v1 s_v1;
-    __u32 s_inode_generation;
-    __u32 s_flags;                  /* Right now used only by inode-attributes, if enabled */
+    __le32 s_inode_generation;
+    __le32 s_flags;                  /* Right now used only by inode-attributes, if enabled */
     unsigned char s_uuid[16];       /* filesystem unique identifier */
     unsigned char s_label[16];      /* filesystem volume label */
     char s_unused[88] ;             /* zero filled by mkreiserfs and
@@ -269,7 +269,7 @@ int is_reiserfs_jr (struct reiserfs_super_block * rs);
 #define QUOTA_EXCEEDED -6
 
 typedef __u32 b_blocknr_t;
-typedef __u32 unp_t;
+typedef __le32 unp_t;
 
 struct unfm_nodeinfo {
     unp_t unfm_nodenum;
@@ -376,8 +376,8 @@ static inline struct reiserfs_sb_info *REISERFS_SB(const struct super_block *sb)
 // directories use this key as well as old files
 //
 struct offset_v1 {
-    __u32 k_offset;
-    __u32 k_uniqueness;
+    __le32 k_offset;
+    __le32 k_uniqueness;
 } __attribute__ ((__packed__));
 
 struct offset_v2 {
@@ -453,9 +453,9 @@ struct in_core_offset_v2 {
 /* Key of an item determines its location in the S+tree, and
    is composed of 4 components */
 struct reiserfs_key {
-    __u32 k_dir_id;    /* packing locality: by default parent
+    __le32 k_dir_id;    /* packing locality: by default parent
 			  directory object id */
-    __u32 k_objectid;  /* object identifier */
+    __le32 k_objectid;  /* object identifier */
     union {
 	struct offset_v1 k_offset_v1;
 	struct offset_v2 k_offset_v2;
@@ -534,15 +534,15 @@ struct item_head
 		   item. Note that the key, not this field, is used to
 		   determine the item type, and thus which field this
 		   union contains. */
-		__u16 ih_free_space_reserved; 
+		__le16 ih_free_space_reserved;
 		/* Iff this is a directory item, this field equals the
 		   number of directory entries in the directory item. */
-		__u16 ih_entry_count; 
+		__le16 ih_entry_count;
 	} __attribute__ ((__packed__)) u;
-	__u16 ih_item_len;           /* total size of the item body */
-	__u16 ih_item_location;      /* an offset to the item body
+	__le16 ih_item_len;           /* total size of the item body */
+	__le16 ih_item_location;      /* an offset to the item body
 				      * within the block */
-	__u16 ih_version;	     /* 0 for all old items, 2 for new
+	__le16 ih_version;	     /* 0 for all old items, 2 for new
 					ones. Highest bit is set by fsck
 					temporary, cleaned after all
 					done */
@@ -778,10 +778,10 @@ extern struct reiserfs_key root_key;
 /* Header of a disk block.  More precisely, header of a formatted leaf
    or internal node, and not the header of an unformatted node. */
 struct block_head {       
-  __u16 blk_level;        /* Level of a block in the tree. */
-  __u16 blk_nr_item;      /* Number of keys/items in a block. */
-  __u16 blk_free_space;   /* Block free space in bytes. */
-  __u16 blk_reserved;
+  __le16 blk_level;        /* Level of a block in the tree. */
+  __le16 blk_nr_item;      /* Number of keys/items in a block. */
+  __le16 blk_free_space;   /* Block free space in bytes. */
+  __le16 blk_reserved;
 				/* dump this in v4/planA */
   struct reiserfs_key  blk_right_delim_key; /* kept only for compatibility */
 };
@@ -845,19 +845,19 @@ struct block_head {
 //
 struct stat_data_v1
 {
-    __u16 sd_mode;	/* file type, permissions */
-    __u16 sd_nlink;	/* number of hard links */
-    __u16 sd_uid;		/* owner */
-    __u16 sd_gid;		/* group */
-    __u32 sd_size;	/* file size */
-    __u32 sd_atime;	/* time of last access */
-    __u32 sd_mtime;	/* time file was last modified  */
-    __u32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+    __le16 sd_mode;	/* file type, permissions */
+    __le16 sd_nlink;	/* number of hard links */
+    __le16 sd_uid;		/* owner */
+    __le16 sd_gid;		/* group */
+    __le32 sd_size;	/* file size */
+    __le32 sd_atime;	/* time of last access */
+    __le32 sd_mtime;	/* time file was last modified  */
+    __le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
     union {
-	__u32 sd_rdev;
-	__u32 sd_blocks;	/* number of blocks file uses */
+	__le32 sd_rdev;
+	__le32 sd_blocks;	/* number of blocks file uses */
     } __attribute__ ((__packed__)) u;
-    __u32 sd_first_direct_byte; /* first byte of file which is stored
+    __le32 sd_first_direct_byte; /* first byte of file which is stored
 				   in a direct item: except that if it
 				   equals 1 it is a symlink and if it
 				   equals ~(__u32)0 there is no
@@ -923,20 +923,20 @@ struct stat_data_v1
 /* Stat Data on disk (reiserfs version of UFS disk inode minus the
    address blocks) */
 struct stat_data {
-    __u16 sd_mode;	/* file type, permissions */
-    __u16 sd_attrs;     /* persistent inode flags */
-    __u32 sd_nlink;	/* number of hard links */
-    __u64 sd_size;	/* file size */
-    __u32 sd_uid;		/* owner */
-    __u32 sd_gid;		/* group */
-    __u32 sd_atime;	/* time of last access */
-    __u32 sd_mtime;	/* time file was last modified  */
-    __u32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
-    __u32 sd_blocks;
+    __le16 sd_mode;	/* file type, permissions */
+    __le16 sd_attrs;     /* persistent inode flags */
+    __le32 sd_nlink;	/* number of hard links */
+    __le64 sd_size;	/* file size */
+    __le32 sd_uid;		/* owner */
+    __le32 sd_gid;		/* group */
+    __le32 sd_atime;	/* time of last access */
+    __le32 sd_mtime;	/* time file was last modified  */
+    __le32 sd_ctime;	/* time inode (stat data) was last changed (except changes to sd_atime and sd_mtime) */
+    __le32 sd_blocks;
     union {
-	__u32 sd_rdev;
-	__u32 sd_generation;
-      //__u32 sd_first_direct_byte; 
+	__le32 sd_rdev;
+	__le32 sd_generation;
+      //__le32 sd_first_direct_byte;
       /* first byte of file which is stored in a
 				       direct item: except that if it equals 1
 				       it is a symlink and if it equals
@@ -1019,12 +1019,12 @@ struct stat_data {
 
 struct reiserfs_de_head
 {
-  __u32 deh_offset;		/* third component of the directory entry key */
-  __u32 deh_dir_id;		/* objectid of the parent directory of the object, that is referenced
+  __le32 deh_offset;		/* third component of the directory entry key */
+  __le32 deh_dir_id;		/* objectid of the parent directory of the object, that is referenced
 					   by directory entry */
-  __u32 deh_objectid;		/* objectid of the object, that is referenced by directory entry */
-  __u16 deh_location;		/* offset of name in the whole item */
-  __u16 deh_state;		/* whether 1) entry contains stat data (for future), and 2) whether
+  __le32 deh_objectid;		/* objectid of the object, that is referenced by directory entry */
+  __le16 deh_location;		/* offset of name in the whole item */
+  __le16 deh_state;		/* whether 1) entry contains stat data (for future), and 2) whether
 					   entry is hidden (unlinked) */
 } __attribute__ ((__packed__));
 #define DEH_SIZE                  sizeof(struct reiserfs_de_head)
@@ -1084,10 +1084,10 @@ struct reiserfs_de_head
 #define de_visible(deh)	    	    test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 #define de_hidden(deh)	    	    !test_bit_unaligned (DEH_Visible, &((deh)->deh_state))
 
-extern void make_empty_dir_item_v1 (char * body, __u32 dirid, __u32 objid,
-				    __u32 par_dirid, __u32 par_objid);
-extern void make_empty_dir_item (char * body, __u32 dirid, __u32 objid,
-				 __u32 par_dirid, __u32 par_objid);
+extern void make_empty_dir_item_v1 (char * body, __le32 dirid, __le32 objid,
+				    __le32 par_dirid, __le32 par_objid);
+extern void make_empty_dir_item (char * body, __le32 dirid, __le32 objid,
+				 __le32 par_dirid, __le32 par_objid);
 
 /* array of the entry headers */
  /* get item body */
@@ -1186,9 +1186,9 @@ struct reiserfs_dir_entry
 /* Disk child pointer: The pointer from an internal node of the tree
    to a node that is on disk. */
 struct disk_child {
-  __u32       dc_block_number;              /* Disk child's block number. */
-  __u16       dc_size;		            /* Disk child's used space.   */
-  __u16       dc_reserved;
+  __le32       dc_block_number;              /* Disk child's block number. */
+  __le16       dc_size;		            /* Disk child's used space.   */
+  __le16       dc_reserved;
 };
 
 #define DC_SIZE (sizeof(struct disk_child))
@@ -1656,10 +1656,10 @@ struct reiserfs_iget_args {
 
 /* first block written in a commit.  */
 struct reiserfs_journal_desc {
-  __u32 j_trans_id ;			/* id of commit */
-  __u32 j_len ;			/* length of commit. len +1 is the commit block */
-  __u32 j_mount_id ;				/* mount id of this trans*/
-  __u32 j_realblock[1] ; /* real locations for each block */
+  __le32 j_trans_id ;			/* id of commit */
+  __le32 j_len ;			/* length of commit. len +1 is the commit block */
+  __le32 j_mount_id ;				/* mount id of this trans*/
+  __le32 j_realblock[1] ; /* real locations for each block */
 } ;
 
 #define get_desc_trans_id(d)   le32_to_cpu((d)->j_trans_id)
@@ -1672,9 +1672,9 @@ struct reiserfs_journal_desc {
 
 /* last block written in a commit */
 struct reiserfs_journal_commit {
-  __u32 j_trans_id ;			/* must match j_trans_id from the desc block */
-  __u32 j_len ;			/* ditto */
-  __u32 j_realblock[1] ; /* real locations for each block */
+  __le32 j_trans_id ;			/* must match j_trans_id from the desc block */
+  __le32 j_len ;			/* ditto */
+  __le32 j_realblock[1] ; /* real locations for each block */
 } ;
 
 #define get_commit_trans_id(c) le32_to_cpu((c)->j_trans_id)
@@ -1689,9 +1689,9 @@ struct reiserfs_journal_commit {
 ** and this transaction does not need to be replayed.
 */
 struct reiserfs_journal_header {
-  __u32 j_last_flush_trans_id ;		/* id of last fully flushed transaction */
-  __u32 j_first_unflushed_offset ;      /* offset in the log of where to start replay after a crash */
-  __u32 j_mount_id ;
+  __le32 j_last_flush_trans_id ;		/* id of last fully flushed transaction */
+  __le32 j_first_unflushed_offset ;      /* offset in the log of where to start replay after a crash */
+  __le32 j_mount_id ;
   /* 12 */ struct journal_params jh_journal;
 } ;
 
@@ -2170,7 +2170,7 @@ void reiserfs_init_alloc_options (struct super_block *s);
  * to use for a new object underneat it.  The locality is returned
  * in disk byte order (le).
  */
-u32 reiserfs_choose_packing(struct inode *dir);
+__le32 reiserfs_choose_packing(struct inode *dir);
 
 int is_reusable (struct super_block * s, b_blocknr_t block, int bit_value);
 void reiserfs_free_block (struct reiserfs_transaction_handle *th, struct inode *, b_blocknr_t, int for_unformatted);
diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
index 1eaa48eca811..9244c5748820 100644
--- a/include/linux/reiserfs_xattr.h
+++ b/include/linux/reiserfs_xattr.h
@@ -10,8 +10,8 @@
 #define REISERFS_XATTR_MAGIC 0x52465841 /* "RFXA" */
 
 struct reiserfs_xattr_header {
-    __u32 h_magic;              /* magic number for identification */
-    __u32 h_hash;               /* hash of the value */
+    __le32 h_magic;              /* magic number for identification */
+    __le32 h_hash;               /* hash of the value */
 };
 
 #ifdef __KERNEL__
-- 
cgit v1.2.3


From b8cc936f6295bba23513a49d858ea82f64982faf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@www.linux.org.uk>
Date: Sun, 1 May 2005 08:59:18 -0700
Subject: [PATCH] reiserfs endianness: fix endianness bugs

fixes for a couple of bugs exposed by the above: le32_to_cpu() used on 16bit
value and missing conversion in comparison of host- and little-endian values.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/journal.c       | 2 +-
 include/linux/reiserfs_fs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index b16d65acb550..3072cfdee959 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -2391,7 +2391,7 @@ int journal_init(struct super_block *p_s_sb, const char * j_dev_name, int old_fo
      jh = (struct reiserfs_journal_header *)(bhjh->b_data);
      
      /* make sure that journal matches to the super block */
-     if (is_reiserfs_jr(rs) && (jh->jh_journal.jp_journal_magic != sb_jp_journal_magic(rs))) {
+     if (is_reiserfs_jr(rs) && (le32_to_cpu(jh->jh_journal.jp_journal_magic) != sb_jp_journal_magic(rs))) {
 	 reiserfs_warning (p_s_sb, "sh-460: journal header magic %x "
 			   "(device %s) does not match to magic found in super "
 			   "block %x",
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index cc39c5305b80..2f7a34d636dd 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -225,7 +225,7 @@ struct reiserfs_super_block
 #define SB_ONDISK_JOURNAL_DEVICE(s) \
          le32_to_cpu ((SB_ONDISK_JP(s)->jp_journal_dev))
 #define SB_ONDISK_RESERVED_FOR_JOURNAL(s) \
-         le32_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
+         le16_to_cpu ((SB_V1_DISK_SUPER_BLOCK(s)->s_reserved_for_journal))
 
 #define is_block_in_log_or_reserved_area(s, block) \
          block >= SB_JOURNAL_1st_RESERVED_BLOCK(s) \
-- 
cgit v1.2.3


From 6b9f5829e6e3af44f20c681e26524c637d4f82ff Mon Sep 17 00:00:00 2001
From: Al Viro <viro@www.linux.org.uk>
Date: Sun, 1 May 2005 08:59:19 -0700
Subject: [PATCH] reiserfs endianness: comp_short_keys() cleanup

comp_short_keys() massaged into sane form, which kills the last place where
pointer to in_core_key (or any object containing such) would be cast to or
from something else.  At that point we are free to change layout of
in_core_key - nothing depends on it anymore.

So we drop the mess with union in there and simply use (unconditional) __u64
k_offset and __u8 k_type instead; places using in_core_key switched to those.
That gives _far_ better code than current mess - on all platforms.

Signed-off-by: Al Viro <viro@parcelfarce.linux.theplanet.co.uk>
Cc: <reiserfs-dev@namesys.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/reiserfs/inode.c         |  4 ++--
 fs/reiserfs/stree.c         | 36 ++++++++++++++-------------------
 fs/reiserfs/super.c         |  4 ++--
 include/linux/reiserfs_fs.h | 49 ++++++++-------------------------------------
 4 files changed, 27 insertions(+), 66 deletions(-)

(limited to 'fs')

diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index 5fdb9f97b99e..2711dff1b7b4 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -1341,8 +1341,8 @@ void reiserfs_read_locked_inode (struct inode * inode, struct reiserfs_iget_args
     key.version = KEY_FORMAT_3_5;
     key.on_disk_key.k_dir_id = dirino;
     key.on_disk_key.k_objectid = inode->i_ino;
-    key.on_disk_key.u.k_offset_v1.k_offset = SD_OFFSET;
-    key.on_disk_key.u.k_offset_v1.k_uniqueness = SD_UNIQUENESS;
+    key.on_disk_key.k_offset = 0;
+    key.on_disk_key.k_type = 0;
 
     /* look for the object's stat data */
     retval = search_item (inode->i_sb, &key, &path_to_sd);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 15fa4cbdce3e..da23ba75f3d5 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -87,23 +87,20 @@ inline void copy_item_head(struct item_head * p_v_to,
 inline int  comp_short_keys (const struct reiserfs_key * le_key,
 			     const struct cpu_key * cpu_key)
 {
-  __le32 * p_s_le_u32;
-  __u32 * p_s_cpu_u32;
-  int n_key_length = REISERFS_SHORT_KEY_LEN;
-
-  p_s_le_u32 = (__le32 *)le_key;
-  p_s_cpu_u32 = (__u32 *)&cpu_key->on_disk_key;
-  for( ; n_key_length--; ++p_s_le_u32, ++p_s_cpu_u32 ) {
-    if ( le32_to_cpu (*p_s_le_u32) < *p_s_cpu_u32 )
+  __u32 n;
+  n = le32_to_cpu(le_key->k_dir_id);
+  if (n < cpu_key->on_disk_key.k_dir_id)
       return -1;
-    if ( le32_to_cpu (*p_s_le_u32) > *p_s_cpu_u32 )
+  if (n > cpu_key->on_disk_key.k_dir_id)
+      return 1;
+  n = le32_to_cpu(le_key->k_objectid);
+  if (n < cpu_key->on_disk_key.k_objectid)
+      return -1;
+  if (n > cpu_key->on_disk_key.k_objectid)
       return 1;
-  }
-
   return 0;
 }
 
-
 /* k1 is pointer to on-disk structure which is stored in little-endian
    form. k2 is pointer to cpu variable.
    Compare keys using all 4 key fields.
@@ -153,18 +150,15 @@ inline int comp_short_le_keys (const struct reiserfs_key * key1, const struct re
 
 inline void le_key2cpu_key (struct cpu_key * to, const struct reiserfs_key * from)
 {
+    int version;
     to->on_disk_key.k_dir_id = le32_to_cpu (from->k_dir_id);
     to->on_disk_key.k_objectid = le32_to_cpu (from->k_objectid);
     
     // find out version of the key
-    to->version = le_key_version (from);
-    if (to->version == KEY_FORMAT_3_5) {
-	to->on_disk_key.u.k_offset_v1.k_offset = le32_to_cpu (from->u.k_offset_v1.k_offset);
-	to->on_disk_key.u.k_offset_v1.k_uniqueness = le32_to_cpu (from->u.k_offset_v1.k_uniqueness);
-    } else {
-	to->on_disk_key.u.k_offset_v2.k_offset = offset_v2_k_offset(&from->u.k_offset_v2);
-	to->on_disk_key.u.k_offset_v2.k_type = offset_v2_k_type(&from->u.k_offset_v2);
-    } 
+    version = le_key_version (from);
+    to->version = version;
+    to->on_disk_key.k_offset = le_key_k_offset(version, from);
+    to->on_disk_key.k_type = le_key_k_type(version, from);
 }
 
 
@@ -235,8 +229,8 @@ const struct reiserfs_key  MAX_KEY = {
 	{{__constant_cpu_to_le32(0xffffffff),
 	__constant_cpu_to_le32(0xffffffff)},}
 };
-const struct in_core_key  MAX_IN_CORE_KEY = {0xffffffff, 0xffffffff, {{0xffffffff, 0xffffffff},}};
 
+const struct in_core_key  MAX_IN_CORE_KEY = {~0U, ~0U, ~0ULL>>4, 15};
 
 /* Get delimiting key of the buffer by looking for it in the buffers in the path, starting from the bottom
    of the path, and going upwards.  We must check the path's validity at each step.  If the key is not in
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index 2283f18aa1dc..31e75125f48b 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -218,8 +218,8 @@ static int finish_unfinished (struct super_block * s)
         item = B_I_PITEM (bh, ih);
         obj_key.on_disk_key.k_dir_id = le32_to_cpu (*(__le32 *)item);
         obj_key.on_disk_key.k_objectid = le32_to_cpu (ih->ih_key.k_objectid);
-	obj_key.on_disk_key.u.k_offset_v1.k_offset = 0;
-	obj_key.on_disk_key.u.k_offset_v1.k_uniqueness = 0;
+	obj_key.on_disk_key.k_offset = 0;
+	obj_key.on_disk_key.k_type = 0;
 	
         pathrelse (&path);
  
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 2f7a34d636dd..d445b682ce00 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -433,23 +433,6 @@ static inline void set_offset_v2_k_offset( struct offset_v2 *v2, loff_t offset )
 # define set_offset_v2_k_offset(v2,val) (offset_v2_k_offset(v2) = (val))
 #endif
 
-struct in_core_offset_v1 {
-    __u32 k_offset;
-    __u32 k_uniqueness;
-} __attribute__ ((__packed__));
-
-struct in_core_offset_v2 {
-#ifdef __LITTLE_ENDIAN
-	    /* little endian version */
-	    __u64 k_offset:60;
-	    __u64 k_type: 4;
-#else
-	    /* big endian version */
-	    __u64 k_type: 4;
-	    __u64 k_offset:60;
-#endif
-} __attribute__ ((__packed__));
-
 /* Key of an item determines its location in the S+tree, and
    is composed of 4 components */
 struct reiserfs_key {
@@ -466,11 +449,9 @@ struct in_core_key {
     __u32 k_dir_id;    /* packing locality: by default parent
 			  directory object id */
     __u32 k_objectid;  /* object identifier */
-    union {
-	struct in_core_offset_v1 k_offset_v1;
-	struct in_core_offset_v2 k_offset_v2;
-    } __attribute__ ((__packed__)) u;
-} __attribute__ ((__packed__));
+    __u64 k_offset;
+    __u8 k_type;
+};
 
 struct cpu_key {
     struct in_core_key on_disk_key;
@@ -696,43 +677,29 @@ static inline void set_le_ih_k_type (struct item_head * ih, int type)
 //
 static inline loff_t cpu_key_k_offset (const struct cpu_key * key)
 {
-    return (key->version == KEY_FORMAT_3_5) ?
-        key->on_disk_key.u.k_offset_v1.k_offset :
-	key->on_disk_key.u.k_offset_v2.k_offset;
+    return key->on_disk_key.k_offset;
 }
 
 static inline loff_t cpu_key_k_type (const struct cpu_key * key)
 {
-    return (key->version == KEY_FORMAT_3_5) ?
-        uniqueness2type (key->on_disk_key.u.k_offset_v1.k_uniqueness) :
-	key->on_disk_key.u.k_offset_v2.k_type;
+    return key->on_disk_key.k_type;
 }
 
 static inline void set_cpu_key_k_offset (struct cpu_key * key, loff_t offset)
 {
-    (key->version == KEY_FORMAT_3_5) ?
-        (key->on_disk_key.u.k_offset_v1.k_offset = offset) :
-	(key->on_disk_key.u.k_offset_v2.k_offset = offset);
+	key->on_disk_key.k_offset = offset;
 }
 
-
 static inline void set_cpu_key_k_type (struct cpu_key * key, int type)
 {
-    (key->version == KEY_FORMAT_3_5) ?
-        (key->on_disk_key.u.k_offset_v1.k_uniqueness = type2uniqueness (type)):
-	(key->on_disk_key.u.k_offset_v2.k_type = type);
+	key->on_disk_key.k_type = type;
 }
 
-
 static inline void cpu_key_k_offset_dec (struct cpu_key * key)
 {
-    if (key->version == KEY_FORMAT_3_5)
-	key->on_disk_key.u.k_offset_v1.k_offset --;
-    else
-	key->on_disk_key.u.k_offset_v2.k_offset --;
+	key->on_disk_key.k_offset --;
 }
 
-
 #define is_direntry_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRENTRY)
 #define is_direct_cpu_key(key) (cpu_key_k_type (key) == TYPE_DIRECT)
 #define is_indirect_cpu_key(key) (cpu_key_k_type (key) == TYPE_INDIRECT)
-- 
cgit v1.2.3


From fe55c452368af263a9beec38ed29f6be85280524 Mon Sep 17 00:00:00 2001
From: Mingming Cao <cmm@us.ibm.com>
Date: Sun, 1 May 2005 08:59:20 -0700
Subject: [PATCH] ext3: remove unnecessary race then retry in ext3_get_block

The extra race-with-truncate-then-retry logic around
ext3_get_block_handle(), which was inherited from ext2, becomes unecessary
for ext3, since we have already obtained the ei->truncate_sem in
ext3_get_block_handle() before calling ext3_alloc_branch().  The
ei->truncate_sem is already there to block concurrent truncate and block
allocation on the same inode.  So the inode's indirect addressing tree
won't be changed after we grab that semaphore.

We could, after get the semaphore, re-verify the branch is up-to-date or
not.  If it has been changed, then get the updated branch.  If we still
need block allocation, we will have a safe version of the branch to work
with in the ext3_find_goal()/ext3_splice_branch().

The code becomes more readable after remove those retry logic.  The patch
also clean up some gotos in ext3_get_block_handle() to make it more
readable.

Signed-off-by: Mingming Cao <cmm@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/ext3/inode.c | 144 ++++++++++++++++++++++++--------------------------------
 1 file changed, 61 insertions(+), 83 deletions(-)

(limited to 'fs')

diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c
index 040eb288bb1c..ea5888688f94 100644
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -455,12 +455,11 @@ static unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
  *	@goal:	place to store the result.
  *
  *	Normally this function find the prefered place for block allocation,
- *	stores it in *@goal and returns zero. If the branch had been changed
- *	under us we return -EAGAIN.
+ *	stores it in *@goal and returns zero.
  */
 
-static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
-			  Indirect *partial, unsigned long *goal)
+static unsigned long ext3_find_goal(struct inode *inode, long block,
+		Indirect chain[4], Indirect *partial)
 {
 	struct ext3_block_alloc_info *block_i =  EXT3_I(inode)->i_block_alloc_info;
 
@@ -470,15 +469,10 @@ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
 	 */
 	if (block_i && (block == block_i->last_alloc_logical_block + 1)
 		&& (block_i->last_alloc_physical_block != 0)) {
-		*goal = block_i->last_alloc_physical_block + 1;
-		return 0;
+		return block_i->last_alloc_physical_block + 1;
 	}
 
-	if (verify_chain(chain, partial)) {
-		*goal = ext3_find_near(inode, partial);
-		return 0;
-	}
-	return -EAGAIN;
+	return ext3_find_near(inode, partial);
 }
 
 /**
@@ -582,12 +576,9 @@ static int ext3_alloc_branch(handle_t *handle, struct inode *inode,
  *	@where: location of missing link
  *	@num:   number of blocks we are adding
  *
- *	This function verifies that chain (up to the missing link) had not
- *	changed, fills the missing link and does all housekeeping needed in
+ *	This function fills the missing link and does all housekeeping needed in
  *	inode (->i_blocks, etc.). In case of success we end up with the full
- *	chain to new block and return 0. Otherwise (== chain had been changed)
- *	we free the new blocks (forgetting their buffer_heads, indeed) and
- *	return -EAGAIN.
+ *	chain to new block and return 0.
  */
 
 static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
@@ -608,12 +599,6 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 		if (err)
 			goto err_out;
 	}
-	/* Verify that place we are splicing to is still there and vacant */
-
-	if (!verify_chain(chain, where-1) || *where->p)
-		/* Writer: end */
-		goto changed;
-
 	/* That's it */
 
 	*where->p = where->key;
@@ -657,26 +642,11 @@ static int ext3_splice_branch(handle_t *handle, struct inode *inode, long block,
 	}
 	return err;
 
-changed:
-	/*
-	 * AKPM: if where[i].bh isn't part of the current updating
-	 * transaction then we explode nastily.  Test this code path.
-	 */
-	jbd_debug(1, "the chain changed: try again\n");
-	err = -EAGAIN;
-
 err_out:
 	for (i = 1; i < num; i++) {
 		BUFFER_TRACE(where[i].bh, "call journal_forget");
 		ext3_journal_forget(handle, where[i].bh);
 	}
-	/* For the normal collision cleanup case, we free up the blocks.
-	 * On genuine filesystem errors we don't even think about doing
-	 * that. */
-	if (err == -EAGAIN)
-		for (i = 0; i < num; i++)
-			ext3_free_blocks(handle, inode, 
-					 le32_to_cpu(where[i].key), 1);
 	return err;
 }
 
@@ -708,7 +678,7 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	unsigned long goal;
 	int left;
 	int boundary = 0;
-	int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
+	const int depth = ext3_block_to_path(inode, iblock, offsets, &boundary);
 	struct ext3_inode_info *ei = EXT3_I(inode);
 
 	J_ASSERT(handle != NULL || create == 0);
@@ -716,54 +686,55 @@ ext3_get_block_handle(handle_t *handle, struct inode *inode, sector_t iblock,
 	if (depth == 0)
 		goto out;
 
-reread:
 	partial = ext3_get_branch(inode, depth, offsets, chain, &err);
 
 	/* Simplest case - block found, no allocation needed */
 	if (!partial) {
 		clear_buffer_new(bh_result);
-got_it:
-		map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
-		if (boundary)
-			set_buffer_boundary(bh_result);
-		/* Clean up and exit */
-		partial = chain+depth-1; /* the whole chain */
-		goto cleanup;
+		goto got_it;
 	}
 
 	/* Next simple case - plain lookup or failed read of indirect block */
-	if (!create || err == -EIO) {
-cleanup:
+	if (!create || err == -EIO)
+		goto cleanup;
+
+	down(&ei->truncate_sem);
+
+	/*
+	 * If the indirect block is missing while we are reading
+	 * the chain(ext3_get_branch() returns -EAGAIN err), or
+	 * if the chain has been changed after we grab the semaphore,
+	 * (either because another process truncated this branch, or
+	 * another get_block allocated this branch) re-grab the chain to see if
+	 * the request block has been allocated or not.
+	 *
+	 * Since we already block the truncate/other get_block
+	 * at this point, we will have the current copy of the chain when we
+	 * splice the branch into the tree.
+	 */
+	if (err == -EAGAIN || !verify_chain(chain, partial)) {
 		while (partial > chain) {
-			BUFFER_TRACE(partial->bh, "call brelse");
 			brelse(partial->bh);
 			partial--;
 		}
-		BUFFER_TRACE(bh_result, "returned");
-out:
-		return err;
+		partial = ext3_get_branch(inode, depth, offsets, chain, &err);
+		if (!partial) {
+			up(&ei->truncate_sem);
+			if (err)
+				goto cleanup;
+			clear_buffer_new(bh_result);
+			goto got_it;
+		}
 	}
 
 	/*
-	 * Indirect block might be removed by truncate while we were
-	 * reading it. Handling of that case (forget what we've got and
-	 * reread) is taken out of the main path.
-	 */
-	if (err == -EAGAIN)
-		goto changed;
-
-	goal = 0;
-	down(&ei->truncate_sem);
-
-	/* lazy initialize the block allocation info here if necessary */
-	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) {
+	 * Okay, we need to do block allocation.  Lazily initialize the block
+	 * allocation info here if necessary
+	*/
+	if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info))
 		ext3_init_block_alloc_info(inode);
-	}
 
-	if (ext3_find_goal(inode, iblock, chain, partial, &goal) < 0) {
-		up(&ei->truncate_sem);
-		goto changed;
-	}
+	goal = ext3_find_goal(inode, iblock, chain, partial);
 
 	left = (chain + depth) - partial;
 
@@ -771,38 +742,45 @@ out:
 	 * Block out ext3_truncate while we alter the tree
 	 */
 	err = ext3_alloc_branch(handle, inode, left, goal,
-					offsets+(partial-chain), partial);
+				offsets + (partial - chain), partial);
 
-	/* The ext3_splice_branch call will free and forget any buffers
+	/*
+	 * The ext3_splice_branch call will free and forget any buffers
 	 * on the new chain if there is a failure, but that risks using
 	 * up transaction credits, especially for bitmaps where the
 	 * credits cannot be returned.  Can we handle this somehow?  We
-	 * may need to return -EAGAIN upwards in the worst case.  --sct */
+	 * may need to return -EAGAIN upwards in the worst case.  --sct
+	 */
 	if (!err)
 		err = ext3_splice_branch(handle, inode, iblock, chain,
 					 partial, left);
-	/* i_disksize growing is protected by truncate_sem
-	 * don't forget to protect it if you're about to implement
-	 * concurrent ext3_get_block() -bzzz */
+	/*
+	 * i_disksize growing is protected by truncate_sem.  Don't forget to
+	 * protect it if you're about to implement concurrent
+	 * ext3_get_block() -bzzz
+	*/
 	if (!err && extend_disksize && inode->i_size > ei->i_disksize)
 		ei->i_disksize = inode->i_size;
 	up(&ei->truncate_sem);
-	if (err == -EAGAIN)
-		goto changed;
 	if (err)
 		goto cleanup;
 
 	set_buffer_new(bh_result);
-	goto got_it;
-
-changed:
+got_it:
+	map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
+	if (boundary)
+		set_buffer_boundary(bh_result);
+	/* Clean up and exit */
+	partial = chain + depth - 1;	/* the whole chain */
+cleanup:
 	while (partial > chain) {
-		jbd_debug(1, "buffer chain changed, retrying\n");
-		BUFFER_TRACE(partial->bh, "brelsing");
+		BUFFER_TRACE(partial->bh, "call brelse");
 		brelse(partial->bh);
 		partial--;
 	}
-	goto reread;
+	BUFFER_TRACE(bh_result, "returned");
+out:
+	return err;
 }
 
 static int ext3_get_block(struct inode *inode, sector_t iblock,
-- 
cgit v1.2.3


From 4dc3b16ba18c0f967ad100c52fa65b01a4f76ff0 Mon Sep 17 00:00:00 2001
From: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Date: Sun, 1 May 2005 08:59:25 -0700
Subject: [PATCH] DocBook: changes and extensions to the kernel documentation

I have recompiled Linux kernel 2.6.11.5 documentation for me and our
university students again.  The documentation could be extended for more
sources which are equipped by structured comments for recent 2.6 kernels.  I
have tried to proceed with that task.  I have done that more times from 2.6.0
time and it gets boring to do same changes again and again.  Linux kernel
compiles after changes for i386 and ARM targets.  I have added references to
some more files into kernel-api book, I have added some section names as well.
 So please, check that changes do not break something and that categories are
not too much skewed.

I have changed kernel-doc to accept "fastcall" and "asmlinkage" words reserved
by kernel convention.  Most of the other changes are modifications in the
comments to make kernel-doc happy, accept some parameters description and do
not bail out on errors.  Changed <pid> to @pid in the description, moved some
#ifdef before comments to correct function to comments bindings, etc.

You can see result of the modified documentation build at
  http://cmp.felk.cvut.cz/~pisa/linux/lkdb-2.6.11.tar.gz

Some more sources are ready to be included into kernel-doc generated
documentation.  Sources has been added into kernel-api for now.  Some more
section names added and probably some more chaos introduced as result of quick
cleanup work.

Signed-off-by: Pavel Pisa <pisa@cmp.felk.cvut.cz>
Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 Documentation/DocBook/kernel-api.tmpl | 186 ++++++++++++++++++++++++++++++++--
 drivers/video/fbmem.c                 |   5 +-
 fs/proc/base.c                        |  10 +-
 include/linux/fs.h                    |  12 +--
 include/linux/net.h                   |  38 +++----
 include/linux/skbuff.h                |   2 +-
 include/net/sock.h                    | 134 ++++++++++++------------
 kernel/exit.c                         |   2 +-
 kernel/power/swsusp.c                 |   2 +-
 mm/page_alloc.c                       |   3 +-
 mm/vmalloc.c                          |   8 +-
 net/core/datagram.c                   |  28 ++---
 net/core/sock.c                       |  12 +--
 net/core/stream.c                     |  12 +--
 net/sunrpc/xdr.c                      |  12 +--
 scripts/kernel-doc                    |   2 +
 16 files changed, 320 insertions(+), 148 deletions(-)

(limited to 'fs')

diff --git a/Documentation/DocBook/kernel-api.tmpl b/Documentation/DocBook/kernel-api.tmpl
index 1bd20c860285..757cef8f8491 100644
--- a/Documentation/DocBook/kernel-api.tmpl
+++ b/Documentation/DocBook/kernel-api.tmpl
@@ -49,13 +49,33 @@
 !Iinclude/asm-i386/unaligned.h
      </sect1>
 
-<!-- FIXME:
-  kernel/sched.c has no docs, which stuffs up the sgml.  Comment
-  out until somebody adds docs.  KAO
      <sect1><title>Delaying, scheduling, and timer routines</title>
-X!Ekernel/sched.c
+!Iinclude/linux/sched.h
+!Ekernel/sched.c
+!Ekernel/timer.c
      </sect1>
-KAO -->
+     <sect1><title>Internal Functions</title>
+!Ikernel/exit.c
+!Ikernel/signal.c
+     </sect1>
+
+     <sect1><title>Kernel objects manipulation</title>
+<!--
+X!Iinclude/linux/kobject.h
+-->
+!Elib/kobject.c
+     </sect1>
+
+     <sect1><title>Kernel utility functions</title>
+!Iinclude/linux/kernel.h
+<!-- This needs to clean up to make kernel-doc happy
+X!Ekernel/printk.c
+ -->
+!Ekernel/panic.c
+!Ekernel/sys.c
+!Ekernel/rcupdate.c
+     </sect1>
+
   </chapter>
 
   <chapter id="adt">
@@ -81,7 +101,9 @@ KAO -->
 !Elib/vsprintf.c
      </sect1>
      <sect1><title>String Manipulation</title>
-!Ilib/string.c
+<!-- All functions are exported at now
+X!Ilib/string.c
+ -->
 !Elib/string.c
      </sect1>
      <sect1><title>Bit Operations</title>
@@ -98,6 +120,25 @@ KAO -->
 !Iinclude/asm-i386/uaccess.h
 !Iarch/i386/lib/usercopy.c
      </sect1>
+     <sect1><title>More Memory Management Functions</title>
+!Iinclude/linux/rmap.h
+!Emm/readahead.c
+!Emm/filemap.c
+!Emm/memory.c
+!Emm/vmalloc.c
+!Emm/mempool.c
+!Emm/page-writeback.c
+!Emm/truncate.c
+     </sect1>
+  </chapter>
+
+
+  <chapter id="ipc">
+     <title>Kernel IPC facilities</title>
+
+     <sect1><title>IPC utilities</title>
+!Iipc/util.c
+     </sect1>
   </chapter>
 
   <chapter id="kfifo">
@@ -114,6 +155,10 @@ KAO -->
      <sect1><title>sysctl interface</title>
 !Ekernel/sysctl.c
      </sect1>
+
+     <sect1><title>proc filesystem interface</title>
+!Ifs/proc/base.c
+     </sect1>
   </chapter>
 
   <chapter id="debugfs">
@@ -127,6 +172,10 @@ KAO -->
 
   <chapter id="vfs">
      <title>The Linux VFS</title>
+     <sect1><title>The Filesystem types</title>
+!Iinclude/linux/fs.h
+!Einclude/linux/fs.h
+     </sect1>
      <sect1><title>The Directory Cache</title>
 !Efs/dcache.c
 !Iinclude/linux/dcache.h
@@ -142,13 +191,31 @@ KAO -->
 !Efs/locks.c
 !Ifs/locks.c
      </sect1>
+     <sect1><title>Other Functions</title>
+!Efs/mpage.c
+!Efs/namei.c
+!Efs/buffer.c
+!Efs/bio.c
+!Efs/seq_file.c
+!Efs/filesystems.c
+!Efs/fs-writeback.c
+!Efs/block_dev.c
+     </sect1>
   </chapter>
 
   <chapter id="netcore">
      <title>Linux Networking</title>
+     <sect1><title>Networking Base Types</title>
+!Iinclude/linux/net.h
+     </sect1>
      <sect1><title>Socket Buffer Functions</title>
 !Iinclude/linux/skbuff.h
+!Iinclude/net/sock.h
+!Enet/socket.c
 !Enet/core/skbuff.c
+!Enet/core/sock.c
+!Enet/core/datagram.c
+!Enet/core/stream.c
      </sect1>
      <sect1><title>Socket Filter</title>
 !Enet/core/filter.c
@@ -158,6 +225,14 @@ KAO -->
 !Enet/core/gen_stats.c
 !Enet/core/gen_estimator.c
      </sect1>
+     <sect1><title>SUN RPC subsystem</title>
+<!-- The !D functionality is not perfect, garbage has to be protected by comments
+!Dnet/sunrpc/sunrpc_syms.c
+-->
+!Enet/sunrpc/xdr.c
+!Enet/sunrpc/svcsock.c
+!Enet/sunrpc/sched.c
+     </sect1>
   </chapter>
 
   <chapter id="netdev">
@@ -194,11 +269,26 @@ X!Ekernel/module.c
 !Iarch/i386/kernel/irq.c
      </sect1>
 
+     <sect1><title>Resources Management</title>
+!Ekernel/resource.c
+     </sect1>
+
      <sect1><title>MTRR Handling</title>
 !Earch/i386/kernel/cpu/mtrr/main.c
      </sect1>
      <sect1><title>PCI Support Library</title>
 !Edrivers/pci/pci.c
+!Edrivers/pci/pci-driver.c
+!Edrivers/pci/remove.c
+!Edrivers/pci/pci-acpi.c
+<!-- kerneldoc does not understand to __devinit
+X!Edrivers/pci/search.c
+ -->
+!Edrivers/pci/msi.c
+!Edrivers/pci/bus.c
+!Edrivers/pci/hotplug.c
+!Edrivers/pci/probe.c
+!Edrivers/pci/rom.c
      </sect1>
      <sect1><title>PCI Hotplug Support Library</title>
 !Edrivers/pci/hotplug/pci_hotplug_core.c
@@ -223,6 +313,14 @@ X!Earch/i386/kernel/mca.c
 !Efs/devfs/base.c
   </chapter>
 
+  <chapter id="sysfs">
+     <title>The Filesystem for Exporting Kernel Objects</title>
+!Efs/sysfs/file.c
+!Efs/sysfs/dir.c
+!Efs/sysfs/symlink.c
+!Efs/sysfs/bin.c
+  </chapter>
+
   <chapter id="security">
      <title>Security Framework</title>
 !Esecurity/security.c
@@ -233,6 +331,61 @@ X!Earch/i386/kernel/mca.c
 !Ekernel/power/pm.c
   </chapter>
 
+  <chapter id="devdrivers">
+     <title>Device drivers infrastructure</title>
+     <sect1><title>Device Drivers Base</title>
+<!--
+X!Iinclude/linux/device.h
+-->
+!Edrivers/base/driver.c
+!Edrivers/base/class_simple.c
+!Edrivers/base/core.c
+!Edrivers/base/firmware_class.c
+!Edrivers/base/transport_class.c
+!Edrivers/base/dmapool.c
+<!-- Cannot be included, because
+     attribute_container_add_class_device_adapter
+ and attribute_container_classdev_to_container
+     exceed allowed 44 characters maximum
+X!Edrivers/base/attribute_container.c
+-->
+!Edrivers/base/sys.c
+<!--
+X!Edrivers/base/interface.c
+-->
+!Edrivers/base/platform.c
+!Edrivers/base/bus.c
+     </sect1>
+     <sect1><title>Device Drivers Power Management</title>
+!Edrivers/base/power/main.c
+!Edrivers/base/power/resume.c
+!Edrivers/base/power/suspend.c
+     </sect1>
+     <sect1><title>Device Drivers ACPI Support</title>
+<!-- Internal functions only
+X!Edrivers/acpi/sleep/main.c
+X!Edrivers/acpi/sleep/wakeup.c
+X!Edrivers/acpi/motherboard.c
+X!Edrivers/acpi/bus.c
+-->
+!Edrivers/acpi/scan.c
+<!-- No correct structured comments
+X!Edrivers/acpi/pci_bind.c
+-->
+     </sect1>
+     <sect1><title>Device drivers PnP support</title>
+!Edrivers/pnp/core.c
+<!-- No correct structured comments
+X!Edrivers/pnp/system.c
+ -->
+!Edrivers/pnp/card.c
+!Edrivers/pnp/driver.c
+!Edrivers/pnp/manager.c
+!Edrivers/pnp/support.c
+     </sect1>
+  </chapter>
+
+
   <chapter id="blkdev">
      <title>Block Devices</title>
 !Edrivers/block/ll_rw_blk.c
@@ -250,7 +403,23 @@ X!Earch/i386/kernel/mca.c
 
   <chapter id="snddev">
      <title>Sound Devices</title>
+!Iinclude/sound/core.h
 !Esound/sound_core.c
+!Iinclude/sound/pcm.h
+!Esound/core/pcm.c
+!Esound/core/device.c
+!Esound/core/info.c
+!Esound/core/rawmidi.c
+!Esound/core/sound.c
+!Esound/core/memory.c
+!Esound/core/pcm_memory.c
+!Esound/core/init.c
+!Esound/core/isadma.c
+!Esound/core/control.c
+!Esound/core/pcm_lib.c
+!Esound/core/hwdep.c
+!Esound/core/pcm_native.c
+!Esound/core/memalloc.c
 <!-- FIXME: Removed for now since no structured comments in source
 X!Isound/sound_firmware.c
 -->
@@ -258,6 +427,7 @@ X!Isound/sound_firmware.c
 
   <chapter id="uart16x50">
      <title>16x50 UART Driver</title>
+!Iinclude/linux/serial_core.h
 !Edrivers/serial/serial_core.c
 !Edrivers/serial/8250.c
   </chapter>
@@ -310,9 +480,11 @@ X!Isound/sound_firmware.c
      <sect1><title>Frame Buffer Memory</title>
 !Edrivers/video/fbmem.c
      </sect1>
+<!--
      <sect1><title>Frame Buffer Console</title>
-!Edrivers/video/console/fbcon.c
+X!Edrivers/video/console/fbcon.c
      </sect1>
+-->
      <sect1><title>Frame Buffer Colormap</title>
 !Edrivers/video/fbcmap.c
      </sect1>
diff --git a/drivers/video/fbmem.c b/drivers/video/fbmem.c
index 25f460ca0daf..208a68ceb63b 100644
--- a/drivers/video/fbmem.c
+++ b/drivers/video/fbmem.c
@@ -1257,6 +1257,8 @@ int fb_new_modelist(struct fb_info *info)
 static char *video_options[FB_MAX];
 static int ofonly;
 
+extern const char *global_mode_option;
+
 /**
  * fb_get_options - get kernel boot parameters
  * @name:   framebuffer name as it would appear in
@@ -1297,9 +1299,6 @@ int fb_get_options(char *name, char **option)
 	return retval;
 }
 
-
-extern const char *global_mode_option;
-
 /**
  *	video_setup - process command line options
  *	@options: string of options
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2eac86d46c51..2b8cd045111c 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1703,13 +1703,13 @@ static struct inode_operations proc_self_inode_operations = {
 };
 
 /**
- * proc_pid_unhash -  Unhash /proc/<pid> entry from the dcache.
+ * proc_pid_unhash -  Unhash /proc/@pid entry from the dcache.
  * @p: task that should be flushed.
  *
- * Drops the /proc/<pid> dcache entry from the hash chains.
+ * Drops the /proc/@pid dcache entry from the hash chains.
  *
- * Dropping /proc/<pid> entries and detach_pid must be synchroneous,
- * otherwise e.g. /proc/<pid>/exe might point to the wrong executable,
+ * Dropping /proc/@pid entries and detach_pid must be synchroneous,
+ * otherwise e.g. /proc/@pid/exe might point to the wrong executable,
  * if the pid value is immediately reused. This is enforced by
  * - caller must acquire spin_lock(p->proc_lock)
  * - must be called before detach_pid()
@@ -1741,7 +1741,7 @@ struct dentry *proc_pid_unhash(struct task_struct *p)
 }
 
 /**
- * proc_pid_flush - recover memory used by stale /proc/<pid>/x entries
+ * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
  * @proc_entry: directoy to prune.
  *
  * Shrink the /proc directory that was used by the just killed thread.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5df687d940fa..3f825b085c8d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1053,12 +1053,12 @@ static inline void file_accessed(struct file *file)
 int sync_inode(struct inode *inode, struct writeback_control *wbc);
 
 /**
- * &export_operations - for nfsd to communicate with file systems
- * decode_fh:      decode a file handle fragment and return a &struct dentry
- * encode_fh:      encode a file handle fragment from a dentry
- * get_name:       find the name for a given inode in a given directory
- * get_parent:     find the parent of a given directory
- * get_dentry:     find a dentry for the inode given a file handle sub-fragment
+ * struct export_operations - for nfsd to communicate with file systems
+ * @decode_fh:      decode a file handle fragment and return a &struct dentry
+ * @encode_fh:      encode a file handle fragment from a dentry
+ * @get_name:       find the name for a given inode in a given directory
+ * @get_parent:     find the parent of a given directory
+ * @get_dentry:     find a dentry for the inode given a file handle sub-fragment
  *
  * Description:
  *    The export_operations structure provides a means for nfsd to communicate
diff --git a/include/linux/net.h b/include/linux/net.h
index 7823b3482506..e5914c1f0c4d 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -64,19 +64,19 @@ typedef enum {
 #define SOCK_PASSCRED		3
 
 #ifndef ARCH_HAS_SOCKET_TYPES
-/** sock_type - Socket types
- * 
+/**
+ * enum sock_type - Socket types
+ * @SOCK_STREAM: stream (connection) socket
+ * @SOCK_DGRAM: datagram (conn.less) socket
+ * @SOCK_RAW: raw socket
+ * @SOCK_RDM: reliably-delivered message
+ * @SOCK_SEQPACKET: sequential packet socket
+ * @SOCK_PACKET: linux specific way of getting packets at the dev level.
+ *		  For writing rarp and other similar things on the user level.
+ *
  * When adding some new socket type please
  * grep ARCH_HAS_SOCKET_TYPE include/asm-* /socket.h, at least MIPS
  * overrides this enum for binary compat reasons.
- * 
- * @SOCK_STREAM - stream (connection) socket
- * @SOCK_DGRAM - datagram (conn.less) socket
- * @SOCK_RAW - raw socket
- * @SOCK_RDM - reliably-delivered message
- * @SOCK_SEQPACKET - sequential packet socket 
- * @SOCK_PACKET - linux specific way of getting packets at the dev level.
- *		  For writing rarp and other similar things on the user level.
  */
 enum sock_type {
 	SOCK_STREAM	= 1,
@@ -93,15 +93,15 @@ enum sock_type {
 
 /**
  *  struct socket - general BSD socket
- *  @state - socket state (%SS_CONNECTED, etc)
- *  @flags - socket flags (%SOCK_ASYNC_NOSPACE, etc)
- *  @ops - protocol specific socket operations
- *  @fasync_list - Asynchronous wake up list
- *  @file - File back pointer for gc
- *  @sk - internal networking protocol agnostic socket representation
- *  @wait - wait queue for several uses
- *  @type - socket type (%SOCK_STREAM, etc)
- *  @passcred - credentials (used only in Unix Sockets (aka PF_LOCAL))
+ *  @state: socket state (%SS_CONNECTED, etc)
+ *  @flags: socket flags (%SOCK_ASYNC_NOSPACE, etc)
+ *  @ops: protocol specific socket operations
+ *  @fasync_list: Asynchronous wake up list
+ *  @file: File back pointer for gc
+ *  @sk: internal networking protocol agnostic socket representation
+ *  @wait: wait queue for several uses
+ *  @type: socket type (%SOCK_STREAM, etc)
+ *  @passcred: credentials (used only in Unix Sockets (aka PF_LOCAL))
  */
 struct socket {
 	socket_state		state;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 22b701819619..c77d745cbd3f 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -968,6 +968,7 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
 		kfree_skb(skb);
 }
 
+#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
 /**
  *	__dev_alloc_skb - allocate an skbuff for sending
  *	@length: length to allocate
@@ -980,7 +981,6 @@ static inline void __skb_queue_purge(struct sk_buff_head *list)
  *
  *	%NULL is returned in there is no free memory.
  */
-#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB
 static inline struct sk_buff *__dev_alloc_skb(unsigned int length,
 					      int gfp_mask)
 {
diff --git a/include/net/sock.h b/include/net/sock.h
index be81cabd0da3..5bc180adfb14 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -90,17 +90,17 @@ do {	spin_lock_init(&((__sk)->sk_lock.slock)); \
 struct sock;
 
 /**
-  *	struct sock_common - minimal network layer representation of sockets
-  *	@skc_family - network address family
-  *	@skc_state - Connection state
-  *	@skc_reuse - %SO_REUSEADDR setting
-  *	@skc_bound_dev_if - bound device index if != 0
-  *	@skc_node - main hash linkage for various protocol lookup tables
-  *	@skc_bind_node - bind hash linkage for various protocol lookup tables
-  *	@skc_refcnt - reference count
-  *
-  *	This is the minimal network layer representation of sockets, the header
-  *	for struct sock and struct tcp_tw_bucket.
+ *	struct sock_common - minimal network layer representation of sockets
+ *	@skc_family: network address family
+ *	@skc_state: Connection state
+ *	@skc_reuse: %SO_REUSEADDR setting
+ *	@skc_bound_dev_if: bound device index if != 0
+ *	@skc_node: main hash linkage for various protocol lookup tables
+ *	@skc_bind_node: bind hash linkage for various protocol lookup tables
+ *	@skc_refcnt: reference count
+ *
+ *	This is the minimal network layer representation of sockets, the header
+ *	for struct sock and struct tcp_tw_bucket.
   */
 struct sock_common {
 	unsigned short		skc_family;
@@ -114,60 +114,60 @@ struct sock_common {
 
 /**
   *	struct sock - network layer representation of sockets
-  *	@__sk_common - shared layout with tcp_tw_bucket
-  *	@sk_shutdown - mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
-  *	@sk_userlocks - %SO_SNDBUF and %SO_RCVBUF settings
-  *	@sk_lock -	synchronizer
-  *	@sk_rcvbuf - size of receive buffer in bytes
-  *	@sk_sleep - sock wait queue
-  *	@sk_dst_cache - destination cache
-  *	@sk_dst_lock - destination cache lock
-  *	@sk_policy - flow policy
-  *	@sk_rmem_alloc - receive queue bytes committed
-  *	@sk_receive_queue - incoming packets
-  *	@sk_wmem_alloc - transmit queue bytes committed
-  *	@sk_write_queue - Packet sending queue
-  *	@sk_omem_alloc - "o" is "option" or "other"
-  *	@sk_wmem_queued - persistent queue size
-  *	@sk_forward_alloc - space allocated forward
-  *	@sk_allocation - allocation mode
-  *	@sk_sndbuf - size of send buffer in bytes
-  *	@sk_flags - %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
-  *	@sk_no_check - %SO_NO_CHECK setting, wether or not checkup packets
-  *	@sk_route_caps - route capabilities (e.g. %NETIF_F_TSO)
-  *	@sk_lingertime - %SO_LINGER l_linger setting
-  *	@sk_hashent - hash entry in several tables (e.g. tcp_ehash)
-  *	@sk_backlog - always used with the per-socket spinlock held
-  *	@sk_callback_lock - used with the callbacks in the end of this struct
-  *	@sk_error_queue - rarely used
-  *	@sk_prot - protocol handlers inside a network family
-  *	@sk_err - last error
-  *	@sk_err_soft - errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
-  *	@sk_ack_backlog - current listen backlog
-  *	@sk_max_ack_backlog - listen backlog set in listen()
-  *	@sk_priority - %SO_PRIORITY setting
-  *	@sk_type - socket type (%SOCK_STREAM, etc)
-  *	@sk_protocol - which protocol this socket belongs in this network family
-  *	@sk_peercred - %SO_PEERCRED setting
-  *	@sk_rcvlowat - %SO_RCVLOWAT setting
-  *	@sk_rcvtimeo - %SO_RCVTIMEO setting
-  *	@sk_sndtimeo - %SO_SNDTIMEO setting
-  *	@sk_filter - socket filtering instructions
-  *	@sk_protinfo - private area, net family specific, when not using slab
-  *	@sk_timer - sock cleanup timer
-  *	@sk_stamp - time stamp of last packet received
-  *	@sk_socket - Identd and reporting IO signals
-  *	@sk_user_data - RPC layer private data
-  *	@sk_sndmsg_page - cached page for sendmsg
-  *	@sk_sndmsg_off - cached offset for sendmsg
-  *	@sk_send_head - front of stuff to transmit
-  *	@sk_write_pending - a write to stream socket waits to start
-  *	@sk_state_change - callback to indicate change in the state of the sock
-  *	@sk_data_ready - callback to indicate there is data to be processed
-  *	@sk_write_space - callback to indicate there is bf sending space available
-  *	@sk_error_report - callback to indicate errors (e.g. %MSG_ERRQUEUE)
-  *	@sk_backlog_rcv - callback to process the backlog
-  *	@sk_destruct - called at sock freeing time, i.e. when all refcnt == 0
+  *	@__sk_common: shared layout with tcp_tw_bucket
+  *	@sk_shutdown: mask of %SEND_SHUTDOWN and/or %RCV_SHUTDOWN
+  *	@sk_userlocks: %SO_SNDBUF and %SO_RCVBUF settings
+  *	@sk_lock:	synchronizer
+  *	@sk_rcvbuf: size of receive buffer in bytes
+  *	@sk_sleep: sock wait queue
+  *	@sk_dst_cache: destination cache
+  *	@sk_dst_lock: destination cache lock
+  *	@sk_policy: flow policy
+  *	@sk_rmem_alloc: receive queue bytes committed
+  *	@sk_receive_queue: incoming packets
+  *	@sk_wmem_alloc: transmit queue bytes committed
+  *	@sk_write_queue: Packet sending queue
+  *	@sk_omem_alloc: "o" is "option" or "other"
+  *	@sk_wmem_queued: persistent queue size
+  *	@sk_forward_alloc: space allocated forward
+  *	@sk_allocation: allocation mode
+  *	@sk_sndbuf: size of send buffer in bytes
+  *	@sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE, %SO_OOBINLINE settings
+  *	@sk_no_check: %SO_NO_CHECK setting, wether or not checkup packets
+  *	@sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
+  *	@sk_lingertime: %SO_LINGER l_linger setting
+  *	@sk_hashent: hash entry in several tables (e.g. tcp_ehash)
+  *	@sk_backlog: always used with the per-socket spinlock held
+  *	@sk_callback_lock: used with the callbacks in the end of this struct
+  *	@sk_error_queue: rarely used
+  *	@sk_prot: protocol handlers inside a network family
+  *	@sk_err: last error
+  *	@sk_err_soft: errors that don't cause failure but are the cause of a persistent failure not just 'timed out'
+  *	@sk_ack_backlog: current listen backlog
+  *	@sk_max_ack_backlog: listen backlog set in listen()
+  *	@sk_priority: %SO_PRIORITY setting
+  *	@sk_type: socket type (%SOCK_STREAM, etc)
+  *	@sk_protocol: which protocol this socket belongs in this network family
+  *	@sk_peercred: %SO_PEERCRED setting
+  *	@sk_rcvlowat: %SO_RCVLOWAT setting
+  *	@sk_rcvtimeo: %SO_RCVTIMEO setting
+  *	@sk_sndtimeo: %SO_SNDTIMEO setting
+  *	@sk_filter: socket filtering instructions
+  *	@sk_protinfo: private area, net family specific, when not using slab
+  *	@sk_timer: sock cleanup timer
+  *	@sk_stamp: time stamp of last packet received
+  *	@sk_socket: Identd and reporting IO signals
+  *	@sk_user_data: RPC layer private data
+  *	@sk_sndmsg_page: cached page for sendmsg
+  *	@sk_sndmsg_off: cached offset for sendmsg
+  *	@sk_send_head: front of stuff to transmit
+  *	@sk_write_pending: a write to stream socket waits to start
+  *	@sk_state_change: callback to indicate change in the state of the sock
+  *	@sk_data_ready: callback to indicate there is data to be processed
+  *	@sk_write_space: callback to indicate there is bf sending space available
+  *	@sk_error_report: callback to indicate errors (e.g. %MSG_ERRQUEUE)
+  *	@sk_backlog_rcv: callback to process the backlog
+  *	@sk_destruct: called at sock freeing time, i.e. when all refcnt == 0
  */
 struct sock {
 	/*
@@ -1223,8 +1223,8 @@ sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb)
 
 /**
  * sk_eat_skb - Release a skb if it is no longer needed
- * @sk - socket to eat this skb from
- * @skb - socket buffer to eat
+ * @sk: socket to eat this skb from
+ * @skb: socket buffer to eat
  *
  * This routine must be called with interrupts disabled or with the socket
  * locked so that the sk_buff queue operation is ok.
diff --git a/kernel/exit.c b/kernel/exit.c
index eb8da36e13df..419d9d3c4c48 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -210,7 +210,7 @@ static inline int has_stopped_jobs(int pgrp)
 }
 
 /**
- * reparent_to_init() - Reparent the calling kernel thread to the init task.
+ * reparent_to_init - Reparent the calling kernel thread to the init task.
  *
  * If a kernel thread is launched as a result of a system call, or if
  * it ever exits, it should generally reparent itself to init so that
diff --git a/kernel/power/swsusp.c b/kernel/power/swsusp.c
index ae5bebc3b18f..90b3b68dee3f 100644
--- a/kernel/power/swsusp.c
+++ b/kernel/power/swsusp.c
@@ -1099,7 +1099,7 @@ static struct pbe * swsusp_pagedir_relocate(struct pbe *pblist)
 	return pblist;
 }
 
-/**
+/*
  *	Using bio to read from swap.
  *	This code requires a bit more work than just using buffer heads
  *	but, it is the recommended way for 2.5/2.6.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 80ce7f2104df..fc1b1064c505 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1355,8 +1355,7 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli
 #define MAX_NODE_LOAD (num_online_nodes())
 static int __initdata node_load[MAX_NUMNODES];
 /**
- * find_next_best_node - find the next node that should appear in a given
- *    node's fallback list
+ * find_next_best_node - find the next node that should appear in a given node's fallback list
  * @node: node whose fallback list we're appending
  * @used_node_mask: nodemask_t of already used nodes
  *
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index c6182f6f1305..2bd83e5c2bbf 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -475,6 +475,10 @@ void *vmalloc(unsigned long size)
 
 EXPORT_SYMBOL(vmalloc);
 
+#ifndef PAGE_KERNEL_EXEC
+# define PAGE_KERNEL_EXEC PAGE_KERNEL
+#endif
+
 /**
  *	vmalloc_exec  -  allocate virtually contiguous, executable memory
  *
@@ -488,10 +492,6 @@ EXPORT_SYMBOL(vmalloc);
  *	use __vmalloc() instead.
  */
 
-#ifndef PAGE_KERNEL_EXEC
-# define PAGE_KERNEL_EXEC PAGE_KERNEL
-#endif
-
 void *vmalloc_exec(unsigned long size)
 {
 	return __vmalloc(size, GFP_KERNEL | __GFP_HIGHMEM, PAGE_KERNEL_EXEC);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index d1bfd279cc1a..27c5cd942820 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -115,10 +115,10 @@ out_noerr:
 
 /**
  *	skb_recv_datagram - Receive a datagram skbuff
- *	@sk - socket
- *	@flags - MSG_ flags
- *	@noblock - blocking operation?
- *	@err - error code returned
+ *	@sk: socket
+ *	@flags: MSG_ flags
+ *	@noblock: blocking operation?
+ *	@err: error code returned
  *
  *	Get a datagram skbuff, understands the peeking, nonblocking wakeups
  *	and possible races. This replaces identical code in packet, raw and
@@ -201,10 +201,10 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
 
 /**
  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
- *	@skb - buffer to copy
- *	@offset - offset in the buffer to start copying from
- *	@iovec - io vector to copy to
- *	@len - amount of data to copy from buffer to iovec
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@iovec: io vector to copy to
+ *	@len: amount of data to copy from buffer to iovec
  *
  *	Note: the iovec is modified during the copy.
  */
@@ -377,9 +377,9 @@ fault:
 
 /**
  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
- *	@skb - skbuff
- *	@hlen - hardware length
- *	@iovec - io vector
+ *	@skb: skbuff
+ *	@hlen: hardware length
+ *	@iovec: io vector
  * 
  *	Caller _must_ check that skb will fit to this iovec.
  *
@@ -425,9 +425,9 @@ fault:
 
 /**
  * 	datagram_poll - generic datagram poll
- *	@file - file struct
- *	@sock - socket
- *	@wait - poll table
+ *	@file: file struct
+ *	@sock: socket
+ *	@wait: poll table
  *
  *	Datagram poll: Again totally generic. This also handles
  *	sequenced packet sockets providing the socket receive queue
diff --git a/net/core/sock.c b/net/core/sock.c
index 5c2f72fa1013..98171ddd7e7d 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -616,10 +616,10 @@ lenout:
 
 /**
  *	sk_alloc - All socket objects are allocated here
- *	@family - protocol family
- *	@priority - for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
- *	@prot - struct proto associated with this new sock instance
- *	@zero_it - if we should zero the newly allocated sock
+ *	@family: protocol family
+ *	@priority: for allocation (%GFP_KERNEL, %GFP_ATOMIC, etc)
+ *	@prot: struct proto associated with this new sock instance
+ *	@zero_it: if we should zero the newly allocated sock
  */
 struct sock *sk_alloc(int family, int priority, struct proto *prot, int zero_it)
 {
@@ -970,8 +970,8 @@ static void __release_sock(struct sock *sk)
 
 /**
  * sk_wait_data - wait for data to arrive at sk_receive_queue
- * sk - sock to wait on
- * timeo - for how long
+ * @sk:    sock to wait on
+ * @timeo: for how long
  *
  * Now socket state including sk->sk_err is changed only under lock,
  * hence we may omit checks after joining wait queue.
diff --git a/net/core/stream.c b/net/core/stream.c
index 1e27a57b5a97..ac9edfdf8742 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -21,7 +21,7 @@
 
 /**
  * sk_stream_write_space - stream socket write_space callback.
- * sk - socket
+ * @sk: socket
  *
  * FIXME: write proper description
  */
@@ -43,8 +43,8 @@ EXPORT_SYMBOL(sk_stream_write_space);
 
 /**
  * sk_stream_wait_connect - Wait for a socket to get into the connected state
- * @sk - sock to wait on
- * @timeo_p - for how long to wait
+ * @sk: sock to wait on
+ * @timeo_p: for how long to wait
  *
  * Must be called with the socket locked.
  */
@@ -79,7 +79,7 @@ EXPORT_SYMBOL(sk_stream_wait_connect);
 
 /**
  * sk_stream_closing - Return 1 if we still have things to send in our buffers.
- * @sk - socket to verify
+ * @sk: socket to verify
  */
 static inline int sk_stream_closing(struct sock *sk)
 {
@@ -107,8 +107,8 @@ EXPORT_SYMBOL(sk_stream_wait_close);
 
 /**
  * sk_stream_wait_memory - Wait for more memory for a socket
- * @sk - socket to wait for memory
- * @timeo_p - for how long
+ * @sk: socket to wait for memory
+ * @timeo_p: for how long
  */
 int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
 {
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 4484931018eb..67b9f035ba86 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -46,9 +46,9 @@ xdr_decode_netobj(u32 *p, struct xdr_netobj *obj)
 
 /**
  * xdr_encode_opaque_fixed - Encode fixed length opaque data
- * @p - pointer to current position in XDR buffer.
- * @ptr - pointer to data to encode (or NULL)
- * @nbytes - size of data.
+ * @p: pointer to current position in XDR buffer.
+ * @ptr: pointer to data to encode (or NULL)
+ * @nbytes: size of data.
  *
  * Copy the array of data of length nbytes at ptr to the XDR buffer
  * at position p, then align to the next 32-bit boundary by padding
@@ -76,9 +76,9 @@ EXPORT_SYMBOL(xdr_encode_opaque_fixed);
 
 /**
  * xdr_encode_opaque - Encode variable length opaque data
- * @p - pointer to current position in XDR buffer.
- * @ptr - pointer to data to encode (or NULL)
- * @nbytes - size of data.
+ * @p: pointer to current position in XDR buffer.
+ * @ptr: pointer to data to encode (or NULL)
+ * @nbytes: size of data.
  *
  * Returns the updated current XDR buffer position
  */
diff --git a/scripts/kernel-doc b/scripts/kernel-doc
index 8b1dab63f11c..3cc333070e59 100755
--- a/scripts/kernel-doc
+++ b/scripts/kernel-doc
@@ -1465,6 +1465,8 @@ sub dump_function($$) {
 
     $prototype =~ s/^static +//;
     $prototype =~ s/^extern +//;
+    $prototype =~ s/^fastcall +//;
+    $prototype =~ s/^asmlinkage +//;
     $prototype =~ s/^inline +//;
     $prototype =~ s/^__inline__ +//;
     $prototype =~ s/^#define +//; #ak added
-- 
cgit v1.2.3


From 67be2dd1bace0ec7ce2dbc1bba3f8df3d7be597e Mon Sep 17 00:00:00 2001
From: Martin Waitz <tali@admingilde.org>
Date: Sun, 1 May 2005 08:59:26 -0700
Subject: [PATCH] DocBook: fix some descriptions

Some KernelDoc descriptions are updated to match the current code.
No code changes.

Signed-off-by: Martin Waitz <tali@admingilde.org>
Signed-off-by: Andrew Morton <akpm@osdl.org>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 drivers/acpi/scan.c     |   4 +-
 drivers/base/platform.c |   4 +-
 drivers/pci/hotplug.c   |   4 ++
 drivers/pci/rom.c       |  14 +++----
 drivers/pnp/manager.c   |   2 +-
 fs/bio.c                |   2 +
 fs/buffer.c             |  11 +++---
 fs/fs-writeback.c       |   4 +-
 fs/mpage.c              |  92 ++++++++++++++++++++++----------------------
 fs/proc/base.c          |   2 +-
 fs/seq_file.c           |   9 ++++-
 fs/sysfs/file.c         |   4 +-
 include/linux/fs.h      | 100 +++++++++++++++++++++++++-----------------------
 include/linux/skbuff.h  |   5 ++-
 include/net/sock.h      |   1 +
 kernel/sched.c          |   3 +-
 kernel/sysctl.c         |   2 +
 lib/kobject.c           |   3 +-
 mm/filemap.c            |  17 ++++----
 mm/page-writeback.c     |   6 +--
 mm/truncate.c           |   4 +-
 net/core/datagram.c     |   4 +-
 22 files changed, 160 insertions(+), 137 deletions(-)

(limited to 'fs')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index e7ca06626566..119c94093a13 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -379,8 +379,8 @@ ACPI_DEVICE_ATTR(eject, 0200, NULL, acpi_eject_store);
 
 /**
  * setup_sys_fs_device_files - sets up the device files under device namespace
- * @@dev:	acpi_device object
- * @@func:	function pointer to create or destroy the device file
+ * @dev:	acpi_device object
+ * @func:	function pointer to create or destroy the device file
  */
 static void
 setup_sys_fs_device_files (
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index cd6453905a9b..3a5f4c991797 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -115,7 +115,7 @@ int platform_add_devices(struct platform_device **devs, int num)
 
 /**
  *	platform_device_register - add a platform-level device
- *	@dev:	platform device we're adding
+ *	@pdev:	platform device we're adding
  *
  */
 int platform_device_register(struct platform_device * pdev)
@@ -174,7 +174,7 @@ int platform_device_register(struct platform_device * pdev)
 
 /**
  *	platform_device_unregister - remove a platform-level device
- *	@dev:	platform device we're removing
+ *	@pdev:	platform device we're removing
  *
  *	Note that this function will also release all memory- and port-based
  *	resources owned by the device (@dev->resource).
diff --git a/drivers/pci/hotplug.c b/drivers/pci/hotplug.c
index d471b3ea5d12..021d0f76bc4c 100644
--- a/drivers/pci/hotplug.c
+++ b/drivers/pci/hotplug.c
@@ -120,6 +120,10 @@ static int pci_visit_bridge (struct pci_visit * fn,
 
 /**
  * pci_visit_dev - scans the pci buses.
+ * @fn: callback functions that are called while visiting
+ * @wrapped_dev: the device to scan
+ * @wrapped_parent: the bus where @wrapped_dev is connected to
+ *
  * Every bus and every function is presented to a custom
  * function that can act upon it.
  */
diff --git a/drivers/pci/rom.c b/drivers/pci/rom.c
index 3e64ff64b38c..838575e3fac6 100644
--- a/drivers/pci/rom.c
+++ b/drivers/pci/rom.c
@@ -14,7 +14,7 @@
 
 /**
  * pci_enable_rom - enable ROM decoding for a PCI device
- * @dev: PCI device to enable
+ * @pdev: PCI device to enable
  *
  * Enable ROM decoding on @dev.  This involves simply turning on the last
  * bit of the PCI ROM BAR.  Note that some cards may share address decoders
@@ -32,7 +32,7 @@ static void pci_enable_rom(struct pci_dev *pdev)
 
 /**
  * pci_disable_rom - disable ROM decoding for a PCI device
- * @dev: PCI device to disable
+ * @pdev: PCI device to disable
  *
  * Disable ROM decoding on a PCI device by turning off the last bit in the
  * ROM BAR.
@@ -47,7 +47,7 @@ static void pci_disable_rom(struct pci_dev *pdev)
 
 /**
  * pci_map_rom - map a PCI ROM to kernel space
- * @dev: pointer to pci device struct
+ * @pdev: pointer to pci device struct
  * @size: pointer to receive size of pci window over ROM
  * @return: kernel virtual pointer to image of ROM
  *
@@ -132,7 +132,7 @@ void __iomem *pci_map_rom(struct pci_dev *pdev, size_t *size)
 
 /**
  * pci_map_rom_copy - map a PCI ROM to kernel space, create a copy
- * @dev: pointer to pci device struct
+ * @pdev: pointer to pci device struct
  * @size: pointer to receive size of pci window over ROM
  * @return: kernel virtual pointer to image of ROM
  *
@@ -166,7 +166,7 @@ void __iomem *pci_map_rom_copy(struct pci_dev *pdev, size_t *size)
 
 /**
  * pci_unmap_rom - unmap the ROM from kernel space
- * @dev: pointer to pci device struct
+ * @pdev: pointer to pci device struct
  * @rom: virtual address of the previous mapping
  *
  * Remove a mapping of a previously mapped ROM
@@ -187,7 +187,7 @@ void pci_unmap_rom(struct pci_dev *pdev, void __iomem *rom)
 
 /**
  * pci_remove_rom - disable the ROM and remove its sysfs attribute
- * @dev: pointer to pci device struct
+ * @pdev: pointer to pci device struct
  *
  * Remove the rom file in sysfs and disable ROM decoding.
  */
@@ -206,7 +206,7 @@ void pci_remove_rom(struct pci_dev *pdev)
 /**
  * pci_cleanup_rom - internal routine for freeing the ROM copy created
  * by pci_map_rom_copy called from remove.c
- * @dev: pointer to pci device struct
+ * @pdev: pointer to pci device struct
  *
  * Free the copied ROM if we allocated one.
  */
diff --git a/drivers/pnp/manager.c b/drivers/pnp/manager.c
index 639e04253482..65ecef738537 100644
--- a/drivers/pnp/manager.c
+++ b/drivers/pnp/manager.c
@@ -253,7 +253,7 @@ void pnp_init_resource_table(struct pnp_resource_table *table)
 
 /**
  * pnp_clean_resources - clears resources that were not manually set
- * @res - the resources to clean
+ * @res: the resources to clean
  *
  */
 static void pnp_clean_resource_table(struct pnp_resource_table * res)
diff --git a/fs/bio.c b/fs/bio.c
index e5349e834563..3a1472acc361 100644
--- a/fs/bio.c
+++ b/fs/bio.c
@@ -140,6 +140,7 @@ inline void bio_init(struct bio *bio)
  * bio_alloc_bioset - allocate a bio for I/O
  * @gfp_mask:   the GFP_ mask given to the slab allocator
  * @nr_iovecs:	number of iovecs to pre-allocate
+ * @bs:		the bio_set to allocate from
  *
  * Description:
  *   bio_alloc_bioset will first try it's on mempool to satisfy the allocation.
@@ -629,6 +630,7 @@ out:
 
 /**
  *	bio_map_user	-	map user address into bio
+ *	@q: the request_queue_t for the bio
  *	@bdev: destination block device
  *	@uaddr: start of user address
  *	@len: length in bytes
diff --git a/fs/buffer.c b/fs/buffer.c
index 792cbacbbf41..5f525b3c6d9f 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -774,15 +774,14 @@ repeat:
 /**
  * sync_mapping_buffers - write out and wait upon a mapping's "associated"
  *                        buffers
- * @buffer_mapping - the mapping which backs the buffers' data
- * @mapping - the mapping which wants those buffers written
+ * @mapping: the mapping which wants those buffers written
  *
  * Starts I/O against the buffers at mapping->private_list, and waits upon
  * that I/O.
  *
- * Basically, this is a convenience function for fsync().  @buffer_mapping is
- * the blockdev which "owns" the buffers and @mapping is a file or directory
- * which needs those buffers to be written for a successful fsync().
+ * Basically, this is a convenience function for fsync().
+ * @mapping is a file or directory which needs those buffers to be written for
+ * a successful fsync().
  */
 int sync_mapping_buffers(struct address_space *mapping)
 {
@@ -1263,6 +1262,7 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 
 /**
  * mark_buffer_dirty - mark a buffer_head as needing writeout
+ * @bh: the buffer_head to mark dirty
  *
  * mark_buffer_dirty() will set the dirty bit against the buffer, then set its
  * backing page dirty, then tag the page as dirty in its address_space's radix
@@ -1501,6 +1501,7 @@ EXPORT_SYMBOL(__breadahead);
 
 /**
  *  __bread() - reads a specified block and returns the bh
+ *  @bdev: the block_device to read from
  *  @block: number of block
  *  @size: size (in bytes) to read
  * 
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index d6efb36cab2a..8e050fa58218 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -512,7 +512,8 @@ restart:
 }
 
 /**
- * sync_inodes
+ * sync_inodes - writes all inodes to disk
+ * @wait: wait for completion
  *
  * sync_inodes() goes through each super block's dirty inode list, writes the
  * inodes out, waits on the writeout and puts the inodes back on the normal
@@ -604,6 +605,7 @@ EXPORT_SYMBOL(sync_inode);
 /**
  * generic_osync_inode - flush all dirty data for a given inode to disk
  * @inode: inode to write
+ * @mapping: the address_space that should be flushed
  * @what:  what to write and wait upon
  *
  * This can be called by file_write functions for files which have the
diff --git a/fs/mpage.c b/fs/mpage.c
index 3923facf94eb..32c7c8fcfce7 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -160,52 +160,6 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
 	} while (page_bh != head);
 }
 
-/**
- * mpage_readpages - populate an address space with some pages, and
- *                       start reads against them.
- *
- * @mapping: the address_space
- * @pages: The address of a list_head which contains the target pages.  These
- *   pages have their ->index populated and are otherwise uninitialised.
- *
- *   The page at @pages->prev has the lowest file offset, and reads should be
- *   issued in @pages->prev to @pages->next order.
- *
- * @nr_pages: The number of pages at *@pages
- * @get_block: The filesystem's block mapper function.
- *
- * This function walks the pages and the blocks within each page, building and
- * emitting large BIOs.
- *
- * If anything unusual happens, such as:
- *
- * - encountering a page which has buffers
- * - encountering a page which has a non-hole after a hole
- * - encountering a page with non-contiguous blocks
- *
- * then this code just gives up and calls the buffer_head-based read function.
- * It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
- *
- * BH_Boundary explanation:
- *
- * There is a problem.  The mpage read code assembles several pages, gets all
- * their disk mappings, and then submits them all.  That's fine, but obtaining
- * the disk mappings may require I/O.  Reads of indirect blocks, for example.
- *
- * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
- * submitted in the following order:
- * 	12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
- * because the indirect block has to be read to get the mappings of blocks
- * 13,14,15,16.  Obviously, this impacts performance.
- * 
- * So what we do it to allow the filesystem's get_block() function to set
- * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
- * after this one will require I/O against a block which is probably close to
- * this one.  So you should push what I/O you have currently accumulated.
- *
- * This all causes the disk requests to be issued in the correct order.
- */
 static struct bio *
 do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
 			sector_t *last_block_in_bio, get_block_t get_block)
@@ -320,6 +274,52 @@ confused:
 	goto out;
 }
 
+/**
+ * mpage_readpages - populate an address space with some pages, and
+ *                       start reads against them.
+ *
+ * @mapping: the address_space
+ * @pages: The address of a list_head which contains the target pages.  These
+ *   pages have their ->index populated and are otherwise uninitialised.
+ *
+ *   The page at @pages->prev has the lowest file offset, and reads should be
+ *   issued in @pages->prev to @pages->next order.
+ *
+ * @nr_pages: The number of pages at *@pages
+ * @get_block: The filesystem's block mapper function.
+ *
+ * This function walks the pages and the blocks within each page, building and
+ * emitting large BIOs.
+ *
+ * If anything unusual happens, such as:
+ *
+ * - encountering a page which has buffers
+ * - encountering a page which has a non-hole after a hole
+ * - encountering a page with non-contiguous blocks
+ *
+ * then this code just gives up and calls the buffer_head-based read function.
+ * It does handle a page which has holes at the end - that is a common case:
+ * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ *
+ * BH_Boundary explanation:
+ *
+ * There is a problem.  The mpage read code assembles several pages, gets all
+ * their disk mappings, and then submits them all.  That's fine, but obtaining
+ * the disk mappings may require I/O.  Reads of indirect blocks, for example.
+ *
+ * So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be
+ * submitted in the following order:
+ * 	12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
+ * because the indirect block has to be read to get the mappings of blocks
+ * 13,14,15,16.  Obviously, this impacts performance.
+ *
+ * So what we do it to allow the filesystem's get_block() function to set
+ * BH_Boundary when it maps block 11.  BH_Boundary says: mapping of the block
+ * after this one will require I/O against a block which is probably close to
+ * this one.  So you should push what I/O you have currently accumulated.
+ *
+ * This all causes the disk requests to be issued in the correct order.
+ */
 int
 mpage_readpages(struct address_space *mapping, struct list_head *pages,
 				unsigned nr_pages, get_block_t get_block)
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 2b8cd045111c..07cafdf74ef2 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1742,7 +1742,7 @@ struct dentry *proc_pid_unhash(struct task_struct *p)
 
 /**
  * proc_pid_flush - recover memory used by stale /proc/@pid/x entries
- * @proc_entry: directoy to prune.
+ * @proc_dentry: directoy to prune.
  *
  * Shrink the /proc directory that was used by the just killed thread.
  */
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 650c43ba86c4..38ef913767ff 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -51,7 +51,10 @@ EXPORT_SYMBOL(seq_open);
 
 /**
  *	seq_read -	->read() method for sequential files.
- *	@file, @buf, @size, @ppos: see file_operations method
+ *	@file: the file to read from
+ *	@buf: the buffer to read to
+ *	@size: the maximum number of bytes to read
+ *	@ppos: the current position in the file
  *
  *	Ready-made ->f_op->read()
  */
@@ -219,7 +222,9 @@ Eoverflow:
 
 /**
  *	seq_lseek -	->llseek() method for sequential files.
- *	@file, @offset, @origin: see file_operations method
+ *	@file: the file in question
+ *	@offset: new position
+ *	@origin: 0 for absolute, 1 for relative position
  *
  *	Ready-made ->f_op->llseek()
  */
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index da25aeb0e062..364208071e17 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -96,7 +96,7 @@ static int fill_read_buffer(struct dentry * dentry, struct sysfs_buffer * buffer
 /**
  *	flush_read_buffer - push buffer to userspace.
  *	@buffer:	data buffer for file.
- *	@userbuf:	user-passed buffer.
+ *	@buf:		user-passed buffer.
  *	@count:		number of bytes requested.
  *	@ppos:		file position.
  *
@@ -164,7 +164,7 @@ out:
 /**
  *	fill_write_buffer - copy buffer from userspace.
  *	@buffer:	data buffer for file.
- *	@userbuf:	data from user.
+ *	@buf:		data from user.
  *	@count:		number of bytes in @userbuf.
  *
  *	Allocate @buffer->page if it hasn't been already, then
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3f825b085c8d..4edba067a717 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1065,71 +1065,75 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc);
  *    with a particular exported file system  - particularly enabling nfsd and
  *    the filesystem to co-operate when dealing with file handles.
  *
- *    export_operations contains two basic operation for dealing with file handles,
- *    decode_fh() and encode_fh(), and allows for some other operations to be defined
- *    which standard helper routines use to get specific information from the
- *    filesystem.
+ *    export_operations contains two basic operation for dealing with file
+ *    handles, decode_fh() and encode_fh(), and allows for some other
+ *    operations to be defined which standard helper routines use to get
+ *    specific information from the filesystem.
  *
  *    nfsd encodes information use to determine which filesystem a filehandle
- *    applies to in the initial part of the file handle.  The remainder, termed a
- *    file handle fragment, is controlled completely by the filesystem.
- *    The standard helper routines assume that this fragment will contain one or two
- *    sub-fragments, one which identifies the file, and one which may be used to
- *    identify the (a) directory containing the file.
+ *    applies to in the initial part of the file handle.  The remainder, termed
+ *    a file handle fragment, is controlled completely by the filesystem.  The
+ *    standard helper routines assume that this fragment will contain one or
+ *    two sub-fragments, one which identifies the file, and one which may be
+ *    used to identify the (a) directory containing the file.
  *
  *    In some situations, nfsd needs to get a dentry which is connected into a
- *    specific part of the file tree.  To allow for this, it passes the function
- *    acceptable() together with a @context which can be used to see if the dentry
- *    is acceptable.  As there can be multiple dentrys for a given file, the filesystem
- *    should check each one for acceptability before looking for the next.  As soon
- *    as an acceptable one is found, it should be returned.
+ *    specific part of the file tree.  To allow for this, it passes the
+ *    function acceptable() together with a @context which can be used to see
+ *    if the dentry is acceptable.  As there can be multiple dentrys for a
+ *    given file, the filesystem should check each one for acceptability before
+ *    looking for the next.  As soon as an acceptable one is found, it should
+ *    be returned.
  *
  * decode_fh:
- *    @decode_fh is given a &struct super_block (@sb), a file handle fragment (@fh, @fh_len)
- *    and an acceptability testing function (@acceptable, @context).  It should return
- *    a &struct dentry which refers to the same file that the file handle fragment refers
- *    to,  and which passes the acceptability test.  If it cannot, it should return
- *    a %NULL pointer if the file was found but no acceptable &dentries were available, or
- *    a %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or %ENOMEM).
+ *    @decode_fh is given a &struct super_block (@sb), a file handle fragment
+ *    (@fh, @fh_len) and an acceptability testing function (@acceptable,
+ *    @context).  It should return a &struct dentry which refers to the same
+ *    file that the file handle fragment refers to,  and which passes the
+ *    acceptability test.  If it cannot, it should return a %NULL pointer if
+ *    the file was found but no acceptable &dentries were available, or a
+ *    %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or
+ *    %ENOMEM).
  *
  * encode_fh:
- *    @encode_fh should store in the file handle fragment @fh (using at most @max_len bytes)
- *    information that can be used by @decode_fh to recover the file refered to by the
- *    &struct dentry @de.  If the @connectable flag is set, the encode_fh() should store
- *    sufficient information so that a good attempt can be made to find not only
- *    the file but also it's place in the filesystem.   This typically means storing
- *    a reference to de->d_parent in the filehandle fragment.
- *    encode_fh() should return the number of bytes stored or a negative error code
- *    such as %-ENOSPC
+ *    @encode_fh should store in the file handle fragment @fh (using at most
+ *    @max_len bytes) information that can be used by @decode_fh to recover the
+ *    file refered to by the &struct dentry @de.  If the @connectable flag is
+ *    set, the encode_fh() should store sufficient information so that a good
+ *    attempt can be made to find not only the file but also it's place in the
+ *    filesystem.   This typically means storing a reference to de->d_parent in
+ *    the filehandle fragment.  encode_fh() should return the number of bytes
+ *    stored or a negative error code such as %-ENOSPC
  *
  * get_name:
- *    @get_name should find a name for the given @child in the given @parent directory.
- *    The name should be stored in the @name (with the understanding that it is already
- *    pointing to a a %NAME_MAX+1 sized buffer.   get_name() should return %0 on success,
- *    a negative error code or error.
- *    @get_name will be called without @parent->i_sem held.
+ *    @get_name should find a name for the given @child in the given @parent
+ *    directory.  The name should be stored in the @name (with the
+ *    understanding that it is already pointing to a a %NAME_MAX+1 sized
+ *    buffer.   get_name() should return %0 on success, a negative error code
+ *    or error.  @get_name will be called without @parent->i_sem held.
  *
  * get_parent:
- *    @get_parent should find the parent directory for the given @child which is also
- *    a directory.  In the event that it cannot be found, or storage space cannot be
- *    allocated, a %ERR_PTR should be returned.
+ *    @get_parent should find the parent directory for the given @child which
+ *    is also a directory.  In the event that it cannot be found, or storage
+ *    space cannot be allocated, a %ERR_PTR should be returned.
  *
  * get_dentry:
- *    Given a &super_block (@sb) and a pointer to a file-system specific inode identifier,
- *    possibly an inode number, (@inump) get_dentry() should find the identified inode and
- *    return a dentry for that inode.
- *    Any suitable dentry can be returned including, if necessary, a new dentry created
- *    with d_alloc_root.  The caller can then find any other extant dentrys by following the
- *    d_alias links.  If a new dentry was created using d_alloc_root, DCACHE_NFSD_DISCONNECTED
- *    should be set, and the dentry should be d_rehash()ed.
+ *    Given a &super_block (@sb) and a pointer to a file-system specific inode
+ *    identifier, possibly an inode number, (@inump) get_dentry() should find
+ *    the identified inode and return a dentry for that inode.  Any suitable
+ *    dentry can be returned including, if necessary, a new dentry created with
+ *    d_alloc_root.  The caller can then find any other extant dentrys by
+ *    following the d_alias links.  If a new dentry was created using
+ *    d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry
+ *    should be d_rehash()ed.
  *
- *    If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code can be returned.
- *    The @inump will be whatever was passed to nfsd_find_fh_dentry() in either the
- *    @obj or @parent parameters.
+ *    If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code
+ *    can be returned.  The @inump will be whatever was passed to
+ *    nfsd_find_fh_dentry() in either the @obj or @parent parameters.
  *
  * Locking rules:
- *  get_parent is called with child->d_inode->i_sem down
- *  get_name is not (which is possibly inconsistent)
+ *    get_parent is called with child->d_inode->i_sem down
+ *    get_name is not (which is possibly inconsistent)
  */
 
 struct export_operations {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c77d745cbd3f..cc04f5cd2286 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -167,13 +167,14 @@ struct skb_shared_info {
  *	@h: Transport layer header
  *	@nh: Network layer header
  *	@mac: Link layer header
- *	@dst: FIXME: Describe this field
+ *	@dst: destination entry
+ *	@sp: the security path, used for xfrm
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
  *	@len: Length of actual data
  *	@data_len: Data length
  *	@mac_len: Length of link layer header
  *	@csum: Checksum
- *	@__unused: Dead field, may be reused
+ *	@local_df: allow local fragmentation
  *	@cloned: Head may be cloned (check refcnt to be sure)
  *	@nohdr: Payload reference only, must not modify header
  *	@pkt_type: Packet class
diff --git a/include/net/sock.h b/include/net/sock.h
index 5bc180adfb14..cc4c9190b7fd 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -161,6 +161,7 @@ struct sock_common {
   *	@sk_sndmsg_page: cached page for sendmsg
   *	@sk_sndmsg_off: cached offset for sendmsg
   *	@sk_send_head: front of stuff to transmit
+  *	@sk_security: used by security modules
   *	@sk_write_pending: a write to stream socket waits to start
   *	@sk_state_change: callback to indicate change in the state of the sock
   *	@sk_data_ready: callback to indicate there is data to be processed
diff --git a/kernel/sched.c b/kernel/sched.c
index 5dadcc6df7dd..0dc3158667a2 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2906,6 +2906,7 @@ static void __wake_up_common(wait_queue_head_t *q, unsigned int mode,
  * @q: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
+ * @key: is directly passed to the wakeup function
  */
 void fastcall __wake_up(wait_queue_head_t *q, unsigned int mode,
 				int nr_exclusive, void *key)
@@ -2928,7 +2929,7 @@ void fastcall __wake_up_locked(wait_queue_head_t *q, unsigned int mode)
 }
 
 /**
- * __wake_up - sync- wake up threads blocked on a waitqueue.
+ * __wake_up_sync - wake up threads blocked on a waitqueue.
  * @q: the waitqueue
  * @mode: which threads
  * @nr_exclusive: how many wake-one or wake-many threads to wake up
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 79dbd93bd697..701d12c63068 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1991,6 +1991,8 @@ int proc_dointvec_userhz_jiffies(ctl_table *table, int write, struct file *filp,
  * @filp: the file structure
  * @buffer: the user buffer
  * @lenp: the size of the user buffer
+ * @ppos: file position
+ * @ppos: the current position in the file
  *
  * Reads/writes up to table->maxlen/sizeof(unsigned int) integer
  * values from/to the user buffer, treated as an ASCII string. 
diff --git a/lib/kobject.c b/lib/kobject.c
index 5df8441c44e7..94048826624c 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -216,13 +216,12 @@ int kobject_register(struct kobject * kobj)
 /**
  *	kobject_set_name - Set the name of an object
  *	@kobj:	object.
- *	@name:	name. 
+ *	@fmt:	format string used to build the name
  *
  *	If strlen(name) >= KOBJ_NAME_LEN, then use a dynamically allocated
  *	string that @kobj->k_name points to. Otherwise, use the static 
  *	@kobj->name array.
  */
-
 int kobject_set_name(struct kobject * kobj, const char * fmt, ...)
 {
 	int error = 0;
diff --git a/mm/filemap.c b/mm/filemap.c
index c085af2332d8..d5fdae2eb183 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -169,9 +169,10 @@ static int sync_page(void *word)
 /**
  * filemap_fdatawrite_range - start writeback against all of a mapping's
  * dirty pages that lie within the byte offsets <start, end>
- * @mapping: address space structure to write
- * @start: offset in bytes where the range starts
- * @end : offset in bytes where the range ends
+ * @mapping:	address space structure to write
+ * @start:	offset in bytes where the range starts
+ * @end:	offset in bytes where the range ends
+ * @sync_mode:	enable synchronous operation
  *
  * If sync_mode is WB_SYNC_ALL then this is a "data integrity" operation, as
  * opposed to a regular memory * cleansing writeback.  The difference between
@@ -535,8 +536,8 @@ EXPORT_SYMBOL(find_trylock_page);
 /**
  * find_lock_page - locate, pin and lock a pagecache page
  *
- * @mapping - the address_space to search
- * @offset - the page index
+ * @mapping: the address_space to search
+ * @offset: the page index
  *
  * Locates the desired pagecache page, locks it, increments its reference
  * count and returns its address.
@@ -575,9 +576,9 @@ EXPORT_SYMBOL(find_lock_page);
 /**
  * find_or_create_page - locate or add a pagecache page
  *
- * @mapping - the page's address_space
- * @index - the page's index into the mapping
- * @gfp_mask - page allocation mode
+ * @mapping: the page's address_space
+ * @index: the page's index into the mapping
+ * @gfp_mask: page allocation mode
  *
  * Locates a page in the pagecache.  If the page is not present, a new page
  * is allocated using @gfp_mask and is added to the pagecache and to the VM's
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 6ddd6a29c73b..613b99a55917 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -255,7 +255,7 @@ static void balance_dirty_pages(struct address_space *mapping)
 
 /**
  * balance_dirty_pages_ratelimited - balance dirty memory state
- * @mapping - address_space which was dirtied
+ * @mapping: address_space which was dirtied
  *
  * Processes which are dirtying memory should call in here once for each page
  * which was newly dirtied.  The function will periodically check the system's
@@ -562,8 +562,8 @@ int do_writepages(struct address_space *mapping, struct writeback_control *wbc)
 /**
  * write_one_page - write out a single page and optionally wait on I/O
  *
- * @page - the page to write
- * @wait - if true, wait on writeout
+ * @page: the page to write
+ * @wait: if true, wait on writeout
  *
  * The page must be locked by the caller and will be unlocked upon return.
  *
diff --git a/mm/truncate.c b/mm/truncate.c
index c9a63f0b69a2..60c8764bfac2 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -242,7 +242,7 @@ EXPORT_SYMBOL(invalidate_inode_pages);
 
 /**
  * invalidate_inode_pages2_range - remove range of pages from an address_space
- * @mapping - the address_space
+ * @mapping: the address_space
  * @start: the page offset 'from' which to invalidate
  * @end: the page offset 'to' which to invalidate (inclusive)
  *
@@ -322,7 +322,7 @@ EXPORT_SYMBOL_GPL(invalidate_inode_pages2_range);
 
 /**
  * invalidate_inode_pages2 - remove all pages from an address_space
- * @mapping - the address_space
+ * @mapping: the address_space
  *
  * Any pages which are found to be mapped into pagetables are unmapped prior to
  * invalidation.
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 27c5cd942820..fcee054b6f75 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -203,7 +203,7 @@ void skb_free_datagram(struct sock *sk, struct sk_buff *skb)
  *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying from
- *	@iovec: io vector to copy to
+ *	@to: io vector to copy to
  *	@len: amount of data to copy from buffer to iovec
  *
  *	Note: the iovec is modified during the copy.
@@ -379,7 +379,7 @@ fault:
  *	skb_copy_and_csum_datagram_iovec - Copy and checkum skb to user iovec.
  *	@skb: skbuff
  *	@hlen: hardware length
- *	@iovec: io vector
+ *	@iov: io vector
  * 
  *	Caller _must_ check that skb will fit to this iovec.
  *
-- 
cgit v1.2.3


From 66f3131f547ef3cc864810974e0757617e60a837 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:24:46 -0600
Subject: [PATCH] JFS: reduce number of synchronous transactions

Use an inline pxd list rather than an xad list in the xadlock.
When the number of extents being modified can fit with the xadlock,
a transaction can be committed asynchronously.  Using a list of
pxd's instead of xad's allows us to fit 4 extents, rather than 2.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_txnmgr.c | 63 ++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 40 insertions(+), 23 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index f40301d93f74..98e16d93e146 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1712,7 +1712,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	struct maplock *maplock;
 	struct xdlistlock *xadlock;
 	struct pxd_lock *pxdlock;
-	pxd_t *pxd;
+	pxd_t *page_pxd;
 	int next, lwm, hwm;
 
 	ip = tlck->ip;
@@ -1722,7 +1722,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	lrd->log.redopage.type = cpu_to_le16(LOG_XTREE);
 	lrd->log.redopage.l2linesize = cpu_to_le16(L2XTSLOTSIZE);
 
-	pxd = &lrd->log.redopage.pxd;
+	page_pxd = &lrd->log.redopage.pxd;
 
 	if (tlck->type & tlckBTROOT) {
 		lrd->log.redopage.type |= cpu_to_le16(LOG_BTROOT);
@@ -1752,9 +1752,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * applying the after-image to the meta-data page.
 		 */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-//              *pxd = mp->cm_pxd;
-		PXDaddress(pxd, mp->index);
-		PXDlength(pxd,
+//              *page_pxd = mp->cm_pxd;
+		PXDaddress(page_pxd, mp->index);
+		PXDlength(page_pxd,
 			  mp->logical_size >> tblk->sb->s_blocksize_bits);
 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
 
@@ -1776,25 +1776,31 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		tlck->flag |= tlckUPDATEMAP;
 		xadlock->flag = mlckALLOCXADLIST;
 		xadlock->count = next - lwm;
-		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
 			int i;
+			pxd_t *pxd;
 			/*
 			 * Lazy commit may allow xtree to be modified before
 			 * txUpdateMap runs.  Copy xad into linelock to
 			 * preserve correct data.
+			 *
+			 * We can fit twice as may pxd's as xads in the lock
 			 */
-			xadlock->xdlist = &xtlck->pxdlock;
-			memcpy(xadlock->xdlist, &p->xad[lwm],
-			       sizeof(xad_t) * xadlock->count);
-
-			for (i = 0; i < xadlock->count; i++)
+			xadlock->flag = mlckALLOCPXDLIST;
+			pxd = xadlock->xdlist = &xtlck->pxdlock;
+			for (i = 0; i < xadlock->count; i++) {
+				PXDaddress(pxd, addressXAD(&p->xad[lwm + i]));
+				PXDlength(pxd, lengthXAD(&p->xad[lwm + i]));
 				p->xad[lwm + i].flag &=
 				    ~(XAD_NEW | XAD_EXTENDED);
+				pxd++;
+			}
 		} else {
 			/*
 			 * xdlist will point to into inode's xtree, ensure
 			 * that transaction is not committed lazily.
 			 */
+			xadlock->flag = mlckALLOCXADLIST;
 			xadlock->xdlist = &p->xad[lwm];
 			tblk->xflag &= ~COMMIT_LAZY;
 		}
@@ -1836,8 +1842,8 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		if (tblk->xflag & COMMIT_TRUNCATE) {
 			/* write NOREDOPAGE for the page */
 			lrd->type = cpu_to_le16(LOG_NOREDOPAGE);
-			PXDaddress(pxd, mp->index);
-			PXDlength(pxd,
+			PXDaddress(page_pxd, mp->index);
+			PXDlength(page_pxd,
 				  mp->logical_size >> tblk->sb->
 				  s_blocksize_bits);
 			lrd->backchain =
@@ -1872,22 +1878,32 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * deleted page itself;
 		 */
 		tlck->flag |= tlckUPDATEMAP;
-		xadlock->flag = mlckFREEXADLIST;
 		xadlock->count = hwm - XTENTRYSTART + 1;
-		if ((xadlock->count <= 2) && (tblk->xflag & COMMIT_LAZY)) {
+		if ((xadlock->count <= 4) && (tblk->xflag & COMMIT_LAZY)) {
+			int i;
+			pxd_t *pxd;
 			/*
 			 * Lazy commit may allow xtree to be modified before
 			 * txUpdateMap runs.  Copy xad into linelock to
 			 * preserve correct data.
+			 *
+			 * We can fit twice as may pxd's as xads in the lock
 			 */
-			xadlock->xdlist = &xtlck->pxdlock;
-			memcpy(xadlock->xdlist, &p->xad[XTENTRYSTART],
-			       sizeof(xad_t) * xadlock->count);
+			xadlock->flag = mlckFREEPXDLIST;
+			pxd = xadlock->xdlist = &xtlck->pxdlock;
+			for (i = 0; i < xadlock->count; i++) {
+				PXDaddress(pxd,
+					addressXAD(&p->xad[XTENTRYSTART + i]));
+				PXDlength(pxd,
+					lengthXAD(&p->xad[XTENTRYSTART + i]));
+				pxd++;
+			}
 		} else {
 			/*
 			 * xdlist will point to into inode's xtree, ensure
 			 * that transaction is not committed lazily.
 			 */
+			xadlock->flag = mlckFREEXADLIST;
 			xadlock->xdlist = &p->xad[XTENTRYSTART];
 			tblk->xflag &= ~COMMIT_LAZY;
 		}
@@ -1918,7 +1934,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	 * header ?
 	 */
 	if (tlck->type & tlckTRUNCATE) {
-		pxd_t tpxd;	/* truncated extent of xad */
+		pxd_t pxd;	/* truncated extent of xad */
 		int twm;
 
 		/*
@@ -1947,8 +1963,9 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * applying the after-image to the meta-data page.
 		 */
 		lrd->type = cpu_to_le16(LOG_REDOPAGE);
-		PXDaddress(pxd, mp->index);
-		PXDlength(pxd, mp->logical_size >> tblk->sb->s_blocksize_bits);
+		PXDaddress(page_pxd, mp->index);
+		PXDlength(page_pxd,
+			  mp->logical_size >> tblk->sb->s_blocksize_bits);
 		lrd->backchain = cpu_to_le32(lmLog(log, tblk, lrd, tlck));
 
 		/*
@@ -1966,7 +1983,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 			lrd->log.updatemap.type = cpu_to_le16(LOG_FREEPXD);
 			lrd->log.updatemap.nxd = cpu_to_le16(1);
 			lrd->log.updatemap.pxd = pxdlock->pxd;
-			tpxd = pxdlock->pxd;	/* save to format maplock */
+			pxd = pxdlock->pxd;	/* save to format maplock */
 			lrd->backchain =
 			    cpu_to_le32(lmLog(log, tblk, lrd, NULL));
 		}
@@ -2035,7 +2052,7 @@ static void xtLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 			pxdlock = (struct pxd_lock *) xadlock;
 			pxdlock->flag = mlckFREEPXD;
 			pxdlock->count = 1;
-			pxdlock->pxd = tpxd;
+			pxdlock->pxd = pxd;
 
 			jfs_info("xtLog: truncate ip:0x%p mp:0x%p count:%d "
 				 "hwm:%d", ip, mp, pxdlock->count, hwm);
-- 
cgit v1.2.3


From d2e83707edbe6a2520591141421d26a87414a1b9 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:24:51 -0600
Subject: [PATCH] JFS: Simplify creation of new iag

JFS was creating a new IAG (inode aggregate group) in one address
space, and afterwards, accessing it from another.  This could lead to
complications when cache pages contain more than one page of jfs
metadata.  This patch causes the IAG to be initialized in the same
address space that it is subsequently accessed with.

This also elimitates an I/O, but IAG's aren't created too often.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_imap.c | 70 ++++++++++++++++++++++++++++---------------------------
 1 file changed, 36 insertions(+), 34 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 783831301625..6a0aa7e2cbef 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -2573,9 +2573,18 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 			goto out;
 		}
 
-		/* assign a buffer for the page */
-		mp = get_metapage(ipimap, xaddr, PSIZE, 1);
-		if (!mp) {
+		/*
+		 * start transaction of update of the inode map
+		 * addressing structure pointing to the new iag page;
+		 */
+		tid = txBegin(sb, COMMIT_FORCE);
+		down(&JFS_IP(ipimap)->commit_sem);
+
+		/* update the inode map addressing structure to point to it */
+		if ((rc =
+		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
+			txEnd(tid);
+			up(&JFS_IP(ipimap)->commit_sem);
 			/* Free the blocks allocated for the iag since it was
 			 * not successfully added to the inode map
 			 */
@@ -2584,6 +2593,29 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 			/* release the inode map lock */
 			IWRITE_UNLOCK(ipimap);
 
+			goto out;
+		}
+
+		/* update the inode map's inode to reflect the extension */
+		ipimap->i_size += PSIZE;
+		inode_add_bytes(ipimap, PSIZE);
+
+		/* assign a buffer for the page */
+		mp = get_metapage(ipimap, blkno, PSIZE, 0);
+		if (!mp) {
+			/*
+			 * This is very unlikely since we just created the
+			 * extent, but let's try to handle it correctly
+			 */
+			xtTruncate(tid, ipimap, ipimap->i_size - PSIZE,
+				   COMMIT_PWMAP);
+
+			txAbort(tid, 0);
+			txEnd(tid);
+
+			/* release the inode map lock */
+			IWRITE_UNLOCK(ipimap);
+
 			rc = -EIO;
 			goto out;
 		}
@@ -2605,40 +2637,10 @@ diNewIAG(struct inomap * imap, int *iagnop, int agno, struct metapage ** mpp)
 			iagp->inosmap[i] = cpu_to_le32(ONES);
 
 		/*
-		 * Invalidate the page after writing and syncing it.
-		 * After it's initialized, we access it in a different
-		 * address space
+		 * Write and sync the metapage
 		 */
-		set_bit(META_discard, &mp->flag);
 		flush_metapage(mp);
 
-		/*
-		 * start tyransaction of update of the inode map
-		 * addressing structure pointing to the new iag page;
-		 */
-		tid = txBegin(sb, COMMIT_FORCE);
-		down(&JFS_IP(ipimap)->commit_sem);
-
-		/* update the inode map addressing structure to point to it */
-		if ((rc =
-		     xtInsert(tid, ipimap, 0, blkno, xlen, &xaddr, 0))) {
-			txEnd(tid);
-			up(&JFS_IP(ipimap)->commit_sem);
-			/* Free the blocks allocated for the iag since it was
-			 * not successfully added to the inode map
-			 */
-			dbFree(ipimap, xaddr, (s64) xlen);
-
-			/* release the inode map lock */
-			IWRITE_UNLOCK(ipimap);
-
-			goto out;
-		}
-
-		/* update the inode map's inode to reflect the extension */
-		ipimap->i_size += PSIZE;
-		inode_add_bytes(ipimap, PSIZE);
-
 		/*
 		 * txCommit(COMMIT_FORCE) will synchronously write address 
 		 * index pages and inode after commit in careful update order 
-- 
cgit v1.2.3


From dc5798d9a7b656550533a5c0177dba17d4ef4990 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:24:57 -0600
Subject: [PATCH] JFS: Changes for larger page size

JFS code has always assumed a page size of 4K.  This patch fixes the
non-pagecache uses of pages to deal with larger pages.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_dtree.c  |  2 +-
 fs/jfs/jfs_logmgr.c | 62 +++++++++++++++++++++++++++++++----------------------
 fs/jfs/jfs_logmgr.h |  3 ++-
 3 files changed, 39 insertions(+), 28 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index e357890adfb2..453bace608d1 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -3181,7 +3181,7 @@ int jfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
 			d = (struct ldtentry *) & p->slot[stbl[i]];
 
 			if (((long) jfs_dirent + d->namlen + 1) >
-			    (dirent_buf + PSIZE)) {
+			    (dirent_buf + PAGE_SIZE)) {
 				/* DBCS codepages could overrun dirent_buf */
 				index = i;
 				overflow = 1;
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index b6a6869ebb4f..e0f867ddfd10 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -1669,6 +1669,7 @@ int lmLogShutdown(struct jfs_log * log)
 	lp->h.eor = lp->t.eor = cpu_to_le16(bp->l_eor);
 	lbmWrite(log, log->bp, lbmWRITE | lbmRELEASE | lbmSYNC, 0);
 	lbmIOWait(log->bp, lbmFREE);
+	log->bp = NULL;
 
 	/*
 	 * synchronous update log superblock
@@ -1819,20 +1820,34 @@ static int lbmLogInit(struct jfs_log * log)
 
 	log->lbuf_free = NULL;
 
-	for (i = 0; i < LOGPAGES; i++) {
-		lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
-		if (lbuf == 0)
-			goto error;
-		lbuf->l_ldata = (char *) get_zeroed_page(GFP_KERNEL);
-		if (lbuf->l_ldata == 0) {
-			kfree(lbuf);
+	for (i = 0; i < LOGPAGES;) {
+		char *buffer;
+		uint offset;
+		struct page *page;
+
+		buffer = (char *) get_zeroed_page(GFP_KERNEL);
+		if (buffer == NULL)
 			goto error;
+		page = virt_to_page(buffer);
+		for (offset = 0; offset < PAGE_SIZE; offset += LOGPSIZE) {
+			lbuf = kmalloc(sizeof(struct lbuf), GFP_KERNEL);
+			if (lbuf == NULL) {
+				if (offset == 0)
+					free_page((unsigned long) buffer);
+				goto error;
+			}
+			if (offset) /* we already have one reference */
+				get_page(page);
+			lbuf->l_offset = offset;
+			lbuf->l_ldata = buffer + offset;
+			lbuf->l_page = page;
+			lbuf->l_log = log;
+			init_waitqueue_head(&lbuf->l_ioevent);
+
+			lbuf->l_freelist = log->lbuf_free;
+			log->lbuf_free = lbuf;
+			i++;
 		}
-		lbuf->l_log = log;
-		init_waitqueue_head(&lbuf->l_ioevent);
-
-		lbuf->l_freelist = log->lbuf_free;
-		log->lbuf_free = lbuf;
 	}
 
 	return (0);
@@ -1857,12 +1872,10 @@ static void lbmLogShutdown(struct jfs_log * log)
 	lbuf = log->lbuf_free;
 	while (lbuf) {
 		struct lbuf *next = lbuf->l_freelist;
-		free_page((unsigned long) lbuf->l_ldata);
+		__free_page(lbuf->l_page);
 		kfree(lbuf);
 		lbuf = next;
 	}
-
-	log->bp = NULL;
 }
 
 
@@ -1974,9 +1987,9 @@ static int lbmRead(struct jfs_log * log, int pn, struct lbuf ** bpp)
 
 	bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
 	bio->bi_bdev = log->bdev;
-	bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+	bio->bi_io_vec[0].bv_page = bp->l_page;
 	bio->bi_io_vec[0].bv_len = LOGPSIZE;
-	bio->bi_io_vec[0].bv_offset = 0;
+	bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
 	bio->bi_vcnt = 1;
 	bio->bi_idx = 0;
@@ -2115,9 +2128,9 @@ static void lbmStartIO(struct lbuf * bp)
 	bio = bio_alloc(GFP_NOFS, 1);
 	bio->bi_sector = bp->l_blkno << (log->l2bsize - 9);
 	bio->bi_bdev = log->bdev;
-	bio->bi_io_vec[0].bv_page = virt_to_page(bp->l_ldata);
+	bio->bi_io_vec[0].bv_page = bp->l_page;
 	bio->bi_io_vec[0].bv_len = LOGPSIZE;
-	bio->bi_io_vec[0].bv_offset = 0;
+	bio->bi_io_vec[0].bv_offset = bp->l_offset;
 
 	bio->bi_vcnt = 1;
 	bio->bi_idx = 0;
@@ -2127,16 +2140,13 @@ static void lbmStartIO(struct lbuf * bp)
 	bio->bi_private = bp;
 
 	/* check if journaling to disk has been disabled */
-	if (!log->no_integrity) {
+	if (log->no_integrity) {
+		bio->bi_size = 0;
+		lbmIODone(bio, 0, 0);
+	} else {
 		submit_bio(WRITE_SYNC, bio);
 		INCREMENT(lmStat.submitted);
 	}
-	else {
-		bio->bi_size = 0;
-		lbmIODone(bio, 0, 0); /* 2nd argument appears to not be used => 0
-				       *  3rd argument appears to not be used => 0
-				       */
-	}
 }
 
 
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index 141ad74010c9..f67146684b7f 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -463,9 +463,10 @@ struct lbuf {
 
 	s64 l_blkno;		/* 8: log page block number */
 	caddr_t l_ldata;	/* 4: data page */
+	struct page *l_page;	/* The page itself */
+	uint l_offset;		/* Offset of l_ldata within the page */	
 
 	wait_queue_head_t l_ioevent;	/* 4: i/o done event */
-	struct page *l_page;	/* The page itself */
 };
 
 /* Reuse l_freelist for redrive list */
-- 
cgit v1.2.3


From 7fab479bebb96b1b4888bdae9b42e1fa9c5d3f38 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:25:02 -0600
Subject: [PATCH] JFS: Support page sizes greater than 4K

jfs has never worked on architecutures where the page size was not 4K.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/inode.c        |  31 +-
 fs/jfs/jfs_dmap.c     |  12 +-
 fs/jfs/jfs_imap.c     |  14 +-
 fs/jfs/jfs_incore.h   |   1 +
 fs/jfs/jfs_logmgr.c   |  71 ++--
 fs/jfs/jfs_logmgr.h   |   5 +-
 fs/jfs/jfs_metapage.c | 908 +++++++++++++++++++++++++++++++++-----------------
 fs/jfs/jfs_metapage.h |  80 +++--
 fs/jfs/jfs_mount.c    |   5 -
 fs/jfs/jfs_txnmgr.c   |  89 +++--
 fs/jfs/jfs_umount.c   |  16 +-
 fs/jfs/resize.c       |   3 +
 fs/jfs/super.c        |  33 +-
 13 files changed, 806 insertions(+), 462 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 7bc906677b0d..6c04f5eda135 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -175,31 +175,22 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 {
 	s64 lblock64 = lblock;
 	int rc = 0;
-	int take_locks;
 	xad_t xad;
 	s64 xaddr;
 	int xflag;
 	s32 xlen;
 
-	/*
-	 * If this is a special inode (imap, dmap)
-	 * the lock should already be taken
-	 */
-	take_locks = (JFS_IP(ip)->fileset != AGGREGATE_I);
-
 	/*
 	 * Take appropriate lock on inode
 	 */
-	if (take_locks) {
-		if (create)
-			IWRITE_LOCK(ip);
-		else
-			IREAD_LOCK(ip);
-	}
+	if (create)
+		IWRITE_LOCK(ip);
+	else
+		IREAD_LOCK(ip);
 
 	if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
-	    (xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)
-	     == 0) && xlen) {
+	    (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
+	    xlen) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
 				/*
@@ -258,12 +249,10 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 	/*
 	 * Release lock on inode
 	 */
-	if (take_locks) {
-		if (create)
-			IWRITE_UNLOCK(ip);
-		else
-			IREAD_UNLOCK(ip);
-	}
+	if (create)
+		IWRITE_UNLOCK(ip);
+	else
+		IREAD_UNLOCK(ip);
 	return rc;
 }
 
diff --git a/fs/jfs/jfs_dmap.c b/fs/jfs/jfs_dmap.c
index d86e467c6e42..69007fd546ef 100644
--- a/fs/jfs/jfs_dmap.c
+++ b/fs/jfs/jfs_dmap.c
@@ -471,6 +471,7 @@ dbUpdatePMap(struct inode *ipbmap,
 	struct metapage *mp;
 	struct jfs_log *log;
 	int lsn, difft, diffp;
+	unsigned long flags;
 
 	/* the blocks better be within the mapsize. */
 	if (blkno + nblocks > bmp->db_mapsize) {
@@ -504,6 +505,7 @@ dbUpdatePMap(struct inode *ipbmap,
 					   0);
 			if (mp == NULL)
 				return -EIO;
+			metapage_wait_for_io(mp);
 		}
 		dp = (struct dmap *) mp->data;
 
@@ -578,34 +580,32 @@ dbUpdatePMap(struct inode *ipbmap,
 		if (mp->lsn != 0) {
 			/* inherit older/smaller lsn */
 			logdiff(diffp, mp->lsn, log);
+			LOGSYNC_LOCK(log, flags);
 			if (difft < diffp) {
 				mp->lsn = lsn;
 
 				/* move bp after tblock in logsync list */
-				LOGSYNC_LOCK(log);
 				list_move(&mp->synclist, &tblk->synclist);
-				LOGSYNC_UNLOCK(log);
 			}
 
 			/* inherit younger/larger clsn */
-			LOGSYNC_LOCK(log);
 			logdiff(difft, tblk->clsn, log);
 			logdiff(diffp, mp->clsn, log);
 			if (difft > diffp)
 				mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log);
+			LOGSYNC_UNLOCK(log, flags);
 		} else {
 			mp->log = log;
 			mp->lsn = lsn;
 
 			/* insert bp after tblock in logsync list */
-			LOGSYNC_LOCK(log);
+			LOGSYNC_LOCK(log, flags);
 
 			log->count++;
 			list_add(&mp->synclist, &tblk->synclist);
 
 			mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log);
+			LOGSYNC_UNLOCK(log, flags);
 		}
 	}
 
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index 6a0aa7e2cbef..7acff2ce3c80 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -502,7 +502,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
 
 	}
 
-	ip->i_mapping->a_ops = &jfs_aops;
+	ip->i_mapping->a_ops = &jfs_metapage_aops;
 	mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
 
 	/* Allocations to metadata inodes should not affect quotas */
@@ -2791,6 +2791,7 @@ diUpdatePMap(struct inode *ipimap,
 	u32 mask;
 	struct jfs_log *log;
 	int lsn, difft, diffp;
+	unsigned long flags;
 
 	imap = JFS_IP(ipimap)->i_imap;
 	/* get the iag number containing the inode */
@@ -2807,6 +2808,7 @@ diUpdatePMap(struct inode *ipimap,
 	IREAD_UNLOCK(ipimap);
 	if (rc)
 		return (rc);
+	metapage_wait_for_io(mp);
 	iagp = (struct iag *) mp->data;
 	/* get the inode number and extent number of the inode within
 	 * the iag and the inode number within the extent.
@@ -2870,30 +2872,28 @@ diUpdatePMap(struct inode *ipimap,
 		/* inherit older/smaller lsn */
 		logdiff(difft, lsn, log);
 		logdiff(diffp, mp->lsn, log);
+		LOGSYNC_LOCK(log, flags);
 		if (difft < diffp) {
 			mp->lsn = lsn;
 			/* move mp after tblock in logsync list */
-			LOGSYNC_LOCK(log);
 			list_move(&mp->synclist, &tblk->synclist);
-			LOGSYNC_UNLOCK(log);
 		}
 		/* inherit younger/larger clsn */
-		LOGSYNC_LOCK(log);
 		assert(mp->clsn);
 		logdiff(difft, tblk->clsn, log);
 		logdiff(diffp, mp->clsn, log);
 		if (difft > diffp)
 			mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 	} else {
 		mp->log = log;
 		mp->lsn = lsn;
 		/* insert mp after tblock in logsync list */
-		LOGSYNC_LOCK(log);
+		LOGSYNC_LOCK(log, flags);
 		log->count++;
 		list_add(&mp->synclist, &tblk->synclist);
 		mp->clsn = tblk->clsn;
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 	}
 	write_metapage(mp);
 	return (0);
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index ebd77c1bed66..c0fd7b3eadc6 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -165,6 +165,7 @@ struct jfs_sb_info {
         /* Formerly in ipbmap */
 	struct bmap	*bmap;		/* incore bmap descriptor	*/
 	struct nls_table *nls_tab;	/* current codepage		*/
+	struct inode *direct_inode;	/* metadata inode */
 	uint		state;		/* mount/recovery state	*/
 	unsigned long	flag;		/* mount time flags */
 	uint		p_state;	/* state prior to going no integrity */
diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index e0f867ddfd10..cfcdad3459dd 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -234,6 +234,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	int lsn;
 	int diffp, difft;
 	struct metapage *mp = NULL;
+	unsigned long flags;
 
 	jfs_info("lmLog: log:0x%p tblk:0x%p, lrd:0x%p tlck:0x%p",
 		 log, tblk, lrd, tlck);
@@ -254,7 +255,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	 */
 	lsn = log->lsn;
 
-	LOGSYNC_LOCK(log);
+	LOGSYNC_LOCK(log, flags);
 
 	/*
 	 * initialize page lsn if first log write of the page
@@ -310,7 +311,7 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		}
 	}
 
-	LOGSYNC_UNLOCK(log);
+	LOGSYNC_UNLOCK(log, flags);
 
 	/*
 	 *      write the log record
@@ -334,7 +335,6 @@ int lmLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 	return lsn;
 }
 
-
 /*
  * NAME:	lmWriteRecord()
  *
@@ -945,6 +945,15 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	struct lrd lrd;
 	int lsn;
 	struct logsyncblk *lp;
+	struct jfs_sb_info *sbi;
+	unsigned long flags;
+
+	/* push dirty metapages out to disk */
+	list_for_each_entry(sbi, &log->sb_list, log_list) {
+		filemap_flush(sbi->ipbmap->i_mapping);
+		filemap_flush(sbi->ipimap->i_mapping);
+		filemap_flush(sbi->direct_inode->i_mapping);
+	}
 
 	/*
 	 *      forward syncpt
@@ -954,10 +963,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	 */
 
 	if (log->sync == log->syncpt) {
-		LOGSYNC_LOCK(log);
-		/* ToDo: push dirty metapages out to disk */
-//              bmLogSync(log);
-
+		LOGSYNC_LOCK(log, flags);
 		if (list_empty(&log->synclist))
 			log->sync = log->lsn;
 		else {
@@ -965,7 +971,7 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 					struct logsyncblk, synclist);
 			log->sync = lp->lsn;
 		}
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 
 	}
 
@@ -974,27 +980,6 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	 * reset syncpt = sync
 	 */
 	if (log->sync != log->syncpt) {
-		struct jfs_sb_info *sbi;
-
-		/*
-		 * We need to make sure all of the "written" metapages
-		 * actually make it to disk
-		 */
-		list_for_each_entry(sbi, &log->sb_list, log_list) {
-			if (sbi->flag & JFS_NOINTEGRITY)
-				continue;
-			filemap_fdatawrite(sbi->ipbmap->i_mapping);
-			filemap_fdatawrite(sbi->ipimap->i_mapping);
-			filemap_fdatawrite(sbi->sb->s_bdev->bd_inode->i_mapping);
-		}
-		list_for_each_entry(sbi, &log->sb_list, log_list) {
-			if (sbi->flag & JFS_NOINTEGRITY)
-				continue;
-			filemap_fdatawait(sbi->ipbmap->i_mapping);
-			filemap_fdatawait(sbi->ipimap->i_mapping);
-			filemap_fdatawait(sbi->sb->s_bdev->bd_inode->i_mapping);
-		}
-
 		lrd.logtid = 0;
 		lrd.backchain = 0;
 		lrd.type = cpu_to_le16(LOG_SYNCPT);
@@ -1547,6 +1532,7 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 {
 	int i;
 	struct tblock *target = NULL;
+	struct jfs_sb_info *sbi;
 
 	/* jfs_write_inode may call us during read-only mount */
 	if (!log)
@@ -1608,12 +1594,18 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 	if (wait < 2)
 		return;
 
+	list_for_each_entry(sbi, &log->sb_list, log_list) {
+		filemap_fdatawrite(sbi->ipbmap->i_mapping);
+		filemap_fdatawrite(sbi->ipimap->i_mapping);
+		filemap_fdatawrite(sbi->direct_inode->i_mapping);
+	}
+
 	/*
 	 * If there was recent activity, we may need to wait
 	 * for the lazycommit thread to catch up
 	 */
 	if ((!list_empty(&log->cqueue)) || !list_empty(&log->synclist)) {
-		for (i = 0; i < 800; i++) {	/* Too much? */
+		for (i = 0; i < 200; i++) {	/* Too much? */
 			msleep(250);
 			if (list_empty(&log->cqueue) &&
 			    list_empty(&log->synclist))
@@ -1621,7 +1613,24 @@ void jfs_flush_journal(struct jfs_log *log, int wait)
 		}
 	}
 	assert(list_empty(&log->cqueue));
-	assert(list_empty(&log->synclist));
+	if (!list_empty(&log->synclist)) {
+		struct logsyncblk *lp;
+
+		list_for_each_entry(lp, &log->synclist, synclist) {
+			if (lp->xflag & COMMIT_PAGE) {
+				struct metapage *mp = (struct metapage *)lp;
+				dump_mem("orphan metapage", lp,
+					 sizeof(struct metapage));
+				dump_mem("page", mp->page, sizeof(struct page));
+			}
+			else
+				dump_mem("orphan tblock", lp,
+					 sizeof(struct tblock));
+		}
+//		current->state = TASK_INTERRUPTIBLE;
+//		schedule();
+	}
+	//assert(list_empty(&log->synclist));
 	clear_bit(log_FLUSH, &log->flag);
 }
 
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index f67146684b7f..f4c121098d4f 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -490,8 +490,9 @@ struct logsyncblk {
  */
 
 #define LOGSYNC_LOCK_INIT(log) spin_lock_init(&(log)->synclock)
-#define LOGSYNC_LOCK(log) spin_lock(&(log)->synclock)
-#define LOGSYNC_UNLOCK(log) spin_unlock(&(log)->synclock)
+#define LOGSYNC_LOCK(log, flags) spin_lock_irqsave(&(log)->synclock, flags)
+#define LOGSYNC_UNLOCK(log, flags) \
+	spin_unlock_irqrestore(&(log)->synclock, flags)
 
 /* compute the difference in bytes of lsn from sync point */
 #define logdiff(diff, lsn, log)\
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index 4c0a3ac75c08..41bf078dce05 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -1,5 +1,5 @@
 /*
- *   Copyright (C) International Business Machines Corp., 2000-2003
+ *   Copyright (C) International Business Machines Corp., 2000-2005
  *   Portions Copyright (C) Christoph Hellwig, 2001-2002
  *
  *   This program is free software;  you can redistribute it and/or modify
@@ -18,10 +18,11 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/bio.h>
 #include <linux/init.h>
 #include <linux/buffer_head.h>
 #include <linux/mempool.h>
-#include <linux/delay.h>
 #include "jfs_incore.h"
 #include "jfs_superblock.h"
 #include "jfs_filsys.h"
@@ -29,8 +30,6 @@
 #include "jfs_txnmgr.h"
 #include "jfs_debug.h"
 
-static DEFINE_SPINLOCK(meta_lock);
-
 #ifdef CONFIG_JFS_STATISTICS
 static struct {
 	uint	pagealloc;	/* # of page allocations */
@@ -39,22 +38,8 @@ static struct {
 } mpStat;
 #endif
 
-
-#define HASH_BITS 10		/* This makes hash_table 1 4K page */
-#define HASH_SIZE (1 << HASH_BITS)
-static struct metapage **hash_table = NULL;
-static unsigned long hash_order;
-
-
-static inline int metapage_locked(struct metapage *mp)
-{
-	return test_bit(META_locked, &mp->flag);
-}
-
-static inline int trylock_metapage(struct metapage *mp)
-{
-	return test_and_set_bit(META_locked, &mp->flag);
-}
+#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
+#define trylock_metapage(mp) test_and_set_bit(META_locked, &(mp)->flag)
 
 static inline void unlock_metapage(struct metapage *mp)
 {
@@ -62,26 +47,26 @@ static inline void unlock_metapage(struct metapage *mp)
 	wake_up(&mp->wait);
 }
 
-static void __lock_metapage(struct metapage *mp)
+static inline void __lock_metapage(struct metapage *mp)
 {
 	DECLARE_WAITQUEUE(wait, current);
-
 	INCREMENT(mpStat.lockwait);
-
 	add_wait_queue_exclusive(&mp->wait, &wait);
 	do {
 		set_current_state(TASK_UNINTERRUPTIBLE);
 		if (metapage_locked(mp)) {
-			spin_unlock(&meta_lock);
+			unlock_page(mp->page);
 			schedule();
-			spin_lock(&meta_lock);
+			lock_page(mp->page);
 		}
 	} while (trylock_metapage(mp));
 	__set_current_state(TASK_RUNNING);
 	remove_wait_queue(&mp->wait, &wait);
 }
 
-/* needs meta_lock */
+/*
+ * Must have mp->page locked
+ */
 static inline void lock_metapage(struct metapage *mp)
 {
 	if (trylock_metapage(mp))
@@ -92,6 +77,110 @@ static inline void lock_metapage(struct metapage *mp)
 static kmem_cache_t *metapage_cache;
 static mempool_t *metapage_mempool;
 
+#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+
+#if MPS_PER_PAGE > 1
+
+struct meta_anchor {
+	int mp_count;
+	atomic_t io_count;
+	struct metapage *mp[MPS_PER_PAGE];
+};
+#define mp_anchor(page) ((struct meta_anchor *)page->private)
+
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+	if (!PagePrivate(page))
+		return NULL;
+	return mp_anchor(page)->mp[offset >> L2PSIZE];
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+	struct meta_anchor *a;
+	int index;
+	int l2mp_blocks;	/* log2 blocks per metapage */
+
+	if (PagePrivate(page))
+		a = mp_anchor(page);
+	else {
+		a = kmalloc(sizeof(struct meta_anchor), GFP_NOFS);
+		if (!a)
+			return -ENOMEM;
+		memset(a, 0, sizeof(struct meta_anchor));
+		page->private = (unsigned long)a;
+		SetPagePrivate(page);
+		kmap(page);
+	}
+
+	if (mp) {
+		l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+		index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+		a->mp_count++;
+		a->mp[index] = mp;
+	}
+
+	return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+	struct meta_anchor *a = mp_anchor(page);
+	int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
+	int index;
+
+	index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
+
+	BUG_ON(a->mp[index] != mp);
+
+	a->mp[index] = NULL;
+	if (--a->mp_count == 0) {
+		kfree(a);
+		page->private = 0;
+		ClearPagePrivate(page);
+		kunmap(page);
+	}
+}
+
+static inline void inc_io(struct page *page)
+{
+	atomic_inc(&mp_anchor(page)->io_count);
+}
+
+static inline void dec_io(struct page *page, void (*handler) (struct page *))
+{
+	if (atomic_dec_and_test(&mp_anchor(page)->io_count))
+		handler(page);
+}
+
+#else
+static inline struct metapage *page_to_mp(struct page *page, uint offset)
+{
+	return PagePrivate(page) ? (struct metapage *)page->private : NULL;
+}
+
+static inline int insert_metapage(struct page *page, struct metapage *mp)
+{
+	if (mp) {
+		page->private = (unsigned long)mp;
+		SetPagePrivate(page);
+		kmap(page);
+	}
+	return 0;
+}
+
+static inline void remove_metapage(struct page *page, struct metapage *mp)
+{
+	page->private = 0;
+	ClearPagePrivate(page);
+	kunmap(page);
+}
+
+#define inc_io(page) do {} while(0)
+#define dec_io(page, handler) handler(page)
+
+#endif
+
 static void init_once(void *foo, kmem_cache_t *cachep, unsigned long flags)
 {
 	struct metapage *mp = (struct metapage *)foo;
@@ -139,16 +228,6 @@ int __init metapage_init(void)
 		kmem_cache_destroy(metapage_cache);
 		return -ENOMEM;
 	}
-	/*
-	 * Now the hash list
-	 */
-	for (hash_order = 0;
-	     ((PAGE_SIZE << hash_order) / sizeof(void *)) < HASH_SIZE;
-	     hash_order++);
-	hash_table =
-	    (struct metapage **) __get_free_pages(GFP_KERNEL, hash_order);
-	assert(hash_table);
-	memset(hash_table, 0, PAGE_SIZE << hash_order);
 
 	return 0;
 }
@@ -159,73 +238,388 @@ void metapage_exit(void)
 	kmem_cache_destroy(metapage_cache);
 }
 
+static inline void drop_metapage(struct page *page, struct metapage *mp)
+{
+	if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
+	    test_bit(META_io, &mp->flag))
+		return;
+	remove_metapage(page, mp);
+	INCREMENT(mpStat.pagefree);
+	free_metapage(mp);
+}
+
 /*
- * Basically same hash as in pagemap.h, but using our hash table
+ * Metapage address space operations
  */
-static struct metapage **meta_hash(struct address_space *mapping,
-				   unsigned long index)
+
+static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
+				    unsigned int *len)
 {
-#define i (((unsigned long)mapping)/ \
-	   (sizeof(struct inode) & ~(sizeof(struct inode) -1 )))
-#define s(x) ((x) + ((x) >> HASH_BITS))
-	return hash_table + (s(i + index) & (HASH_SIZE - 1));
-#undef i
-#undef s
+	int rc = 0;
+	int xflag;
+	s64 xaddr;
+	sector_t file_blocks = (inode->i_size + inode->i_blksize - 1) >>
+			       inode->i_blkbits;
+
+	if (lblock >= file_blocks)
+		return 0;
+	if (lblock + *len > file_blocks)
+		*len = file_blocks - lblock;
+
+	if (inode->i_ino) {
+		rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
+		if ((rc == 0) && *len)
+			lblock = (sector_t)xaddr;
+		else
+			lblock = 0;
+	} /* else no mapping */
+
+	return lblock;
 }
 
-static struct metapage *search_hash(struct metapage ** hash_ptr,
-				    struct address_space *mapping,
-			       unsigned long index)
+static void last_read_complete(struct page *page)
 {
-	struct metapage *ptr;
+	if (!PageError(page))
+		SetPageUptodate(page);
+	unlock_page(page);
+}
+
+static int metapage_read_end_io(struct bio *bio, unsigned int bytes_done,
+				int err)
+{
+	struct page *page = bio->bi_private;
+
+	if (bio->bi_size)
+		return 1;
 
-	for (ptr = *hash_ptr; ptr; ptr = ptr->hash_next) {
-		if ((ptr->mapping == mapping) && (ptr->index == index))
-			return ptr;
+	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
+		SetPageError(page);
 	}
 
-	return NULL;
+	dec_io(page, last_read_complete);
+	bio_put(bio);
+
+	return 0;
 }
 
-static void add_to_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static void remove_from_logsync(struct metapage *mp)
 {
-	if (*hash_ptr)
-		(*hash_ptr)->hash_prev = mp;
+	struct jfs_log *log = mp->log;
+	unsigned long flags;
+/*
+ * This can race.  Recheck that log hasn't been set to null, and after
+ * acquiring logsync lock, recheck lsn
+ */
+	if (!log)
+		return;
+
+	LOGSYNC_LOCK(log, flags);
+	if (mp->lsn) {
+		mp->log = NULL;
+		mp->lsn = 0;
+		mp->clsn = 0;
+		log->count--;
+		list_del(&mp->synclist);
+	}
+	LOGSYNC_UNLOCK(log, flags);
+}
 
-	mp->hash_prev = NULL;
-	mp->hash_next = *hash_ptr;
-	*hash_ptr = mp;
+static void last_write_complete(struct page *page)
+{
+	struct metapage *mp;
+	unsigned int offset;
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+		if (mp && test_bit(META_io, &mp->flag)) {
+			if (mp->lsn)
+				remove_from_logsync(mp);
+			clear_bit(META_io, &mp->flag);
+		}
+		/*
+		 * I'd like to call drop_metapage here, but I don't think it's
+		 * safe unless I have the page locked
+		 */
+	}
+	end_page_writeback(page);
 }
 
-static void remove_from_hash(struct metapage * mp, struct metapage ** hash_ptr)
+static int metapage_write_end_io(struct bio *bio, unsigned int bytes_done,
+				 int err)
 {
-	if (mp->hash_prev)
-		mp->hash_prev->hash_next = mp->hash_next;
-	else {
-		assert(*hash_ptr == mp);
-		*hash_ptr = mp->hash_next;
+	struct page *page = bio->bi_private;
+
+	BUG_ON(!PagePrivate(page));
+
+	if (bio->bi_size)
+		return 1;
+
+	if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
+		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
+		SetPageError(page);
+	}
+	dec_io(page, last_write_complete);
+	bio_put(bio);
+	return 0;
+}
+
+static int metapage_writepage(struct page *page, struct writeback_control *wbc)
+{
+	struct bio *bio = NULL;
+	unsigned int block_offset;	/* block offset of mp within page */
+	struct inode *inode = page->mapping->host;
+	unsigned int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
+	unsigned int len;
+	unsigned int xlen;
+	struct metapage *mp;
+	int redirty = 0;
+	sector_t lblock;
+	sector_t pblock;
+	sector_t next_block = 0;
+	sector_t page_start;
+	unsigned long bio_bytes = 0;
+	unsigned long bio_offset = 0;
+	unsigned int offset;
+
+	page_start = (sector_t)page->index <<
+		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+	BUG_ON(!PageLocked(page));
+	BUG_ON(PageWriteback(page));
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+
+		if (!mp || !test_bit(META_dirty, &mp->flag))
+			continue;
+
+		if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
+			redirty = 1;
+			continue;
+		}
+
+		clear_bit(META_dirty, &mp->flag);
+		block_offset = offset >> inode->i_blkbits;
+		lblock = page_start + block_offset;
+		if (bio) {
+			if (xlen && lblock == next_block) {
+				/* Contiguous, in memory & on disk */
+				len = min(xlen, blocks_per_mp);
+				xlen -= len;
+				bio_bytes += len << inode->i_blkbits;
+				set_bit(META_io, &mp->flag);
+				continue;
+			}
+			/* Not contiguous */
+			if (bio_add_page(bio, page, bio_bytes, bio_offset) <
+			    bio_bytes)
+				goto add_failed;
+			/*
+			 * Increment counter before submitting i/o to keep
+			 * count from hitting zero before we're through
+			 */
+			inc_io(page);
+			if (!bio->bi_size)
+				goto dump_bio;
+			submit_bio(WRITE, bio);
+			bio = NULL;
+		} else {
+			set_page_writeback(page);
+			inc_io(page);
+		}
+		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+		pblock = metapage_get_blocks(inode, lblock, &xlen);
+		if (!pblock) {
+			/* Need better error handling */
+			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
+			dec_io(page, last_write_complete);
+			continue;
+		}
+		set_bit(META_io, &mp->flag);
+		len = min(xlen, (uint) JFS_SBI(inode->i_sb)->nbperpage);
+
+		bio = bio_alloc(GFP_NOFS, 1);
+		bio->bi_bdev = inode->i_sb->s_bdev;
+		bio->bi_sector = pblock << (inode->i_blkbits - 9);
+		bio->bi_end_io = metapage_write_end_io;
+		bio->bi_private = page;
+
+		/* Don't call bio_add_page yet, we may add to this vec */
+		bio_offset = offset;
+		bio_bytes = len << inode->i_blkbits;
+
+		xlen -= len;
+		next_block = lblock + len;
+	}
+	if (bio) {
+		if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
+				goto add_failed;
+		if (!bio->bi_size)
+			goto dump_bio;
+		
+		submit_bio(WRITE, bio);
+	}
+	if (redirty)
+		redirty_page_for_writepage(wbc, page);
+
+	unlock_page(page);
+
+	return 0;
+add_failed:
+	/* We should never reach here, since we're only adding one vec */
+	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+	goto skip;
+dump_bio:
+	dump_mem("bio", bio, sizeof(*bio));
+skip:
+	bio_put(bio);
+	unlock_page(page);
+	dec_io(page, last_write_complete);
+
+	return -EIO;
+}
+
+static int metapage_readpage(struct file *fp, struct page *page)
+{
+	struct inode *inode = page->mapping->host;
+	struct bio *bio = NULL;
+	unsigned int block_offset;
+	unsigned int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+	sector_t page_start;	/* address of page in fs blocks */
+	sector_t pblock;
+	unsigned int xlen;
+	unsigned int len;
+	unsigned int offset;
+
+	BUG_ON(!PageLocked(page));
+	page_start = (sector_t)page->index <<
+		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
+
+	block_offset = 0;
+	while (block_offset < blocks_per_page) {
+		xlen = blocks_per_page - block_offset;
+		pblock = metapage_get_blocks(inode, page_start + block_offset,
+					     &xlen);
+		if (pblock) {
+			if (!PagePrivate(page))
+				insert_metapage(page, NULL);
+			inc_io(page);
+			if (bio)
+				submit_bio(READ, bio);
+
+			bio = bio_alloc(GFP_NOFS, 1);
+			bio->bi_bdev = inode->i_sb->s_bdev;
+			bio->bi_sector = pblock << (inode->i_blkbits - 9);
+			bio->bi_end_io = metapage_read_end_io;
+			bio->bi_private = page;
+			len = xlen << inode->i_blkbits;
+			offset = block_offset << inode->i_blkbits;
+			if (bio_add_page(bio, page, len, offset) < len)
+				goto add_failed;
+			block_offset += xlen;
+		} else
+			block_offset++;
 	}
+	if (bio)
+		submit_bio(READ, bio);
+	else
+		unlock_page(page);
+
+	return 0;
 
-	if (mp->hash_next)
-		mp->hash_next->hash_prev = mp->hash_prev;
+add_failed:
+	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
+	bio_put(bio);
+	dec_io(page, last_read_complete);
+	return -EIO;
 }
 
+static int metapage_releasepage(struct page *page, int gfp_mask)
+{
+	struct metapage *mp;
+	int busy = 0;
+	unsigned int offset;
+
+	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+		mp = page_to_mp(page, offset);
+
+		if (!mp)
+			continue;
+
+		jfs_info("metapage_releasepage: mp = 0x%p", mp);
+		if (mp->count || mp->nohomeok) {
+			jfs_info("count = %ld, nohomeok = %d", mp->count,
+				 mp->nohomeok);
+			busy = 1;
+			continue;
+		}
+		wait_on_page_writeback(page);
+		//WARN_ON(test_bit(META_dirty, &mp->flag));
+		if (test_bit(META_dirty, &mp->flag)) {
+			dump_mem("dirty mp in metapage_releasepage", mp,
+				 sizeof(struct metapage));
+			dump_mem("page", page, sizeof(struct page));
+			dump_stack();
+		}
+		WARN_ON(mp->lsn);
+		if (mp->lsn)
+			remove_from_logsync(mp);
+		remove_metapage(page, mp);
+		INCREMENT(mpStat.pagefree);
+		free_metapage(mp);
+	}
+	if (busy)
+		return -1;
+
+	return 0;
+}
+
+static int metapage_invalidatepage(struct page *page, unsigned long offset)
+{
+	BUG_ON(offset);
+
+	if (PageWriteback(page))
+		return 0;
+
+	return metapage_releasepage(page, 0);
+}
+
+struct address_space_operations jfs_metapage_aops = {
+	.readpage	= metapage_readpage,
+	.writepage	= metapage_writepage,
+	.sync_page	= block_sync_page,
+	.releasepage	= metapage_releasepage,
+	.invalidatepage	= metapage_invalidatepage,
+	.set_page_dirty	= __set_page_dirty_nobuffers,
+};
+
 struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 				unsigned int size, int absolute,
 				unsigned long new)
 {
-	struct metapage **hash_ptr;
 	int l2BlocksPerPage;
 	int l2bsize;
 	struct address_space *mapping;
-	struct metapage *mp;
+	struct metapage *mp = NULL;
+	struct page *page;
 	unsigned long page_index;
 	unsigned long page_offset;
 
-	jfs_info("__get_metapage: inode = 0x%p, lblock = 0x%lx", inode, lblock);
-
+	jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
+		 inode->i_ino, lblock, absolute);
+
+	l2bsize = inode->i_blkbits;
+	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+	page_index = lblock >> l2BlocksPerPage;
+	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
+	if ((page_offset + size) > PAGE_CACHE_SIZE) {
+		jfs_err("MetaData crosses page boundary!!");
+		jfs_err("lblock = %lx, size  = %d", lblock, size);
+		dump_stack();
+		return NULL;
+	}
 	if (absolute)
-		mapping = inode->i_sb->s_bdev->bd_inode->i_mapping;
+		mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
 	else {
 		/*
 		 * If an nfs client tries to read an inode that is larger
@@ -237,312 +631,212 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
 		mapping = inode->i_mapping;
 	}
 
-	hash_ptr = meta_hash(mapping, lblock);
-again:
-	spin_lock(&meta_lock);
-	mp = search_hash(hash_ptr, mapping, lblock);
+	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+		page = grab_cache_page(mapping, page_index);
+		if (!page) {
+			jfs_err("grab_cache_page failed!");
+			return NULL;
+		}
+		SetPageUptodate(page);
+	} else {
+		page = read_cache_page(mapping, page_index,
+			    (filler_t *)mapping->a_ops->readpage, NULL);
+		if (IS_ERR(page)) {
+			jfs_err("read_cache_page failed!");
+			return NULL;
+		}
+		lock_page(page);
+	}
+
+	mp = page_to_mp(page, page_offset);
 	if (mp) {
-	      page_found:
-		if (test_bit(META_stale, &mp->flag)) {
-			spin_unlock(&meta_lock);
-			msleep(1);
-			goto again;
+		if (mp->logical_size != size) {
+			jfs_error(inode->i_sb,
+				  "__get_metapage: mp->logical_size != size");
+			jfs_err("logical_size = %d, size = %d",
+				mp->logical_size, size);
+			dump_stack();
+			goto unlock; 
 		}
 		mp->count++;
 		lock_metapage(mp);
-		spin_unlock(&meta_lock);
 		if (test_bit(META_discard, &mp->flag)) {
 			if (!new) {
 				jfs_error(inode->i_sb,
 					  "__get_metapage: using a "
 					  "discarded metapage");
-				release_metapage(mp);
-				return NULL;
+				discard_metapage(mp);
+				goto unlock; 
 			}
 			clear_bit(META_discard, &mp->flag);
 		}
-		jfs_info("__get_metapage: found 0x%p, in hash", mp);
-		if (mp->logical_size != size) {
-			jfs_error(inode->i_sb,
-				  "__get_metapage: mp->logical_size != size");
-			release_metapage(mp);
-			return NULL;
-		}
 	} else {
-		l2bsize = inode->i_blkbits;
-		l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
-		page_index = lblock >> l2BlocksPerPage;
-		page_offset = (lblock - (page_index << l2BlocksPerPage)) <<
-		    l2bsize;
-		if ((page_offset + size) > PAGE_CACHE_SIZE) {
-			spin_unlock(&meta_lock);
-			jfs_err("MetaData crosses page boundary!!");
-			return NULL;
-		}
-		
-		/*
-		 * Locks held on aggregate inode pages are usually
-		 * not held long, and they are taken in critical code
-		 * paths (committing dirty inodes, txCommit thread) 
-		 * 
-		 * Attempt to get metapage without blocking, tapping into
-		 * reserves if necessary.
-		 */
-		mp = NULL;
-		if (JFS_IP(inode)->fileset == AGGREGATE_I) {
-			mp = alloc_metapage(GFP_ATOMIC);
-			if (!mp) {
-				/*
-				 * mempool is supposed to protect us from
-				 * failing here.  We will try a blocking
-				 * call, but a deadlock is possible here
-				 */
-				printk(KERN_WARNING
-				       "__get_metapage: atomic call to mempool_alloc failed.\n");
-				printk(KERN_WARNING
-				       "Will attempt blocking call\n");
-			}
-		}
-		if (!mp) {
-			struct metapage *mp2;
-
-			spin_unlock(&meta_lock);
-			mp = alloc_metapage(GFP_NOFS);
-			spin_lock(&meta_lock);
-
-			/* we dropped the meta_lock, we need to search the
-			 * hash again.
-			 */
-			mp2 = search_hash(hash_ptr, mapping, lblock);
-			if (mp2) {
-				free_metapage(mp);
-				mp = mp2;
-				goto page_found;
-			}
-		}
+		INCREMENT(mpStat.pagealloc);
+		mp = alloc_metapage(GFP_NOFS);
+		mp->page = page;
 		mp->flag = 0;
-		lock_metapage(mp);
-		if (absolute)
-			set_bit(META_absolute, &mp->flag);
 		mp->xflag = COMMIT_PAGE;
 		mp->count = 1;
-		atomic_set(&mp->nohomeok,0);
-		mp->mapping = mapping;
-		mp->index = lblock;
-		mp->page = NULL;
+		mp->nohomeok = 0;
 		mp->logical_size = size;
-		add_to_hash(mp, hash_ptr);
-		spin_unlock(&meta_lock);
-
-		if (new) {
-			jfs_info("__get_metapage: Calling grab_cache_page");
-			mp->page = grab_cache_page(mapping, page_index);
-			if (!mp->page) {
-				jfs_err("grab_cache_page failed!");
-				goto freeit;
-			} else {
-				INCREMENT(mpStat.pagealloc);
-				unlock_page(mp->page);
-			}
-		} else {
-			jfs_info("__get_metapage: Calling read_cache_page");
-			mp->page = read_cache_page(mapping, lblock,
-				    (filler_t *)mapping->a_ops->readpage, NULL);
-			if (IS_ERR(mp->page)) {
-				jfs_err("read_cache_page failed!");
-				goto freeit;
-			} else
-				INCREMENT(mpStat.pagealloc);
+		mp->data = page_address(page) + page_offset;
+		mp->index = lblock;
+		if (unlikely(insert_metapage(page, mp))) {
+			free_metapage(mp);
+			goto unlock;
 		}
-		mp->data = kmap(mp->page) + page_offset;
+		lock_metapage(mp);
 	}
 
-	if (new)
+	if (new) {
+		jfs_info("zeroing mp = 0x%p", mp);
 		memset(mp->data, 0, PSIZE);
+	}
 
-	jfs_info("__get_metapage: returning = 0x%p", mp);
+	unlock_page(page);
+	jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
 	return mp;
 
-freeit:
-	spin_lock(&meta_lock);
-	remove_from_hash(mp, hash_ptr);
-	free_metapage(mp);
-	spin_unlock(&meta_lock);
+unlock:
+	unlock_page(page);
 	return NULL;
 }
 
-void hold_metapage(struct metapage * mp, int force)
+void grab_metapage(struct metapage * mp)
 {
-	spin_lock(&meta_lock);
-
+	jfs_info("grab_metapage: mp = 0x%p", mp);
+	page_cache_get(mp->page);
+	lock_page(mp->page);
 	mp->count++;
-
-	if (force) {
-		ASSERT (!(test_bit(META_forced, &mp->flag)));
-		if (trylock_metapage(mp))
-			set_bit(META_forced, &mp->flag);
-	} else
-		lock_metapage(mp);
-
-	spin_unlock(&meta_lock);
+	lock_metapage(mp);
+	unlock_page(mp->page);
 }
 
-static void __write_metapage(struct metapage * mp)
+void force_metapage(struct metapage *mp)
 {
-	int l2bsize = mp->mapping->host->i_blkbits;
-	int l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
-	unsigned long page_index;
-	unsigned long page_offset;
-	int rc;
-
-	jfs_info("__write_metapage: mp = 0x%p", mp);
-
-	page_index = mp->page->index;
-	page_offset =
-	    (mp->index - (page_index << l2BlocksPerPage)) << l2bsize;
+	struct page *page = mp->page;
+	jfs_info("force_metapage: mp = 0x%p", mp);
+	set_bit(META_forcewrite, &mp->flag);
+	clear_bit(META_sync, &mp->flag);
+	page_cache_get(page);
+	lock_page(page);
+	set_page_dirty(page);
+	write_one_page(page, 1);
+	clear_bit(META_forcewrite, &mp->flag);
+	page_cache_release(page);
+}
 
+extern void hold_metapage(struct metapage *mp)
+{
 	lock_page(mp->page);
-	rc = mp->mapping->a_ops->prepare_write(NULL, mp->page, page_offset,
-					       page_offset +
-					       mp->logical_size);
-	if (rc) {
-		jfs_err("prepare_write return %d!", rc);
-		ClearPageUptodate(mp->page);
+}
+
+extern void put_metapage(struct metapage *mp)
+{
+	if (mp->count || mp->nohomeok) {
+		/* Someone else will release this */
 		unlock_page(mp->page);
-		clear_bit(META_dirty, &mp->flag);
 		return;
 	}
-	rc = mp->mapping->a_ops->commit_write(NULL, mp->page, page_offset,
-					      page_offset +
-					      mp->logical_size);
-	if (rc) {
-		jfs_err("commit_write returned %d", rc);
-	}
-
+	page_cache_get(mp->page);
+	mp->count++;
+	lock_metapage(mp);
 	unlock_page(mp->page);
-	clear_bit(META_dirty, &mp->flag);
-
-	jfs_info("__write_metapage done");
-}
-
-static inline void sync_metapage(struct metapage *mp)
-{
-	struct page *page = mp->page;
-
-	page_cache_get(page);
-	lock_page(page);
-
-	/* we're done with this page - no need to check for errors */
-	if (page_has_buffers(page))
-		write_one_page(page, 1);
-	else
-		unlock_page(page);
-	page_cache_release(page);
+	release_metapage(mp);
 }
 
 void release_metapage(struct metapage * mp)
 {
-	struct jfs_log *log;
-
+	struct page *page = mp->page;
 	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);
 
-	spin_lock(&meta_lock);
-	if (test_bit(META_forced, &mp->flag)) {
-		clear_bit(META_forced, &mp->flag);
-		mp->count--;
-		spin_unlock(&meta_lock);
-		return;
-	}
+	BUG_ON(!page);
+
+	lock_page(page);
+	unlock_metapage(mp);
 
 	assert(mp->count);
-	if (--mp->count || atomic_read(&mp->nohomeok)) {
-		unlock_metapage(mp);
-		spin_unlock(&meta_lock);
+	if (--mp->count || mp->nohomeok) {
+		unlock_page(page);
+		page_cache_release(page);
 		return;
 	}
 
-	if (mp->page) {
-		set_bit(META_stale, &mp->flag);
-		spin_unlock(&meta_lock);
-		kunmap(mp->page);
-		mp->data = NULL;
-		if (test_bit(META_dirty, &mp->flag))
-			__write_metapage(mp);
+	if (test_bit(META_dirty, &mp->flag)) {
+		set_page_dirty(page);
 		if (test_bit(META_sync, &mp->flag)) {
-			sync_metapage(mp);
 			clear_bit(META_sync, &mp->flag);
+			write_one_page(page, 1);
+			lock_page(page); /* write_one_page unlocks the page */
 		}
+	} else if (mp->lsn)	/* discard_metapage doesn't remove it */
+		remove_from_logsync(mp);
 
-		if (test_bit(META_discard, &mp->flag)) {
-			lock_page(mp->page);
-			block_invalidatepage(mp->page, 0);
-			unlock_page(mp->page);
-		}
-
-		page_cache_release(mp->page);
-		mp->page = NULL;
-		INCREMENT(mpStat.pagefree);
-		spin_lock(&meta_lock);
-	}
+#if MPS_PER_PAGE == 1
+	/*
+	 * If we know this is the only thing in the page, we can throw
+	 * the page out of the page cache.  If pages are larger, we
+	 * don't want to do this.
+	 */
 
-	if (mp->lsn) {
-		/*
-		 * Remove metapage from logsynclist.
-		 */
-		log = mp->log;
-		LOGSYNC_LOCK(log);
-		mp->log = NULL;
-		mp->lsn = 0;
-		mp->clsn = 0;
-		log->count--;
-		list_del(&mp->synclist);
-		LOGSYNC_UNLOCK(log);
+	/* Retest mp->count since we may have released page lock */
+	if (test_bit(META_discard, &mp->flag) && !mp->count) {
+		clear_page_dirty(page);
+		ClearPageUptodate(page);
+#ifdef _NOT_YET
+		if (page->mapping) {
+		/* Remove from page cache and page cache reference */
+			remove_from_page_cache(page);
+			page_cache_release(page);
+			metapage_releasepage(page, 0);
+		}
+#endif
 	}
-	remove_from_hash(mp, meta_hash(mp->mapping, mp->index));
-	spin_unlock(&meta_lock);
-
-	free_metapage(mp);
+#else
+	/* Try to keep metapages from using up too much memory */
+	drop_metapage(page, mp);
+#endif
+	unlock_page(page);
+	page_cache_release(page);
 }
 
 void __invalidate_metapages(struct inode *ip, s64 addr, int len)
 {
-	struct metapage **hash_ptr;
-	unsigned long lblock;
+	sector_t lblock;
 	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
+	int BlocksPerPage = 1 << l2BlocksPerPage;
 	/* All callers are interested in block device's mapping */
-	struct address_space *mapping = ip->i_sb->s_bdev->bd_inode->i_mapping;
+	struct address_space *mapping =
+		JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
 	struct metapage *mp;
 	struct page *page;
+	unsigned int offset;
 
 	/*
-	 * First, mark metapages to discard.  They will eventually be
+	 * Mark metapages to discard.  They will eventually be
 	 * released, but should not be written.
 	 */
-	for (lblock = addr; lblock < addr + len;
-	     lblock += 1 << l2BlocksPerPage) {
-		hash_ptr = meta_hash(mapping, lblock);
-again:
-		spin_lock(&meta_lock);
-		mp = search_hash(hash_ptr, mapping, lblock);
-		if (mp) {
-			if (test_bit(META_stale, &mp->flag)) {
-				spin_unlock(&meta_lock);
-				msleep(1);
-				goto again;
-			}
+	for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
+	     lblock += BlocksPerPage) {
+		page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
+		if (!page)
+			continue;
+		for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+			mp = page_to_mp(page, offset);
+			if (!mp)
+				continue;
+			if (mp->index < addr)
+				continue;
+			if (mp->index >= addr + len)
+				break;
 
 			clear_bit(META_dirty, &mp->flag);
 			set_bit(META_discard, &mp->flag);
-			spin_unlock(&meta_lock);
-		} else {
-			spin_unlock(&meta_lock);
-			page = find_lock_page(mapping, lblock>>l2BlocksPerPage);
-			if (page) {
-				block_invalidatepage(page, 0);
-				unlock_page(page);
-				page_cache_release(page);
-			}
+			if (mp->lsn)
+				remove_from_logsync(mp);
 		}
+		unlock_page(page);
+		page_cache_release(page);
 	}
 }
 
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 0e58aba58c37..991e9fb84c75 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -33,38 +33,27 @@ struct metapage {
 	unsigned long flag;	/* See Below */
 	unsigned long count;	/* Reference count */
 	void *data;		/* Data pointer */
-
-	/* list management stuff */
-	struct metapage *hash_prev;
-	struct metapage *hash_next;	/* Also used for free list */
-
-	/*
-	 * mapping & index become redundant, but we need these here to
-	 * add the metapage to the hash before we have the real page
-	 */
-	struct address_space *mapping;
-	unsigned long index;
+	sector_t index; 	/* block address of page */
 	wait_queue_head_t wait;
 
 	/* implementation */
 	struct page *page;
-	unsigned long logical_size;
+	unsigned int logical_size;
 
 	/* Journal management */
 	int clsn;
-	atomic_t nohomeok;
+	int nohomeok;
 	struct jfs_log *log;
 };
 
 /* metapage flag */
 #define META_locked	0
-#define META_absolute	1
-#define META_free	2
-#define META_dirty	3
-#define META_sync	4
-#define META_discard	5
-#define META_forced	6
-#define META_stale	7
+#define META_free	1
+#define META_dirty	2
+#define META_sync	3
+#define META_discard	4
+#define META_forcewrite	5
+#define META_io		6
 
 #define mark_metapage_dirty(mp) set_bit(META_dirty, &(mp)->flag)
 
@@ -80,7 +69,16 @@ extern struct metapage *__get_metapage(struct inode *inode,
 	 __get_metapage(inode, lblock, size, absolute, TRUE)
 
 extern void release_metapage(struct metapage *);
-extern void hold_metapage(struct metapage *, int);
+extern void grab_metapage(struct metapage *);
+extern void force_metapage(struct metapage *);
+
+/*
+ * hold_metapage and put_metapage are used in conjuction.  The page lock
+ * is not dropped between the two, so no other threads can get or release
+ * the metapage
+ */
+extern void hold_metapage(struct metapage *);
+extern void put_metapage(struct metapage *);
 
 static inline void write_metapage(struct metapage *mp)
 {
@@ -101,6 +99,46 @@ static inline void discard_metapage(struct metapage *mp)
 	release_metapage(mp);
 }
 
+static inline void metapage_nohomeok(struct metapage *mp)
+{
+	struct page *page = mp->page;
+	lock_page(page);
+	if (!mp->nohomeok++) {
+		mark_metapage_dirty(mp);
+		page_cache_get(page);
+		wait_on_page_writeback(page);
+	}
+	unlock_page(page);
+}
+
+/*
+ * This serializes access to mp->lsn when metapages are added to logsynclist
+ * without setting nohomeok.  i.e. updating imap & dmap
+ */
+static inline void metapage_wait_for_io(struct metapage *mp)
+{
+	if (test_bit(META_io, &mp->flag))
+		wait_on_page_writeback(mp->page);
+}
+
+/*
+ * This is called when already holding the metapage
+ */
+static inline void _metapage_homeok(struct metapage *mp)
+{
+	if (!--mp->nohomeok)
+		page_cache_release(mp->page);
+}
+
+static inline void metapage_homeok(struct metapage *mp)
+{
+	hold_metapage(mp);
+	_metapage_homeok(mp);
+	put_metapage(mp);
+}
+
+extern struct address_space_operations jfs_metapage_aops;
+
 /*
  * This routines invalidate all pages for an extent.
  */
diff --git a/fs/jfs/jfs_mount.c b/fs/jfs/jfs_mount.c
index c535ffd638e8..032d111bc330 100644
--- a/fs/jfs/jfs_mount.c
+++ b/fs/jfs/jfs_mount.c
@@ -285,11 +285,6 @@ int jfs_mount_rw(struct super_block *sb, int remount)
 	 */
 	logMOUNT(sb);
 
-	/*
-	 * Set page cache allocation policy
-	 */
-	mapping_set_gfp_mask(sb->s_bdev->bd_inode->i_mapping, GFP_NOFS);
-
 	return rc;
 }
 
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index 98e16d93e146..bbc9c1407b55 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -227,6 +227,7 @@ static lid_t txLockAlloc(void)
 
 static void txLockFree(lid_t lid)
 {
+	TxLock[lid].tid = 0;
 	TxLock[lid].next = TxAnchor.freelock;
 	TxAnchor.freelock = lid;
 	TxAnchor.tlocksInUse--;
@@ -633,8 +634,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 
 	/* is page locked by the requester transaction ? */
 	tlck = lid_to_tlock(lid);
-	if ((xtid = tlck->tid) == tid)
+	if ((xtid = tlck->tid) == tid) {
+		TXN_UNLOCK();
 		goto grantLock;
+	}
 
 	/*
 	 * is page locked by anonymous transaction/lock ?
@@ -649,6 +652,7 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	 */
 	if (xtid == 0) {
 		tlck->tid = tid;
+		TXN_UNLOCK();
 		tblk = tid_to_tblock(tid);
 		/*
 		 * The order of the tlocks in the transaction is important
@@ -706,17 +710,18 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 	 */
 	tlck->tid = tid;
 
+	TXN_UNLOCK();
+
 	/* mark tlock for meta-data page */
 	if (mp->xflag & COMMIT_PAGE) {
 
 		tlck->flag = tlckPAGELOCK;
 
 		/* mark the page dirty and nohomeok */
-		mark_metapage_dirty(mp);
-		atomic_inc(&mp->nohomeok);
+		metapage_nohomeok(mp);
 
 		jfs_info("locking mp = 0x%p, nohomeok = %d tid = %d tlck = 0x%p",
-			 mp, atomic_read(&mp->nohomeok), tid, tlck);
+			 mp, mp->nohomeok, tid, tlck);
 
 		/* if anonymous transaction, and buffer is on the group
 		 * commit synclist, mark inode to show this.  This will
@@ -762,8 +767,10 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 		if (tlck->next == 0) {
 			/* This inode's first anonymous transaction */
 			jfs_ip->atltail = lid;
+			TXN_LOCK();
 			list_add_tail(&jfs_ip->anon_inode_list,
 				      &TxAnchor.anon_list);
+			TXN_UNLOCK();
 		}
 	}
 
@@ -821,8 +828,6 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
       grantLock:
 	tlck->type |= type;
 
-	TXN_UNLOCK();
-
 	return tlck;
 
 	/*
@@ -841,11 +846,19 @@ struct tlock *txLock(tid_t tid, struct inode *ip, struct metapage * mp,
 		BUG();
 	}
 	INCREMENT(stattx.waitlock);	/* statistics */
+	TXN_UNLOCK();
 	release_metapage(mp);
+	TXN_LOCK();
+	xtid = tlck->tid;	/* reaquire after dropping TXN_LOCK */
 
 	jfs_info("txLock: in waitLock, tid = %d, xtid = %d, lid = %d",
 		 tid, xtid, lid);
-	TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
+
+	/* Recheck everything since dropping TXN_LOCK */
+	if (xtid && (tlck->mp == mp) && (mp->lid == lid))
+		TXN_SLEEP_DROP_LOCK(&tid_to_tblock(xtid)->waitor);
+	else
+		TXN_UNLOCK();
 	jfs_info("txLock: awakened     tid = %d, lid = %d", tid, lid);
 
 	return NULL;
@@ -906,6 +919,7 @@ static void txUnlock(struct tblock * tblk)
 	struct metapage *mp;
 	struct jfs_log *log;
 	int difft, diffp;
+	unsigned long flags;
 
 	jfs_info("txUnlock: tblk = 0x%p", tblk);
 	log = JFS_SBI(tblk->sb)->log;
@@ -925,19 +939,14 @@ static void txUnlock(struct tblock * tblk)
 			assert(mp->xflag & COMMIT_PAGE);
 
 			/* hold buffer
-			 *
-			 * It's possible that someone else has the metapage.
-			 * The only things were changing are nohomeok, which
-			 * is handled atomically, and clsn which is protected
-			 * by the LOGSYNC_LOCK.
 			 */
-			hold_metapage(mp, 1);
+			hold_metapage(mp);
 
-			assert(atomic_read(&mp->nohomeok) > 0);
-			atomic_dec(&mp->nohomeok);
+			assert(mp->nohomeok > 0);
+			_metapage_homeok(mp);
 
 			/* inherit younger/larger clsn */
-			LOGSYNC_LOCK(log);
+			LOGSYNC_LOCK(log, flags);
 			if (mp->clsn) {
 				logdiff(difft, tblk->clsn, log);
 				logdiff(diffp, mp->clsn, log);
@@ -945,16 +954,11 @@ static void txUnlock(struct tblock * tblk)
 					mp->clsn = tblk->clsn;
 			} else
 				mp->clsn = tblk->clsn;
-			LOGSYNC_UNLOCK(log);
+			LOGSYNC_UNLOCK(log, flags);
 
 			assert(!(tlck->flag & tlckFREEPAGE));
 
-			if (tlck->flag & tlckWRITEPAGE) {
-				write_metapage(mp);
-			} else {
-				/* release page which has been forced */
-				release_metapage(mp);
-			}
+			put_metapage(mp);
 		}
 
 		/* insert tlock, and linelock(s) of the tlock if any,
@@ -981,10 +985,10 @@ static void txUnlock(struct tblock * tblk)
 	 * has been inserted in logsync list at txUpdateMap())
 	 */
 	if (tblk->lsn) {
-		LOGSYNC_LOCK(log);
+		LOGSYNC_LOCK(log, flags);
 		log->count--;
 		list_del(&tblk->synclist);
-		LOGSYNC_UNLOCK(log);
+		LOGSYNC_UNLOCK(log, flags);
 	}
 }
 
@@ -1573,8 +1577,8 @@ static int dataLog(struct jfs_log * log, struct tblock * tblk, struct lrd * lrd,
 		 * the last entry, so don't bother logging this
 		 */
 		mp->lid = 0;
-		hold_metapage(mp, 0);
-		atomic_dec(&mp->nohomeok);
+		grab_metapage(mp);
+		metapage_homeok(mp);
 		discard_metapage(mp);
 		tlck->mp = NULL;
 		return 0;
@@ -2270,7 +2274,8 @@ void txForce(struct tblock * tblk)
 				tlck->flag &= ~tlckWRITEPAGE;
 
 				/* do not release page to freelist */
-
+				force_metapage(mp);
+#if 0
 				/*
 				 * The "right" thing to do here is to
 				 * synchronously write the metadata.
@@ -2282,9 +2287,10 @@ void txForce(struct tblock * tblk)
 				 * we can get by with synchronously writing
 				 * the pages when they are released.
 				 */
-				assert(atomic_read(&mp->nohomeok));
+				assert(mp->nohomeok);
 				set_bit(META_dirty, &mp->flag);
 				set_bit(META_sync, &mp->flag);
+#endif
 			}
 		}
 	}
@@ -2344,7 +2350,7 @@ static void txUpdateMap(struct tblock * tblk)
 			 */
 			mp = tlck->mp;
 			ASSERT(mp->xflag & COMMIT_PAGE);
-			hold_metapage(mp, 0);
+			grab_metapage(mp);
 		}
 
 		/*
@@ -2394,8 +2400,8 @@ static void txUpdateMap(struct tblock * tblk)
 				ASSERT(mp->lid == lid);
 				tlck->mp->lid = 0;
 			}
-			assert(atomic_read(&mp->nohomeok) == 1);
-			atomic_dec(&mp->nohomeok);
+			assert(mp->nohomeok == 1);
+			metapage_homeok(mp);
 			discard_metapage(mp);
 			tlck->mp = NULL;
 		}
@@ -2861,24 +2867,9 @@ static void LogSyncRelease(struct metapage * mp)
 {
 	struct jfs_log *log = mp->log;
 
-	assert(atomic_read(&mp->nohomeok));
+	assert(mp->nohomeok);
 	assert(log);
-	atomic_dec(&mp->nohomeok);
-
-	if (atomic_read(&mp->nohomeok))
-		return;
-
-	hold_metapage(mp, 0);
-
-	LOGSYNC_LOCK(log);
-	mp->log = NULL;
-	mp->lsn = 0;
-	mp->clsn = 0;
-	log->count--;
-	list_del_init(&mp->synclist);
-	LOGSYNC_UNLOCK(log);
-
-	release_metapage(mp);
+	metapage_homeok(mp);
 }
 
 /*
diff --git a/fs/jfs/jfs_umount.c b/fs/jfs/jfs_umount.c
index f31a9e3f3fec..5cf91785b541 100644
--- a/fs/jfs/jfs_umount.c
+++ b/fs/jfs/jfs_umount.c
@@ -49,7 +49,6 @@
  */
 int jfs_umount(struct super_block *sb)
 {
-	struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
 	struct inode *ipbmap = sbi->ipbmap;
 	struct inode *ipimap = sbi->ipimap;
@@ -109,8 +108,8 @@ int jfs_umount(struct super_block *sb)
 	 * Make sure all metadata makes it to disk before we mark
 	 * the superblock as clean
 	 */
-	filemap_fdatawrite(bdev_mapping);
-	filemap_fdatawait(bdev_mapping);
+	filemap_fdatawrite(sbi->direct_inode->i_mapping);
+	filemap_fdatawait(sbi->direct_inode->i_mapping);
 
 	/*
 	 * ensure all file system file pages are propagated to their
@@ -123,9 +122,6 @@ int jfs_umount(struct super_block *sb)
 	if (log) {		/* log = NULL if read-only mount */
 		updateSuper(sb, FM_CLEAN);
 
-		/* Restore default gfp_mask for bdev */
-		mapping_set_gfp_mask(bdev_mapping, GFP_USER);
-
 		/*
 		 * close log: 
 		 *
@@ -140,7 +136,6 @@ int jfs_umount(struct super_block *sb)
 
 int jfs_umount_rw(struct super_block *sb)
 {
-	struct address_space *bdev_mapping = sb->s_bdev->bd_inode->i_mapping;
 	struct jfs_sb_info *sbi = JFS_SBI(sb);
 	struct jfs_log *log = sbi->log;
 
@@ -166,13 +161,10 @@ int jfs_umount_rw(struct super_block *sb)
 	 * mark the superblock clean before everything is flushed to
 	 * disk.
 	 */
-	filemap_fdatawrite(bdev_mapping);
-	filemap_fdatawait(bdev_mapping);
+	filemap_fdatawrite(sbi->direct_inode->i_mapping);
+	filemap_fdatawait(sbi->direct_inode->i_mapping);
 
 	updateSuper(sb, FM_CLEAN);
 
-	/* Restore default gfp_mask for bdev */
-	mapping_set_gfp_mask(bdev_mapping, GFP_USER);
-
 	return lmLogClose(sb);
 }
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 2eb6869b6e72..c6dc254d3253 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -209,6 +209,9 @@ int jfs_extendfs(struct super_block *sb, s64 newLVSize, int newLogSize)
 	 */
 	txQuiesce(sb);
 
+	/* Reset size of direct inode */
+	sbi->direct_inode->i_size =  sb->s_bdev->bd_inode->i_size;
+
 	if (sbi->mntflag & JFS_INLINELOG) {
 		/*
 		 * deactivate old inline log
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 5856866e24fc..0812005364a1 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -210,6 +210,10 @@ static void jfs_put_super(struct super_block *sb)
 		unload_nls(sbi->nls_tab);
 	sbi->nls_tab = NULL;
 
+	truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
+	iput(sbi->direct_inode);
+	sbi->direct_inode = NULL;
+
 	kfree(sbi);
 }
 
@@ -358,6 +362,12 @@ static int jfs_remount(struct super_block *sb, int *flags, char *data)
 	}
 
 	if ((sb->s_flags & MS_RDONLY) && !(*flags & MS_RDONLY)) {
+		/*
+		 * Invalidate any previously read metadata.  fsck may have
+		 * changed the on-disk data since we mounted r/o
+		 */
+		truncate_inode_pages(JFS_SBI(sb)->direct_inode->i_mapping, 0);
+
 		JFS_SBI(sb)->flag = flag;
 		return jfs_mount_rw(sb, 1);
 	}
@@ -428,12 +438,26 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
 	sb->s_op = &jfs_super_operations;
 	sb->s_export_op = &jfs_export_operations;
 
+	/*
+	 * Initialize direct-mapping inode/address-space
+	 */
+	inode = new_inode(sb);
+	if (inode == NULL)
+		goto out_kfree;
+	inode->i_ino = 0;
+	inode->i_nlink = 1;
+	inode->i_size = sb->s_bdev->bd_inode->i_size;
+	inode->i_mapping->a_ops = &jfs_metapage_aops;
+	mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
+
+	sbi->direct_inode = inode;
+
 	rc = jfs_mount(sb);
 	if (rc) {
 		if (!silent) {
 			jfs_err("jfs_mount failed w/return code = %d", rc);
 		}
-		goto out_kfree;
+		goto out_mount_failed;
 	}
 	if (sb->s_flags & MS_RDONLY)
 		sbi->log = NULL;
@@ -482,6 +506,13 @@ out_no_rw:
 	if (rc) {
 		jfs_err("jfs_umount failed with return code %d", rc);
 	}
+out_mount_failed:
+	filemap_fdatawrite(sbi->direct_inode->i_mapping);
+	filemap_fdatawait(sbi->direct_inode->i_mapping);
+	truncate_inode_pages(sbi->direct_inode->i_mapping, 0);
+	make_bad_inode(sbi->direct_inode);
+	iput(sbi->direct_inode);
+	sbi->direct_inode = NULL;
 out_kfree:
 	if (sbi->nls_tab)
 		unload_nls(sbi->nls_tab);
-- 
cgit v1.2.3


From 1c6278295d6482edaaaef5faa64b18f17b3319b7 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:25:08 -0600
Subject: [PATCH] JFS: Write journal sync points more often

This patch adds jfs_syncpt, which calls lmLogSync to write sync points
to the journal both in jfs_sync_fs and when sync barrier processing
completes.

lmLogSync accomplishes two things:  1) it pushes logged-but-dirty
metadata pages to disk, and 2) it writes a sync record to the journal
so that jfs_fsck doesn't need to replay more transactions than is
necessary.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/jfs_logmgr.c | 17 ++++++++++++++---
 fs/jfs/jfs_logmgr.h |  1 +
 fs/jfs/jfs_txnmgr.c | 14 +++++++++-----
 fs/jfs/super.c      |  4 +++-
 4 files changed, 27 insertions(+), 9 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/jfs_logmgr.c b/fs/jfs/jfs_logmgr.c
index cfcdad3459dd..dfa1200daa61 100644
--- a/fs/jfs/jfs_logmgr.c
+++ b/fs/jfs/jfs_logmgr.c
@@ -927,9 +927,8 @@ static void lmPostGC(struct lbuf * bp)
  *	calculate new value of i_nextsync which determines when
  *	this code is called again.
  *
- *	this is called only from lmLog().
- *
- * PARAMETER:	ip	- pointer to logs inode.
+ * PARAMETERS:	log	- log structure
+ * 		nosyncwait - 1 if called asynchronously
  *
  * RETURN:	0
  *			
@@ -1051,6 +1050,18 @@ static int lmLogSync(struct jfs_log * log, int nosyncwait)
 	return lsn;
 }
 
+/*
+ * NAME:	jfs_syncpt
+ *
+ * FUNCTION:	write log SYNCPT record for specified log
+ *
+ * PARAMETERS:	log	- log structure
+ */
+void jfs_syncpt(struct jfs_log *log)
+{	LOG_LOCK(log);
+	lmLogSync(log, 1);
+	LOG_UNLOCK(log);
+}
 
 /*
  * NAME:	lmLogOpen()
diff --git a/fs/jfs/jfs_logmgr.h b/fs/jfs/jfs_logmgr.h
index f4c121098d4f..51291fbc420c 100644
--- a/fs/jfs/jfs_logmgr.h
+++ b/fs/jfs/jfs_logmgr.h
@@ -508,5 +508,6 @@ extern int lmLogShutdown(struct jfs_log * log);
 extern int lmLogInit(struct jfs_log * log);
 extern int lmLogFormat(struct jfs_log *log, s64 logAddress, int logSize);
 extern void jfs_flush_journal(struct jfs_log * log, int wait);
+extern void jfs_syncpt(struct jfs_log *log);
 
 #endif				/* _H_JFS_LOGMGR */
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c
index bbc9c1407b55..e93d01aa12c4 100644
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -567,9 +567,6 @@ void txEnd(tid_t tid)
 		 * synchronize with logsync barrier
 		 */
 		if (test_bit(log_SYNCBARRIER, &log->flag)) {
-			/* forward log syncpt */
-			/* lmSync(log); */
-
 			jfs_info("log barrier off: 0x%x", log->lsn);
 
 			/* enable new transactions start */
@@ -577,15 +574,22 @@ void txEnd(tid_t tid)
 
 			/* wakeup all waitors for logsync barrier */
 			TXN_WAKEUP(&log->syncwait);
+
+			TXN_UNLOCK();
+
+			/* forward log syncpt */
+			jfs_syncpt(log);
+
+			goto wakeup;
 		}
 	}
 
+	TXN_UNLOCK();
+wakeup:
 	/*
 	 * wakeup all waitors for a free tblock
 	 */
 	TXN_WAKEUP(&TxAnchor.freewait);
-
-	TXN_UNLOCK();
 }
 
 
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 0812005364a1..5e774ed7fb64 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -558,8 +558,10 @@ static int jfs_sync_fs(struct super_block *sb, int wait)
 	struct jfs_log *log = JFS_SBI(sb)->log;
 
 	/* log == NULL indicates read-only mount */
-	if (log)
+	if (log) {
 		jfs_flush_journal(log, wait);
+		jfs_syncpt(log);
+	}
 
 	return 0;
 }
-- 
cgit v1.2.3


From 6628465e33ca694bd8fd5c3cf4eb7ff9177bc694 Mon Sep 17 00:00:00 2001
From: Dave Kleikamp <shaggy@austin.ibm.com>
Date: Mon, 2 May 2005 12:25:13 -0600
Subject: [PATCH] JFS: Don't allocate extents that overlap existing extents

Modify xtSearch so that it returns the next allocated block when the
requested block is unmapped.  This can be used to make sure we don't
create a new extent that overlaps the next one.

Signed-off-by: Dave Kleikamp <shaggy@austin.ibm.com>
Signed-off-by: Linus Torvalds <torvalds@osdl.org>
---
 fs/jfs/inode.c     |  6 +++---
 fs/jfs/jfs_dtree.c |  4 ++--
 fs/jfs/jfs_xtree.c | 61 ++++++++++++++++++++++++++++++++++++------------------
 3 files changed, 46 insertions(+), 25 deletions(-)

(limited to 'fs')

diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c
index 6c04f5eda135..24a689179af2 100644
--- a/fs/jfs/inode.c
+++ b/fs/jfs/inode.c
@@ -178,7 +178,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 	xad_t xad;
 	s64 xaddr;
 	int xflag;
-	s32 xlen;
+	s32 xlen = max_blocks;
 
 	/*
 	 * Take appropriate lock on inode
@@ -190,7 +190,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 
 	if (((lblock64 << ip->i_sb->s_blocksize_bits) < ip->i_size) &&
 	    (!xtLookup(ip, lblock64, max_blocks, &xflag, &xaddr, &xlen, 0)) &&
-	    xlen) {
+	    xaddr) {
 		if (xflag & XAD_NOTRECORDED) {
 			if (!create)
 				/*
@@ -229,7 +229,7 @@ jfs_get_blocks(struct inode *ip, sector_t lblock, unsigned long max_blocks,
 #ifdef _JFS_4K
 	if ((rc = extHint(ip, lblock64 << ip->i_sb->s_blocksize_bits, &xad)))
 		goto unlock;
-	rc = extAlloc(ip, max_blocks, lblock64, &xad, FALSE);
+	rc = extAlloc(ip, xlen, lblock64, &xad, FALSE);
 	if (rc)
 		goto unlock;
 
diff --git a/fs/jfs/jfs_dtree.c b/fs/jfs/jfs_dtree.c
index 453bace608d1..ac41f72d6d50 100644
--- a/fs/jfs/jfs_dtree.c
+++ b/fs/jfs/jfs_dtree.c
@@ -212,7 +212,7 @@ static struct metapage *read_index_page(struct inode *inode, s64 blkno)
 	s32 xlen;
 
 	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
-	if (rc || (xlen == 0))
+	if (rc || (xaddr == 0))
 		return NULL;
 
 	return read_metapage(inode, xaddr, PSIZE, 1);
@@ -231,7 +231,7 @@ static struct metapage *get_index_page(struct inode *inode, s64 blkno)
 	s32 xlen;
 
 	rc = xtLookup(inode, blkno, 1, &xflag, &xaddr, &xlen, 1);
-	if (rc || (xlen == 0))
+	if (rc || (xaddr == 0))
 		return NULL;
 
 	return get_metapage(inode, xaddr, PSIZE, 1);
diff --git a/fs/jfs/jfs_xtree.c b/fs/jfs/jfs_xtree.c
index 11c58c54b818..2c1f311914a1 100644
--- a/fs/jfs/jfs_xtree.c
+++ b/fs/jfs/jfs_xtree.c
@@ -111,8 +111,8 @@ static struct {
 /*
  * forward references
  */
-static int xtSearch(struct inode *ip,
-		    s64 xoff, int *cmpp, struct btstack * btstack, int flag);
+static int xtSearch(struct inode *ip, s64 xoff, s64 *next, int *cmpp,
+		    struct btstack * btstack, int flag);
 
 static int xtSplitUp(tid_t tid,
 		     struct inode *ip,
@@ -159,11 +159,12 @@ int xtLookup(struct inode *ip, s64 lstart,
 	xtpage_t *p;
 	int index;
 	xad_t *xad;
-	s64 size, xoff, xend;
+	s64 next, size, xoff, xend;
 	int xlen;
 	s64 xaddr;
 
-	*plen = 0;
+	*paddr = 0;
+	*plen = llen;
 
 	if (!no_check) {
 		/* is lookup offset beyond eof ? */
@@ -180,7 +181,7 @@ int xtLookup(struct inode *ip, s64 lstart,
 	 * search for the xad entry covering the logical extent
 	 */
 //search:
-	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
+	if ((rc = xtSearch(ip, lstart, &next, &cmp, &btstack, 0))) {
 		jfs_err("xtLookup: xtSearch returned %d", rc);
 		return rc;
 	}
@@ -198,8 +199,11 @@ int xtLookup(struct inode *ip, s64 lstart,
 	 * lstart is a page start address,
 	 * i.e., lstart cannot start in a hole;
 	 */
-	if (cmp)
+	if (cmp) {
+		if (next)
+			*plen = min(next - lstart, llen);
 		goto out;
+	}
 
 	/*
 	 * lxd covered by xad
@@ -284,7 +288,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
 	if (lstart >= size)
 		return 0;
 
-	if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
+	if ((rc = xtSearch(ip, lstart, NULL, &cmp, &btstack, 0)))
 		return rc;
 
 	/*
@@ -488,6 +492,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
  * parameters:
  *      ip      - file object;
  *      xoff    - extent offset;
+ *      nextp	- address of next extent (if any) for search miss
  *      cmpp    - comparison result:
  *      btstack - traverse stack;
  *      flag    - search process flag (XT_INSERT);
@@ -497,7 +502,7 @@ int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
  *      *cmpp is set to result of comparison with the entry returned.
  *      the page containing the entry is pinned at exit.
  */
-static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
+static int xtSearch(struct inode *ip, s64 xoff,	s64 *nextp,
 		    int *cmpp, struct btstack * btstack, int flag)
 {
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
@@ -511,6 +516,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 	struct btframe *btsp;
 	int nsplit = 0;		/* number of pages to split */
 	s64 t64;
+	s64 next = 0;
 
 	INCREMENT(xtStat.search);
 
@@ -579,6 +585,7 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 						 * previous and this entry
 						 */
 						*cmpp = 1;
+						next = t64;
 						goto out;
 					}
 
@@ -623,6 +630,9 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 			/* update sequential access heuristics */
 			jfs_ip->btindex = index;
 
+			if (nextp)
+				*nextp = next;
+
 			INCREMENT(xtStat.fastSearch);
 			return 0;
 		}
@@ -675,10 +685,11 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 
 					return 0;
 				}
-
 				/* search hit - internal page:
 				 * descend/search its child page
 				 */
+				if (index < p->header.nextindex - 1)
+					next = offsetXAD(&p->xad[index + 1]);
 				goto next;
 			}
 
@@ -694,6 +705,8 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 		 * base is the smallest index with key (Kj) greater than
 		 * search key (K) and may be zero or maxentry index.
 		 */
+		if (base < p->header.nextindex)
+			next = offsetXAD(&p->xad[base]);
 		/*
 		 * search miss - leaf page:
 		 *
@@ -727,6 +740,9 @@ static int xtSearch(struct inode *ip, s64 xoff,	/* offset of extent */
 				jfs_ip->btorder = BT_RANDOM;
 			jfs_ip->btindex = base;
 
+			if (nextp)
+				*nextp = next;
+
 			return 0;
 		}
 
@@ -793,6 +809,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 	struct xtsplit split;	/* split information */
 	xad_t *xad;
 	int cmp;
+	s64 next;
 	struct tlock *tlck;
 	struct xtlock *xtlck;
 
@@ -806,7 +823,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 	 * n.b. xtSearch() may return index of maxentry of
 	 * the full page.
 	 */
-	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 		return rc;
 
 	/* retrieve search result */
@@ -814,7 +831,7 @@ int xtInsert(tid_t tid,		/* transaction id */
 
 	/* This test must follow XT_GETSEARCH since mp must be valid if
 	 * we branch to out: */
-	if (cmp == 0) {
+	if ((cmp == 0) || (next && (xlen > next - xoff))) {
 		rc = -EEXIST;
 		goto out;
 	}
@@ -1626,7 +1643,7 @@ int xtExtend(tid_t tid,		/* transaction id */
 	jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 
 	/* there must exist extent to be extended */
-	if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
+	if ((rc = xtSearch(ip, xoff - 1, NULL, &cmp, &btstack, XT_INSERT)))
 		return rc;
 
 	/* retrieve search result */
@@ -1794,7 +1811,7 @@ printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
 */
 
 	/* there must exist extent to be tailgated */
-	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+	if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, XT_INSERT)))
 		return rc;
 
 	/* retrieve search result */
@@ -1977,7 +1994,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 	nxlen = lengthXAD(nxad);
 	nxaddr = addressXAD(nxad);
 
-	if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+	if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
 		return rc;
 
 	/* retrieve search result */
@@ -2291,7 +2308,7 @@ int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 	if (nextindex == le16_to_cpu(p->header.maxentry)) {
 		XT_PUTPAGE(mp);
 
-		if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
+		if ((rc = xtSearch(ip, nxoff, NULL, &cmp, &btstack, XT_INSERT)))
 			return rc;
 
 		/* retrieve search result */
@@ -2438,6 +2455,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	int nsplit, nblocks, xlen;
 	struct pxdlist pxdlist;
 	pxd_t *pxd;
+	s64 next;
 
 	xaddr = *xaddrp;
 	xlen = *xlenp;
@@ -2452,7 +2470,7 @@ int xtAppend(tid_t tid,		/* transaction id */
 	 * n.b. xtSearch() may return index of maxentry of
 	 * the full page.
 	 */
-	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
+	if ((rc = xtSearch(ip, xoff, &next, &cmp, &btstack, XT_INSERT)))
 		return rc;
 
 	/* retrieve search result */
@@ -2462,6 +2480,9 @@ int xtAppend(tid_t tid,		/* transaction id */
 		rc = -EEXIST;
 		goto out;
 	}
+
+	if (next)
+		xlen = min(xlen, (int)(next - xoff));
 //insert:
 	/*
 	 *      insert entry for new extent
@@ -2600,7 +2621,7 @@ int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 	/*
 	 * find the matching entry; xtSearch() pins the page
 	 */
-	if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
+	if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
 		return rc;
 
 	XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
@@ -2852,7 +2873,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 	 */
 	if (xtype == DATAEXT) {
 		/* search in leaf entry */
-		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
 		if (rc)
 			return rc;
 
@@ -2958,7 +2979,7 @@ xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,	/* old XAD */
 		}
 
 		/* get back parent page */
-		if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
+		if ((rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0)))
 			return rc;
 
 		XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
@@ -3991,7 +4012,7 @@ s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 
 	if (committed_size) {
 		xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
-		rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
+		rc = xtSearch(ip, xoff, NULL, &cmp, &btstack, 0);
 		if (rc)
 			return rc;
 
-- 
cgit v1.2.3