From dca3c33652e437ed02c30ed3eca3cecd0cc00838 Mon Sep 17 00:00:00 2001
From: Roman Zippel <zippel@linux-m68k.org>
Date: Tue, 29 Apr 2008 17:02:20 +0200
Subject: [PATCH] fix reservation discarding in affs

- remove affs_put_inode, so preallocations aren't discared unnecessarily
  often.
- remove affs_drop_inode, it's called with a spinlock held, so it can't
  use a mutex.
- make i_opencnt atomic
- avoid direct b_count manipulations
- a few allocation failure fixes, so that these are more gracefully
  handled now.
Signed-off-by: Roman Zippel <zippel@linux-m68k.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/affs/affs.h  |  4 +---
 fs/affs/file.c  | 25 +++++++++++++++++--------
 fs/affs/inode.c | 34 ++++++++--------------------------
 fs/affs/namei.c |  6 ++++--
 fs/affs/super.c | 18 +++++++++++-------
 5 files changed, 41 insertions(+), 46 deletions(-)

diff --git a/fs/affs/affs.h b/fs/affs/affs.h
index d5bd497ab9cb..223b1917093e 100644
--- a/fs/affs/affs.h
+++ b/fs/affs/affs.h
@@ -48,7 +48,7 @@ struct affs_ext_key {
  * affs fs inode data in memory
  */
 struct affs_inode_info {
-	u32	 i_opencnt;
+	atomic_t i_opencnt;
 	struct semaphore i_link_lock;		/* Protects internal inode access. */
 	struct semaphore i_ext_lock;		/* Protects internal inode access. */
 #define i_hash_lock i_ext_lock
@@ -170,8 +170,6 @@ extern int	affs_rename(struct inode *old_dir, struct dentry *old_dentry,
 extern unsigned long		 affs_parent_ino(struct inode *dir);
 extern struct inode		*affs_new_inode(struct inode *dir);
 extern int			 affs_notify_change(struct dentry *dentry, struct iattr *attr);
-extern void			 affs_put_inode(struct inode *inode);
-extern void			 affs_drop_inode(struct inode *inode);
 extern void			 affs_delete_inode(struct inode *inode);
 extern void			 affs_clear_inode(struct inode *inode);
 extern struct inode		*affs_iget(struct super_block *sb,
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 1a4f092f24ef..6eac7bdeec94 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp)
 {
 	if (atomic_read(&filp->f_count) != 1)
 		return 0;
-	pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt);
-	AFFS_I(inode)->i_opencnt++;
+	pr_debug("AFFS: open(%lu,%d)\n",
+		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
+	atomic_inc(&AFFS_I(inode)->i_opencnt);
 	return 0;
 }
 
@@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp)
 {
 	if (atomic_read(&filp->f_count) != 0)
 		return 0;
-	pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt);
-	AFFS_I(inode)->i_opencnt--;
-	if (!AFFS_I(inode)->i_opencnt)
+	pr_debug("AFFS: release(%lu, %d)\n",
+		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
+
+	if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
+		mutex_lock(&inode->i_mutex);
+		if (inode->i_size != AFFS_I(inode)->mmu_private)
+			affs_truncate(inode);
 		affs_free_prealloc(inode);
+		mutex_unlock(&inode->i_mutex);
+	}
 
 	return 0;
 }
@@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext)
 	/* inline the simplest case: same extended block as last time */
 	struct buffer_head *bh = AFFS_I(inode)->i_ext_bh;
 	if (ext == AFFS_I(inode)->i_ext_last)
-		atomic_inc(&bh->b_count);
+		get_bh(bh);
 	else
 		/* we have to do more (not inlined) */
 		bh = affs_get_extblock_slow(inode, ext);
@@ -306,7 +313,7 @@ store_ext:
 	affs_brelse(AFFS_I(inode)->i_ext_bh);
 	AFFS_I(inode)->i_ext_last = ext;
 	AFFS_I(inode)->i_ext_bh = bh;
-	atomic_inc(&bh->b_count);
+	get_bh(bh);
 
 	return bh;
 
@@ -324,7 +331,6 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
 
 	pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
 
-
 	BUG_ON(block > (sector_t)0x7fffffffUL);
 
 	if (block >= AFFS_I(inode)->i_blkcnt) {
@@ -827,6 +833,8 @@ affs_truncate(struct inode *inode)
 		res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
 		if (!res)
 			res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
+		else
+			inode->i_size = AFFS_I(inode)->mmu_private;
 		mark_inode_dirty(inode);
 		return;
 	} else if (inode->i_size == AFFS_I(inode)->mmu_private)
@@ -862,6 +870,7 @@ affs_truncate(struct inode *inode)
 		blk++;
 	} else
 		AFFS_HEAD(ext_bh)->first_data = 0;
+	AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i);
 	size = AFFS_SB(sb)->s_hashsize;
 	if (size > blkcnt - blk + i)
 		size = blkcnt - blk + i;
diff --git a/fs/affs/inode.c b/fs/affs/inode.c
index 27fe6cbe43ae..a13b334a3910 100644
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 	AFFS_I(inode)->i_extcnt = 1;
 	AFFS_I(inode)->i_ext_last = ~1;
 	AFFS_I(inode)->i_protect = prot;
-	AFFS_I(inode)->i_opencnt = 0;
+	atomic_set(&AFFS_I(inode)->i_opencnt, 0);
 	AFFS_I(inode)->i_blkcnt = 0;
 	AFFS_I(inode)->i_lc = NULL;
 	AFFS_I(inode)->i_lc_size = 0;
@@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
 			inode->i_mode |= S_IFDIR;
 		} else
 			inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR;
-		if (tail->link_chain)
-			inode->i_nlink = 2;
 		/* Maybe it should be controlled by mount parameter? */
 		//inode->i_mode |= S_ISVTX;
 		inode->i_op = &affs_dir_inode_operations;
@@ -244,32 +242,13 @@ out:
 	return error;
 }
 
-void
-affs_put_inode(struct inode *inode)
-{
-	pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
-	affs_free_prealloc(inode);
-}
-
-void
-affs_drop_inode(struct inode *inode)
-{
-	mutex_lock(&inode->i_mutex);
-	if (inode->i_size != AFFS_I(inode)->mmu_private)
-		affs_truncate(inode);
-	mutex_unlock(&inode->i_mutex);
-
-	generic_drop_inode(inode);
-}
-
 void
 affs_delete_inode(struct inode *inode)
 {
 	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
 	truncate_inode_pages(&inode->i_data, 0);
 	inode->i_size = 0;
-	if (S_ISREG(inode->i_mode))
-		affs_truncate(inode);
+	affs_truncate(inode);
 	clear_inode(inode);
 	affs_free_block(inode->i_sb, inode->i_ino);
 }
@@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode)
 void
 affs_clear_inode(struct inode *inode)
 {
-	unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc;
+	unsigned long cache_page;
 
 	pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+
+	affs_free_prealloc(inode);
+	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
 	if (cache_page) {
 		pr_debug("AFFS: freeing ext cache\n");
 		AFFS_I(inode)->i_lc = NULL;
@@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir)
 	inode->i_ino     = block;
 	inode->i_nlink   = 1;
 	inode->i_mtime   = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
-	AFFS_I(inode)->i_opencnt = 0;
+	atomic_set(&AFFS_I(inode)->i_opencnt, 0);
 	AFFS_I(inode)->i_blkcnt = 0;
 	AFFS_I(inode)->i_lc = NULL;
 	AFFS_I(inode)->i_lc_size = 0;
@@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
 	switch (type) {
 	case ST_LINKFILE:
 	case ST_LINKDIR:
-		inode_bh = bh;
 		retval = -ENOSPC;
 		block = affs_alloc_block(dir, dir->i_ino);
 		if (!block)
 			goto err;
 		retval = -EIO;
+		inode_bh = bh;
 		bh = affs_getzeroblk(sb, block);
 		if (!bh)
 			goto err;
diff --git a/fs/affs/namei.c b/fs/affs/namei.c
index 2218f1ee71ce..cfcf1b6cf82b 100644
--- a/fs/affs/namei.c
+++ b/fs/affs/namei.c
@@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 int
 affs_unlink(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino,
+	pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
+		 dentry->d_inode->i_ino,
 		 (int)dentry->d_name.len, dentry->d_name.name);
 
 	return affs_remove_header(dentry);
@@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
 int
 affs_rmdir(struct inode *dir, struct dentry *dentry)
 {
-	pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino,
+	pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
+		 dentry->d_inode->i_ino,
 		 (int)dentry->d_name.len, dentry->d_name.name);
 
 	return affs_remove_header(dentry);
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 01d25d532541..d214837d5e42 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep;
 
 static struct inode *affs_alloc_inode(struct super_block *sb)
 {
-	struct affs_inode_info *ei;
-	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
-	if (!ei)
+	struct affs_inode_info *i;
+
+	i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
+	if (!i)
 		return NULL;
-	ei->vfs_inode.i_version = 1;
-	return &ei->vfs_inode;
+
+	i->vfs_inode.i_version = 1;
+	i->i_lc = NULL;
+	i->i_ext_bh = NULL;
+	i->i_pa_cnt = 0;
+
+	return &i->vfs_inode;
 }
 
 static void affs_destroy_inode(struct inode *inode)
@@ -114,8 +120,6 @@ static const struct super_operations affs_sops = {
 	.alloc_inode	= affs_alloc_inode,
 	.destroy_inode	= affs_destroy_inode,
 	.write_inode	= affs_write_inode,
-	.put_inode	= affs_put_inode,
-	.drop_inode	= affs_drop_inode,
 	.delete_inode	= affs_delete_inode,
 	.clear_inode	= affs_clear_inode,
 	.put_super	= affs_put_super,
-- 
cgit v1.2.3


From 33dcdac2df54e66c447ae03f58c95c7251aa5649 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 29 Apr 2008 17:46:26 +0200
Subject: [PATCH] kill ->put_inode

And with that last patch to affs killing the last put_inode instance we
can finally, after many years of transition kill this racy and awkward
interface.

(It's kinda funny that even the description in
Documentation/filesystems/vfs.txt was entirely wrong..)

Also remove a very misleading comment above the defintion of
struct super_operations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/Locking | 2 --
 Documentation/filesystems/vfs.txt | 4 ----
 fs/inode.c                        | 3 ---
 include/linux/fs.h                | 5 -----
 4 files changed, 14 deletions(-)

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index c2992bc54f2f..8b22d7d8b991 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -92,7 +92,6 @@ prototypes:
 	void (*destroy_inode)(struct inode *);
 	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
-	void (*put_inode) (struct inode *);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
@@ -115,7 +114,6 @@ alloc_inode:		no	no	no
 destroy_inode:		no
 dirty_inode:		no				(must not sleep)
 write_inode:		no
-put_inode:		no
 drop_inode:		no				!!!inode_lock!!!
 delete_inode:		no
 put_super:		yes	yes	no
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 81e5be6e6e35..b7522c6cbae3 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -205,7 +205,6 @@ struct super_operations {
 
         void (*dirty_inode) (struct inode *);
         int (*write_inode) (struct inode *, int);
-        void (*put_inode) (struct inode *);
         void (*drop_inode) (struct inode *);
         void (*delete_inode) (struct inode *);
         void (*put_super) (struct super_block *);
@@ -246,9 +245,6 @@ or bottom half).
 	inode to disc.  The second parameter indicates whether the write
 	should be synchronous or not, not all filesystems check this flag.
 
-  put_inode: called when the VFS inode is removed from the inode
-	cache.
-
   drop_inode: called when the last access to the inode is dropped,
 	with the inode_lock spinlock held.
 
diff --git a/fs/inode.c b/fs/inode.c
index bf6478130424..18bdce14b70c 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -1153,9 +1153,6 @@ void iput(struct inode *inode)
 
 		BUG_ON(inode->i_state == I_CLEAR);
 
-		if (op && op->put_inode)
-			op->put_inode(inode);
-
 		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
 			iput_final(inode);
 	}
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a1ba005d08e7..7e0fa9e64479 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1289,17 +1289,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
 extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
 		unsigned long, loff_t *);
 
-/*
- * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
- * without the big kernel lock held in all filesystems.
- */
 struct super_operations {
    	struct inode *(*alloc_inode)(struct super_block *sb);
 	void (*destroy_inode)(struct inode *);
 
    	void (*dirty_inode) (struct inode *);
 	int (*write_inode) (struct inode *, int);
-	void (*put_inode) (struct inode *);
 	void (*drop_inode) (struct inode *);
 	void (*delete_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
-- 
cgit v1.2.3


From 0b2bac2f1ea0d33a3621b27ca68b9ae760fca2e9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 6 May 2008 13:58:34 -0400
Subject: [PATCH] fix SMP ordering hole in fcntl_setlk()

fcntl_setlk()/close() race prevention has a subtle hole - we need to
make sure that if we *do* have an fcntl/close race on SMP box, the
access to descriptor table and inode->i_flock won't get reordered.

As it is, we get STORE inode->i_flock, LOAD descriptor table entry vs.
STORE descriptor table entry, LOAD inode->i_flock with not a single
lock in common on both sides.  We do have BKL around the first STORE,
but check in locks_remove_posix() is outside of BKL and for a good
reason - we don't want BKL on common path of close(2).

Solution is to hold ->file_lock around fcheck() in there; that orders
us wrt removal from descriptor table that preceded locks_remove_posix()
on close path and we either come first (in which case eviction will be
handled by the close side) or we'll see the effect of close and do
eviction ourselves.  Note that even though it's read-only access,
we do need ->file_lock here - rcu_read_lock() won't be enough to
order the things.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/locks.c | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

diff --git a/fs/locks.c b/fs/locks.c
index 663c069b59b3..0ac6b92cb0b6 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1753,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
 	struct file_lock *file_lock = locks_alloc_lock();
 	struct flock flock;
 	struct inode *inode;
+	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
@@ -1825,7 +1826,15 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+	/*
+	 * we need that spin_lock here - it prevents reordering between
+	 * update of inode->i_flock and check for it done in close().
+	 * rcu_read_lock() wouldn't do.
+	 */
+	spin_lock(&current->files->file_lock);
+	f = fcheck(fd);
+	spin_unlock(&current->files->file_lock);
+	if (!error && f != filp && flock.l_type != F_UNLCK) {
 		flock.l_type = F_UNLCK;
 		goto again;
 	}
@@ -1881,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
 	struct file_lock *file_lock = locks_alloc_lock();
 	struct flock64 flock;
 	struct inode *inode;
+	struct file *f;
 	int error;
 
 	if (file_lock == NULL)
@@ -1953,7 +1963,10 @@ again:
 	 * Attempt to detect a close/fcntl race and recover by
 	 * releasing the lock that was just acquired.
 	 */
-	if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
+	spin_lock(&current->files->file_lock);
+	f = fcheck(fd);
+	spin_unlock(&current->files->file_lock);
+	if (!error && f != filp && flock.l_type != F_UNLCK) {
 		flock.l_type = F_UNLCK;
 		goto again;
 	}
-- 
cgit v1.2.3