From 56b0dacfa2b8416815a2f2a5f4f51e46be4cf14c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 6 Oct 2010 10:48:55 +0200 Subject: fs: mark destroy_inode static Hugetlbfs used to need it, but after the destroy_inode and evict_inode changes it's not required anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 86464332e590..3368abd64bb5 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -235,7 +235,7 @@ void __destroy_inode(struct inode *inode) } EXPORT_SYMBOL(__destroy_inode); -void destroy_inode(struct inode *inode) +static void destroy_inode(struct inode *inode) { __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) -- cgit v1.2.3 From a3314a0ed389f51a51695120b429eccd45b3a165 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Mon, 11 Oct 2010 22:38:00 +0900 Subject: lockdep: fixup checking of dir inode annotation Since inode->i_mode shares its bits for S_IFMT, S_ISDIR should be used to distinguish whether it is a dir or not. Signed-off-by: Namhyung Kim Signed-off-by: Al Viro --- fs/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 3368abd64bb5..4f0a67c54f89 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -663,7 +663,7 @@ EXPORT_SYMBOL(new_inode); void unlock_new_inode(struct inode *inode) { #ifdef CONFIG_DEBUG_LOCK_ALLOC - if (inode->i_mode & S_IFDIR) { + if (S_ISDIR(inode->i_mode)) { struct file_system_type *type = inode->i_sb->s_type; /* Set new key only if filesystem hasn't already changed it */ -- cgit v1.2.3 From a8dade34e3df581bc36ca2afe6e27055e178801c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Oct 2010 11:13:10 -0400 Subject: unexport invalidate_inodes Signed-off-by: Al Viro --- fs/inode.c | 1 - fs/internal.h | 5 +++++ include/linux/fs.h | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 4f0a67c54f89..db7c74c7dd80 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -417,7 +417,6 @@ int invalidate_inodes(struct super_block *sb) return busy; } -EXPORT_SYMBOL(invalidate_inodes); static int can_unuse(struct inode *inode) { diff --git a/fs/internal.h b/fs/internal.h index a6910e91cee8..f6dce46d80dc 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -101,3 +101,8 @@ extern void put_super(struct super_block *sb); struct nameidata; extern struct file *nameidata_to_filp(struct nameidata *); extern void release_open_intent(struct nameidata *); + +/* + * inode.c + */ +extern int invalidate_inodes(struct super_block *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 7fc126df1c42..c3f6daf749cc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2082,7 +2082,6 @@ extern int check_disk_change(struct block_device *); extern int __invalidate_device(struct block_device *); extern int invalidate_partition(struct gendisk *, int); #endif -extern int invalidate_inodes(struct super_block *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); -- cgit v1.2.3 From 1d3382cbf02986e4833849f528d451367ea0b4cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Oct 2010 15:19:20 -0400 Subject: new helper: inode_unhashed() note: for race-free uses you inode_lock held Signed-off-by: Al Viro --- fs/btrfs/inode.c | 2 +- fs/fs-writeback.c | 2 +- fs/inode.c | 6 +++--- fs/reiserfs/xattr.c | 2 +- include/linux/fs.h | 5 +++++ mm/shmem.c | 4 ++-- 6 files changed, 13 insertions(+), 8 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index c03864406af3..f6f2a0da2695 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3849,7 +3849,7 @@ again: p = &root->inode_tree.rb_node; parent = NULL; - if (hlist_unhashed(&inode->i_hash)) + if (inode_unhashed(inode)) return; spin_lock(&root->inode_lock); diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 29e3f409bbd0..39f44f2e709a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -962,7 +962,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) * dirty list. Add blockdev inodes as well. */ if (!S_ISBLK(inode->i_mode)) { - if (hlist_unhashed(&inode->i_hash)) + if (inode_unhashed(inode)) goto out; } if (inode->i_state & I_FREEING) diff --git a/fs/inode.c b/fs/inode.c index db7c74c7dd80..4440cf1034ec 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -1094,7 +1094,7 @@ int insert_inode_locked(struct inode *inode) __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1133,7 +1133,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, __iget(old); spin_unlock(&inode_lock); wait_on_inode(old); - if (unlikely(!hlist_unhashed(&old->i_hash))) { + if (unlikely(!inode_unhashed(old))) { iput(old); return -EBUSY; } @@ -1186,7 +1186,7 @@ EXPORT_SYMBOL(generic_delete_inode); */ int generic_drop_inode(struct inode *inode) { - return !inode->i_nlink || hlist_unhashed(&inode->i_hash); + return !inode->i_nlink || inode_unhashed(inode); } EXPORT_SYMBOL_GPL(generic_drop_inode); diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index f7415de13878..5d04a7828e7a 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -422,7 +422,7 @@ int reiserfs_commit_write(struct file *f, struct page *page, static void update_ctime(struct inode *inode) { struct timespec now = current_fs_time(inode->i_sb); - if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink || + if (inode_unhashed(inode) || !inode->i_nlink || timespec_equal(&inode->i_ctime, &now)) return; diff --git a/include/linux/fs.h b/include/linux/fs.h index c3f6daf749cc..78043da85e1f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -786,6 +786,11 @@ struct inode { void *i_private; /* fs or device private pointer */ }; +static inline int inode_unhashed(struct inode *inode) +{ + return hlist_unhashed(&inode->i_hash); +} + /* * inode->i_mutex nesting subclasses for the lock validator: * diff --git a/mm/shmem.c b/mm/shmem.c index 080b09a57a8f..27a58120dbd5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2146,7 +2146,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, if (*len < 3) return 255; - if (hlist_unhashed(&inode->i_hash)) { + if (inode_unhashed(inode)) { /* Unfortunately insert_inode_hash is not idempotent, * so as we hash inodes here rather than at creation * time, we need a lock to ensure we only try @@ -2154,7 +2154,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len, */ static DEFINE_SPINLOCK(lock); spin_lock(&lock); - if (hlist_unhashed(&inode->i_hash)) + if (inode_unhashed(inode)) __insert_inode_hash(inode, inode->i_ino + inode->i_generation); spin_unlock(&lock); -- cgit v1.2.3 From cffbc8aa334f55c9ed42d25202eb3ebf3a97c195 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 23 Oct 2010 05:03:02 -0400 Subject: fs: Convert nr_inodes and nr_unused to per-cpu counters The number of inodes allocated does not need to be tied to the addition or removal of an inode to/from a list. If we are not tied to a list lock, we could update the counters when inodes are initialised or destroyed, but to do that we need to convert the counters to be per-cpu (i.e. independent of a lock). This means that we have the freedom to change the list/locking implementation without needing to care about the counters. Based on a patch originally from Eric Dumazet. [AV: cleaned up a bit, fixed build breakage on weird configs Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fs-writeback.c | 5 ++--- fs/inode.c | 64 ++++++++++++++++++++++++++++++++++++++---------------- fs/internal.h | 1 + include/linux/fs.h | 3 ++- kernel/sysctl.c | 4 ++-- 5 files changed, 52 insertions(+), 25 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 39f44f2e709a..f04d04af84f2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -723,7 +723,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb) wb->last_old_flush = jiffies; nr_pages = global_page_state(NR_FILE_DIRTY) + global_page_state(NR_UNSTABLE_NFS) + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); + get_nr_dirty_inodes(); if (nr_pages) { struct wb_writeback_work work = { @@ -1090,8 +1090,7 @@ void writeback_inodes_sb(struct super_block *sb) WARN_ON(!rwsem_is_locked(&sb->s_umount)); - work.nr_pages = nr_dirty + nr_unstable + - (inodes_stat.nr_inodes - inodes_stat.nr_unused); + work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes(); bdi_queue_work(sb->s_bdi, &work); wait_for_completion(&done); diff --git a/fs/inode.c b/fs/inode.c index 4440cf1034ec..0d5aeccbdd90 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -103,8 +103,41 @@ static DECLARE_RWSEM(iprune_sem); */ struct inodes_stat_t inodes_stat; +static struct percpu_counter nr_inodes __cacheline_aligned_in_smp; +static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp; + static struct kmem_cache *inode_cachep __read_mostly; +static inline int get_nr_inodes(void) +{ + return percpu_counter_sum_positive(&nr_inodes); +} + +static inline int get_nr_inodes_unused(void) +{ + return percpu_counter_sum_positive(&nr_inodes_unused); +} + +int get_nr_dirty_inodes(void) +{ + int nr_dirty = get_nr_inodes() - get_nr_inodes_unused(); + return nr_dirty > 0 ? nr_dirty : 0; + +} + +/* + * Handle nr_inode sysctl + */ +#ifdef CONFIG_SYSCTL +int proc_nr_inodes(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + inodes_stat.nr_inodes = get_nr_inodes(); + inodes_stat.nr_unused = get_nr_inodes_unused(); + return proc_dointvec(table, write, buffer, lenp, ppos); +} +#endif + static void wake_up_inode(struct inode *inode) { /* @@ -192,6 +225,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode) inode->i_fsnotify_mask = 0; #endif + percpu_counter_inc(&nr_inodes); + return 0; out: return -ENOMEM; @@ -232,6 +267,7 @@ void __destroy_inode(struct inode *inode) if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED) posix_acl_release(inode->i_default_acl); #endif + percpu_counter_dec(&nr_inodes); } EXPORT_SYMBOL(__destroy_inode); @@ -286,7 +322,7 @@ void __iget(struct inode *inode) if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_in_use); - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); } void end_writeback(struct inode *inode) @@ -327,8 +363,6 @@ static void evict(struct inode *inode) */ static void dispose_list(struct list_head *head) { - int nr_disposed = 0; - while (!list_empty(head)) { struct inode *inode; @@ -344,11 +378,7 @@ static void dispose_list(struct list_head *head) wake_up_inode(inode); destroy_inode(inode); - nr_disposed++; } - spin_lock(&inode_lock); - inodes_stat.nr_inodes -= nr_disposed; - spin_unlock(&inode_lock); } /* @@ -357,7 +387,7 @@ static void dispose_list(struct list_head *head) static int invalidate_list(struct list_head *head, struct list_head *dispose) { struct list_head *next; - int busy = 0, count = 0; + int busy = 0; next = head->next; for (;;) { @@ -383,13 +413,11 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - count++; + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; } - /* only unused inodes may be cached with i_count zero */ - inodes_stat.nr_unused -= count; return busy; } @@ -447,7 +475,6 @@ static int can_unuse(struct inode *inode) static void prune_icache(int nr_to_scan) { LIST_HEAD(freeable); - int nr_pruned = 0; int nr_scanned; unsigned long reap = 0; @@ -483,9 +510,8 @@ static void prune_icache(int nr_to_scan) list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - nr_pruned++; + percpu_counter_dec(&nr_inodes_unused); } - inodes_stat.nr_unused -= nr_pruned; if (current_is_kswapd()) __count_vm_events(KSWAPD_INODESTEAL, reap); else @@ -517,7 +543,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask) return -1; prune_icache(nr); } - return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure; + return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure; } static struct shrinker icache_shrinker = { @@ -594,7 +620,6 @@ static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - inodes_stat.nr_inodes++; list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) @@ -1214,7 +1239,7 @@ static void iput_final(struct inode *inode) if (!drop) { if (!(inode->i_state & (I_DIRTY|I_SYNC))) list_move(&inode->i_list, &inode_unused); - inodes_stat.nr_unused++; + percpu_counter_inc(&nr_inodes_unused); if (sb->s_flags & MS_ACTIVE) { spin_unlock(&inode_lock); return; @@ -1226,14 +1251,13 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - inodes_stat.nr_unused--; + percpu_counter_dec(&nr_inodes_unused); hlist_del_init(&inode->i_hash); } list_del_init(&inode->i_list); list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - inodes_stat.nr_inodes--; spin_unlock(&inode_lock); evict(inode); spin_lock(&inode_lock); @@ -1502,6 +1526,8 @@ void __init inode_init(void) SLAB_MEM_SPREAD), init_once); register_shrinker(&icache_shrinker); + percpu_counter_init(&nr_inodes, 0); + percpu_counter_init(&nr_inodes_unused, 0); /* Hash may have been set up in inode_init_early */ if (!hashdist) diff --git a/fs/internal.h b/fs/internal.h index f6dce46d80dc..4cc67eb6ed56 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -105,4 +105,5 @@ extern void release_open_intent(struct nameidata *); /* * inode.c */ +extern int get_nr_dirty_inodes(void); extern int invalidate_inodes(struct super_block *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 78043da85e1f..a3937a8ee95e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2486,7 +2486,8 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); - +int proc_nr_inodes(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 694b140852c2..99a510cbfbb3 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1340,14 +1340,14 @@ static struct ctl_table fs_table[] = { .data = &inodes_stat, .maxlen = 2*sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "inode-state", .data = &inodes_stat, .maxlen = 7*sizeof(int), .mode = 0444, - .proc_handler = proc_dointvec, + .proc_handler = proc_nr_inodes, }, { .procname = "file-nr", -- cgit v1.2.3 From 9e38d86ff2d8a8db99570e982230861046df32b5 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Sat, 23 Oct 2010 06:55:17 -0400 Subject: fs: Implement lazy LRU updates for inodes Convert the inode LRU to use lazy updates to reduce lock and cacheline traffic. We avoid moving inodes around in the LRU list during iget/iput operations so these frequent operations don't need to access the LRUs. Instead, we defer the refcount checks to reclaim-time and use a per-inode state flag, I_REFERENCED, to tell reclaim that iget has touched the inode in the past. This means that only reclaim should be touching the LRU with any frequency, hence significantly reducing lock acquisitions and the amount contention on LRU updates. This also removes the inode_in_use list, which means we now only have one list for tracking the inode LRU status. This makes it much simpler to split out the LRU list operations under it's own lock. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/fs-writeback.c | 11 +++--- fs/inode.c | 86 +++++++++++++++++++++++++++++++++-------------- include/linux/fs.h | 13 +++---- include/linux/writeback.h | 2 -- 4 files changed, 71 insertions(+), 41 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index f04d04af84f2..e8f65290e836 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -408,16 +408,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * completion. */ redirty_tail(inode); - } else if (atomic_read(&inode->i_count)) { - /* - * The inode is clean, inuse - */ - list_move(&inode->i_list, &inode_in_use); } else { /* - * The inode is clean, unused + * The inode is clean. At this point we either have + * a reference to the inode or it's on it's way out. + * No need to add it back to the LRU. */ - list_move(&inode->i_list, &inode_unused); + list_del_init(&inode->i_list); } } inode_sync_complete(inode); diff --git a/fs/inode.c b/fs/inode.c index 0d5aeccbdd90..3bdc76f1653a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -72,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ -LIST_HEAD(inode_in_use); -LIST_HEAD(inode_unused); +static LIST_HEAD(inode_unused); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -291,6 +290,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); + INIT_LIST_HEAD(&inode->i_list); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -317,12 +317,23 @@ static void init_once(void *foo) */ void __iget(struct inode *inode) { - if (atomic_inc_return(&inode->i_count) != 1) - return; + atomic_inc(&inode->i_count); +} - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_in_use); - percpu_counter_dec(&nr_inodes_unused); +static void inode_lru_list_add(struct inode *inode) +{ + if (list_empty(&inode->i_list)) { + list_add(&inode->i_list, &inode_unused); + percpu_counter_inc(&nr_inodes_unused); + } +} + +static void inode_lru_list_del(struct inode *inode) +{ + if (!list_empty(&inode->i_list)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + } } void end_writeback(struct inode *inode) @@ -367,7 +378,7 @@ static void dispose_list(struct list_head *head) struct inode *inode; inode = list_first_entry(head, struct inode, i_list); - list_del(&inode->i_list); + list_del_init(&inode->i_list); evict(inode); @@ -413,7 +424,8 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) list_move(&inode->i_list, dispose); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; - percpu_counter_dec(&nr_inodes_unused); + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); continue; } busy = 1; @@ -448,7 +460,7 @@ int invalidate_inodes(struct super_block *sb) static int can_unuse(struct inode *inode) { - if (inode->i_state) + if (inode->i_state & ~I_REFERENCED) return 0; if (inode_has_buffers(inode)) return 0; @@ -460,17 +472,20 @@ static int can_unuse(struct inode *inode) } /* - * Scan `goal' inodes on the unused list for freeable ones. They are moved to - * a temporary list and then are freed outside inode_lock by dispose_list(). + * Scan `goal' inodes on the unused list for freeable ones. They are moved to a + * temporary list and then are freed outside inode_lock by dispose_list(). * * Any inodes which are pinned purely because of attached pagecache have their - * pagecache removed. We expect the final iput() on that inode to add it to - * the front of the inode_unused list. So look for it there and if the - * inode is still freeable, proceed. The right inode is found 99.9% of the - * time in testing on a 4-way. + * pagecache removed. If the inode has metadata buffers attached to + * mapping->private_list then try to remove them. * - * If the inode has metadata buffers attached to mapping->private_list then - * try to remove them. + * If the inode has the I_REFERENCED flag set, then it means that it has been + * used recently - the flag is set in iput_final(). When we encounter such an + * inode, clear the flag and move it to the back of the LRU so it gets another + * pass through the LRU before it gets reclaimed. This is necessary because of + * the fact we are doing lazy LRU updates to minimise lock contention so the + * LRU does not have strict ordering. Hence we don't want to reclaim inodes + * with this flag set because they are the inodes that are out of order. */ static void prune_icache(int nr_to_scan) { @@ -488,8 +503,21 @@ static void prune_icache(int nr_to_scan) inode = list_entry(inode_unused.prev, struct inode, i_list); - if (inode->i_state || atomic_read(&inode->i_count)) { + /* + * Referenced or dirty inodes are still in use. Give them + * another pass through the LRU as we canot reclaim them now. + */ + if (atomic_read(&inode->i_count) || + (inode->i_state & ~I_REFERENCED)) { + list_del_init(&inode->i_list); + percpu_counter_dec(&nr_inodes_unused); + continue; + } + + /* recently referenced inodes get one more pass */ + if (inode->i_state & I_REFERENCED) { list_move(&inode->i_list, &inode_unused); + inode->i_state &= ~I_REFERENCED; continue; } if (inode_has_buffers(inode) || inode->i_data.nrpages) { @@ -620,7 +648,6 @@ static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) { - list_add(&inode->i_list, &inode_in_use); list_add(&inode->i_sb_list, &sb->s_inodes); if (head) hlist_add_head(&inode->i_hash, head); @@ -1237,10 +1264,11 @@ static void iput_final(struct inode *inode) drop = generic_drop_inode(inode); if (!drop) { - if (!(inode->i_state & (I_DIRTY|I_SYNC))) - list_move(&inode->i_list, &inode_unused); - percpu_counter_inc(&nr_inodes_unused); if (sb->s_flags & MS_ACTIVE) { + inode->i_state |= I_REFERENCED; + if (!(inode->i_state & (I_DIRTY|I_SYNC))) { + inode_lru_list_add(inode); + } spin_unlock(&inode_lock); return; } @@ -1251,13 +1279,19 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - percpu_counter_dec(&nr_inodes_unused); hlist_del_init(&inode->i_hash); } - list_del_init(&inode->i_list); - list_del_init(&inode->i_sb_list); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + + /* + * After we delete the inode from the LRU here, we avoid moving dirty + * inodes back onto the LRU now because I_FREEING is set and hence + * writeback_single_inode() won't move the inode around. + */ + inode_lru_list_del(inode); + + list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); evict(inode); spin_lock(&inode_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index a3937a8ee95e..876275fc0638 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1641,16 +1641,17 @@ struct super_operations { * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ -#define I_DIRTY_SYNC 1 -#define I_DIRTY_DATASYNC 2 -#define I_DIRTY_PAGES 4 +#define I_DIRTY_SYNC (1 << 0) +#define I_DIRTY_DATASYNC (1 << 1) +#define I_DIRTY_PAGES (1 << 2) #define __I_NEW 3 #define I_NEW (1 << __I_NEW) -#define I_WILL_FREE 16 -#define I_FREEING 32 -#define I_CLEAR 64 +#define I_WILL_FREE (1 << 4) +#define I_FREEING (1 << 5) +#define I_CLEAR (1 << 6) #define __I_SYNC 7 #define I_SYNC (1 << __I_SYNC) +#define I_REFERENCED (1 << 8) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 72a5d647a5f2..242b6f812ba6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -10,8 +10,6 @@ struct backing_dev_info; extern spinlock_t inode_lock; -extern struct list_head inode_in_use; -extern struct list_head inode_unused; /* * fs/fs-writeback.c -- cgit v1.2.3 From 4c51acbc66f754e536e1c9e3331656b69bce86d0 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Sat, 23 Oct 2010 06:58:09 -0400 Subject: fs: Factor inode hash operations into functions Before replacing the inode hash locking with a more scalable mechanism, factor the removal of the inode from the hashes rather than open coding it in several places. Based on a patch originally from Nick Piggin. Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 100 +++++++++++++++++++++++++++++++++---------------------------- 1 file changed, 55 insertions(+), 45 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 3bdc76f1653a..5e5bafe70ceb 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -336,6 +336,58 @@ static void inode_lru_list_del(struct inode *inode) } } +static unsigned long hash(struct super_block *sb, unsigned long hashval) +{ + unsigned long tmp; + + tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / + L1_CACHE_BYTES; + tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); + return tmp & I_HASHMASK; +} + +/** + * __insert_inode_hash - hash an inode + * @inode: unhashed inode + * @hashval: unsigned long value used to locate this object in the + * inode_hashtable. + * + * Add an inode to the inode hash for this superblock. + */ +void __insert_inode_hash(struct inode *inode, unsigned long hashval) +{ + struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); + hlist_add_head(&inode->i_hash, head); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL(__insert_inode_hash); + +/** + * __remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +static void __remove_inode_hash(struct inode *inode) +{ + hlist_del_init(&inode->i_hash); +} + +/** + * remove_inode_hash - remove an inode from the hash + * @inode: inode to unhash + * + * Remove an inode from the superblock. + */ +void remove_inode_hash(struct inode *inode) +{ + spin_lock(&inode_lock); + hlist_del_init(&inode->i_hash); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL(remove_inode_hash); + void end_writeback(struct inode *inode) { might_sleep(); @@ -383,7 +435,7 @@ static void dispose_list(struct list_head *head) evict(inode); spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); @@ -634,16 +686,6 @@ repeat: return node ? inode : NULL; } -static unsigned long hash(struct super_block *sb, unsigned long hashval) -{ - unsigned long tmp; - - tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) / - L1_CACHE_BYTES; - tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS); - return tmp & I_HASHMASK; -} - static inline void __inode_add_to_lists(struct super_block *sb, struct hlist_head *head, struct inode *inode) @@ -1194,36 +1236,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval, } EXPORT_SYMBOL(insert_inode_locked4); -/** - * __insert_inode_hash - hash an inode - * @inode: unhashed inode - * @hashval: unsigned long value used to locate this object in the - * inode_hashtable. - * - * Add an inode to the inode hash for this superblock. - */ -void __insert_inode_hash(struct inode *inode, unsigned long hashval) -{ - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); - spin_lock(&inode_lock); - hlist_add_head(&inode->i_hash, head); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(__insert_inode_hash); - -/** - * remove_inode_hash - remove an inode from the hash - * @inode: inode to unhash - * - * Remove an inode from the superblock. - */ -void remove_inode_hash(struct inode *inode) -{ - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL(remove_inode_hash); int generic_delete_inode(struct inode *inode) { @@ -1279,7 +1291,7 @@ static void iput_final(struct inode *inode) spin_lock(&inode_lock); WARN_ON(inode->i_state & I_NEW); inode->i_state &= ~I_WILL_FREE; - hlist_del_init(&inode->i_hash); + __remove_inode_hash(inode); } WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; @@ -1294,9 +1306,7 @@ static void iput_final(struct inode *inode) list_del_init(&inode->i_sb_list); spin_unlock(&inode_lock); evict(inode); - spin_lock(&inode_lock); - hlist_del_init(&inode->i_hash); - spin_unlock(&inode_lock); + remove_inode_hash(inode); wake_up_inode(inode); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); destroy_inode(inode); -- cgit v1.2.3 From ad5e195ac9fdf4e2b28b8cf14937e5b9384dac2e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:00:16 -0400 Subject: fs: Stop abusing find_inode_fast in iunique Stop abusing find_inode_fast for iunique and opencode the inode hash walk. Introduce a new iunique_lock to protect the iunique counters once inode_lock is removed. Based on a patch originally from Nick Piggin. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 5e5bafe70ceb..a8035e8576df 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -884,6 +884,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb, return inode; } +/* + * search the inode cache for a matching inode number. + * If we find one, then the inode number we are trying to + * allocate is not unique and so we should not use it. + * + * Returns 1 if the inode number is unique, 0 if it is not. + */ +static int test_inode_iunique(struct super_block *sb, unsigned long ino) +{ + struct hlist_head *b = inode_hashtable + hash(sb, ino); + struct hlist_node *node; + struct inode *inode; + + hlist_for_each_entry(inode, node, b, i_hash) { + if (inode->i_ino == ino && inode->i_sb == sb) + return 0; + } + + return 1; +} + /** * iunique - get a unique inode number * @sb: superblock @@ -905,19 +926,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved) * error if st_ino won't fit in target struct field. Use 32bit counter * here to attempt to avoid that. */ + static DEFINE_SPINLOCK(iunique_lock); static unsigned int counter; - struct inode *inode; - struct hlist_head *head; ino_t res; spin_lock(&inode_lock); + spin_lock(&iunique_lock); do { if (counter <= max_reserved) counter = max_reserved + 1; res = counter++; - head = inode_hashtable + hash(sb, res); - inode = find_inode_fast(sb, head, res); - } while (inode != NULL); + } while (!test_inode_iunique(sb, res)); + spin_unlock(&iunique_lock); spin_unlock(&inode_lock); return res; -- cgit v1.2.3 From f7899bd5472e8e99741369b4a32eca44e5282a85 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:09:06 -0400 Subject: fs: move i_count increments into find_inode/find_inode_fast Now that iunique is not abusing find_inode anymore we can move the i_ref increment back to where it belongs. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index a8035e8576df..78c41c626cdc 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -634,9 +634,6 @@ static struct shrinker icache_shrinker = { static void __wait_on_freeing_inode(struct inode *inode); /* * Called with the inode lock held. - * NOTE: we are not increasing the inode-refcount, you must call __iget() - * by hand after calling find_inode now! This simplifies iunique and won't - * add any additional branch in the common code. */ static struct inode *find_inode(struct super_block *sb, struct hlist_head *head, @@ -656,9 +653,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } - break; + __iget(inode); + return inode; } - return node ? inode : NULL; + return NULL; } /* @@ -681,9 +679,10 @@ repeat: __wait_on_freeing_inode(inode); goto repeat; } - break; + __iget(inode); + return inode; } - return node ? inode : NULL; + return NULL; } static inline void @@ -828,7 +827,6 @@ static struct inode *get_new_inode(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; @@ -875,7 +873,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb, * us. Use the old inode instead of the one we just * allocated. */ - __iget(old); spin_unlock(&inode_lock); destroy_inode(inode); inode = old; @@ -989,7 +986,6 @@ static struct inode *ifind(struct super_block *sb, spin_lock(&inode_lock); inode = find_inode(sb, head, test, data); if (inode) { - __iget(inode); spin_unlock(&inode_lock); if (likely(wait)) wait_on_inode(inode); @@ -1022,7 +1018,6 @@ static struct inode *ifind_fast(struct super_block *sb, spin_lock(&inode_lock); inode = find_inode_fast(sb, head, ino); if (inode) { - __iget(inode); spin_unlock(&inode_lock); wait_on_inode(inode); return inode; -- cgit v1.2.3 From 646ec4615cd05972581c9c5342ed7a1e77df17bb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 07:15:32 -0400 Subject: fs: remove inode_add_to_list/__inode_add_to_list Split up inode_add_to_list/__inode_add_to_list. Locking for the two lists will be split soon so these helpers really don't buy us much anymore. The __ prefixes for the sb list helpers will go away soon, but until inode_lock is gone we'll need them to distinguish between the locked and unlocked variants. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 70 +++++++++++++++++++++------------------------ fs/xfs/linux-2.6/xfs_iops.c | 4 ++- include/linux/fs.h | 5 ++-- 3 files changed, 38 insertions(+), 41 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 78c41c626cdc..430d70f2abe7 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -336,6 +336,28 @@ static void inode_lru_list_del(struct inode *inode) } } +static inline void __inode_sb_list_add(struct inode *inode) +{ + list_add(&inode->i_sb_list, &inode->i_sb->s_inodes); +} + +/** + * inode_sb_list_add - add inode to the superblock list of inodes + * @inode: inode to add + */ +void inode_sb_list_add(struct inode *inode) +{ + spin_lock(&inode_lock); + __inode_sb_list_add(inode); + spin_unlock(&inode_lock); +} +EXPORT_SYMBOL_GPL(inode_sb_list_add); + +static inline void __inode_sb_list_del(struct inode *inode) +{ + list_del_init(&inode->i_sb_list); +} + static unsigned long hash(struct super_block *sb, unsigned long hashval) { unsigned long tmp; @@ -356,9 +378,10 @@ static unsigned long hash(struct super_block *sb, unsigned long hashval) */ void __insert_inode_hash(struct inode *inode, unsigned long hashval) { - struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval); + struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval); + spin_lock(&inode_lock); - hlist_add_head(&inode->i_hash, head); + hlist_add_head(&inode->i_hash, b); spin_unlock(&inode_lock); } EXPORT_SYMBOL(__insert_inode_hash); @@ -436,7 +459,7 @@ static void dispose_list(struct list_head *head) spin_lock(&inode_lock); __remove_inode_hash(inode); - list_del_init(&inode->i_sb_list); + __inode_sb_list_del(inode); spin_unlock(&inode_lock); wake_up_inode(inode); @@ -685,37 +708,6 @@ repeat: return NULL; } -static inline void -__inode_add_to_lists(struct super_block *sb, struct hlist_head *head, - struct inode *inode) -{ - list_add(&inode->i_sb_list, &sb->s_inodes); - if (head) - hlist_add_head(&inode->i_hash, head); -} - -/** - * inode_add_to_lists - add a new inode to relevant lists - * @sb: superblock inode belongs to - * @inode: inode to mark in use - * - * When an inode is allocated it needs to be accounted for, added to the in use - * list, the owning superblock and the inode hash. This needs to be done under - * the inode_lock, so export a function to do this rather than the inode lock - * itself. We calculate the hash list to add to here so it is all internal - * which requires the caller to have already set up the inode number in the - * inode to add. - */ -void inode_add_to_lists(struct super_block *sb, struct inode *inode) -{ - struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino); - - spin_lock(&inode_lock); - __inode_add_to_lists(sb, head, inode); - spin_unlock(&inode_lock); -} -EXPORT_SYMBOL_GPL(inode_add_to_lists); - /** * new_inode - obtain an inode * @sb: superblock @@ -743,7 +735,7 @@ struct inode *new_inode(struct super_block *sb) inode = alloc_inode(sb); if (inode) { spin_lock(&inode_lock); - __inode_add_to_lists(sb, NULL, inode); + __inode_sb_list_add(inode); inode->i_ino = ++last_ino; inode->i_state = 0; spin_unlock(&inode_lock); @@ -812,7 +804,8 @@ static struct inode *get_new_inode(struct super_block *sb, if (set(inode, data)) goto set_failed; - __inode_add_to_lists(sb, head, inode); + hlist_add_head(&inode->i_hash, head); + __inode_sb_list_add(inode); inode->i_state = I_NEW; spin_unlock(&inode_lock); @@ -858,7 +851,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb, old = find_inode_fast(sb, head, ino); if (!old) { inode->i_ino = ino; - __inode_add_to_lists(sb, head, inode); + hlist_add_head(&inode->i_hash, head); + __inode_sb_list_add(inode); inode->i_state = I_NEW; spin_unlock(&inode_lock); @@ -1318,7 +1312,7 @@ static void iput_final(struct inode *inode) */ inode_lru_list_del(inode); - list_del_init(&inode->i_sb_list); + __inode_sb_list_del(inode); spin_unlock(&inode_lock); evict(inode); remove_inode_hash(inode); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index ec858e09d546..71d83c93621c 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -760,7 +760,9 @@ xfs_setup_inode( inode->i_ino = ip->i_ino; inode->i_state = I_NEW; - inode_add_to_lists(ip->i_mount->m_super, inode); + + inode_sb_list_add(inode); + insert_inode_hash(inode); inode->i_mode = ip->i_d.di_mode; inode->i_nlink = ip->i_d.di_nlink; diff --git a/include/linux/fs.h b/include/linux/fs.h index 876275fc0638..d43e8b6685a2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2171,7 +2171,6 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); -extern void inode_add_to_lists(struct super_block *, struct inode *); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); @@ -2202,9 +2201,11 @@ extern int file_remove_suid(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); -static inline void insert_inode_hash(struct inode *inode) { +static inline void insert_inode_hash(struct inode *inode) +{ __insert_inode_hash(inode, inode->i_ino); } +extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); -- cgit v1.2.3 From 7de9c6ee3ecffd99e1628e81a5ea5468f7581a1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 23 Oct 2010 11:11:40 -0400 Subject: new helper: ihold() Clones an existing reference to inode; caller must already hold one. Signed-off-by: Al Viro --- fs/9p/vfs_inode.c | 5 +++-- fs/affs/inode.c | 2 +- fs/afs/dir.c | 2 +- fs/aio.c | 5 ++--- fs/anon_inodes.c | 5 ++--- fs/bfs/dir.c | 2 +- fs/block_dev.c | 8 ++++---- fs/btrfs/inode.c | 2 +- fs/coda/dir.c | 2 +- fs/exofs/namei.c | 2 +- fs/ext2/namei.c | 2 +- fs/ext3/namei.c | 2 +- fs/ext4/namei.c | 2 +- fs/gfs2/ops_inode.c | 2 +- fs/hfsplus/dir.c | 2 +- fs/inode.c | 9 +++++++++ fs/jffs2/dir.c | 4 ++-- fs/jfs/jfs_txnmgr.c | 2 +- fs/jfs/namei.c | 2 +- fs/libfs.c | 2 +- fs/logfs/dir.c | 2 +- fs/minix/namei.c | 2 +- fs/namei.c | 2 +- fs/nfs/dir.c | 2 +- fs/nfs/getroot.c | 3 +-- fs/nilfs2/namei.c | 2 +- fs/ntfs/super.c | 4 ++-- fs/ocfs2/namei.c | 2 +- fs/reiserfs/namei.c | 2 +- fs/sysv/namei.c | 2 +- fs/ubifs/dir.c | 2 +- fs/udf/namei.c | 2 +- fs/ufs/namei.c | 2 +- fs/xfs/linux-2.6/xfs_iops.c | 2 +- fs/xfs/xfs_inode.h | 2 +- include/linux/fs.h | 1 + ipc/mqueue.c | 2 +- kernel/futex.c | 2 +- mm/shmem.c | 2 +- net/socket.c | 2 +- 40 files changed, 57 insertions(+), 49 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 9e670d527646..ef5905f7c8a3 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir, kfree(st); } else { /* Caching disabled. No need to get upto date stat info. - * This dentry will be released immediately. So, just i_count++ + * This dentry will be released immediately. So, just hold the + * inode */ - atomic_inc(&old_dentry->d_inode->i_count); + ihold(old_dentry->d_inode); } dentry->d_op = old_dentry->d_op; diff --git a/fs/affs/inode.c b/fs/affs/inode.c index 3a0fdec175ba..5d828903ac69 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain)); mark_buffer_dirty_inode(inode_bh, inode); inode->i_nlink = 2; - atomic_inc(&inode->i_count); + ihold(inode); } affs_fix_checksum(sb, bh); mark_buffer_dirty_inode(bh, inode); diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 0d38c09bd55e..5439e1bc9a86 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir, if (ret < 0) goto link_error; - atomic_inc(&vnode->vfs_inode.i_count); + ihold(&vnode->vfs_inode); d_instantiate(dentry, &vnode->vfs_inode); key_put(key); _leave(" = 0"); diff --git a/fs/aio.c b/fs/aio.c index 9e319a04780e..8c8f6c5b6d79 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1553,10 +1553,9 @@ static void aio_batch_add(struct address_space *mapping, * * When we're called, we always have a reference * on the file, so we must always have a reference - * on the inode, so igrab must always just - * bump the count and move on. + * on the inode, so ihold() is safe here. */ - atomic_inc(&mapping->host->i_count); + ihold(mapping->host); abe->mapping = mapping; hlist_add_head(&abe->list, &batch_hash[bucket]); return; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index e4b75d6eda83..9c8e87b0361f 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name, path.mnt = mntget(anon_inode_mnt); /* * We know the anon_inode inode count is always greater than zero, - * so we can avoid doing an igrab() and we can use an open-coded - * atomic_inc(). + * so ihold() is safe. */ - atomic_inc(&anon_inode_inode->i_count); + ihold(anon_inode_inode); path.dentry->d_op = &anon_inodefs_dentry_operations; d_instantiate(path.dentry, anon_inode_inode); diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index d967e052b779..685ecff3ab31 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir, inc_nlink(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(new, inode); mutex_unlock(&info->bfs_lock); return 0; diff --git a/fs/block_dev.c b/fs/block_dev.c index b737451e2e9d..81972eb34b39 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -550,7 +550,7 @@ EXPORT_SYMBOL(bdget); */ struct block_device *bdgrab(struct block_device *bdev) { - atomic_inc(&bdev->bd_inode->i_count); + ihold(bdev->bd_inode); return bdev; } @@ -580,7 +580,7 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); bdev = inode->i_bdev; if (bdev) { - atomic_inc(&bdev->bd_inode->i_count); + ihold(bdev->bd_inode); spin_unlock(&bdev_lock); return bdev; } @@ -591,12 +591,12 @@ static struct block_device *bd_acquire(struct inode *inode) spin_lock(&bdev_lock); if (!inode->i_bdev) { /* - * We take an additional bd_inode->i_count for inode, + * We take an additional reference to bd_inode, * and it's released in clear_inode() of inode. * So, we can access it via ->i_mapping always * without igrab(). */ - atomic_inc(&bdev->bd_inode->i_count); + ihold(bdev->bd_inode); inode->i_bdev = bdev; inode->i_mapping = bdev->bd_inode->i_mapping; list_add(&inode->i_devices, &bdev->bd_inodes); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f6f2a0da2695..64f99cf69ce0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } btrfs_set_trans_block_group(trans, dir); - atomic_inc(&inode->i_count); + ihold(inode); err = btrfs_add_nondir(trans, dentry, inode, 1, index); diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 96fbeab77f2f..5d8b35539601 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode, } coda_dir_update_mtime(dir_inode); - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(de, inode); inc_nlink(inode); return 0; diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c index b7dd0c236863..264e95d02830 100644 --- a/fs/exofs/namei.c +++ b/fs/exofs/namei.c @@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); return exofs_add_nondir(dentry, inode); } diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c index 71efb0e9a3f2..f8aecd2e3297 100644 --- a/fs/ext2/namei.c +++ b/fs/ext2/namei.c @@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); err = ext2_add_link(dentry, inode); if (!err) { diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c index 2b35ddb70d65..bce9dce639b8 100644 --- a/fs/ext3/namei.c +++ b/fs/ext3/namei.c @@ -2260,7 +2260,7 @@ retry: inode->i_ctime = CURRENT_TIME_SEC; inc_nlink(inode); - atomic_inc(&inode->i_count); + ihold(inode); err = ext3_add_entry(handle, dentry, inode); if (!err) { diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 314c0d3b3fa9..bd39885b5998 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2312,7 +2312,7 @@ retry: inode->i_ctime = ext4_current_time(inode); ext4_inc_count(handle, inode); - atomic_inc(&inode->i_count); + ihold(inode); err = ext4_add_entry(handle, dentry, inode); if (!err) { diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c index 48a274f1674c..12cbea7502c2 100644 --- a/fs/gfs2/ops_inode.c +++ b/fs/gfs2/ops_inode.c @@ -255,7 +255,7 @@ out_parent: gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); if (!error) { - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(dentry, inode); mark_inode_dirty(inode); } diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index d236d85ec9d7..e318bbc0daf6 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir, inc_nlink(inode); hfsplus_instantiate(dst_dentry, inode, cnid); - atomic_inc(&inode->i_count); + ihold(inode); inode->i_ctime = CURRENT_TIME_SEC; mark_inode_dirty(inode); sbi->file_count++; diff --git a/fs/inode.c b/fs/inode.c index 430d70f2abe7..05ea293d5f32 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -320,6 +320,15 @@ void __iget(struct inode *inode) atomic_inc(&inode->i_count); } +/* + * get additional reference to inode; caller must already hold one. + */ +void ihold(struct inode *inode) +{ + WARN_ON(atomic_inc_return(&inode->i_count) < 2); +} +EXPORT_SYMBOL(ihold); + static void inode_lru_list_add(struct inode *inode) { if (list_empty(&inode->i_list)) { diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c index ed78a3cf3cb0..79121aa5858b 100644 --- a/fs/jffs2/dir.c +++ b/fs/jffs2/dir.c @@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de mutex_unlock(&f->sem); d_instantiate(dentry, old_dentry->d_inode); dir_i->i_mtime = dir_i->i_ctime = ITIME(now); - atomic_inc(&old_dentry->d_inode->i_count); + ihold(old_dentry->d_inode); } return ret; } @@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry, printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret); /* Might as well let the VFS know */ d_instantiate(new_dentry, old_dentry->d_inode); - atomic_inc(&old_dentry->d_inode->i_count); + ihold(old_dentry->d_inode); new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now); return ret; } diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c index d945ea76b445..9466957ec841 100644 --- a/fs/jfs/jfs_txnmgr.c +++ b/fs/jfs/jfs_txnmgr.c @@ -1279,7 +1279,7 @@ int txCommit(tid_t tid, /* transaction identifier */ * lazy commit thread finishes processing */ if (tblk->xflag & COMMIT_DELETE) { - atomic_inc(&tblk->u.ip->i_count); + ihold(tblk->u.ip); /* * Avoid a rare deadlock * diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c index a9cf8e8675be..231ca4af9bce 100644 --- a/fs/jfs/namei.c +++ b/fs/jfs/namei.c @@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry, ip->i_ctime = CURRENT_TIME; dir->i_ctime = dir->i_mtime = CURRENT_TIME; mark_inode_dirty(dir); - atomic_inc(&ip->i_count); + ihold(ip); iplist[0] = ip; iplist[1] = dir; diff --git a/fs/libfs.c b/fs/libfs.c index 2dbf4877d7ef..304a5132ca27 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); - atomic_inc(&inode->i_count); + ihold(inode); dget(dentry); d_instantiate(dentry, inode); return 0; diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index 1eb4e89e045b..409dfd65e9a1 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir, return -EMLINK; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - atomic_inc(&inode->i_count); + ihold(inode); inode->i_nlink++; mark_inode_dirty_sync(inode); diff --git a/fs/minix/namei.c b/fs/minix/namei.c index f3f3578393a4..c0d35a3accef 100644 --- a/fs/minix/namei.c +++ b/fs/minix/namei.c @@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); return add_nondir(dentry, inode); } diff --git a/fs/namei.c b/fs/namei.c index f1ef97dbc6c4..f7dbc06857ab 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2285,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname) goto slashes; inode = dentry->d_inode; if (inode) - atomic_inc(&inode->i_count); + ihold(inode); error = mnt_want_write(nd.path.mnt); if (error) goto exit2; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index e257172d438c..0fac7fea18ef 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1580,7 +1580,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry) d_drop(dentry); error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); if (error == 0) { - atomic_inc(&inode->i_count); + ihold(inode); d_add(dentry, inode); } return error; diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c index a70e446e1605..ac7b814ce162 100644 --- a/fs/nfs/getroot.c +++ b/fs/nfs/getroot.c @@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i iput(inode); return -ENOMEM; } - /* Circumvent igrab(): we know the inode is not being freed */ - atomic_inc(&inode->i_count); + ihold(inode); /* * Ensure that this dentry is invisible to d_find_alias(). * Otherwise, it may be spliced into the tree by diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c index 185d1607cb00..6e9557ecf161 100644 --- a/fs/nilfs2/namei.c +++ b/fs/nilfs2/namei.c @@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); err = nilfs_add_nondir(dentry, inode); if (!err) diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c index d4dec2d32117..d3fbe5730bfc 100644 --- a/fs/ntfs/super.c +++ b/fs/ntfs/super.c @@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent) goto unl_upcase_iput_tmp_ino_err_out_now; } if ((sb->s_root = d_alloc_root(vol->root_ino))) { - /* We increment i_count simulating an ntfs_iget(). */ - atomic_inc(&vol->root_ino->i_count); + /* We grab a reference, simulating an ntfs_iget(). */ + ihold(vol->root_ino); ntfs_debug("Exiting, status successful."); /* Release the default upcase if it has no users. */ mutex_lock(&ntfs_lock); diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index e7bde21149ae..ff5744e1e36f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry, goto out_commit; } - atomic_inc(&inode->i_count); + ihold(inode); dentry->d_op = &ocfs2_dentry_ops; d_instantiate(dentry, inode); diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c index ee78d4a0086a..ba5f51ec3458 100644 --- a/fs/reiserfs/namei.c +++ b/fs/reiserfs/namei.c @@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir, inode->i_ctime = CURRENT_TIME_SEC; reiserfs_update_sd(&th, inode); - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(dentry, inode); retval = journal_end(&th, dir->i_sb, jbegin_count); reiserfs_write_unlock(dir->i_sb); diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c index 33e047b59b8d..11e7f7d11cd0 100644 --- a/fs/sysv/namei.c +++ b/fs/sysv/namei.c @@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); return add_nondir(dentry, inode); } diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 87ebcce72213..14f64b689d7f 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir, lock_2_inodes(dir, inode); inc_nlink(inode); - atomic_inc(&inode->i_count); + ihold(inode); inode->i_ctime = ubifs_current_time(inode); dir->i_size += sz_change; dir_ui->ui_size = dir->i_size; diff --git a/fs/udf/namei.c b/fs/udf/namei.c index bf5fc674193c..6d8dc02baebb 100644 --- a/fs/udf/namei.c +++ b/fs/udf/namei.c @@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir, inc_nlink(inode); inode->i_ctime = current_fs_time(inode->i_sb); mark_inode_dirty(inode); - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(dentry, inode); unlock_kernel(); diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c index b056f02b1fb3..12f39b9e4437 100644 --- a/fs/ufs/namei.c +++ b/fs/ufs/namei.c @@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir, inode->i_ctime = CURRENT_TIME_SEC; inode_inc_link_count(inode); - atomic_inc(&inode->i_count); + ihold(inode); error = ufs_add_nondir(dentry, inode); unlock_kernel(); diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c index 71d83c93621c..96107efc0c61 100644 --- a/fs/xfs/linux-2.6/xfs_iops.c +++ b/fs/xfs/linux-2.6/xfs_iops.c @@ -317,7 +317,7 @@ xfs_vn_link( if (unlikely(error)) return -error; - atomic_inc(&inode->i_count); + ihold(inode); d_instantiate(dentry, inode); return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index fac52290de90..fb2ca2e4cdc9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -500,7 +500,7 @@ void xfs_mark_inode_dirty_sync(xfs_inode_t *); #define IHOLD(ip) \ do { \ ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \ - atomic_inc(&(VFS_I(ip)->i_count)); \ + ihold(VFS_I(ip)); \ trace_xfs_ihold(ip, _THIS_IP_); \ } while (0) diff --git a/include/linux/fs.h b/include/linux/fs.h index d43e8b6685a2..bd6ae6c71fc8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2171,6 +2171,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); +extern void ihold(struct inode * inode); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index e1e7b9635f5d..80b35ffca25d 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -769,7 +769,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) inode = dentry->d_inode; if (inode) - atomic_inc(&inode->i_count); + ihold(inode); err = mnt_want_write(ipc_ns->mq_mnt); if (err) goto out_err; diff --git a/kernel/futex.c b/kernel/futex.c index a118bf160e0b..6c683b37f2ce 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key) switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: - atomic_inc(&key->shared.inode->i_count); + ihold(key->shared.inode); break; case FUT_OFF_MMSHARED: atomic_inc(&key->private.mm->mm_count); diff --git a/mm/shmem.c b/mm/shmem.c index 27a58120dbd5..d4e2852526e6 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1903,7 +1903,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); - atomic_inc(&inode->i_count); /* New dentry reference */ + ihold(inode); /* New dentry reference */ dget(dentry); /* Extra pinning count for the created dentry */ d_instantiate(dentry, inode); out: diff --git a/net/socket.c b/net/socket.c index abf3e2561521..d223725f99e5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) &socket_file_ops); if (unlikely(!file)) { /* drop dentry, keep inode */ - atomic_inc(&path.dentry->d_inode->i_count); + ihold(path.dentry->d_inode); path_put(&path); put_unused_fd(fd); return -ENFILE; -- cgit v1.2.3 From f991bd2e14210fb93d722cb23e54991de20e8a3d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 23 Oct 2010 11:18:01 -0400 Subject: fs: introduce a per-cpu last_ino allocator new_inode() dirties a contended cache line to get increasing inode numbers. This limits performance on workloads that cause significant parallel inode allocation. Solve this problem by using a per_cpu variable fed by the shared last_ino in batches of 1024 allocations. This reduces contention on the shared last_ino, and give same spreading ino numbers than before (i.e. same wraparound after 2^32 allocations). Signed-off-by: Eric Dumazet Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 45 ++++++++++++++++++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 7 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 05ea293d5f32..46a3e120b196 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -717,6 +717,43 @@ repeat: return NULL; } +/* + * Each cpu owns a range of LAST_INO_BATCH numbers. + * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations, + * to renew the exhausted range. + * + * This does not significantly increase overflow rate because every CPU can + * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is + * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the + * 2^32 range, and is a worst-case. Even a 50% wastage would only increase + * overflow rate by 2x, which does not seem too significant. + * + * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW + * error if st_ino won't fit in target struct field. Use 32bit counter + * here to attempt to avoid that. + */ +#define LAST_INO_BATCH 1024 +static DEFINE_PER_CPU(unsigned int, last_ino); + +static unsigned int get_next_ino(void) +{ + unsigned int *p = &get_cpu_var(last_ino); + unsigned int res = *p; + +#ifdef CONFIG_SMP + if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) { + static atomic_t shared_last_ino; + int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino); + + res = next - LAST_INO_BATCH; + } +#endif + + *p = ++res; + put_cpu_var(last_ino); + return res; +} + /** * new_inode - obtain an inode * @sb: superblock @@ -731,12 +768,6 @@ repeat: */ struct inode *new_inode(struct super_block *sb) { - /* - * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW - * error if st_ino won't fit in target struct field. Use 32bit counter - * here to attempt to avoid that. - */ - static unsigned int last_ino; struct inode *inode; spin_lock_prefetch(&inode_lock); @@ -745,7 +776,7 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = ++last_ino; + inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } -- cgit v1.2.3 From 85fe4025c616a7c0ed07bc2fc8c5371b07f3888c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 11:19:54 -0400 Subject: fs: do not assign default i_ino in new_inode Instead of always assigning an increasing inode number in new_inode move the call to assign it into those callers that actually need it. For now callers that need it is estimated conservatively, that is the call is added to all filesystems that do not assign an i_ino by themselves. For a few more filesystems we can avoid assigning any inode number given that they aren't user visible, and for others it could be done lazily when an inode number is actually needed, but that's left for later patches. Signed-off-by: Christoph Hellwig Signed-off-by: Dave Chinner Signed-off-by: Al Viro --- drivers/infiniband/hw/ipath/ipath_fs.c | 1 + drivers/infiniband/hw/qib/qib_fs.c | 1 + drivers/misc/ibmasm/ibmasmfs.c | 1 + drivers/oprofile/oprofilefs.c | 1 + drivers/usb/core/inode.c | 1 + drivers/usb/gadget/f_fs.c | 1 + drivers/usb/gadget/inode.c | 1 + fs/anon_inodes.c | 1 + fs/autofs4/inode.c | 1 + fs/binfmt_misc.c | 1 + fs/configfs/inode.c | 1 + fs/debugfs/inode.c | 1 + fs/ext4/mballoc.c | 1 + fs/freevxfs/vxfs_inode.c | 1 + fs/fuse/control.c | 1 + fs/hugetlbfs/inode.c | 1 + fs/inode.c | 4 ++-- fs/ocfs2/dlmfs/dlmfs.c | 2 ++ fs/pipe.c | 2 ++ fs/proc/base.c | 2 ++ fs/proc/proc_sysctl.c | 2 ++ fs/ramfs/inode.c | 1 + fs/xfs/linux-2.6/xfs_buf.c | 1 + include/linux/fs.h | 1 + ipc/mqueue.c | 1 + kernel/cgroup.c | 1 + mm/shmem.c | 1 + net/socket.c | 1 + net/sunrpc/rpc_pipe.c | 1 + security/inode.c | 1 + security/selinux/selinuxfs.c | 1 + 31 files changed, 36 insertions(+), 2 deletions(-) (limited to 'fs/inode.c') diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c index d13e72685dcf..12d5bf76302c 100644 --- a/drivers/infiniband/hw/ipath/ipath_fs.c +++ b/drivers/infiniband/hw/ipath/ipath_fs.c @@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_private = data; diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c index a0e6613e8be6..7e433d75c775 100644 --- a/drivers/infiniband/hw/qib/qib_fs.c +++ b/drivers/infiniband/hw/qib/qib_fs.c @@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry, goto bail; } + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = 0; inode->i_gid = 0; diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c index af2497ae5fe3..0a53500636c9 100644 --- a/drivers/misc/ibmasm/ibmasmfs.c +++ b/drivers/misc/ibmasm/ibmasmfs.c @@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c index 95f711b251ad..449de59bf35b 100644 --- a/drivers/oprofile/oprofilefs.c +++ b/drivers/oprofile/oprofilefs.c @@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; } diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 095fa5366690..e2f63c0ea09d 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c index e4f595055208..e093fd8d04d3 100644 --- a/drivers/usb/gadget/f_fs.c +++ b/drivers/usb/gadget/f_fs.c @@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data, if (likely(inode)) { struct timespec current_time = CURRENT_TIME; + inode->i_ino = usbfs_get_inode(); inode->i_mode = perms->mode; inode->i_uid = perms->uid; inode->i_gid = perms->gid; diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index d1d72d946b04..ba145e7fbe03 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb, struct inode *inode = new_inode (sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = default_uid; inode->i_gid = default_gid; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 9c8e87b0361f..5365527ca43f 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -193,6 +193,7 @@ static struct inode *anon_inode_mkinode(void) if (!inode) return ERR_PTR(-ENOMEM); + inode->i_ino = get_next_ino(); inode->i_fop = &anon_inode_fops; inode->i_mapping->a_ops = &anon_aops; diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 821b2b955dac..ac87e49fa706 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb, inode->i_gid = sb->s_root->d_inode->i_gid; } inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; + inode->i_ino = get_next_ino(); if (S_ISDIR(inf->mode)) { inode->i_nlink = 2; diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 139fc8083f53..29990f0eee0c 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode) struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb); diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index cf78d44a8d6a..253476d78ed8 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd) { struct inode * inode = new_inode(configfs_sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mapping->a_ops = &configfs_aops; inode->i_mapping->backing_dev_info = &configfs_backing_dev_info; inode->i_op = &configfs_inode_operations; diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 30a87b3dbcac..a4ed8380e98a 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 19aa0d44d822..42f77b1dc72d 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb) printk(KERN_ERR "EXT4-fs: can't get new inode\n"); goto err_freesgi; } + sbi->s_buddy_cache->i_ino = get_next_ino(); EXT4_I(sbi->s_buddy_cache)->i_disksize = 0; for (i = 0; i < ngroups; i++) { desc = ext4_get_group_desc(sb, i, NULL); diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c index 79d1b4ea13e7..8c04eac5079d 100644 --- a/fs/freevxfs/vxfs_inode.c +++ b/fs/freevxfs/vxfs_inode.c @@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip) struct inode *ip = NULL; if ((ip = new_inode(sbp))) { + ip->i_ino = get_next_ino(); vxfs_iinit(ip, vip); ip->i_mapping->a_ops = &vxfs_aops; } diff --git a/fs/fuse/control.c b/fs/fuse/control.c index 7367e177186f..4eba07661e5c 100644 --- a/fs/fuse/control.c +++ b/fs/fuse/control.c @@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = fc->user_id; inode->i_gid = fc->group_id; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 113eba3d3c38..8d0607b37266 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -455,6 +455,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid, inode = new_inode(sb); if (inode) { struct hugetlbfs_inode_info *info; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = uid; inode->i_gid = gid; diff --git a/fs/inode.c b/fs/inode.c index 46a3e120b196..2cd2e48f7a20 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -735,7 +735,7 @@ repeat: #define LAST_INO_BATCH 1024 static DEFINE_PER_CPU(unsigned int, last_ino); -static unsigned int get_next_ino(void) +unsigned int get_next_ino(void) { unsigned int *p = &get_cpu_var(last_ino); unsigned int res = *p; @@ -753,6 +753,7 @@ static unsigned int get_next_ino(void) put_cpu_var(last_ino); return res; } +EXPORT_SYMBOL(get_next_ino); /** * new_inode - obtain an inode @@ -776,7 +777,6 @@ struct inode *new_inode(struct super_block *sb) if (inode) { spin_lock(&inode_lock); __inode_sb_list_add(inode); - inode->i_ino = get_next_ino(); inode->i_state = 0; spin_unlock(&inode_lock); } diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c index a7ebd9d42dc8..75e115f1bd73 100644 --- a/fs/ocfs2/dlmfs/dlmfs.c +++ b/fs/ocfs2/dlmfs/dlmfs.c @@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb) if (inode) { ip = DLMFS_I(inode); + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); @@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent, if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/fs/pipe.c b/fs/pipe.c index 37eb1ebeaa90..d2d7566ce68e 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void) if (!inode) goto fail_inode; + inode->i_ino = get_next_ino(); + pipe = alloc_pipe_info(inode); if (!pipe) goto fail_iput; diff --git a/fs/proc/base.c b/fs/proc/base.c index fb2a5abd4e4f..9883f1e18332 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1603,6 +1603,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st /* Common stuff */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; inode->i_op = &proc_def_inode_operations; @@ -2549,6 +2550,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir, /* Initialize the inode */ ei = PROC_I(inode); + inode->i_ino = get_next_ino(); inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; /* diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 2fc52552271d..b652cb00906b 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb, if (!inode) goto out; + inode->i_ino = get_next_ino(); + sysctl_head_get(head); ei = PROC_I(inode); ei->sysctl = head; diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c index a5ebae70dc6d..67fadb1ad2c1 100644 --- a/fs/ramfs/inode.c +++ b/fs/ramfs/inode.c @@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb, struct inode * inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_mapping->a_ops = &ramfs_aops; inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info; diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c index ba5312802aa9..63fd2c07cb57 100644 --- a/fs/xfs/linux-2.6/xfs_buf.c +++ b/fs/xfs/linux-2.6/xfs_buf.c @@ -1580,6 +1580,7 @@ xfs_mapping_buftarg( XFS_BUFTARG_NAME(btp)); return ENOMEM; } + inode->i_ino = get_next_ino(); inode->i_mode = S_IFBLK; inode->i_bdev = bdev; inode->i_rdev = bdev->bd_dev; diff --git a/include/linux/fs.h b/include/linux/fs.h index bd6ae6c71fc8..4a573cf13f51 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2191,6 +2191,7 @@ extern struct inode * iget_locked(struct super_block *, unsigned long); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); +extern unsigned int get_next_ino(void); extern void __iget(struct inode * inode); extern void iget_failed(struct inode *); diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 80b35ffca25d..3a61ffefe884 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 7b69b8d0313d..9270d532ec3c 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/mm/shmem.c b/mm/shmem.c index d4e2852526e6..f6d350e8adc5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode_init_owner(inode, dir, mode); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; diff --git a/net/socket.c b/net/socket.c index d223725f99e5..5cac1c707755 100644 --- a/net/socket.c +++ b/net/socket.c @@ -480,6 +480,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); kmemcheck_annotate_bitfield(sock, type); + inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 52f252432144..7df92d237cb8 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode) struct inode *inode = new_inode(sb); if (!inode) return NULL; + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch(mode & S_IFMT) { diff --git a/security/inode.c b/security/inode.c index 88839866cbcd..cb8f47c66a58 100644 --- a/security/inode.c +++ b/security/inode.c @@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev) struct inode *inode = new_inode(sb); if (inode) { + inode->i_ino = get_next_ino(); inode->i_mode = mode; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; switch (mode & S_IFMT) { diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 87e0556bae70..55a755c1a1bd 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode) struct inode *ret = new_inode(sb); if (ret) { + ret->i_ino = get_next_ino(); ret->i_mode = mode; ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME; } -- cgit v1.2.3 From 99a38919241fd051b8d93b2e4d0c05ef0556d795 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 23 Oct 2010 19:07:20 +0200 Subject: fs: fix buffer invalidation in invalidate_list We must not call invalidate_inode_buffers in invalidate_list unless the inode can be reclaimed. If we remove the buffer association of a busy inode fsync won't find the buffers anymore. As invalidate_inode_buffers is called from various others sources than umount this actually does matter in practice. While at it change the loop to a more natural form and remove the WARN_ON for I_NEW, wich we already tested a few lines above. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 2cd2e48f7a20..4bedac32154f 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -28,7 +28,6 @@ /* * This is needed for the following functions: * - inode_has_buffers - * - invalidate_inode_buffers * - invalidate_bdev * * FIXME: remove all knowledge of the buffer layer from this file @@ -503,16 +502,15 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) inode = list_entry(tmp, struct inode, i_sb_list); if (inode->i_state & I_NEW) continue; - invalidate_inode_buffers(inode); - if (!atomic_read(&inode->i_count)) { - list_move(&inode->i_list, dispose); - WARN_ON(inode->i_state & I_NEW); - inode->i_state |= I_FREEING; - if (!(inode->i_state & (I_DIRTY | I_SYNC))) - percpu_counter_dec(&nr_inodes_unused); + if (atomic_read(&inode->i_count)) { + busy = 1; continue; } - busy = 1; + + list_move(&inode->i_list, dispose); + inode->i_state |= I_FREEING; + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); } return busy; } -- cgit v1.2.3 From 7ccf19a8042e343f8159f8a5fdd6a9422aa90c78 Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Thu, 21 Oct 2010 11:49:30 +1100 Subject: fs: inode split IO and LRU lists The use of the same inode list structure (inode->i_list) for two different list constructs with different lifecycles and purposes makes it impossible to separate the locking of the different operations. Therefore, to enable the separation of the locking of the writeback and reclaim lists, split the inode->i_list into two separate lists dedicated to their specific tracking functions. Signed-off-by: Nick Piggin Signed-off-by: Dave Chinner Reviewed-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/block_dev.c | 2 +- fs/fs-writeback.c | 35 ++++++++++++++++++----------------- fs/inode.c | 53 ++++++++++++++++++++++++++++++++++------------------- include/linux/fs.h | 3 ++- mm/backing-dev.c | 6 +++--- 5 files changed, 58 insertions(+), 41 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/block_dev.c b/fs/block_dev.c index 4e847e53051f..dea3b628a6ce 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -59,7 +59,7 @@ static void bdev_inode_switch_bdi(struct inode *inode, spin_lock(&inode_lock); inode->i_data.backing_dev_info = dst; if (inode->i_state & I_DIRTY) - list_move(&inode->i_list, &dst->wb.b_dirty); + list_move(&inode->i_wb_list, &dst->wb.b_dirty); spin_unlock(&inode_lock); } diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e8f65290e836..7a24cc957f05 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) return sb->s_bdi; } +static inline struct inode *wb_inode(struct list_head *head) +{ + return list_entry(head, struct inode, i_wb_list); +} + static void bdi_queue_work(struct backing_dev_info *bdi, struct wb_writeback_work *work) { @@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode) if (!list_empty(&wb->b_dirty)) { struct inode *tail; - tail = list_entry(wb->b_dirty.next, struct inode, i_list); + tail = wb_inode(wb->b_dirty.next); if (time_before(inode->dirtied_when, tail->dirtied_when)) inode->dirtied_when = jiffies; } - list_move(&inode->i_list, &wb->b_dirty); + list_move(&inode->i_wb_list, &wb->b_dirty); } /* @@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode) { struct bdi_writeback *wb = &inode_to_bdi(inode)->wb; - list_move(&inode->i_list, &wb->b_more_io); + list_move(&inode->i_wb_list, &wb->b_more_io); } static void inode_sync_complete(struct inode *inode) @@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; while (!list_empty(delaying_queue)) { - inode = list_entry(delaying_queue->prev, struct inode, i_list); + inode = wb_inode(delaying_queue->prev); if (older_than_this && inode_dirtied_after(inode, *older_than_this)) break; if (sb && sb != inode->i_sb) do_sb_sort = 1; sb = inode->i_sb; - list_move(&inode->i_list, &tmp); + list_move(&inode->i_wb_list, &tmp); } /* just one sb in list, splice to dispatch_queue and we're done */ @@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue, /* Move inodes from one superblock together */ while (!list_empty(&tmp)) { - inode = list_entry(tmp.prev, struct inode, i_list); - sb = inode->i_sb; + sb = wb_inode(tmp.prev)->i_sb; list_for_each_prev_safe(pos, node, &tmp) { - inode = list_entry(pos, struct inode, i_list); + inode = wb_inode(pos); if (inode->i_sb == sb) - list_move(&inode->i_list, dispatch_queue); + list_move(&inode->i_wb_list, dispatch_queue); } } } @@ -414,7 +418,7 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc) * a reference to the inode or it's on it's way out. * No need to add it back to the LRU. */ - list_del_init(&inode->i_list); + list_del_init(&inode->i_wb_list); } } inode_sync_complete(inode); @@ -462,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, { while (!list_empty(&wb->b_io)) { long pages_skipped; - struct inode *inode = list_entry(wb->b_io.prev, - struct inode, i_list); + struct inode *inode = wb_inode(wb->b_io.prev); if (inode->i_sb != sb) { if (only_this_sb) { @@ -533,8 +536,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb, queue_io(wb, wbc->older_than_this); while (!list_empty(&wb->b_io)) { - struct inode *inode = list_entry(wb->b_io.prev, - struct inode, i_list); + struct inode *inode = wb_inode(wb->b_io.prev); struct super_block *sb = inode->i_sb; if (!pin_sb_for_writeback(sb)) { @@ -672,8 +674,7 @@ static long wb_writeback(struct bdi_writeback *wb, */ spin_lock(&inode_lock); if (!list_empty(&wb->b_more_io)) { - inode = list_entry(wb->b_more_io.prev, - struct inode, i_list); + inode = wb_inode(wb->b_more_io.prev); trace_wbc_writeback_wait(&wbc, wb->bdi); inode_wait_for_writeback(inode); } @@ -987,7 +988,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - list_move(&inode->i_list, &bdi->wb.b_dirty); + list_move(&inode->i_wb_list, &bdi->wb.b_dirty); } } out: diff --git a/fs/inode.c b/fs/inode.c index 4bedac32154f..09e2d7a5f1d2 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -71,7 +71,7 @@ static unsigned int i_hash_shift __read_mostly; * allowing for low-overhead inode sync() operations. */ -static LIST_HEAD(inode_unused); +static LIST_HEAD(inode_lru); static struct hlist_head *inode_hashtable __read_mostly; /* @@ -271,6 +271,7 @@ EXPORT_SYMBOL(__destroy_inode); static void destroy_inode(struct inode *inode) { + BUG_ON(!list_empty(&inode->i_lru)); __destroy_inode(inode); if (inode->i_sb->s_op->destroy_inode) inode->i_sb->s_op->destroy_inode(inode); @@ -289,7 +290,8 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_dentry); INIT_LIST_HEAD(&inode->i_devices); - INIT_LIST_HEAD(&inode->i_list); + INIT_LIST_HEAD(&inode->i_wb_list); + INIT_LIST_HEAD(&inode->i_lru); INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC); spin_lock_init(&inode->i_data.tree_lock); spin_lock_init(&inode->i_data.i_mmap_lock); @@ -330,16 +332,16 @@ EXPORT_SYMBOL(ihold); static void inode_lru_list_add(struct inode *inode) { - if (list_empty(&inode->i_list)) { - list_add(&inode->i_list, &inode_unused); + if (list_empty(&inode->i_lru)) { + list_add(&inode->i_lru, &inode_lru); percpu_counter_inc(&nr_inodes_unused); } } static void inode_lru_list_del(struct inode *inode) { - if (!list_empty(&inode->i_list)) { - list_del_init(&inode->i_list); + if (!list_empty(&inode->i_lru)) { + list_del_init(&inode->i_lru); percpu_counter_dec(&nr_inodes_unused); } } @@ -460,8 +462,8 @@ static void dispose_list(struct list_head *head) while (!list_empty(head)) { struct inode *inode; - inode = list_first_entry(head, struct inode, i_list); - list_del_init(&inode->i_list); + inode = list_first_entry(head, struct inode, i_lru); + list_del_init(&inode->i_lru); evict(inode); @@ -507,8 +509,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) continue; } - list_move(&inode->i_list, dispose); inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, dispose); + list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) percpu_counter_dec(&nr_inodes_unused); } @@ -580,10 +588,10 @@ static void prune_icache(int nr_to_scan) for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) { struct inode *inode; - if (list_empty(&inode_unused)) + if (list_empty(&inode_lru)) break; - inode = list_entry(inode_unused.prev, struct inode, i_list); + inode = list_entry(inode_lru.prev, struct inode, i_lru); /* * Referenced or dirty inodes are still in use. Give them @@ -591,14 +599,14 @@ static void prune_icache(int nr_to_scan) */ if (atomic_read(&inode->i_count) || (inode->i_state & ~I_REFERENCED)) { - list_del_init(&inode->i_list); + list_del_init(&inode->i_lru); percpu_counter_dec(&nr_inodes_unused); continue; } /* recently referenced inodes get one more pass */ if (inode->i_state & I_REFERENCED) { - list_move(&inode->i_list, &inode_unused); + list_move(&inode->i_lru, &inode_lru); inode->i_state &= ~I_REFERENCED; continue; } @@ -611,15 +619,21 @@ static void prune_icache(int nr_to_scan) iput(inode); spin_lock(&inode_lock); - if (inode != list_entry(inode_unused.next, - struct inode, i_list)) + if (inode != list_entry(inode_lru.next, + struct inode, i_lru)) continue; /* wrong inode or list_empty */ if (!can_unuse(inode)) continue; } - list_move(&inode->i_list, &freeable); WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, &freeable); + list_del_init(&inode->i_wb_list); percpu_counter_dec(&nr_inodes_unused); } if (current_is_kswapd()) @@ -1340,15 +1354,16 @@ static void iput_final(struct inode *inode) inode->i_state &= ~I_WILL_FREE; __remove_inode_hash(inode); } + WARN_ON(inode->i_state & I_NEW); inode->i_state |= I_FREEING; /* - * After we delete the inode from the LRU here, we avoid moving dirty - * inodes back onto the LRU now because I_FREEING is set and hence - * writeback_single_inode() won't move the inode around. + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. */ inode_lru_list_del(inode); + list_del_init(&inode->i_wb_list); __inode_sb_list_del(inode); spin_unlock(&inode_lock); diff --git a/include/linux/fs.h b/include/linux/fs.h index d58059944801..f300a6508818 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -723,7 +723,8 @@ struct posix_acl; struct inode { struct hlist_node i_hash; - struct list_head i_list; /* backing dev IO list */ + struct list_head i_wb_list; /* backing dev IO list */ + struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; struct list_head i_dentry; unsigned long i_ino; diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 65d420499a61..15d5097de821 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) nr_wb = nr_dirty = nr_io = nr_more_io = 0; spin_lock(&inode_lock); - list_for_each_entry(inode, &wb->b_dirty, i_list) + list_for_each_entry(inode, &wb->b_dirty, i_wb_list) nr_dirty++; - list_for_each_entry(inode, &wb->b_io, i_list) + list_for_each_entry(inode, &wb->b_io, i_wb_list) nr_io++; - list_for_each_entry(inode, &wb->b_more_io, i_list) + list_for_each_entry(inode, &wb->b_more_io, i_wb_list) nr_more_io++; spin_unlock(&inode_lock); -- cgit v1.2.3 From d895a1c96af8c2a0f6a5e0119695a7c6b92df8db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Oct 2010 19:40:24 +0200 Subject: fs: do not drop inode_lock in dispose_list Despite the comment above it we can not safely drop the lock here. invalidate_list is called from many other places that just umount. Also switch to proper list macros now that we never drop the lock. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 09e2d7a5f1d2..1bf2be41257a 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -482,26 +482,10 @@ static void dispose_list(struct list_head *head) */ static int invalidate_list(struct list_head *head, struct list_head *dispose) { - struct list_head *next; + struct inode *inode, *next; int busy = 0; - next = head->next; - for (;;) { - struct list_head *tmp = next; - struct inode *inode; - - /* - * We can reschedule here without worrying about the list's - * consistency because the per-sb list of inodes must not - * change during umount anymore, and because iprune_sem keeps - * shrink_icache_memory() away. - */ - cond_resched_lock(&inode_lock); - - next = next->next; - if (tmp == head) - break; - inode = list_entry(tmp, struct inode, i_sb_list); + list_for_each_entry_safe(inode, next, head, i_sb_list) { if (inode->i_state & I_NEW) continue; if (atomic_read(&inode->i_count)) { -- cgit v1.2.3 From a031878670ac8fe466859d4c1506bd91ae48678c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 24 Oct 2010 19:40:33 +0200 Subject: fs: fold invalidate_list into invalidate_inodes Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- fs/inode.c | 43 ++++++++++++++++--------------------------- 1 file changed, 16 insertions(+), 27 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index 1bf2be41257a..f6b3b52f1278 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -477,15 +477,24 @@ static void dispose_list(struct list_head *head) } } -/* - * Invalidate all inodes for a device. +/** + * invalidate_inodes - attempt to free all inodes on a superblock + * @sb: superblock to operate on + * + * Attempts to free all inodes for a given superblock. If there were any + * busy inodes return a non-zero value, else zero. */ -static int invalidate_list(struct list_head *head, struct list_head *dispose) +int invalidate_inodes(struct super_block *sb) { - struct inode *inode, *next; int busy = 0; + struct inode *inode, *next; + LIST_HEAD(dispose); - list_for_each_entry_safe(inode, next, head, i_sb_list) { + down_write(&iprune_sem); + + spin_lock(&inode_lock); + fsnotify_unmount_inodes(&sb->s_inodes); + list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { if (inode->i_state & I_NEW) continue; if (atomic_read(&inode->i_count)) { @@ -499,34 +508,14 @@ static int invalidate_list(struct list_head *head, struct list_head *dispose) * Move the inode off the IO lists and LRU once I_FREEING is * set so that it won't get moved back on there if it is dirty. */ - list_move(&inode->i_lru, dispose); + list_move(&inode->i_lru, &dispose); list_del_init(&inode->i_wb_list); if (!(inode->i_state & (I_DIRTY | I_SYNC))) percpu_counter_dec(&nr_inodes_unused); } - return busy; -} - -/** - * invalidate_inodes - discard the inodes on a device - * @sb: superblock - * - * Discard all of the inodes for a given superblock. If the discard - * fails because there are busy inodes then a non zero value is returned. - * If the discard is successful all the inodes have been discarded. - */ -int invalidate_inodes(struct super_block *sb) -{ - int busy; - LIST_HEAD(throw_away); - - down_write(&iprune_sem); - spin_lock(&inode_lock); - fsnotify_unmount_inodes(&sb->s_inodes); - busy = invalidate_list(&sb->s_inodes, &throw_away); spin_unlock(&inode_lock); - dispose_list(&throw_away); + dispose_list(&dispose); up_write(&iprune_sem); return busy; -- cgit v1.2.3 From 63997e98a3be68d7cec806d22bf9b02b2e1daabb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 25 Oct 2010 20:49:35 -0400 Subject: split invalidate_inodes() Pull removal of fsnotify marks into generic_shutdown_super(). Split umount-time work into a new function - evict_inodes(). Make sure that invalidate_inodes() will be able to cope with I_FREEING once we change locking in iput(). Signed-off-by: Al Viro --- fs/inode.c | 46 ++++++++++++++++++++++++++++++++++++++++++++-- fs/internal.h | 1 + fs/notify/inode_mark.c | 2 ++ fs/super.c | 8 ++++---- 4 files changed, 51 insertions(+), 6 deletions(-) (limited to 'fs/inode.c') diff --git a/fs/inode.c b/fs/inode.c index f6b3b52f1278..a6d60682f0fd 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -477,6 +477,49 @@ static void dispose_list(struct list_head *head) } } +/** + * evict_inodes - evict all evictable inodes for a superblock + * @sb: superblock to operate on + * + * Make sure that no inodes with zero refcount are retained. This is + * called by superblock shutdown after having MS_ACTIVE flag removed, + * so any inode reaching zero refcount during or after that call will + * be immediately evicted. + */ +void evict_inodes(struct super_block *sb) +{ + struct inode *inode, *next; + LIST_HEAD(dispose); + + down_write(&iprune_sem); + + spin_lock(&inode_lock); + list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { + if (atomic_read(&inode->i_count)) + continue; + + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { + WARN_ON(1); + continue; + } + + inode->i_state |= I_FREEING; + + /* + * Move the inode off the IO lists and LRU once I_FREEING is + * set so that it won't get moved back on there if it is dirty. + */ + list_move(&inode->i_lru, &dispose); + list_del_init(&inode->i_wb_list); + if (!(inode->i_state & (I_DIRTY | I_SYNC))) + percpu_counter_dec(&nr_inodes_unused); + } + spin_unlock(&inode_lock); + + dispose_list(&dispose); + up_write(&iprune_sem); +} + /** * invalidate_inodes - attempt to free all inodes on a superblock * @sb: superblock to operate on @@ -493,9 +536,8 @@ int invalidate_inodes(struct super_block *sb) down_write(&iprune_sem); spin_lock(&inode_lock); - fsnotify_unmount_inodes(&sb->s_inodes); list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) { - if (inode->i_state & I_NEW) + if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) continue; if (atomic_read(&inode->i_count)) { busy = 1; diff --git a/fs/internal.h b/fs/internal.h index 4cc67eb6ed56..ebad3b90752d 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -106,4 +106,5 @@ extern void release_open_intent(struct nameidata *); * inode.c */ extern int get_nr_dirty_inodes(void); +extern int evict_inodes(struct super_block *); extern int invalidate_inodes(struct super_block *); diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c index 33297c005060..21ed10660b80 100644 --- a/fs/notify/inode_mark.c +++ b/fs/notify/inode_mark.c @@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list) { struct inode *inode, *next_i, *need_iput = NULL; + spin_lock(&inode_lock); list_for_each_entry_safe(inode, next_i, list, i_sb_list) { struct inode *need_iput_tmp; @@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list) spin_lock(&inode_lock); } + spin_unlock(&inode_lock); } diff --git a/fs/super.c b/fs/super.c index 8819e3a7ff20..b9c9869165db 100644 --- a/fs/super.c +++ b/fs/super.c @@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb) get_fs_excl(); sb->s_flags &= ~MS_ACTIVE; - /* bad name - it should be evict_inodes() */ - invalidate_inodes(sb); + fsnotify_unmount_inodes(&sb->s_inodes); + + evict_inodes(sb); if (sop->put_super) sop->put_super(sb); - /* Forget any remaining inodes */ - if (invalidate_inodes(sb)) { + if (!list_empty(&sb->s_inodes)) { printk("VFS: Busy inodes after unmount of %s. " "Self-destruct in 5 seconds. Have a nice day...\n", sb->s_id); -- cgit v1.2.3