diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/fs-writeback.c | 106 | ||||
-rw-r--r-- | fs/inode.c | 2 | ||||
-rw-r--r-- | fs/super.c | 2 |
3 files changed, 85 insertions, 25 deletions
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index fe7e83a45eff..1fcce8345da3 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -981,6 +981,37 @@ void inode_io_list_del(struct inode *inode) } /* + * mark an inode as under writeback on the sb + */ +void sb_mark_inode_writeback(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + unsigned long flags; + + if (list_empty(&inode->i_wb_list)) { + spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); + if (list_empty(&inode->i_wb_list)) + list_add_tail(&inode->i_wb_list, &sb->s_inodes_wb); + spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); + } +} + +/* + * clear an inode as under writeback on the sb + */ +void sb_clear_inode_writeback(struct inode *inode) +{ + struct super_block *sb = inode->i_sb; + unsigned long flags; + + if (!list_empty(&inode->i_wb_list)) { + spin_lock_irqsave(&sb->s_inode_wblist_lock, flags); + list_del_init(&inode->i_wb_list); + spin_unlock_irqrestore(&sb->s_inode_wblist_lock, flags); + } +} + +/* * Redirty an inode: set its when-it-was dirtied timestamp and move it to the * furthest end of its superblock's dirty-inode list. * @@ -2154,7 +2185,7 @@ EXPORT_SYMBOL(__mark_inode_dirty); */ static void wait_sb_inodes(struct super_block *sb) { - struct inode *inode, *old_inode = NULL; + LIST_HEAD(sync_list); /* * We need to be protected against the filesystem going from @@ -2163,38 +2194,60 @@ static void wait_sb_inodes(struct super_block *sb) WARN_ON(!rwsem_is_locked(&sb->s_umount)); mutex_lock(&sb->s_sync_lock); - spin_lock(&sb->s_inode_list_lock); /* - * Data integrity sync. Must wait for all pages under writeback, - * because there may have been pages dirtied before our sync - * call, but which had writeout started before we write it out. - * In which case, the inode may not be on the dirty list, but - * we still have to wait for that writeout. + * Splice the writeback list onto a temporary list to avoid waiting on + * inodes that have started writeback after this point. + * + * Use rcu_read_lock() to keep the inodes around until we have a + * reference. s_inode_wblist_lock protects sb->s_inodes_wb as well as + * the local list because inodes can be dropped from either by writeback + * completion. + */ + rcu_read_lock(); + spin_lock_irq(&sb->s_inode_wblist_lock); + list_splice_init(&sb->s_inodes_wb, &sync_list); + + /* + * Data integrity sync. Must wait for all pages under writeback, because + * there may have been pages dirtied before our sync call, but which had + * writeout started before we write it out. In which case, the inode + * may not be on the dirty list, but we still have to wait for that + * writeout. */ - list_for_each_entry(inode, &sb->s_inodes, i_sb_list) { + while (!list_empty(&sync_list)) { + struct inode *inode = list_first_entry(&sync_list, struct inode, + i_wb_list); struct address_space *mapping = inode->i_mapping; + /* + * Move each inode back to the wb list before we drop the lock + * to preserve consistency between i_wb_list and the mapping + * writeback tag. Writeback completion is responsible to remove + * the inode from either list once the writeback tag is cleared. + */ + list_move_tail(&inode->i_wb_list, &sb->s_inodes_wb); + + /* + * The mapping can appear untagged while still on-list since we + * do not have the mapping lock. Skip it here, wb completion + * will remove it. + */ + if (!mapping_tagged(mapping, PAGECACHE_TAG_WRITEBACK)) + continue; + + spin_unlock_irq(&sb->s_inode_wblist_lock); + spin_lock(&inode->i_lock); - if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) || - (mapping->nrpages == 0)) { + if (inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) { spin_unlock(&inode->i_lock); + + spin_lock_irq(&sb->s_inode_wblist_lock); continue; } __iget(inode); spin_unlock(&inode->i_lock); - spin_unlock(&sb->s_inode_list_lock); - - /* - * We hold a reference to 'inode' so it couldn't have been - * removed from s_inodes list while we dropped the - * s_inode_list_lock. We cannot iput the inode now as we can - * be holding the last reference and we cannot iput it under - * s_inode_list_lock. So we keep the reference and iput it - * later. - */ - iput(old_inode); - old_inode = inode; + rcu_read_unlock(); /* * We keep the error status of individual mapping so that @@ -2205,10 +2258,13 @@ static void wait_sb_inodes(struct super_block *sb) cond_resched(); - spin_lock(&sb->s_inode_list_lock); + iput(inode); + + rcu_read_lock(); + spin_lock_irq(&sb->s_inode_wblist_lock); } - spin_unlock(&sb->s_inode_list_lock); - iput(old_inode); + spin_unlock_irq(&sb->s_inode_wblist_lock); + rcu_read_unlock(); mutex_unlock(&sb->s_sync_lock); } diff --git a/fs/inode.c b/fs/inode.c index 4ccbc21b30ce..e171f7b5f9e4 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -365,6 +365,7 @@ void inode_init_once(struct inode *inode) INIT_HLIST_NODE(&inode->i_hash); INIT_LIST_HEAD(&inode->i_devices); INIT_LIST_HEAD(&inode->i_io_list); + INIT_LIST_HEAD(&inode->i_wb_list); INIT_LIST_HEAD(&inode->i_lru); address_space_init_once(&inode->i_data); i_size_ordered_init(inode); @@ -507,6 +508,7 @@ void clear_inode(struct inode *inode) BUG_ON(!list_empty(&inode->i_data.private_list)); BUG_ON(!(inode->i_state & I_FREEING)); BUG_ON(inode->i_state & I_CLEAR); + BUG_ON(!list_empty(&inode->i_wb_list)); /* don't need i_lock here, no concurrent mods to i_state */ inode->i_state = I_FREEING | I_CLEAR; } diff --git a/fs/super.c b/fs/super.c index d78b9847e6cb..5806ffd45563 100644 --- a/fs/super.c +++ b/fs/super.c @@ -206,6 +206,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags) mutex_init(&s->s_sync_lock); INIT_LIST_HEAD(&s->s_inodes); spin_lock_init(&s->s_inode_list_lock); + INIT_LIST_HEAD(&s->s_inodes_wb); + spin_lock_init(&s->s_inode_wblist_lock); if (list_lru_init_memcg(&s->s_dentry_lru)) goto fail; |