diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-17 16:12:34 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-02-17 16:12:34 -0800 |
commit | 038911597e17017cee55fe93d521164a27056866 (patch) | |
tree | 8f279a91de8237ce370a14d745940cccfd78ea07 | |
parent | 66dc830d14a222c9214a8557e9feb1e4a67a3857 (diff) | |
parent | a26f49926da938f47561f386be56a83dd37a496d (diff) | |
download | linux-038911597e17017cee55fe93d521164a27056866.tar.bz2 |
Merge branch 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull lazytime mount option support from Al Viro:
"Lazytime stuff from tytso"
* 'lazytime' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
ext4: add optimization for the lazytime mount option
vfs: add find_inode_nowait() function
vfs: add support for a lazytime mount option
-rw-r--r-- | fs/ext4/inode.c | 70 | ||||
-rw-r--r-- | fs/ext4/super.c | 10 | ||||
-rw-r--r-- | fs/fs-writeback.c | 62 | ||||
-rw-r--r-- | fs/gfs2/file.c | 4 | ||||
-rw-r--r-- | fs/inode.c | 106 | ||||
-rw-r--r-- | fs/jfs/file.c | 2 | ||||
-rw-r--r-- | fs/libfs.c | 2 | ||||
-rw-r--r-- | fs/proc_namespace.c | 1 | ||||
-rw-r--r-- | fs/sync.c | 8 | ||||
-rw-r--r-- | include/linux/backing-dev.h | 1 | ||||
-rw-r--r-- | include/linux/fs.h | 10 | ||||
-rw-r--r-- | include/trace/events/ext4.h | 30 | ||||
-rw-r--r-- | include/trace/events/writeback.h | 60 | ||||
-rw-r--r-- | include/uapi/linux/fs.h | 4 | ||||
-rw-r--r-- | mm/backing-dev.c | 10 |
15 files changed, 343 insertions, 37 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 28555f191b62..85404f15e53a 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -4174,6 +4174,65 @@ static int ext4_inode_blocks_set(handle_t *handle, return 0; } +struct other_inode { + unsigned long orig_ino; + struct ext4_inode *raw_inode; +}; + +static int other_inode_match(struct inode * inode, unsigned long ino, + void *data) +{ + struct other_inode *oi = (struct other_inode *) data; + + if ((inode->i_ino != ino) || + (inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) || + ((inode->i_state & I_DIRTY_TIME) == 0)) + return 0; + spin_lock(&inode->i_lock); + if (((inode->i_state & (I_FREEING | I_WILL_FREE | I_NEW | + I_DIRTY_SYNC | I_DIRTY_DATASYNC)) == 0) && + (inode->i_state & I_DIRTY_TIME)) { + struct ext4_inode_info *ei = EXT4_I(inode); + + inode->i_state &= ~(I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED); + spin_unlock(&inode->i_lock); + + spin_lock(&ei->i_raw_lock); + EXT4_INODE_SET_XTIME(i_ctime, inode, oi->raw_inode); + EXT4_INODE_SET_XTIME(i_mtime, inode, oi->raw_inode); + EXT4_INODE_SET_XTIME(i_atime, inode, oi->raw_inode); + ext4_inode_csum_set(inode, oi->raw_inode, ei); + spin_unlock(&ei->i_raw_lock); + trace_ext4_other_inode_update_time(inode, oi->orig_ino); + return -1; + } + spin_unlock(&inode->i_lock); + return -1; +} + +/* + * Opportunistically update the other time fields for other inodes in + * the same inode table block. + */ +static void ext4_update_other_inodes_time(struct super_block *sb, + unsigned long orig_ino, char *buf) +{ + struct other_inode oi; + unsigned long ino; + int i, inodes_per_block = EXT4_SB(sb)->s_inodes_per_block; + int inode_size = EXT4_INODE_SIZE(sb); + + oi.orig_ino = orig_ino; + ino = orig_ino & ~(inodes_per_block - 1); + for (i = 0; i < inodes_per_block; i++, ino++, buf += inode_size) { + if (ino == orig_ino) + continue; + oi.raw_inode = (struct ext4_inode *) buf; + (void) find_inode_nowait(sb, ino, other_inode_match, &oi); + } +} + /* * Post the struct inode info into an on-disk inode location in the * buffer-cache. This gobbles the caller's reference to the @@ -4283,10 +4342,11 @@ static int ext4_do_update_inode(handle_t *handle, cpu_to_le16(ei->i_extra_isize); } } - ext4_inode_csum_set(inode, raw_inode, ei); - spin_unlock(&ei->i_raw_lock); + if (inode->i_sb->s_flags & MS_LAZYTIME) + ext4_update_other_inodes_time(inode->i_sb, inode->i_ino, + bh->b_data); BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); rc = ext4_handle_dirty_metadata(handle, NULL, bh); @@ -4875,11 +4935,17 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode) * If the inode is marked synchronous, we don't honour that here - doing * so would cause a commit on atime updates, which we don't bother doing. * We handle synchronous inodes at the highest possible level. + * + * If only the I_DIRTY_TIME flag is set, we can skip everything. If + * I_DIRTY_TIME and I_DIRTY_SYNC is set, the only inode fields we need + * to copy into the on-disk inode structure are the timestamp files. */ void ext4_dirty_inode(struct inode *inode, int flags) { handle_t *handle; + if (flags == I_DIRTY_TIME) + return; handle = ext4_journal_start(inode, EXT4_HT_INODE, 2); if (IS_ERR(handle)) goto out; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 10e8c6b7ca08..1adac6868e6f 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1126,6 +1126,7 @@ enum { Opt_noquota, Opt_barrier, Opt_nobarrier, Opt_err, Opt_usrquota, Opt_grpquota, Opt_i_version, Opt_dax, Opt_stripe, Opt_delalloc, Opt_nodelalloc, Opt_mblk_io_submit, + Opt_lazytime, Opt_nolazytime, Opt_nomblk_io_submit, Opt_block_validity, Opt_noblock_validity, Opt_inode_readahead_blks, Opt_journal_ioprio, Opt_dioread_nolock, Opt_dioread_lock, @@ -1190,6 +1191,8 @@ static const match_table_t tokens = { {Opt_dax, "dax"}, {Opt_stripe, "stripe=%u"}, {Opt_delalloc, "delalloc"}, + {Opt_lazytime, "lazytime"}, + {Opt_nolazytime, "nolazytime"}, {Opt_nodelalloc, "nodelalloc"}, {Opt_removed, "mblk_io_submit"}, {Opt_removed, "nomblk_io_submit"}, @@ -1448,6 +1451,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, case Opt_i_version: sb->s_flags |= MS_I_VERSION; return 1; + case Opt_lazytime: + sb->s_flags |= MS_LAZYTIME; + return 1; + case Opt_nolazytime: + sb->s_flags &= ~MS_LAZYTIME; + return 1; } for (m = ext4_mount_opts; m->token != Opt_err; m++) @@ -5044,6 +5053,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data) } #endif + *flags = (*flags & ~MS_LAZYTIME) | (sb->s_flags & MS_LAZYTIME); ext4_msg(sb, KERN_INFO, "re-mounted. Opts: %s", orig_data); kfree(orig_data); return 0; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index c399152de397..073657f755d4 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -253,14 +253,19 @@ static bool inode_dirtied_after(struct inode *inode, unsigned long t) return ret; } +#define EXPIRE_DIRTY_ATIME 0x0001 + /* * Move expired (dirtied before work->older_than_this) dirty inodes from * @delaying_queue to @dispatch_queue. */ static int move_expired_inodes(struct list_head *delaying_queue, struct list_head *dispatch_queue, + int flags, struct wb_writeback_work *work) { + unsigned long *older_than_this = NULL; + unsigned long expire_time; LIST_HEAD(tmp); struct list_head *pos, *node; struct super_block *sb = NULL; @@ -268,13 +273,21 @@ static int move_expired_inodes(struct list_head *delaying_queue, int do_sb_sort = 0; int moved = 0; + if ((flags & EXPIRE_DIRTY_ATIME) == 0) + older_than_this = work->older_than_this; + else if ((work->reason == WB_REASON_SYNC) == 0) { + expire_time = jiffies - (HZ * 86400); + older_than_this = &expire_time; + } while (!list_empty(delaying_queue)) { inode = wb_inode(delaying_queue->prev); - if (work->older_than_this && - inode_dirtied_after(inode, *work->older_than_this)) + if (older_than_this && + inode_dirtied_after(inode, *older_than_this)) break; list_move(&inode->i_wb_list, &tmp); moved++; + if (flags & EXPIRE_DIRTY_ATIME) + set_bit(__I_DIRTY_TIME_EXPIRED, &inode->i_state); if (sb_is_blkdev_sb(inode->i_sb)) continue; if (sb && sb != inode->i_sb) @@ -315,9 +328,12 @@ out: static void queue_io(struct bdi_writeback *wb, struct wb_writeback_work *work) { int moved; + assert_spin_locked(&wb->list_lock); list_splice_init(&wb->b_more_io, &wb->b_io); - moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, work); + moved = move_expired_inodes(&wb->b_dirty, &wb->b_io, 0, work); + moved += move_expired_inodes(&wb->b_dirty_time, &wb->b_io, + EXPIRE_DIRTY_ATIME, work); trace_writeback_queue_io(wb, work, moved); } @@ -441,6 +457,8 @@ static void requeue_inode(struct inode *inode, struct bdi_writeback *wb, * updates after data IO completion. */ redirty_tail(inode, wb); + } else if (inode->i_state & I_DIRTY_TIME) { + list_move(&inode->i_wb_list, &wb->b_dirty_time); } else { /* The inode is clean. Remove from writeback lists. */ list_del_init(&inode->i_wb_list); @@ -487,7 +505,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_lock(&inode->i_lock); dirty = inode->i_state & I_DIRTY; - inode->i_state &= ~I_DIRTY; + if (((dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) && + (inode->i_state & I_DIRTY_TIME)) || + (inode->i_state & I_DIRTY_TIME_EXPIRED)) { + dirty |= I_DIRTY_TIME | I_DIRTY_TIME_EXPIRED; + trace_writeback_lazytime(inode); + } + inode->i_state &= ~dirty; /* * Paired with smp_mb() in __mark_inode_dirty(). This allows @@ -507,8 +531,10 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) spin_unlock(&inode->i_lock); + if (dirty & I_DIRTY_TIME) + mark_inode_dirty_sync(inode); /* Don't write the inode if only I_DIRTY_PAGES was set */ - if (dirty & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + if (dirty & ~I_DIRTY_PAGES) { int err = write_inode(inode, wbc); if (ret == 0) ret = err; @@ -556,7 +582,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, * make sure inode is on some writeback list and leave it there unless * we have completely cleaned the inode. */ - if (!(inode->i_state & I_DIRTY) && + if (!(inode->i_state & I_DIRTY_ALL) && (wbc->sync_mode != WB_SYNC_ALL || !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))) goto out; @@ -571,7 +597,7 @@ writeback_single_inode(struct inode *inode, struct bdi_writeback *wb, * If inode is clean, remove it from writeback lists. Otherwise don't * touch it. See comment above for explanation. */ - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY_ALL)) list_del_init(&inode->i_wb_list); spin_unlock(&wb->list_lock); inode_sync_complete(inode); @@ -713,7 +739,7 @@ static long writeback_sb_inodes(struct super_block *sb, wrote += write_chunk - wbc.nr_to_write; spin_lock(&wb->list_lock); spin_lock(&inode->i_lock); - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY_ALL)) wrote++; requeue_inode(inode, wb, &wbc); inode_sync_complete(inode); @@ -1151,16 +1177,20 @@ static noinline void block_dump___mark_inode_dirty(struct inode *inode) * page->mapping->host, so the page-dirtying time is recorded in the internal * blockdev inode. */ +#define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) void __mark_inode_dirty(struct inode *inode, int flags) { struct super_block *sb = inode->i_sb; struct backing_dev_info *bdi = NULL; + int dirtytime; + + trace_writeback_mark_inode_dirty(inode, flags); /* * Don't do this for I_DIRTY_PAGES - that doesn't actually * dirty the inode itself */ - if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC)) { + if (flags & (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_TIME)) { trace_writeback_dirty_inode_start(inode, flags); if (sb->s_op->dirty_inode) @@ -1168,6 +1198,9 @@ void __mark_inode_dirty(struct inode *inode, int flags) trace_writeback_dirty_inode(inode, flags); } + if (flags & I_DIRTY_INODE) + flags &= ~I_DIRTY_TIME; + dirtytime = flags & I_DIRTY_TIME; /* * Paired with smp_mb() in __writeback_single_inode() for the @@ -1175,16 +1208,21 @@ void __mark_inode_dirty(struct inode *inode, int flags) */ smp_mb(); - if ((inode->i_state & flags) == flags) + if (((inode->i_state & flags) == flags) || + (dirtytime && (inode->i_state & I_DIRTY_INODE))) return; if (unlikely(block_dump)) block_dump___mark_inode_dirty(inode); spin_lock(&inode->i_lock); + if (dirtytime && (inode->i_state & I_DIRTY_INODE)) + goto out_unlock_inode; if ((inode->i_state & flags) != flags) { const int was_dirty = inode->i_state & I_DIRTY; + if (flags & I_DIRTY_INODE) + inode->i_state &= ~I_DIRTY_TIME; inode->i_state |= flags; /* @@ -1231,8 +1269,10 @@ void __mark_inode_dirty(struct inode *inode, int flags) } inode->dirtied_when = jiffies; - list_move(&inode->i_wb_list, &bdi->wb.b_dirty); + list_move(&inode->i_wb_list, dirtytime ? + &bdi->wb.b_dirty_time : &bdi->wb.b_dirty); spin_unlock(&bdi->wb.list_lock); + trace_writeback_dirty_inode_enqueue(inode); if (wakeup_bdi) bdi_wakeup_thread_delayed(bdi); diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index ec9c2d33477a..3e32bb8e2d7e 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -654,7 +654,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, { struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; - int sync_state = inode->i_state & I_DIRTY; + int sync_state = inode->i_state & I_DIRTY_ALL; struct gfs2_inode *ip = GFS2_I(inode); int ret = 0, ret1 = 0; @@ -667,7 +667,7 @@ static int gfs2_fsync(struct file *file, loff_t start, loff_t end, if (!gfs2_is_jdata(ip)) sync_state &= ~I_DIRTY_PAGES; if (datasync) - sync_state &= ~I_DIRTY_SYNC; + sync_state &= ~(I_DIRTY_SYNC | I_DIRTY_TIME); if (sync_state) { ret = sync_inode_metadata(inode, 1); diff --git a/fs/inode.c b/fs/inode.c index 86bfaca724db..f00b16f45507 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -18,6 +18,7 @@ #include <linux/buffer_head.h> /* for inode_has_buffers */ #include <linux/ratelimit.h> #include <linux/list_lru.h> +#include <trace/events/writeback.h> #include "internal.h" /* @@ -30,7 +31,7 @@ * inode_sb_list_lock protects: * sb->s_inodes, inode->i_sb_list * bdi->wb.list_lock protects: - * bdi->wb.b_{dirty,io,more_io}, inode->i_wb_list + * bdi->wb.b_{dirty,io,more_io,dirty_time}, inode->i_wb_list * inode_hash_lock protects: * inode_hashtable, inode->i_hash * @@ -403,7 +404,8 @@ static void inode_lru_list_add(struct inode *inode) */ void inode_add_lru(struct inode *inode) { - if (!(inode->i_state & (I_DIRTY | I_SYNC | I_FREEING | I_WILL_FREE)) && + if (!(inode->i_state & (I_DIRTY_ALL | I_SYNC | + I_FREEING | I_WILL_FREE)) && !atomic_read(&inode->i_count) && inode->i_sb->s_flags & MS_ACTIVE) inode_lru_list_add(inode); } @@ -634,7 +636,7 @@ int invalidate_inodes(struct super_block *sb, bool kill_dirty) spin_unlock(&inode->i_lock); continue; } - if (inode->i_state & I_DIRTY && !kill_dirty) { + if (inode->i_state & I_DIRTY_ALL && !kill_dirty) { spin_unlock(&inode->i_lock); busy = 1; continue; @@ -1268,6 +1270,56 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) } EXPORT_SYMBOL(ilookup); +/** + * find_inode_nowait - find an inode in the inode cache + * @sb: super block of file system to search + * @hashval: hash value (usually inode number) to search for + * @match: callback used for comparisons between inodes + * @data: opaque data pointer to pass to @match + * + * Search for the inode specified by @hashval and @data in the inode + * cache, where the helper function @match will return 0 if the inode + * does not match, 1 if the inode does match, and -1 if the search + * should be stopped. The @match function must be responsible for + * taking the i_lock spin_lock and checking i_state for an inode being + * freed or being initialized, and incrementing the reference count + * before returning 1. It also must not sleep, since it is called with + * the inode_hash_lock spinlock held. + * + * This is a even more generalized version of ilookup5() when the + * function must never block --- find_inode() can block in + * __wait_on_freeing_inode() --- or when the caller can not increment + * the reference count because the resulting iput() might cause an + * inode eviction. The tradeoff is that the @match funtion must be + * very carefully implemented. + */ +struct inode *find_inode_nowait(struct super_block *sb, + unsigned long hashval, + int (*match)(struct inode *, unsigned long, + void *), + void *data) +{ + struct hlist_head *head = inode_hashtable + hash(sb, hashval); + struct inode *inode, *ret_inode = NULL; + int mval; + + spin_lock(&inode_hash_lock); + hlist_for_each_entry(inode, head, i_hash) { + if (inode->i_sb != sb) + continue; + mval = match(inode, hashval, data); + if (mval == 0) + continue; + if (mval == 1) + ret_inode = inode; + goto out; + } +out: + spin_unlock(&inode_hash_lock); + return ret_inode; +} +EXPORT_SYMBOL(find_inode_nowait); + int insert_inode_locked(struct inode *inode) { struct super_block *sb = inode->i_sb; @@ -1418,11 +1470,20 @@ static void iput_final(struct inode *inode) */ void iput(struct inode *inode) { - if (inode) { - BUG_ON(inode->i_state & I_CLEAR); - - if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) - iput_final(inode); + if (!inode) + return; + BUG_ON(inode->i_state & I_CLEAR); +retry: + if (atomic_dec_and_lock(&inode->i_count, &inode->i_lock)) { + if (inode->i_nlink && (inode->i_state & I_DIRTY_TIME)) { + atomic_inc(&inode->i_count); + inode->i_state &= ~I_DIRTY_TIME; + spin_unlock(&inode->i_lock); + trace_writeback_lazytime_iput(inode); + mark_inode_dirty_sync(inode); + goto retry; + } + iput_final(inode); } } EXPORT_SYMBOL(iput); @@ -1481,14 +1542,9 @@ static int relatime_need_update(struct vfsmount *mnt, struct inode *inode, return 0; } -/* - * This does the actual work of updating an inodes time or version. Must have - * had called mnt_want_write() before calling this. - */ -static int update_time(struct inode *inode, struct timespec *time, int flags) +int generic_update_time(struct inode *inode, struct timespec *time, int flags) { - if (inode->i_op->update_time) - return inode->i_op->update_time(inode, time, flags); + int iflags = I_DIRTY_TIME; if (flags & S_ATIME) inode->i_atime = *time; @@ -1498,9 +1554,27 @@ static int update_time(struct inode *inode, struct timespec *time, int flags) inode->i_ctime = *time; if (flags & S_MTIME) inode->i_mtime = *time; - mark_inode_dirty_sync(inode); + + if (!(inode->i_sb->s_flags & MS_LAZYTIME) || (flags & S_VERSION)) + iflags |= I_DIRTY_SYNC; + __mark_inode_dirty(inode, iflags); return 0; } +EXPORT_SYMBOL(generic_update_time); + +/* + * This does the actual work of updating an inodes time or version. Must have + * had called mnt_want_write() before calling this. + */ +static int update_time(struct inode *inode, struct timespec *time, int flags) +{ + int (*update_time)(struct inode *, struct timespec *, int); + + update_time = inode->i_op->update_time ? inode->i_op->update_time : + generic_update_time; + + return update_time(inode, time, flags); +} /** * touch_atime - update the access time diff --git a/fs/jfs/file.c b/fs/jfs/file.c index 33aa0cc1f8b8..10815f8dfd8b 100644 --- a/fs/jfs/file.c +++ b/fs/jfs/file.c @@ -39,7 +39,7 @@ int jfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return rc; mutex_lock(&inode->i_mutex); - if (!(inode->i_state & I_DIRTY) || + if (!(inode->i_state & I_DIRTY_ALL) || (datasync && !(inode->i_state & I_DIRTY_DATASYNC))) { /* Make sure committed changes hit the disk */ jfs_flush_journal(JFS_SBI(inode->i_sb)->log, 1); diff --git a/fs/libfs.c b/fs/libfs.c index 005843ce5dbd..b2ffdb045be4 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -948,7 +948,7 @@ int __generic_file_fsync(struct file *file, loff_t start, loff_t end, mutex_lock(&inode->i_mutex); ret = sync_mapping_buffers(inode->i_mapping); - if (!(inode->i_state & I_DIRTY)) + if (!(inode->i_state & I_DIRTY_ALL)) goto out; if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) goto out; diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c index 0f96f71ab32b..8db932da4009 100644 --- a/fs/proc_namespace.c +++ b/fs/proc_namespace.c @@ -44,6 +44,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb) { MS_SYNCHRONOUS, ",sync" }, { MS_DIRSYNC, ",dirsync" }, { MS_MANDLOCK, ",mand" }, + { MS_LAZYTIME, ",lazytime" }, { 0, NULL } }; const struct proc_fs_info *fs_infop; diff --git a/fs/sync.c b/fs/sync.c index 01d9f18a70b5..fbc98ee62044 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -177,8 +177,16 @@ SYSCALL_DEFINE1(syncfs, int, fd) */ int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync) { + struct inode *inode = file->f_mapping->host; + if (!file->f_op->fsync) return -EINVAL; + if (!datasync && (inode->i_state & I_DIRTY_TIME)) { + spin_lock(&inode->i_lock); + inode->i_state &= ~I_DIRTY_TIME; + spin_unlock(&inode->i_lock); + mark_inode_dirty_sync(inode); + } return file->f_op->fsync(file, start, end, datasync); } EXPORT_SYMBOL(vfs_fsync_range); diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d94077fea1f8..aff923ae8c4b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -55,6 +55,7 @@ struct bdi_writeback { struct list_head b_dirty; /* dirty inodes */ struct list_head b_io; /* parked for writeback */ struct list_head b_more_io; /* parked for more writeback */ + struct list_head b_dirty_time; /* time stamps are dirty */ spinlock_t list_lock; /* protects the b_* lists */ }; diff --git a/include/linux/fs.h b/include/linux/fs.h index 0b5b146d0490..447932aed1e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1790,8 +1790,12 @@ struct super_operations { #define __I_DIO_WAKEUP 9 #define I_DIO_WAKEUP (1 << I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) +#define I_DIRTY_TIME (1 << 11) +#define __I_DIRTY_TIME_EXPIRED 12 +#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) +#define I_DIRTY_ALL (I_DIRTY | I_DIRTY_TIME) extern void __mark_inode_dirty(struct inode *, int); static inline void mark_inode_dirty(struct inode *inode) @@ -1954,6 +1958,7 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); +extern int generic_update_time(struct inode *, struct timespec *, int); static inline struct inode *file_inode(const struct file *f) { @@ -2492,6 +2497,11 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); +extern struct inode *find_inode_nowait(struct super_block *, + unsigned long, + int (*match)(struct inode *, + unsigned long, void *), + void *data); extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); extern int insert_inode_locked(struct inode *); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h index 6cfb841fea7c..6e5abd6d38a2 100644 --- a/include/trace/events/ext4.h +++ b/include/trace/events/ext4.h @@ -73,6 +73,36 @@ struct extent_status; { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}) +TRACE_EVENT(ext4_other_inode_update_time, + TP_PROTO(struct inode *inode, ino_t orig_ino), + + TP_ARGS(inode, orig_ino), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field( ino_t, ino ) + __field( ino_t, orig_ino ) + __field( uid_t, uid ) + __field( gid_t, gid ) + __field( __u16, mode ) + ), + + TP_fast_assign( + __entry->orig_ino = orig_ino; + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->uid = i_uid_read(inode); + __entry->gid = i_gid_read(inode); + __entry->mode = inode->i_mode; + ), + + TP_printk("dev %d,%d orig_ino %lu ino %lu mode 0%o uid %u gid %u", + MAJOR(__entry->dev), MINOR(__entry->dev), + (unsigned long) __entry->orig_ino, + (unsigned long) __entry->ino, __entry->mode, + __entry->uid, __entry->gid) +); + TRACE_EVENT(ext4_free_inode, TP_PROTO(struct inode *inode), diff --git a/include/trace/events/writeback.h b/include/trace/events/writeback.h index 0e9310905413..5a14ead59696 100644 --- a/include/trace/events/writeback.h +++ b/include/trace/events/writeback.h @@ -18,6 +18,8 @@ {I_FREEING, "I_FREEING"}, \ {I_CLEAR, "I_CLEAR"}, \ {I_SYNC, "I_SYNC"}, \ + {I_DIRTY_TIME, "I_DIRTY_TIME"}, \ + {I_DIRTY_TIME_EXPIRED, "I_DIRTY_TIME_EXPIRED"}, \ {I_REFERENCED, "I_REFERENCED"} \ ) @@ -68,6 +70,7 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, TP_STRUCT__entry ( __array(char, name, 32) __field(unsigned long, ino) + __field(unsigned long, state) __field(unsigned long, flags) ), @@ -78,16 +81,25 @@ DECLARE_EVENT_CLASS(writeback_dirty_inode_template, strncpy(__entry->name, bdi->dev ? dev_name(bdi->dev) : "(unknown)", 32); __entry->ino = inode->i_ino; + __entry->state = inode->i_state; __entry->flags = flags; ), - TP_printk("bdi %s: ino=%lu flags=%s", + TP_printk("bdi %s: ino=%lu state=%s flags=%s", __entry->name, __entry->ino, + show_inode_state(__entry->state), show_inode_state(__entry->flags) ) ); +DEFINE_EVENT(writeback_dirty_inode_template, writeback_mark_inode_dirty, + + TP_PROTO(struct inode *inode, int flags), + + TP_ARGS(inode, flags) +); + DEFINE_EVENT(writeback_dirty_inode_template, writeback_dirty_inode_start, TP_PROTO(struct inode *inode, int flags), @@ -596,6 +608,52 @@ DEFINE_EVENT(writeback_single_inode_template, writeback_single_inode, TP_ARGS(inode, wbc, nr_to_write) ); +DECLARE_EVENT_CLASS(writeback_lazytime_template, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode), + + TP_STRUCT__entry( + __field( dev_t, dev ) + __field(unsigned long, ino ) + __field(unsigned long, state ) + __field( __u16, mode ) + __field(unsigned long, dirtied_when ) + ), + + TP_fast_assign( + __entry->dev = inode->i_sb->s_dev; + __entry->ino = inode->i_ino; + __entry->state = inode->i_state; + __entry->mode = inode->i_mode; + __entry->dirtied_when = inode->dirtied_when; + ), + + TP_printk("dev %d,%d ino %lu dirtied %lu state %s mode 0%o", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->ino, __entry->dirtied_when, + show_inode_state(__entry->state), __entry->mode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_lazytime_iput, + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + +DEFINE_EVENT(writeback_lazytime_template, writeback_dirty_inode_enqueue, + + TP_PROTO(struct inode *inode), + + TP_ARGS(inode) +); + #endif /* _TRACE_WRITEBACK_H */ /* This part must be outside protection */ diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 3735fa0a6784..9b964a5920af 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -90,6 +90,7 @@ struct inodes_stat_t { #define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ #define MS_I_VERSION (1<<23) /* Update inode I_version field */ #define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_LAZYTIME (1<<25) /* Update the on-disk [acm]times lazily */ /* These sb flags are internal to the kernel */ #define MS_NOSEC (1<<28) @@ -100,7 +101,8 @@ struct inodes_stat_t { /* * Superblock flags that can be altered by MS_REMOUNT */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION|\ + MS_LAZYTIME) /* * Old magic mount flag and mask diff --git a/mm/backing-dev.c b/mm/backing-dev.c index 7690ec77c722..6dc4580df2af 100644 --- a/mm/backing-dev.c +++ b/mm/backing-dev.c @@ -49,10 +49,10 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) unsigned long background_thresh; unsigned long dirty_thresh; unsigned long bdi_thresh; - unsigned long nr_dirty, nr_io, nr_more_io; + unsigned long nr_dirty, nr_io, nr_more_io, nr_dirty_time; struct inode *inode; - nr_dirty = nr_io = nr_more_io = 0; + nr_dirty = nr_io = nr_more_io = nr_dirty_time = 0; spin_lock(&wb->list_lock); list_for_each_entry(inode, &wb->b_dirty, i_wb_list) nr_dirty++; @@ -60,6 +60,9 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) nr_io++; list_for_each_entry(inode, &wb->b_more_io, i_wb_list) nr_more_io++; + list_for_each_entry(inode, &wb->b_dirty_time, i_wb_list) + if (inode->i_state & I_DIRTY_TIME) + nr_dirty_time++; spin_unlock(&wb->list_lock); global_dirty_limits(&background_thresh, &dirty_thresh); @@ -78,6 +81,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) "b_dirty: %10lu\n" "b_io: %10lu\n" "b_more_io: %10lu\n" + "b_dirty_time: %10lu\n" "bdi_list: %10u\n" "state: %10lx\n", (unsigned long) K(bdi_stat(bdi, BDI_WRITEBACK)), @@ -91,6 +95,7 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v) nr_dirty, nr_io, nr_more_io, + nr_dirty_time, !list_empty(&bdi->bdi_list), bdi->state); #undef K @@ -380,6 +385,7 @@ static void bdi_wb_init(struct bdi_writeback *wb, struct backing_dev_info *bdi) INIT_LIST_HEAD(&wb->b_dirty); INIT_LIST_HEAD(&wb->b_io); INIT_LIST_HEAD(&wb->b_more_io); + INIT_LIST_HEAD(&wb->b_dirty_time); spin_lock_init(&wb->list_lock); INIT_DELAYED_WORK(&wb->dwork, bdi_writeback_workfn); } |