From 0ba13fd19d39b7cb672bcec052bc813389c079a4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Sep 2015 13:26:39 -0700 Subject: Revert "writeback: plug writeback at a high level" This reverts commit d353d7587d02116b9732d5c06615aed75a4d3a47. Doing the block layer plug/unplug inside writeback_sb_inodes() is broken, because that function is actually called with a spinlock held: wb->list_lock, as pointed out by Chris Mason. Chris suggested just dropping and re-taking the spinlock around the blk_finish_plug() call (the plgging itself can happen under the spinlock), and that would technically work, but is just disgusting. We do something fairly similar - but not quite as disgusting because we at least have a better reason for it - in writeback_single_inode(), so it's not like the caller can depend on the lock being held over the call, but in this case there just isn't any good reason for that "release and re-take the lock" pattern. [ In general, we should really strive to avoid the "release and retake" pattern for locks, because in the general case it can easily cause subtle bugs when the caller caches any state around the call that might be invalidated by dropping the lock even just temporarily. ] But in this case, the plugging should be easy to just move up to the callers before the spinlock is taken, which should even improve the effectiveness of the plug. So there is really no good reason to play games with locking here. I'll send off a test-patch so that Dave Chinner can verify that that plug movement works. In the meantime this just reverts the problematic commit and adds a comment to the function so that we hopefully don't make this mistake again. Reported-by: Chris Mason Cc: Josef Bacik Cc: Dave Chinner Cc: Neil Brown Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 24489126f8ca..d8ea7ed411b2 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1380,6 +1380,10 @@ static long writeback_chunk_size(struct bdi_writeback *wb, * Write a portion of b_io inodes which belong to @sb. * * Return the number of pages and/or inodes written. + * + * NOTE! This is called with wb->list_lock held, and will + * unlock and relock that for each inode it ends up doing + * IO for. */ static long writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb, @@ -1398,9 +1402,7 @@ static long writeback_sb_inodes(struct super_block *sb, unsigned long start_time = jiffies; long write_chunk; long wrote = 0; /* count both pages and inodes */ - struct blk_plug plug; - blk_start_plug(&plug); while (!list_empty(&wb->b_io)) { struct inode *inode = wb_inode(wb->b_io.prev); @@ -1498,7 +1500,6 @@ static long writeback_sb_inodes(struct super_block *sb, break; } } - blk_finish_plug(&plug); return wrote; } -- cgit v1.2.3 From 505a666ee3fc611518e85df203eb8c707995ceaa Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 11 Sep 2015 13:37:19 -0700 Subject: writeback: plug writeback in wb_writeback() and writeback_inodes_wb() We had to revert the pluggin in writeback_sb_inodes() because the wb->list_lock is held, but we could easily plug at a higher level before taking that lock, and unplug after releasing it. This does that. Chris will run performance numbers, just to verify that this approach is comparable to the alternative (we could just drop and re-take the lock around the blk_finish_plug() rather than these two commits. I'd have preferred waiting for actual performance numbers before picking one approach over the other, but I don't want to release rc1 with the known "sleeping function called from invalid context" issue, so I'll pick this cleanup version for now. But if the numbers show that we really want to plug just at the writeback_sb_inodes() level, and we should just play ugly games with the spinlock, we'll switch to that. Cc: Chris Mason Cc: Josef Bacik Cc: Dave Chinner Cc: Neil Brown Cc: Jan Kara Cc: Christoph Hellwig Signed-off-by: Linus Torvalds --- fs/fs-writeback.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index d8ea7ed411b2..587ac08eabb6 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1546,12 +1546,15 @@ static long writeback_inodes_wb(struct bdi_writeback *wb, long nr_pages, .range_cyclic = 1, .reason = reason, }; + struct blk_plug plug; + blk_start_plug(&plug); spin_lock(&wb->list_lock); if (list_empty(&wb->b_io)) queue_io(wb, &work); __writeback_inodes_wb(wb, &work); spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work.nr_pages; } @@ -1579,10 +1582,12 @@ static long wb_writeback(struct bdi_writeback *wb, unsigned long oldest_jif; struct inode *inode; long progress; + struct blk_plug plug; oldest_jif = jiffies; work->older_than_this = &oldest_jif; + blk_start_plug(&plug); spin_lock(&wb->list_lock); for (;;) { /* @@ -1662,6 +1667,7 @@ static long wb_writeback(struct bdi_writeback *wb, } } spin_unlock(&wb->list_lock); + blk_finish_plug(&plug); return nr_pages - work->nr_pages; } -- cgit v1.2.3