From 96150606e2fb82d242c9e4a414e4e922849f7bf7 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sat, 26 Nov 2011 11:00:47 +0530 Subject: logfs: update page reference count for pined pages LogFS sets PG_private flag to indicate a pined page. We assumed that marking a page as private is enough to ensure its existence. But instead it is necessary to hold a reference count to the page. The change resolves the following BUG BUG: Bad page state in process flush-253:16 pfn:6a6d0 page flags: 0x100000000000808(uptodate|private) Suggested-and-Acked-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/readwrite.c | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'fs/logfs/readwrite.c') diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 2ac4217b7901..6d663e8ea6da 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -560,8 +560,13 @@ static void inode_free_block(struct super_block *sb, struct logfs_block *block) static void indirect_free_block(struct super_block *sb, struct logfs_block *block) { - ClearPagePrivate(block->page); - block->page->private = 0; + struct page *page = block->page; + + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } __free_block(sb, block); } @@ -650,8 +655,11 @@ static void alloc_data_block(struct inode *inode, struct page *page) logfs_unpack_index(page->index, &bix, &level); block = __alloc_block(inode->i_sb, inode->i_ino, bix, level); block->page = page; + SetPagePrivate(page); - page->private = (unsigned long)block; + page_cache_get(page); + set_page_private(page, (unsigned long) block); + block->ops = &indirect_block_ops; } @@ -1901,8 +1909,11 @@ static void move_page_to_inode(struct inode *inode, struct page *page) li->li_block = block; block->page = NULL; - page->private = 0; - ClearPagePrivate(page); + if (PagePrivate(page)) { + ClearPagePrivate(page); + page_cache_release(page); + set_page_private(page, 0); + } } static void move_inode_to_page(struct page *page, struct inode *inode) @@ -1918,8 +1929,12 @@ static void move_inode_to_page(struct page *page, struct inode *inode) BUG_ON(PagePrivate(page)); block->ops = &indirect_block_ops; block->page = page; - page->private = (unsigned long)block; - SetPagePrivate(page); + + if (!PagePrivate(page)) { + SetPagePrivate(page); + page_cache_get(page); + set_page_private(page, (unsigned long) block); + } block->inode = NULL; li->li_block = NULL; -- cgit v1.2.3 From 13ced29cb28996a9bc4f68e43ff0c57eafdb1e21 Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sat, 28 Jan 2012 11:36:06 +0530 Subject: logfs: take write mutex lock during fsync and sync LogFS uses super->s_write_mutex while writing data to disk. Taking the same mutex lock in sync and fsync code path solves the following BUG: ------------[ cut here ]------------ kernel BUG at /home/prasad/logfs/dev_bdev.c:134! Pid: 2387, comm: flush-253:16 Not tainted 3.0.0+ #4 Bochs Bochs RIP: 0010:[] [] bdev_writeseg+0x25d/0x270 [logfs] Call Trace: [] logfs_open_area+0x91/0x150 [logfs] [] ? find_level.clone.9+0x62/0x100 [] __logfs_segment_write.clone.20+0x5c/0x190 [logfs] [] ? mempool_kmalloc+0x15/0x20 [] ? mempool_alloc+0x53/0x130 [] logfs_segment_write+0x1d4/0x230 [logfs] [] logfs_write_i0+0x12e/0x190 [logfs] [] __logfs_write_rec+0x140/0x220 [logfs] [] logfs_write_rec+0x64/0xd0 [logfs] [] __logfs_write_buf+0x106/0x110 [logfs] [] logfs_write_buf+0x4e/0x80 [logfs] [] __logfs_writepage+0x23/0x80 [logfs] [] logfs_writepage+0xdc/0x110 [logfs] [] __writepage+0x17/0x40 [] write_cache_pages+0x208/0x4f0 [] ? set_page_dirty+0x70/0x70 [] generic_writepages+0x4a/0x70 [] do_writepages+0x21/0x40 [] writeback_single_inode+0x101/0x250 [] writeback_sb_inodes+0xed/0x1c0 [] writeback_inodes_wb+0x7b/0x1e0 [] wb_writeback+0x4c3/0x530 [] ? sub_preempt_count+0x9d/0xd0 [] wb_do_writeback+0xdb/0x290 [] ? sub_preempt_count+0x9d/0xd0 [] ? _raw_spin_unlock_irqrestore+0x18/0x40 [] ? del_timer+0x8a/0x120 [] bdi_writeback_thread+0x8c/0x2e0 [] ? wb_do_writeback+0x290/0x290 [] kthread+0x96/0xa0 [] kernel_thread_helper+0x4/0x10 [] ? kthread_worker_fn+0x190/0x190 [] ? gs_change+0xb/0xb RIP [] bdev_writeseg+0x25d/0x270 [logfs] ---[ end trace 0211ad60a57657c4 ]--- Reviewed-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/file.c | 2 ++ fs/logfs/inode.c | 2 ++ fs/logfs/logfs.h | 2 ++ fs/logfs/readwrite.c | 6 ++---- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'fs/logfs/readwrite.c') diff --git a/fs/logfs/file.c b/fs/logfs/file.c index b548c87a86f1..3886cded283c 100644 --- a/fs/logfs/file.c +++ b/fs/logfs/file.c @@ -230,7 +230,9 @@ int logfs_fsync(struct file *file, loff_t start, loff_t end, int datasync) return ret; mutex_lock(&inode->i_mutex); + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); mutex_unlock(&inode->i_mutex); return 0; diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 7e441ad5f792..388d7c5a7bed 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -364,7 +364,9 @@ static void logfs_init_once(void *_li) static int logfs_sync_fs(struct super_block *sb, int wait) { + logfs_get_wblocks(sb, NULL, WF_LOCK); logfs_write_anchor(sb); + logfs_put_wblocks(sb, NULL, WF_LOCK); return 0; } diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index 398ecff6e548..bb4340850c1b 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -577,6 +577,8 @@ void initialize_block_counters(struct page *page, struct logfs_block *block, __be64 *array, int page_is_empty); int logfs_exist_block(struct inode *inode, u64 bix); int get_page_reserve(struct inode *inode, struct page *page); +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock); +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock); extern struct logfs_block_ops indirect_block_ops; /* segment.c */ diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 6d663e8ea6da..7b10e8aecced 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -244,8 +244,7 @@ static void preunlock_page(struct super_block *sb, struct page *page, int lock) * is waiting for s_write_mutex. We annotate this fact by setting PG_pre_locked * in addition to PG_locked. */ -static void logfs_get_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_get_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); @@ -260,8 +259,7 @@ static void logfs_get_wblocks(struct super_block *sb, struct page *page, } } -static void logfs_put_wblocks(struct super_block *sb, struct page *page, - int lock) +void logfs_put_wblocks(struct super_block *sb, struct page *page, int lock) { struct logfs_super *super = logfs_super(sb); -- cgit v1.2.3 From 0bd90387ed5a8abbcf43391b480efdc211721cfe Mon Sep 17 00:00:00 2001 From: Prasad Joshi Date: Sun, 2 Oct 2011 23:46:51 +0530 Subject: logfs: Propagate page parameter to __logfs_write_inode During GC LogFS has to rewrite each valid block to a separate segment. Rewrite operation reads data from an old segment and writes it to a newly allocated segment. Since every write operation changes data block pointers maintained in inode, inode should also be rewritten. In GC path to avoid AB-BA deadlock LogFS marks a page with PG_pre_locked in addition to locking the page (PG_locked). The page lock is ignored iff the page is pre-locked. LogFS uses a special file called segment file. The segment file maintains an 8 bytes entry for every segment. It keeps track of erase count, level etc. for every segment. Bad things happen with a segment belonging to the segment file is GCed ------------[ cut here ]------------ kernel BUG at /home/prasad/logfs/readwrite.c:297! invalid opcode: 0000 [#1] SMP Modules linked in: logfs joydev usbhid hid psmouse e1000 i2c_piix4 serio_raw [last unloaded: logfs] Pid: 20161, comm: mount Not tainted 3.1.0-rc3+ #3 innotek GmbH VirtualBox EIP: 0060:[] EFLAGS: 00010292 CPU: 0 EIP is at logfs_lock_write_page+0x6a/0x70 [logfs] EAX: 00000027 EBX: f73f5b20 ECX: c16007c8 EDX: 00000094 ESI: 00000000 EDI: e59be6e4 EBP: c7337b28 ESP: c7337b18 DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process mount (pid: 20161, ti=c7336000 task=eb323f70 task.ti=c7336000) Stack: f8099a3d c7337b24 f73f5b20 00001002 c7337b50 f8091f6d f8099a4d f80994e4 00000003 00000000 c7337b68 00000000 c67e4400 00001000 c7337b80 f80935e5 00000000 00000000 00000000 00000000 e1fcf000 0000000f e59be618 c70bf900 Call Trace: [] logfs_get_write_page.clone.16+0xdd/0x100 [logfs] [] logfs_mod_segment_entry+0x55/0x110 [logfs] [] logfs_get_segment_entry+0x1d/0x20 [logfs] [] ? logfs_cleanup_journal+0x50/0x50 [logfs] [] ostore_get_erase_count+0x1b/0x40 [logfs] [] logfs_open_area+0xc8/0x150 [logfs] [] ? kmemleak_alloc+0x2c/0x60 [] __logfs_segment_write.clone.16+0x4e/0x1b0 [logfs] [] ? mempool_kmalloc+0x13/0x20 [] ? mempool_kmalloc+0x13/0x20 [] logfs_segment_write+0x17f/0x1d0 [logfs] [] logfs_write_i0+0x11c/0x180 [logfs] [] logfs_write_direct+0x45/0x90 [logfs] [] __logfs_write_buf+0xbd/0xf0 [logfs] [] ? kmap_atomic_prot+0x4e/0xe0 [] logfs_write_buf+0x3b/0x60 [logfs] [] __logfs_write_inode+0xa9/0x110 [logfs] [] logfs_rewrite_block+0xc0/0x110 [logfs] [] ? get_mapping_page+0x10/0x60 [logfs] [] ? logfs_load_object_aliases+0x2e0/0x2f0 [logfs] [] logfs_gc_segment+0x2ad/0x310 [logfs] [] __logfs_gc_once+0x4a/0x80 [logfs] [] logfs_gc_pass+0x683/0x6a0 [logfs] [] logfs_mount+0x5a9/0x680 [logfs] [] mount_fs+0x21/0xd0 [] ? __alloc_percpu+0xf/0x20 [] ? alloc_vfsmnt+0xb1/0x130 [] vfs_kern_mount+0x4b/0xa0 [] do_kern_mount+0x3e/0xe0 [] do_mount+0x34d/0x670 [] ? strndup_user+0x49/0x70 [] sys_mount+0x6b/0xa0 [] syscall_call+0x7/0xb Code: f8 e8 8b 93 39 c9 8b 45 f8 3e 0f ba 28 00 19 d2 85 d2 74 ca eb d0 0f 0b 8d 45 fc 89 44 24 04 c7 04 24 3d 9a 09 f8 e8 09 92 39 c9 <0f> 0b 8d 74 26 00 55 89 e5 3e 8d 74 26 00 8b 10 80 e6 01 74 09 EIP: [] logfs_lock_write_page+0x6a/0x70 [logfs] SS:ESP 0068:c7337b18 ---[ end trace 96e67d5b3aa3d6ca ]--- The patch passes locked page to __logfs_write_inode. It calls function logfs_get_wblocks() to pre-lock the page. This ensures any further attempts to lock the page are ignored (esp from get_erase_count). Acked-by: Joern Engel Signed-off-by: Prasad Joshi --- fs/logfs/dir.c | 2 +- fs/logfs/inode.c | 2 +- fs/logfs/logfs.h | 2 +- fs/logfs/readwrite.c | 12 ++++++------ 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/logfs/readwrite.c') diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c index b7d7f67cee5a..b6404898da83 100644 --- a/fs/logfs/dir.c +++ b/fs/logfs/dir.c @@ -71,7 +71,7 @@ static int write_dir(struct inode *dir, struct logfs_disk_dentry *dd, static int write_inode(struct inode *inode) { - return __logfs_write_inode(inode, WF_LOCK); + return __logfs_write_inode(inode, NULL, WF_LOCK); } static s64 dir_seek_data(struct inode *inode, s64 pos) diff --git a/fs/logfs/inode.c b/fs/logfs/inode.c index 388d7c5a7bed..7c42c132c177 100644 --- a/fs/logfs/inode.c +++ b/fs/logfs/inode.c @@ -287,7 +287,7 @@ static int logfs_write_inode(struct inode *inode, struct writeback_control *wbc) if (logfs_inode(inode)->li_flags & LOGFS_IF_STILLBORN) return 0; - ret = __logfs_write_inode(inode, flags); + ret = __logfs_write_inode(inode, NULL, flags); LOGFS_BUG_ON(ret, inode->i_sb); return ret; } diff --git a/fs/logfs/logfs.h b/fs/logfs/logfs.h index bb4340850c1b..0dec29887a8a 100644 --- a/fs/logfs/logfs.h +++ b/fs/logfs/logfs.h @@ -528,7 +528,7 @@ void logfs_destroy_inode_cache(void); void logfs_set_blocks(struct inode *inode, u64 no); /* these logically belong into inode.c but actually reside in readwrite.c */ int logfs_read_inode(struct inode *inode); -int __logfs_write_inode(struct inode *inode, long flags); +int __logfs_write_inode(struct inode *inode, struct page *, long flags); void logfs_evict_inode(struct inode *inode); /* journal.c */ diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 7b10e8aecced..88284c67ba97 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -422,7 +422,7 @@ static void inode_write_block(struct logfs_block *block) if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - ret = __logfs_write_inode(inode, 0); + ret = __logfs_write_inode(inode, NULL, 0); /* see indirect_write_block comment */ BUG_ON(ret); } @@ -1629,7 +1629,7 @@ int logfs_rewrite_block(struct inode *inode, u64 bix, u64 ofs, if (inode->i_ino == LOGFS_INO_MASTER) logfs_write_anchor(inode->i_sb); else { - err = __logfs_write_inode(inode, flags); + err = __logfs_write_inode(inode, page, flags); } } } @@ -1879,7 +1879,7 @@ int logfs_truncate(struct inode *inode, u64 target) logfs_get_wblocks(sb, NULL, 1); err = __logfs_truncate(inode, size); if (!err) - err = __logfs_write_inode(inode, 0); + err = __logfs_write_inode(inode, NULL, 0); logfs_put_wblocks(sb, NULL, 1); } @@ -2119,14 +2119,14 @@ void logfs_set_segment_unreserved(struct super_block *sb, u32 segno, u32 ec) ec_level); } -int __logfs_write_inode(struct inode *inode, long flags) +int __logfs_write_inode(struct inode *inode, struct page *page, long flags) { struct super_block *sb = inode->i_sb; int ret; - logfs_get_wblocks(sb, NULL, flags & WF_LOCK); + logfs_get_wblocks(sb, page, flags & WF_LOCK); ret = do_write_inode(inode); - logfs_put_wblocks(sb, NULL, flags & WF_LOCK); + logfs_put_wblocks(sb, page, flags & WF_LOCK); return ret; } -- cgit v1.2.3 From bbe01387129f76fa4bec17904eb14c4bdc3c179f Mon Sep 17 00:00:00 2001 From: Joern Engel Date: Fri, 5 Aug 2011 11:13:30 +0200 Subject: logfs: Grow inode in delete path Can be necessary if an inode gets deleted (through -ENOSPC) before being written. Might be better to move this into logfs_write_rec(), but for now go with the stupid&safe patch. Signed-off-by: Joern Engel --- fs/logfs/readwrite.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/logfs/readwrite.c') diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c index 88284c67ba97..4153e65b0148 100644 --- a/fs/logfs/readwrite.c +++ b/fs/logfs/readwrite.c @@ -1576,11 +1576,15 @@ int logfs_write_buf(struct inode *inode, struct page *page, long flags) static int __logfs_delete(struct inode *inode, struct page *page) { long flags = WF_DELETE; + int err; inode->i_ctime = inode->i_mtime = CURRENT_TIME; if (page->index < I0_BLOCKS) return logfs_write_direct(inode, page, flags); + err = grow_inode(inode, page->index, 0); + if (err) + return err; return logfs_write_rec(inode, page, page->index, 0, flags); } -- cgit v1.2.3