diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 15:11:48 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-05-08 15:11:48 -0700 |
commit | 942d33da999b86821c9aee9615fcb81207ee04c7 (patch) | |
tree | db14ab92982f936c0a2ea2202f5e301310f33bdd /fs/f2fs/node.c | |
parent | 246e6a0d781091c4657890ffa497c2576bd99095 (diff) | |
parent | 59bbd474abb9dd6a0c1a74df758ec29c7a8b150f (diff) | |
download | linux-942d33da999b86821c9aee9615fcb81207ee04c7.tar.bz2 |
Merge tag 'f2fs-for-v3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs
Pull f2fs updates from Jaegeuk Kim:
"This patch-set includes the following major enhancement patches.
- introduce a new gloabl lock scheme
- add tracepoints on several major functions
- fix the overall cleaning process focused on victim selection
- apply the block plugging to merge IOs as much as possible
- enhance management of free nids and its list
- enhance the readahead mode for node pages
- address several cretical deadlock conditions
- reduce lock_page calls
The other minor bug fixes and enhancements are as follows.
- calculation mistakes: overflow
- bio types: READ, READA, and READ_SYNC
- fix the recovery flow, data races, and null pointer errors"
* tag 'f2fs-for-v3.10' of git://git.kernel.org/pub/scm/linux/kernel/git/jaegeuk/f2fs: (68 commits)
f2fs: cover free_nid management with spin_lock
f2fs: optimize scan_nat_page()
f2fs: code cleanup for scan_nat_page() and build_free_nids()
f2fs: bugfix for alloc_nid_failed()
f2fs: recover when journal contains deleted files
f2fs: continue to mount after failing recovery
f2fs: avoid deadlock during evict after f2fs_gc
f2fs: modify the number of issued pages to merge IOs
f2fs: remove useless #include <linux/proc_fs.h> as we're now using sysfs as debug entry.
f2fs: fix inconsistent using of NM_WOUT_THRESHOLD
f2fs: check truncation of mapping after lock_page
f2fs: enhance alloc_nid and build_free_nids flows
f2fs: add a tracepoint on f2fs_new_inode
f2fs: check nid == 0 in add_free_nid
f2fs: add REQ_META about metadata requests for submit
f2fs: give a chance to merge IOs by IO scheduler
f2fs: avoid frequent background GC
f2fs: add tracepoints to debug checkpoint request
f2fs: add tracepoints for write page operations
f2fs: add tracepoints to debug the block allocation
...
Diffstat (limited to 'fs/f2fs/node.c')
-rw-r--r-- | fs/f2fs/node.c | 411 |
1 files changed, 226 insertions, 185 deletions
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index e275218904ed..3df43b4efd89 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -19,6 +19,7 @@ #include "f2fs.h" #include "node.h" #include "segment.h" +#include <trace/events/f2fs.h> static struct kmem_cache *nat_entry_slab; static struct kmem_cache *free_nid_slab; @@ -88,10 +89,13 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) { struct address_space *mapping = sbi->meta_inode->i_mapping; struct f2fs_nm_info *nm_i = NM_I(sbi); + struct blk_plug plug; struct page *page; pgoff_t index; int i; + blk_start_plug(&plug); + for (i = 0; i < FREE_NID_PAGES; i++, nid += NAT_ENTRY_PER_BLOCK) { if (nid >= nm_i->max_nid) nid = 0; @@ -100,12 +104,16 @@ static void ra_nat_pages(struct f2fs_sb_info *sbi, int nid) page = grab_cache_page(mapping, index); if (!page) continue; - if (f2fs_readpage(sbi, page, index, READ)) { + if (PageUptodate(page)) { f2fs_put_page(page, 1); continue; } + if (f2fs_readpage(sbi, page, index, READ)) + continue; + f2fs_put_page(page, 0); } + blk_finish_plug(&plug); } static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n) @@ -236,7 +244,7 @@ static int try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink) { struct f2fs_nm_info *nm_i = NM_I(sbi); - if (nm_i->nat_cnt < 2 * NM_WOUT_THRESHOLD) + if (nm_i->nat_cnt <= NM_WOUT_THRESHOLD) return 0; write_lock(&nm_i->nat_tree_lock); @@ -320,15 +328,14 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[0] = 0; if (block < direct_index) { - offset[n++] = block; - level = 0; + offset[n] = block; goto got; } block -= direct_index; if (block < direct_blks) { offset[n++] = NODE_DIR1_BLOCK; noffset[n] = 1; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -336,7 +343,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) if (block < direct_blks) { offset[n++] = NODE_DIR2_BLOCK; noffset[n] = 2; - offset[n++] = block; + offset[n] = block; level = 1; goto got; } @@ -346,7 +353,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 3; offset[n++] = block / direct_blks; noffset[n] = 4 + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -356,7 +363,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 4 + dptrs_per_blk; offset[n++] = block / direct_blks; noffset[n] = 5 + dptrs_per_blk + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 2; goto got; } @@ -371,7 +378,7 @@ static int get_node_path(long block, int offset[4], unsigned int noffset[4]) noffset[n] = 7 + (dptrs_per_blk * 2) + offset[n - 2] * (dptrs_per_blk + 1) + offset[n - 1]; - offset[n++] = block % direct_blks; + offset[n] = block % direct_blks; level = 3; goto got; } else { @@ -383,8 +390,11 @@ got: /* * Caller should call f2fs_put_dnode(dn). + * Also, it should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op() only if ro is not set RDONLY_NODE. + * In the case of RDONLY_NODE, we don't need to care about mutex. */ -int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) +int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode) { struct f2fs_sb_info *sbi = F2FS_SB(dn->inode->i_sb); struct page *npage[4]; @@ -403,7 +413,8 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) return PTR_ERR(npage[0]); parent = npage[0]; - nids[1] = get_nid(parent, offset[0], true); + if (level != 0) + nids[1] = get_nid(parent, offset[0], true); dn->inode_page = npage[0]; dn->inode_page_locked = true; @@ -411,12 +422,9 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) for (i = 1; i <= level; i++) { bool done = false; - if (!nids[i] && !ro) { - mutex_lock_op(sbi, NODE_NEW); - + if (!nids[i] && mode == ALLOC_NODE) { /* alloc new node */ if (!alloc_nid(sbi, &(nids[i]))) { - mutex_unlock_op(sbi, NODE_NEW); err = -ENOSPC; goto release_pages; } @@ -425,16 +433,14 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int ro) npage[i] = new_node_page(dn, noffset[i]); if (IS_ERR(npage[i])) { alloc_nid_failed(sbi, nids[i]); - mutex_unlock_op(sbi, NODE_NEW); err = PTR_ERR(npage[i]); goto release_pages; } set_nid(parent, offset[i - 1], nids[i], i == 1); alloc_nid_done(sbi, nids[i]); - mutex_unlock_op(sbi, NODE_NEW); done = true; - } else if (ro && i == level && level > 1) { + } else if (mode == LOOKUP_NODE_RA && i == level && level > 1) { npage[i] = get_node_page_ra(parent, offset[i - 1]); if (IS_ERR(npage[i])) { err = PTR_ERR(npage[i]); @@ -507,6 +513,7 @@ invalidate: f2fs_put_page(dn->node_page, 1); dn->node_page = NULL; + trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr); } static int truncate_dnode(struct dnode_of_data *dn) @@ -547,9 +554,13 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, if (dn->nid == 0) return NIDS_PER_BLOCK + 1; + trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr); + page = get_node_page(sbi, dn->nid); - if (IS_ERR(page)) + if (IS_ERR(page)) { + trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page)); return PTR_ERR(page); + } rn = (struct f2fs_node *)page_address(page); if (depth < 3) { @@ -591,10 +602,12 @@ static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs, } else { f2fs_put_page(page, 1); } + trace_f2fs_truncate_nodes_exit(dn->inode, freed); return freed; out_err: f2fs_put_page(page, 1); + trace_f2fs_truncate_nodes_exit(dn->inode, ret); return ret; } @@ -649,6 +662,9 @@ static int truncate_partial_nodes(struct dnode_of_data *dn, fail: for (i = depth - 3; i >= 0; i--) f2fs_put_page(pages[i], 1); + + trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err); + return err; } @@ -658,6 +674,7 @@ fail: int truncate_inode_blocks(struct inode *inode, pgoff_t from) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); + struct address_space *node_mapping = sbi->node_inode->i_mapping; int err = 0, cont = 1; int level, offset[4], noffset[4]; unsigned int nofs = 0; @@ -665,11 +682,15 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from) struct dnode_of_data dn; struct page *page; - level = get_node_path(from, offset, noffset); + trace_f2fs_truncate_inode_blocks_enter(inode, from); + level = get_node_path(from, offset, noffset); +restart: page = get_node_page(sbi, inode->i_ino); - if (IS_ERR(page)) + if (IS_ERR(page)) { + trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page)); return PTR_ERR(page); + } set_new_dnode(&dn, inode, page, NULL, 0); unlock_page(page); @@ -728,6 +749,10 @@ skip_partial: if (offset[1] == 0 && rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK]) { lock_page(page); + if (page->mapping != node_mapping) { + f2fs_put_page(page, 1); + goto restart; + } wait_on_page_writeback(page); rn->i.i_nid[offset[0] - NODE_DIR1_BLOCK] = 0; set_page_dirty(page); @@ -739,9 +764,14 @@ skip_partial: } fail: f2fs_put_page(page, 0); + trace_f2fs_truncate_inode_blocks_exit(inode, err); return err > 0 ? 0 : err; } +/* + * Caller should grab and release a mutex by calling mutex_lock_op() and + * mutex_unlock_op(). + */ int remove_inode_page(struct inode *inode) { struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); @@ -749,21 +779,16 @@ int remove_inode_page(struct inode *inode) nid_t ino = inode->i_ino; struct dnode_of_data dn; - mutex_lock_op(sbi, NODE_TRUNC); page = get_node_page(sbi, ino); - if (IS_ERR(page)) { - mutex_unlock_op(sbi, NODE_TRUNC); + if (IS_ERR(page)) return PTR_ERR(page); - } if (F2FS_I(inode)->i_xattr_nid) { nid_t nid = F2FS_I(inode)->i_xattr_nid; struct page *npage = get_node_page(sbi, nid); - if (IS_ERR(npage)) { - mutex_unlock_op(sbi, NODE_TRUNC); + if (IS_ERR(npage)) return PTR_ERR(npage); - } F2FS_I(inode)->i_xattr_nid = 0; set_new_dnode(&dn, inode, page, npage, nid); @@ -775,23 +800,18 @@ int remove_inode_page(struct inode *inode) BUG_ON(inode->i_blocks != 0 && inode->i_blocks != 1); set_new_dnode(&dn, inode, page, page, ino); truncate_node(&dn); - - mutex_unlock_op(sbi, NODE_TRUNC); return 0; } int new_inode_page(struct inode *inode, const struct qstr *name) { - struct f2fs_sb_info *sbi = F2FS_SB(inode->i_sb); struct page *page; struct dnode_of_data dn; /* allocate inode page for new inode */ set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino); - mutex_lock_op(sbi, NODE_NEW); page = new_node_page(&dn, 0); init_dent_inode(name, page); - mutex_unlock_op(sbi, NODE_NEW); if (IS_ERR(page)) return PTR_ERR(page); f2fs_put_page(page, 1); @@ -844,6 +864,12 @@ fail: return ERR_PTR(err); } +/* + * Caller should do after getting the following values. + * 0: f2fs_put_page(page, 0) + * LOCKED_PAGE: f2fs_put_page(page, 1) + * error: nothing + */ static int read_node_page(struct page *page, int type) { struct f2fs_sb_info *sbi = F2FS_SB(page->mapping->host->i_sb); @@ -851,8 +877,14 @@ static int read_node_page(struct page *page, int type) get_node_info(sbi, page->index, &ni); - if (ni.blk_addr == NULL_ADDR) + if (ni.blk_addr == NULL_ADDR) { + f2fs_put_page(page, 1); return -ENOENT; + } + + if (PageUptodate(page)) + return LOCKED_PAGE; + return f2fs_readpage(sbi, page, ni.blk_addr, type); } @@ -863,40 +895,53 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid) { struct address_space *mapping = sbi->node_inode->i_mapping; struct page *apage; + int err; apage = find_get_page(mapping, nid); - if (apage && PageUptodate(apage)) - goto release_out; + if (apage && PageUptodate(apage)) { + f2fs_put_page(apage, 0); + return; + } f2fs_put_page(apage, 0); apage = grab_cache_page(mapping, nid); if (!apage) return; - if (read_node_page(apage, READA)) - unlock_page(apage); - -release_out: - f2fs_put_page(apage, 0); + err = read_node_page(apage, READA); + if (err == 0) + f2fs_put_page(apage, 0); + else if (err == LOCKED_PAGE) + f2fs_put_page(apage, 1); return; } struct page *get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid) { - int err; - struct page *page; struct address_space *mapping = sbi->node_inode->i_mapping; - + struct page *page; + int err; +repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); err = read_node_page(page, READ_SYNC); - if (err) { - f2fs_put_page(page, 1); + if (err < 0) return ERR_PTR(err); - } + else if (err == LOCKED_PAGE) + goto got_it; + lock_page(page); + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + if (page->mapping != mapping) { + f2fs_put_page(page, 1); + goto repeat; + } +got_it: BUG_ON(nid != nid_of_node(page)); mark_page_accessed(page); return page; @@ -910,31 +955,27 @@ struct page *get_node_page_ra(struct page *parent, int start) { struct f2fs_sb_info *sbi = F2FS_SB(parent->mapping->host->i_sb); struct address_space *mapping = sbi->node_inode->i_mapping; - int i, end; - int err = 0; - nid_t nid; + struct blk_plug plug; struct page *page; + int err, i, end; + nid_t nid; /* First, try getting the desired direct node. */ nid = get_nid(parent, start, false); if (!nid) return ERR_PTR(-ENOENT); - - page = find_get_page(mapping, nid); - if (page && PageUptodate(page)) - goto page_hit; - f2fs_put_page(page, 0); - repeat: page = grab_cache_page(mapping, nid); if (!page) return ERR_PTR(-ENOMEM); - err = read_node_page(page, READA); - if (err) { - f2fs_put_page(page, 1); + err = read_node_page(page, READ_SYNC); + if (err < 0) return ERR_PTR(err); - } + else if (err == LOCKED_PAGE) + goto page_hit; + + blk_start_plug(&plug); /* Then, try readahead for siblings of the desired node */ end = start + MAX_RA_NODE; @@ -946,18 +987,19 @@ repeat: ra_node_page(sbi, nid); } -page_hit: - lock_page(page); - if (PageError(page)) { - f2fs_put_page(page, 1); - return ERR_PTR(-EIO); - } + blk_finish_plug(&plug); - /* Has the page been truncated? */ + lock_page(page); if (page->mapping != mapping) { f2fs_put_page(page, 1); goto repeat; } +page_hit: + if (!PageUptodate(page)) { + f2fs_put_page(page, 1); + return ERR_PTR(-EIO); + } + mark_page_accessed(page); return page; } @@ -972,7 +1014,7 @@ void sync_inode_page(struct dnode_of_data *dn) if (!dn->inode_page_locked) unlock_page(dn->inode_page); } else { - f2fs_write_inode(dn->inode, NULL); + update_inode_page(dn->inode); } } @@ -1087,17 +1129,8 @@ static int f2fs_write_node_page(struct page *page, block_t new_addr; struct node_info ni; - if (wbc->for_reclaim) { - dec_page_count(sbi, F2FS_DIRTY_NODES); - wbc->pages_skipped++; - set_page_dirty(page); - return AOP_WRITEPAGE_ACTIVATE; - } - wait_on_page_writeback(page); - mutex_lock_op(sbi, NODE_WRITE); - /* get old block addr of this node page */ nid = nid_of_node(page); BUG_ON(page->index != nid); @@ -1105,17 +1138,25 @@ static int f2fs_write_node_page(struct page *page, get_node_info(sbi, nid, &ni); /* This page is already truncated */ - if (ni.blk_addr == NULL_ADDR) + if (ni.blk_addr == NULL_ADDR) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + unlock_page(page); return 0; + } - set_page_writeback(page); + if (wbc->for_reclaim) { + dec_page_count(sbi, F2FS_DIRTY_NODES); + wbc->pages_skipped++; + set_page_dirty(page); + return AOP_WRITEPAGE_ACTIVATE; + } - /* insert node offset */ + mutex_lock(&sbi->node_write); + set_page_writeback(page); write_node_page(sbi, page, nid, ni.blk_addr, &new_addr); set_node_addr(sbi, &ni, new_addr); dec_page_count(sbi, F2FS_DIRTY_NODES); - - mutex_unlock_op(sbi, NODE_WRITE); + mutex_unlock(&sbi->node_write); unlock_page(page); return 0; } @@ -1130,12 +1171,11 @@ static int f2fs_write_node_pages(struct address_space *mapping, struct writeback_control *wbc) { struct f2fs_sb_info *sbi = F2FS_SB(mapping->host->i_sb); - struct block_device *bdev = sbi->sb->s_bdev; long nr_to_write = wbc->nr_to_write; /* First check balancing cached NAT entries */ if (try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK)) { - write_checkpoint(sbi, false); + f2fs_sync_fs(sbi->sb, true); return 0; } @@ -1144,10 +1184,9 @@ static int f2fs_write_node_pages(struct address_space *mapping, return 0; /* if mounting is failed, skip writing node pages */ - wbc->nr_to_write = bio_get_nr_vecs(bdev); + wbc->nr_to_write = max_hw_blocks(sbi); sync_node_pages(sbi, 0, wbc); - wbc->nr_to_write = nr_to_write - - (bio_get_nr_vecs(bdev) - wbc->nr_to_write); + wbc->nr_to_write = nr_to_write - (max_hw_blocks(sbi) - wbc->nr_to_write); return 0; } @@ -1178,7 +1217,7 @@ static void f2fs_invalidate_node_page(struct page *page, unsigned long offset) static int f2fs_release_node_page(struct page *page, gfp_t wait) { ClearPagePrivate(page); - return 0; + return 1; } /* @@ -1195,14 +1234,13 @@ const struct address_space_operations f2fs_node_aops = { static struct free_nid *__lookup_free_nid_list(nid_t n, struct list_head *head) { struct list_head *this; - struct free_nid *i = NULL; + struct free_nid *i; list_for_each(this, head) { i = list_entry(this, struct free_nid, list); if (i->nid == n) - break; - i = NULL; + return i; } - return i; + return NULL; } static void __del_from_free_nid_list(struct free_nid *i) @@ -1211,11 +1249,29 @@ static void __del_from_free_nid_list(struct free_nid *i) kmem_cache_free(free_nid_slab, i); } -static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) +static int add_free_nid(struct f2fs_nm_info *nm_i, nid_t nid, bool build) { struct free_nid *i; + struct nat_entry *ne; + bool allocated = false; if (nm_i->fcnt > 2 * MAX_FREE_NIDS) + return -1; + + /* 0 nid should not be used */ + if (nid == 0) + return 0; + + if (!build) + goto retry; + + /* do not add allocated nids */ + read_lock(&nm_i->nat_tree_lock); + ne = __lookup_nat_cache(nm_i, nid); + if (ne && nat_get_blkaddr(ne) != NULL_ADDR) + allocated = true; + read_unlock(&nm_i->nat_tree_lock); + if (allocated) return 0; retry: i = kmem_cache_alloc(free_nid_slab, GFP_NOFS); @@ -1250,63 +1306,59 @@ static void remove_free_nid(struct f2fs_nm_info *nm_i, nid_t nid) spin_unlock(&nm_i->free_nid_list_lock); } -static int scan_nat_page(struct f2fs_nm_info *nm_i, +static void scan_nat_page(struct f2fs_nm_info *nm_i, struct page *nat_page, nid_t start_nid) { struct f2fs_nat_block *nat_blk = page_address(nat_page); block_t blk_addr; - int fcnt = 0; int i; - /* 0 nid should not be used */ - if (start_nid == 0) - ++start_nid; - i = start_nid % NAT_ENTRY_PER_BLOCK; for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) { - blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); + + if (start_nid >= nm_i->max_nid) + break; + + blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr); BUG_ON(blk_addr == NEW_ADDR); - if (blk_addr == NULL_ADDR) - fcnt += add_free_nid(nm_i, start_nid); + if (blk_addr == NULL_ADDR) { + if (add_free_nid(nm_i, start_nid, true) < 0) + break; + } } - return fcnt; } static void build_free_nids(struct f2fs_sb_info *sbi) { - struct free_nid *fnid, *next_fnid; struct f2fs_nm_info *nm_i = NM_I(sbi); struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA); struct f2fs_summary_block *sum = curseg->sum_blk; - nid_t nid = 0; - bool is_cycled = false; - int fcnt = 0; - int i; + int i = 0; + nid_t nid = nm_i->next_scan_nid; - nid = nm_i->next_scan_nid; - nm_i->init_scan_nid = nid; + /* Enough entries */ + if (nm_i->fcnt > NAT_ENTRY_PER_BLOCK) + return; + /* readahead nat pages to be scanned */ ra_nat_pages(sbi, nid); while (1) { struct page *page = get_current_nat_page(sbi, nid); - fcnt += scan_nat_page(nm_i, page, nid); + scan_nat_page(nm_i, page, nid); f2fs_put_page(page, 1); nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK)); - - if (nid >= nm_i->max_nid) { + if (nid >= nm_i->max_nid) nid = 0; - is_cycled = true; - } - if (fcnt > MAX_FREE_NIDS) - break; - if (is_cycled && nm_i->init_scan_nid <= nid) + + if (i++ == FREE_NID_PAGES) break; } + /* go to the next free nat pages to find free nids abundantly */ nm_i->next_scan_nid = nid; /* find free nids from current sum_pages */ @@ -1315,22 +1367,11 @@ static void build_free_nids(struct f2fs_sb_info *sbi) block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr); nid = le32_to_cpu(nid_in_journal(sum, i)); if (addr == NULL_ADDR) - add_free_nid(nm_i, nid); + add_free_nid(nm_i, nid, true); else remove_free_nid(nm_i, nid); } mutex_unlock(&curseg->curseg_mutex); - - /* remove the free nids from current allocated nids */ - list_for_each_entry_safe(fnid, next_fnid, &nm_i->free_nid_list, list) { - struct nat_entry *ne; - - read_lock(&nm_i->nat_tree_lock); - ne = __lookup_nat_cache(nm_i, fnid->nid); - if (ne && nat_get_blkaddr(ne) != NULL_ADDR) - remove_free_nid(nm_i, fnid->nid); - read_unlock(&nm_i->nat_tree_lock); - } } /* @@ -1344,41 +1385,36 @@ bool alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid) struct free_nid *i = NULL; struct list_head *this; retry: - mutex_lock(&nm_i->build_lock); - if (!nm_i->fcnt) { - /* scan NAT in order to build free nid list */ - build_free_nids(sbi); - if (!nm_i->fcnt) { - mutex_unlock(&nm_i->build_lock); - return false; - } - } - mutex_unlock(&nm_i->build_lock); + if (sbi->total_valid_node_count + 1 >= nm_i->max_nid) + return false; - /* - * We check fcnt again since previous check is racy as - * we didn't hold free_nid_list_lock. So other thread - * could consume all of free nids. - */ spin_lock(&nm_i->free_nid_list_lock); - if (!nm_i->fcnt) { - spin_unlock(&nm_i->free_nid_list_lock); - goto retry; - } - BUG_ON(list_empty(&nm_i->free_nid_list)); - list_for_each(this, &nm_i->free_nid_list) { - i = list_entry(this, struct free_nid, list); - if (i->state == NID_NEW) - break; - } + /* We should not use stale free nids created by build_free_nids */ + if (nm_i->fcnt && !sbi->on_build_free_nids) { + BUG_ON(list_empty(&nm_i->free_nid_list)); + list_for_each(this, &nm_i->free_nid_list) { + i = list_entry(this, struct free_nid, list); + if (i->state == NID_NEW) + break; + } - BUG_ON(i->state != NID_NEW); - *nid = i->nid; - i->state = NID_ALLOC; - nm_i->fcnt--; + BUG_ON(i->state != NID_NEW); + *nid = i->nid; + i->state = NID_ALLOC; + nm_i->fcnt--; + spin_unlock(&nm_i->free_nid_list_lock); + return true; + } spin_unlock(&nm_i->free_nid_list_lock); - return true; + + /* Let's scan nat pages and its caches to get free nids */ + mutex_lock(&nm_i->build_lock); + sbi->on_build_free_nids = 1; + build_free_nids(sbi); + sbi->on_build_free_nids = 0; + mutex_unlock(&nm_i->build_lock); + goto retry; } /* @@ -1391,10 +1427,8 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) spin_lock(&nm_i->free_nid_list_lock); i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); - if (i) { - BUG_ON(i->state != NID_ALLOC); - __del_from_free_nid_list(i); - } + BUG_ON(!i || i->state != NID_ALLOC); + __del_from_free_nid_list(i); spin_unlock(&nm_i->free_nid_list_lock); } @@ -1403,8 +1437,19 @@ void alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid) */ void alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid) { - alloc_nid_done(sbi, nid); - add_free_nid(NM_I(sbi), nid); + struct f2fs_nm_info *nm_i = NM_I(sbi); + struct free_nid *i; + + spin_lock(&nm_i->free_nid_list_lock); + i = __lookup_free_nid_list(nid, &nm_i->free_nid_list); + BUG_ON(!i || i->state != NID_ALLOC); + if (nm_i->fcnt > 2 * MAX_FREE_NIDS) { + __del_from_free_nid_list(i); + } else { + i->state = NID_NEW; + nm_i->fcnt++; + } + spin_unlock(&nm_i->free_nid_list_lock); } void recover_node_page(struct f2fs_sb_info *sbi, struct page *page, @@ -1475,23 +1520,24 @@ int restore_node_summary(struct f2fs_sb_info *sbi, sum_entry = &sum->entries[0]; for (i = 0; i < last_offset; i++, sum_entry++) { + /* + * In order to read next node page, + * we must clear PageUptodate flag. + */ + ClearPageUptodate(page); + if (f2fs_readpage(sbi, page, addr, READ_SYNC)) goto out; + lock_page(page); rn = (struct f2fs_node *)page_address(page); sum_entry->nid = rn->footer.nid; sum_entry->version = 0; sum_entry->ofs_in_node = 0; addr++; - - /* - * In order to read next node page, - * we must clear PageUptodate flag. - */ - ClearPageUptodate(page); } -out: unlock_page(page); +out: __free_pages(page, 0); return 0; } @@ -1614,13 +1660,11 @@ flush_now: nid_in_journal(sum, offset) = cpu_to_le32(nid); } - if (nat_get_blkaddr(ne) == NULL_ADDR) { + if (nat_get_blkaddr(ne) == NULL_ADDR && + add_free_nid(NM_I(sbi), nid, false) <= 0) { write_lock(&nm_i->nat_tree_lock); __del_from_nat_cache(nm_i, ne); write_unlock(&nm_i->nat_tree_lock); - - /* We can reuse this freed nid at this point */ - add_free_nid(NM_I(sbi), nid); } else { write_lock(&nm_i->nat_tree_lock); __clear_nat_cache_dirty(nm_i, ne); @@ -1661,19 +1705,16 @@ static int init_node_manager(struct f2fs_sb_info *sbi) spin_lock_init(&nm_i->free_nid_list_lock); rwlock_init(&nm_i->nat_tree_lock); - nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); - nm_i->init_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid); - - nm_i->nat_bitmap = kzalloc(nm_i->bitmap_size, GFP_KERNEL); - if (!nm_i->nat_bitmap) - return -ENOMEM; + nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP); version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP); if (!version_bitmap) return -EFAULT; - /* copy version bitmap */ - memcpy(nm_i->nat_bitmap, version_bitmap, nm_i->bitmap_size); + nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size, + GFP_KERNEL); + if (!nm_i->nat_bitmap) + return -ENOMEM; return 0; } |