diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 12:21:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-06-02 12:21:36 -0700 |
commit | 94709049fb8442fb2f7b91fbec3c2897a75e18df (patch) | |
tree | f1d38ea6bc9db6d5a15ba4821c83abeb7ce7fd35 /fs | |
parent | 17839856fd588f4ab6b789f482ed3ffd7c403e1f (diff) | |
parent | 4fba37586e4e73f9f9a855e610e151ef7da2b481 (diff) | |
download | linux-94709049fb8442fb2f7b91fbec3c2897a75e18df.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
"A few little subsystems and a start of a lot of MM patches.
Subsystems affected by this patch series: squashfs, ocfs2, parisc,
vfs. With mm subsystems: slab-generic, slub, debug, pagecache, gup,
swap, memcg, pagemap, memory-failure, vmalloc, kasan"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (128 commits)
kasan: move kasan_report() into report.c
mm/mm_init.c: report kasan-tag information stored in page->flags
ubsan: entirely disable alignment checks under UBSAN_TRAP
kasan: fix clang compilation warning due to stack protector
x86/mm: remove vmalloc faulting
mm: remove vmalloc_sync_(un)mappings()
x86/mm/32: implement arch_sync_kernel_mappings()
x86/mm/64: implement arch_sync_kernel_mappings()
mm/ioremap: track which page-table levels were modified
mm/vmalloc: track which page-table levels were modified
mm: add functions to track page directory modifications
s390: use __vmalloc_node in stack_alloc
powerpc: use __vmalloc_node in alloc_vm_stack
arm64: use __vmalloc_node in arch_alloc_vmap_stack
mm: remove vmalloc_user_node_flags
mm: switch the test_vmalloc module to use __vmalloc_node
mm: remove __vmalloc_node_flags_caller
mm: remove both instances of __vmalloc_node_flags
mm: remove the prot argument to __vmalloc_node
mm: remove the pgprot argument to __vmalloc
...
Diffstat (limited to 'fs')
72 files changed, 642 insertions, 880 deletions
diff --git a/fs/block_dev.c b/fs/block_dev.c index 93672c3f1c78..f05e2f2c898d 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -614,10 +614,9 @@ static int blkdev_readpage(struct file * file, struct page * page) return block_read_full_page(page, blkdev_get_block); } -static int blkdev_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void blkdev_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, blkdev_get_block); + mpage_readahead(rac, blkdev_get_block); } static int blkdev_write_begin(struct file *file, struct address_space *mapping, @@ -2085,7 +2084,7 @@ static int blkdev_writepages(struct address_space *mapping, static const struct address_space_operations def_blk_aops = { .readpage = blkdev_readpage, - .readpages = blkdev_readpages, + .readahead = blkdev_readahead, .writepage = blkdev_writepage, .write_begin = blkdev_write_begin, .write_end = blkdev_write_end, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d10c7be10f3b..7278789ff8a7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -980,9 +980,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset, btrfs_warn(BTRFS_I(page->mapping->host)->root->fs_info, "page private not zero on page %llu", (unsigned long long)page_offset(page)); - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); + detach_page_private(page); } } diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 39e45b8a5031..e12eb32d9e17 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -3076,22 +3076,16 @@ static int submit_extent_page(unsigned int opf, static void attach_extent_buffer_page(struct extent_buffer *eb, struct page *page) { - if (!PagePrivate(page)) { - SetPagePrivate(page); - get_page(page); - set_page_private(page, (unsigned long)eb); - } else { + if (!PagePrivate(page)) + attach_page_private(page, eb); + else WARN_ON(page->private != (unsigned long)eb); - } } void set_page_extent_mapped(struct page *page) { - if (!PagePrivate(page)) { - SetPagePrivate(page); - get_page(page); - set_page_private(page, EXTENT_PAGE_PRIVATE); - } + if (!PagePrivate(page)) + attach_page_private(page, (void *)EXTENT_PAGE_PRIVATE); } static struct extent_map * @@ -4367,51 +4361,32 @@ int extent_writepages(struct address_space *mapping, return ret; } -int extent_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages) +void extent_readahead(struct readahead_control *rac) { struct bio *bio = NULL; unsigned long bio_flags = 0; struct page *pagepool[16]; struct extent_map *em_cached = NULL; - int nr = 0; u64 prev_em_start = (u64)-1; + int nr; - while (!list_empty(pages)) { - u64 contig_end = 0; - - for (nr = 0; nr < ARRAY_SIZE(pagepool) && !list_empty(pages);) { - struct page *page = lru_to_page(pages); - - prefetchw(&page->flags); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - readahead_gfp_mask(mapping))) { - put_page(page); - break; - } - - pagepool[nr++] = page; - contig_end = page_offset(page) + PAGE_SIZE - 1; - } + while ((nr = readahead_page_batch(rac, pagepool))) { + u64 contig_start = page_offset(pagepool[0]); + u64 contig_end = page_offset(pagepool[nr - 1]) + PAGE_SIZE - 1; - if (nr) { - u64 contig_start = page_offset(pagepool[0]); + ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end); - ASSERT(contig_start + nr * PAGE_SIZE - 1 == contig_end); - - contiguous_readpages(pagepool, nr, contig_start, - contig_end, &em_cached, &bio, &bio_flags, - &prev_em_start); - } + contiguous_readpages(pagepool, nr, contig_start, contig_end, + &em_cached, &bio, &bio_flags, &prev_em_start); } if (em_cached) free_extent_map(em_cached); - if (bio) - return submit_one_bio(bio, 0, bio_flags); - return 0; + if (bio) { + if (submit_one_bio(bio, 0, bio_flags)) + return; + } } /* @@ -4929,10 +4904,7 @@ static void btrfs_release_extent_buffer_pages(struct extent_buffer *eb) * We need to make sure we haven't be attached * to a new eb. */ - ClearPagePrivate(page); - set_page_private(page, 0); - /* One for the page private */ - put_page(page); + detach_page_private(page); } if (mapped) diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2ed65bd0760e..25594e09fdcd 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -198,8 +198,7 @@ int extent_writepages(struct address_space *mapping, struct writeback_control *wbc); int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc); -int extent_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages); +void extent_readahead(struct readahead_control *rac); int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len); void set_page_extent_mapped(struct page *page); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 320d1062068d..8b3489f229c7 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4856,8 +4856,8 @@ static void evict_inode_truncate_pages(struct inode *inode) /* * Keep looping until we have no more ranges in the io tree. - * We can have ongoing bios started by readpages (called from readahead) - * that have their endio callback (extent_io.c:end_bio_extent_readpage) + * We can have ongoing bios started by readahead that have + * their endio callback (extent_io.c:end_bio_extent_readpage) * still in progress (unlocked the pages in the bio but did not yet * unlocked the ranges in the io tree). Therefore this means some * ranges can still be locked and eviction started because before @@ -7050,11 +7050,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, * for it to complete) and then invalidate the pages for * this range (through invalidate_inode_pages2_range()), * but that can lead us to a deadlock with a concurrent - * call to readpages() (a buffered read or a defrag call + * call to readahead (a buffered read or a defrag call * triggered a readahead) on a page lock due to an * ordered dio extent we created before but did not have * yet a corresponding bio submitted (whence it can not - * complete), which makes readpages() wait for that + * complete), which makes readahead wait for that * ordered extent to complete while holding a lock on * that page. */ @@ -8293,21 +8293,16 @@ static int btrfs_writepages(struct address_space *mapping, return extent_writepages(mapping, wbc); } -static int -btrfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void btrfs_readahead(struct readahead_control *rac) { - return extent_readpages(mapping, pages, nr_pages); + extent_readahead(rac); } static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags) { int ret = try_release_extent_mapping(page, gfp_flags); - if (ret == 1) { - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); - } + if (ret == 1) + detach_page_private(page); return ret; } @@ -8329,14 +8324,8 @@ static int btrfs_migratepage(struct address_space *mapping, if (ret != MIGRATEPAGE_SUCCESS) return ret; - if (page_has_private(page)) { - ClearPagePrivate(page); - get_page(newpage); - set_page_private(newpage, page_private(page)); - set_page_private(page, 0); - put_page(page); - SetPagePrivate(newpage); - } + if (page_has_private(page)) + attach_page_private(newpage, detach_page_private(page)); if (PagePrivate2(page)) { ClearPagePrivate2(page); @@ -8458,11 +8447,7 @@ again: } ClearPageChecked(page); - if (PagePrivate(page)) { - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); - } + detach_page_private(page); } /* @@ -10553,7 +10538,7 @@ static const struct address_space_operations btrfs_aops = { .readpage = btrfs_readpage, .writepage = btrfs_writepage, .writepages = btrfs_writepages, - .readpages = btrfs_readpages, + .readahead = btrfs_readahead, .direct_IO = btrfs_direct_IO, .invalidatepage = btrfs_invalidatepage, .releasepage = btrfs_releasepage, diff --git a/fs/buffer.c b/fs/buffer.c index a60f60396cfa..64fe82ec65ff 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -123,14 +123,6 @@ void __wait_on_buffer(struct buffer_head * bh) } EXPORT_SYMBOL(__wait_on_buffer); -static void -__clear_page_buffers(struct page *page) -{ - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); -} - static void buffer_io_error(struct buffer_head *bh, char *msg) { if (!test_bit(BH_Quiet, &bh->b_state)) @@ -906,7 +898,7 @@ link_dev_buffers(struct page *page, struct buffer_head *head) bh = bh->b_this_page; } while (bh); tail->b_this_page = head; - attach_page_buffers(page, head); + attach_page_private(page, head); } static sector_t blkdev_max_block(struct block_device *bdev, unsigned int size) @@ -1154,12 +1146,19 @@ EXPORT_SYMBOL(mark_buffer_dirty); void mark_buffer_write_io_error(struct buffer_head *bh) { + struct super_block *sb; + set_buffer_write_io_error(bh); /* FIXME: do we need to set this in both places? */ if (bh->b_page && bh->b_page->mapping) mapping_set_error(bh->b_page->mapping, -EIO); if (bh->b_assoc_map) mapping_set_error(bh->b_assoc_map, -EIO); + rcu_read_lock(); + sb = READ_ONCE(bh->b_bdev->bd_super); + if (sb) + errseq_set(&sb->s_wb_err, -EIO); + rcu_read_unlock(); } EXPORT_SYMBOL(mark_buffer_write_io_error); @@ -1580,7 +1579,7 @@ void create_empty_buffers(struct page *page, bh = bh->b_this_page; } while (bh != head); } - attach_page_buffers(page, head); + attach_page_private(page, head); spin_unlock(&page->mapping->private_lock); } EXPORT_SYMBOL(create_empty_buffers); @@ -2567,7 +2566,7 @@ static void attach_nobh_buffers(struct page *page, struct buffer_head *head) bh->b_this_page = head; bh = bh->b_this_page; } while (bh != head); - attach_page_buffers(page, head); + attach_page_private(page, head); spin_unlock(&page->mapping->private_lock); } @@ -3227,7 +3226,7 @@ drop_buffers(struct page *page, struct buffer_head **buffers_to_free) bh = next; } while (bh != head); *buffers_to_free = head; - __clear_page_buffers(page); + detach_page_private(page); return 1; failed: return 0; diff --git a/fs/erofs/data.c b/fs/erofs/data.c index fc3a8d8064f8..d0542151e8c4 100644 --- a/fs/erofs/data.c +++ b/fs/erofs/data.c @@ -280,47 +280,36 @@ static int erofs_raw_access_readpage(struct file *file, struct page *page) return 0; } -static int erofs_raw_access_readpages(struct file *filp, - struct address_space *mapping, - struct list_head *pages, - unsigned int nr_pages) +static void erofs_raw_access_readahead(struct readahead_control *rac) { erofs_off_t last_block; struct bio *bio = NULL; - gfp_t gfp = readahead_gfp_mask(mapping); - struct page *page = list_last_entry(pages, struct page, lru); - - trace_erofs_readpages(mapping->host, page, nr_pages, true); + struct page *page; - for (; nr_pages; --nr_pages) { - page = list_entry(pages->prev, struct page, lru); + trace_erofs_readpages(rac->mapping->host, readahead_index(rac), + readahead_count(rac), true); + while ((page = readahead_page(rac))) { prefetchw(&page->flags); - list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, page->index, gfp)) { - bio = erofs_read_raw_page(bio, mapping, page, - &last_block, nr_pages, true); + bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block, + readahead_count(rac), true); - /* all the page errors are ignored when readahead */ - if (IS_ERR(bio)) { - pr_err("%s, readahead error at page %lu of nid %llu\n", - __func__, page->index, - EROFS_I(mapping->host)->nid); + /* all the page errors are ignored when readahead */ + if (IS_ERR(bio)) { + pr_err("%s, readahead error at page %lu of nid %llu\n", + __func__, page->index, + EROFS_I(rac->mapping->host)->nid); - bio = NULL; - } + bio = NULL; } - /* pages could still be locked */ put_page(page); } - DBG_BUGON(!list_empty(pages)); /* the rare case (end in gaps) */ if (bio) submit_bio(bio); - return 0; } static int erofs_get_block(struct inode *inode, sector_t iblock, @@ -358,7 +347,7 @@ static sector_t erofs_bmap(struct address_space *mapping, sector_t block) /* for uncompressed (aligned) files and raw access for other files */ const struct address_space_operations erofs_raw_access_aops = { .readpage = erofs_raw_access_readpage, - .readpages = erofs_raw_access_readpages, + .readahead = erofs_raw_access_readahead, .bmap = erofs_bmap, }; diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c index 5d2d81940679..7628816f2453 100644 --- a/fs/erofs/decompressor.c +++ b/fs/erofs/decompressor.c @@ -274,7 +274,7 @@ static int z_erofs_decompress_generic(struct z_erofs_decompress_req *rq, i = 0; while (1) { - dst = vm_map_ram(rq->out, nrpages_out, -1, PAGE_KERNEL); + dst = vm_map_ram(rq->out, nrpages_out, -1); /* retry two more times (totally 3 times) */ if (dst || ++i >= 3) diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index c4b6c9aa87ec..187f93b4900e 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1305,28 +1305,23 @@ static bool should_decompress_synchronously(struct erofs_sb_info *sbi, return nr <= sbi->max_sync_decompress_pages; } -static int z_erofs_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) +static void z_erofs_readahead(struct readahead_control *rac) { - struct inode *const inode = mapping->host; + struct inode *const inode = rac->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - bool sync = should_decompress_synchronously(sbi, nr_pages); + bool sync = should_decompress_synchronously(sbi, readahead_count(rac)); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); - gfp_t gfp = mapping_gfp_constraint(mapping, GFP_KERNEL); - struct page *head = NULL; + struct page *page, *head = NULL; LIST_HEAD(pagepool); - trace_erofs_readpages(mapping->host, lru_to_page(pages), - nr_pages, false); + trace_erofs_readpages(inode, readahead_index(rac), + readahead_count(rac), false); - f.headoffset = (erofs_off_t)lru_to_page(pages)->index << PAGE_SHIFT; - - for (; nr_pages; --nr_pages) { - struct page *page = lru_to_page(pages); + f.headoffset = readahead_pos(rac); + while ((page = readahead_page(rac))) { prefetchw(&page->flags); - list_del(&page->lru); /* * A pure asynchronous readahead is indicated if @@ -1335,11 +1330,6 @@ static int z_erofs_readpages(struct file *filp, struct address_space *mapping, */ sync &= !(PageReadahead(page) && !head); - if (add_to_page_cache_lru(page, mapping, page->index, gfp)) { - list_add(&page->lru, &pagepool); - continue; - } - set_page_private(page, (unsigned long)head); head = page; } @@ -1368,11 +1358,10 @@ static int z_erofs_readpages(struct file *filp, struct address_space *mapping, /* clean up the remaining free pages */ put_pages_list(&pagepool); - return 0; } const struct address_space_operations z_erofs_aops = { .readpage = z_erofs_readpage, - .readpages = z_erofs_readpages, + .readahead = z_erofs_readahead, }; diff --git a/fs/exfat/inode.c b/fs/exfat/inode.c index 06887492f54b..785ead346543 100644 --- a/fs/exfat/inode.c +++ b/fs/exfat/inode.c @@ -372,10 +372,9 @@ static int exfat_readpage(struct file *file, struct page *page) return mpage_readpage(page, exfat_get_block); } -static int exfat_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) +static void exfat_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, exfat_get_block); + mpage_readahead(rac, exfat_get_block); } static int exfat_writepage(struct page *page, struct writeback_control *wbc) @@ -502,7 +501,7 @@ int exfat_block_truncate_page(struct inode *inode, loff_t from) static const struct address_space_operations exfat_aops = { .readpage = exfat_readpage, - .readpages = exfat_readpages, + .readahead = exfat_readahead, .writepage = exfat_writepage, .writepages = exfat_writepages, .write_begin = exfat_write_begin, diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index c885cf7d724b..2875c0a705b5 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -877,11 +877,9 @@ static int ext2_readpage(struct file *file, struct page *page) return mpage_readpage(page, ext2_get_block); } -static int -ext2_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void ext2_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, ext2_get_block); + mpage_readahead(rac, ext2_get_block); } static int @@ -967,7 +965,7 @@ ext2_dax_writepages(struct address_space *mapping, struct writeback_control *wbc const struct address_space_operations ext2_aops = { .readpage = ext2_readpage, - .readpages = ext2_readpages, + .readahead = ext2_readahead, .writepage = ext2_writepage, .write_begin = ext2_write_begin, .write_end = ext2_write_end, @@ -981,7 +979,7 @@ const struct address_space_operations ext2_aops = { const struct address_space_operations ext2_nobh_aops = { .readpage = ext2_readpage, - .readpages = ext2_readpages, + .readahead = ext2_readahead, .writepage = ext2_nobh_writepage, .write_begin = ext2_nobh_write_begin, .write_end = nobh_write_end, diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3147bb0cf82a..15b062efcff1 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3317,9 +3317,8 @@ static inline void ext4_set_de_type(struct super_block *sb, } /* readpages.c */ -extern int ext4_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead); +extern int ext4_mpage_readpages(struct inode *inode, + struct readahead_control *rac, struct page *page); extern int __init ext4_init_post_read_processing(void); extern void ext4_exit_post_read_processing(void); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 2a4aae6acdcb..52be85f96159 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3224,23 +3224,20 @@ static int ext4_readpage(struct file *file, struct page *page) ret = ext4_readpage_inline(inode, page); if (ret == -EAGAIN) - return ext4_mpage_readpages(page->mapping, NULL, page, 1, - false); + return ext4_mpage_readpages(inode, NULL, page); return ret; } -static int -ext4_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void ext4_readahead(struct readahead_control *rac) { - struct inode *inode = mapping->host; + struct inode *inode = rac->mapping->host; - /* If the file has inline data, no need to do readpages. */ + /* If the file has inline data, no need to do readahead. */ if (ext4_has_inline_data(inode)) - return 0; + return; - return ext4_mpage_readpages(mapping, pages, NULL, nr_pages, true); + ext4_mpage_readpages(inode, rac, NULL); } static void ext4_invalidatepage(struct page *page, unsigned int offset, @@ -3605,7 +3602,7 @@ static int ext4_set_page_dirty(struct page *page) static const struct address_space_operations ext4_aops = { .readpage = ext4_readpage, - .readpages = ext4_readpages, + .readahead = ext4_readahead, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_write_begin, @@ -3622,7 +3619,7 @@ static const struct address_space_operations ext4_aops = { static const struct address_space_operations ext4_journalled_aops = { .readpage = ext4_readpage, - .readpages = ext4_readpages, + .readahead = ext4_readahead, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_write_begin, @@ -3638,7 +3635,7 @@ static const struct address_space_operations ext4_journalled_aops = { static const struct address_space_operations ext4_da_aops = { .readpage = ext4_readpage, - .readpages = ext4_readpages, + .readahead = ext4_readahead, .writepage = ext4_writepage, .writepages = ext4_writepages, .write_begin = ext4_da_write_begin, diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c index c1769afbf799..5761e9961682 100644 --- a/fs/ext4/readpage.c +++ b/fs/ext4/readpage.c @@ -7,8 +7,8 @@ * * This was originally taken from fs/mpage.c * - * The intent is the ext4_mpage_readpages() function here is intended - * to replace mpage_readpages() in the general case, not just for + * The ext4_mpage_readpages() function here is intended to + * replace mpage_readahead() in the general case, not just for * encrypted files. It has some limitations (see below), where it * will fall back to read_block_full_page(), but these limitations * should only be hit when page_size != block_size. @@ -221,14 +221,12 @@ static inline loff_t ext4_readpage_limit(struct inode *inode) return i_size_read(inode); } -int ext4_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead) +int ext4_mpage_readpages(struct inode *inode, + struct readahead_control *rac, struct page *page) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; - struct inode *inode = mapping->host; const unsigned blkbits = inode->i_blkbits; const unsigned blocks_per_page = PAGE_SIZE >> blkbits; const unsigned blocksize = 1 << blkbits; @@ -241,6 +239,7 @@ int ext4_mpage_readpages(struct address_space *mapping, int length; unsigned relative_block = 0; struct ext4_map_blocks map; + unsigned int nr_pages = rac ? readahead_count(rac) : 1; map.m_pblk = 0; map.m_lblk = 0; @@ -251,14 +250,9 @@ int ext4_mpage_readpages(struct address_space *mapping, int fully_mapped = 1; unsigned first_hole = blocks_per_page; - if (pages) { - page = lru_to_page(pages); - + if (rac) { + page = readahead_page(rac); prefetchw(&page->flags); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, page->index, - readahead_gfp_mask(mapping))) - goto next_page; } if (page_has_buffers(page)) @@ -381,7 +375,7 @@ int ext4_mpage_readpages(struct address_space *mapping, bio->bi_iter.bi_sector = blocks[0] << (blkbits - 9); bio->bi_end_io = mpage_end_io; bio_set_op_attrs(bio, REQ_OP_READ, - is_readahead ? REQ_RAHEAD : 0); + rac ? REQ_RAHEAD : 0); } length = first_hole << blkbits; @@ -406,10 +400,9 @@ int ext4_mpage_readpages(struct address_space *mapping, else unlock_page(page); next_page: - if (pages) + if (rac) put_page(page); } - BUG_ON(pages && !list_empty(pages)); if (bio) submit_bio(bio); return 0; diff --git a/fs/ext4/verity.c b/fs/ext4/verity.c index dc5ec724d889..dec1244dd062 100644 --- a/fs/ext4/verity.c +++ b/fs/ext4/verity.c @@ -342,37 +342,6 @@ static int ext4_get_verity_descriptor(struct inode *inode, void *buf, return desc_size; } -/* - * Prefetch some pages from the file's Merkle tree. - * - * This is basically a stripped-down version of __do_page_cache_readahead() - * which works on pages past i_size. - */ -static void ext4_merkle_tree_readahead(struct address_space *mapping, - pgoff_t start_index, unsigned long count) -{ - LIST_HEAD(pages); - unsigned int nr_pages = 0; - struct page *page; - pgoff_t index; - struct blk_plug plug; - - for (index = start_index; index < start_index + count; index++) { - page = xa_load(&mapping->i_pages, index); - if (!page || xa_is_value(page)) { - page = __page_cache_alloc(readahead_gfp_mask(mapping)); - if (!page) - break; - page->index = index; - list_add(&page->lru, &pages); - nr_pages++; - } - } - blk_start_plug(&plug); - ext4_mpage_readpages(mapping, &pages, NULL, nr_pages, true); - blk_finish_plug(&plug); -} - static struct page *ext4_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) @@ -386,8 +355,8 @@ static struct page *ext4_read_merkle_tree_page(struct inode *inode, if (page) put_page(page); else if (num_ra_pages > 1) - ext4_merkle_tree_readahead(inode->i_mapping, index, - num_ra_pages); + page_cache_readahead_unbounded(inode->i_mapping, NULL, + index, num_ra_pages, 0); page = read_mapping_page(inode->i_mapping, index, NULL); } return page; diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index cdf2f626bea7..03ec97f28235 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -2177,13 +2177,11 @@ out: * use ->readpage() or do the necessary surgery to decouple ->readpages() * from read-ahead. */ -int f2fs_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead) +static int f2fs_mpage_readpages(struct inode *inode, + struct readahead_control *rac, struct page *page) { struct bio *bio = NULL; sector_t last_block_in_bio = 0; - struct inode *inode = mapping->host; struct f2fs_map_blocks map; #ifdef CONFIG_F2FS_FS_COMPRESSION struct compress_ctx cc = { @@ -2197,6 +2195,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, .nr_cpages = 0, }; #endif + unsigned nr_pages = rac ? readahead_count(rac) : 1; unsigned max_nr_pages = nr_pages; int ret = 0; @@ -2210,15 +2209,9 @@ int f2fs_mpage_readpages(struct address_space *mapping, map.m_may_create = false; for (; nr_pages; nr_pages--) { - if (pages) { - page = list_last_entry(pages, struct page, lru); - + if (rac) { + page = readahead_page(rac); prefetchw(&page->flags); - list_del(&page->lru); - if (add_to_page_cache_lru(page, mapping, - page_index(page), - readahead_gfp_mask(mapping))) - goto next_page; } #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -2228,7 +2221,7 @@ int f2fs_mpage_readpages(struct address_space *mapping, ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead, false); + rac != NULL, false); f2fs_destroy_compress_ctx(&cc); if (ret) goto set_error_page; @@ -2251,7 +2244,7 @@ read_single_page: #endif ret = f2fs_read_single_page(inode, page, max_nr_pages, &map, - &bio, &last_block_in_bio, is_readahead); + &bio, &last_block_in_bio, rac); if (ret) { #ifdef CONFIG_F2FS_FS_COMPRESSION set_error_page: @@ -2260,8 +2253,10 @@ set_error_page: zero_user_segment(page, 0, PAGE_SIZE); unlock_page(page); } +#ifdef CONFIG_F2FS_FS_COMPRESSION next_page: - if (pages) +#endif + if (rac) put_page(page); #ifdef CONFIG_F2FS_FS_COMPRESSION @@ -2271,16 +2266,15 @@ next_page: ret = f2fs_read_multi_pages(&cc, &bio, max_nr_pages, &last_block_in_bio, - is_readahead, false); + rac != NULL, false); f2fs_destroy_compress_ctx(&cc); } } #endif } - BUG_ON(pages && !list_empty(pages)); if (bio) __submit_bio(F2FS_I_SB(inode), bio, DATA); - return pages ? 0 : ret; + return ret; } static int f2fs_read_data_page(struct file *file, struct page *page) @@ -2299,28 +2293,24 @@ static int f2fs_read_data_page(struct file *file, struct page *page) if (f2fs_has_inline_data(inode)) ret = f2fs_read_inline_data(inode, page); if (ret == -EAGAIN) - ret = f2fs_mpage_readpages(page_file_mapping(page), - NULL, page, 1, false); + ret = f2fs_mpage_readpages(inode, NULL, page); return ret; } -static int f2fs_read_data_pages(struct file *file, - struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void f2fs_readahead(struct readahead_control *rac) { - struct inode *inode = mapping->host; - struct page *page = list_last_entry(pages, struct page, lru); + struct inode *inode = rac->mapping->host; - trace_f2fs_readpages(inode, page, nr_pages); + trace_f2fs_readpages(inode, readahead_index(rac), readahead_count(rac)); if (!f2fs_is_compress_backend_ready(inode)) - return 0; + return; /* If the file has inline data, skip readpages */ if (f2fs_has_inline_data(inode)) - return 0; + return; - return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages, true); + f2fs_mpage_readpages(inode, rac, NULL); } int f2fs_encrypt_one_page(struct f2fs_io_info *fio) @@ -3805,7 +3795,7 @@ static void f2fs_swap_deactivate(struct file *file) const struct address_space_operations f2fs_dblock_aops = { .readpage = f2fs_read_data_page, - .readpages = f2fs_read_data_pages, + .readahead = f2fs_readahead, .writepage = f2fs_write_data_page, .writepages = f2fs_write_data_pages, .write_begin = f2fs_write_begin, diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 157eec348970..5c0149d2f46a 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -3051,19 +3051,12 @@ static inline void f2fs_set_page_private(struct page *page, if (PagePrivate(page)) return; - get_page(page); - SetPagePrivate(page); - set_page_private(page, data); + attach_page_private(page, (void *)data); } static inline void f2fs_clear_page_private(struct page *page) { - if (!PagePrivate(page)) - return; - - set_page_private(page, 0); - ClearPagePrivate(page); - f2fs_put_page(page, 0); + detach_page_private(page); } /* @@ -3373,9 +3366,6 @@ int f2fs_reserve_new_block(struct dnode_of_data *dn); int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index); int f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from); int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index); -int f2fs_mpage_readpages(struct address_space *mapping, - struct list_head *pages, struct page *page, - unsigned nr_pages, bool is_readahead); struct page *f2fs_get_read_data_page(struct inode *inode, pgoff_t index, int op_flags, bool for_write); struct page *f2fs_find_data_page(struct inode *inode, pgoff_t index); diff --git a/fs/f2fs/verity.c b/fs/f2fs/verity.c index d7d430a6f130..865c9fb774fb 100644 --- a/fs/f2fs/verity.c +++ b/fs/f2fs/verity.c @@ -222,37 +222,6 @@ static int f2fs_get_verity_descriptor(struct inode *inode, void *buf, return size; } -/* - * Prefetch some pages from the file's Merkle tree. - * - * This is basically a stripped-down version of __do_page_cache_readahead() - * which works on pages past i_size. - */ -static void f2fs_merkle_tree_readahead(struct address_space *mapping, - pgoff_t start_index, unsigned long count) -{ - LIST_HEAD(pages); - unsigned int nr_pages = 0; - struct page *page; - pgoff_t index; - struct blk_plug plug; - - for (index = start_index; index < start_index + count; index++) { - page = xa_load(&mapping->i_pages, index); - if (!page || xa_is_value(page)) { - page = __page_cache_alloc(readahead_gfp_mask(mapping)); - if (!page) - break; - page->index = index; - list_add(&page->lru, &pages); - nr_pages++; - } - } - blk_start_plug(&plug); - f2fs_mpage_readpages(mapping, &pages, NULL, nr_pages, true); - blk_finish_plug(&plug); -} - static struct page *f2fs_read_merkle_tree_page(struct inode *inode, pgoff_t index, unsigned long num_ra_pages) @@ -266,8 +235,8 @@ static struct page *f2fs_read_merkle_tree_page(struct inode *inode, if (page) put_page(page); else if (num_ra_pages > 1) - f2fs_merkle_tree_readahead(inode->i_mapping, index, - num_ra_pages); + page_cache_readahead_unbounded(inode->i_mapping, NULL, + index, num_ra_pages, 0); page = read_mapping_page(inode->i_mapping, index, NULL); } return page; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 71946da84388..e6e68b2274a5 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -210,10 +210,9 @@ static int fat_readpage(struct file *file, struct page *page) return mpage_readpage(page, fat_get_block); } -static int fat_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void fat_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, fat_get_block); + mpage_readahead(rac, fat_get_block); } static void fat_write_failed(struct address_space *mapping, loff_t to) @@ -344,7 +343,7 @@ int fat_block_truncate_page(struct inode *inode, loff_t from) static const struct address_space_operations fat_aops = { .readpage = fat_readpage, - .readpages = fat_readpages, + .readahead = fat_readahead, .writepage = fat_writepage, .writepages = fat_writepages, .write_begin = fat_write_begin, diff --git a/fs/file_table.c b/fs/file_table.c index 30d55c9a1744..676e620948d2 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -198,6 +198,7 @@ static struct file *alloc_file(const struct path *path, int flags, file->f_inode = path->dentry->d_inode; file->f_mapping = path->dentry->d_inode->i_mapping; file->f_wb_err = filemap_sample_wb_err(file->f_mapping); + file->f_sb_err = file_sample_sb_err(file); if ((file->f_mode & FMODE_READ) && likely(fop->read || fop->read_iter)) file->f_mode |= FMODE_CAN_READ; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 76ac9c7d32ec..c5bdf46e3b4b 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1070,7 +1070,6 @@ static void bdi_split_work_to_wbs(struct backing_dev_info *bdi, static unsigned long get_nr_dirty_pages(void) { return global_node_page_state(NR_FILE_DIRTY) + - global_node_page_state(NR_UNSTABLE_NFS) + get_nr_dirty_inodes(); } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 9d67b830fb7a..bac51c32d660 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -915,84 +915,40 @@ static void fuse_send_readpages(struct fuse_io_args *ia, struct file *file) fuse_readpages_end(fc, &ap->args, err); } -struct fuse_fill_data { - struct fuse_io_args *ia; - struct file *file; - struct inode *inode; - unsigned int nr_pages; - unsigned int max_pages; -}; - -static int fuse_readpages_fill(void *_data, struct page *page) +static void fuse_readahead(struct readahead_control *rac) { - struct fuse_fill_data *data = _data; - struct fuse_io_args *ia = data->ia; - struct fuse_args_pages *ap = &ia->ap; - struct inode *inode = data->inode; + struct inode *inode = rac->mapping->host; struct fuse_conn *fc = get_fuse_conn(inode); + unsigned int i, max_pages, nr_pages = 0; - fuse_wait_on_page_writeback(inode, page->index); - - if (ap->num_pages && - (ap->num_pages == fc->max_pages || - (ap->num_pages + 1) * PAGE_SIZE > fc->max_read || - ap->pages[ap->num_pages - 1]->index + 1 != page->index)) { - data->max_pages = min_t(unsigned int, data->nr_pages, - fc->max_pages); - fuse_send_readpages(ia, data->file); - data->ia = ia = fuse_io_alloc(NULL, data->max_pages); - if (!ia) { - unlock_page(page); - return -ENOMEM; - } - ap = &ia->ap; - } - - if (WARN_ON(ap->num_pages >= data->max_pages)) { - unlock_page(page); - fuse_io_free(ia); - return -EIO; - } - - get_page(page); - ap->pages[ap->num_pages] = page; - ap->descs[ap->num_pages].length = PAGE_SIZE; - ap->num_pages++; - data->nr_pages--; - return 0; -} - -static int fuse_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) -{ - struct inode *inode = mapping->host; - struct fuse_conn *fc = get_fuse_conn(inode); - struct fuse_fill_data data; - int err; - - err = -EIO; if (is_bad_inode(inode)) - goto out; + return; - data.file = file; - data.inode = inode; - data.nr_pages = nr_pages; - data.max_pages = min_t(unsigned int, nr_pages, fc->max_pages); -; - data.ia = fuse_io_alloc(NULL, data.max_pages); - err = -ENOMEM; - if (!data.ia) - goto out; + max_pages = min_t(unsigned int, fc->max_pages, + fc->max_read / PAGE_SIZE); - err = read_cache_pages(mapping, pages, fuse_readpages_fill, &data); - if (!err) { - if (data.ia->ap.num_pages) - fuse_send_readpages(data.ia, file); - else - fuse_io_free(data.ia); + for (;;) { + struct fuse_io_args *ia; + struct fuse_args_pages *ap; + + nr_pages = readahead_count(rac) - nr_pages; + if (nr_pages > max_pages) + nr_pages = max_pages; + if (nr_pages == 0) + break; + ia = fuse_io_alloc(NULL, nr_pages); + if (!ia) + return; + ap = &ia->ap; + nr_pages = __readahead_batch(rac, ap->pages, nr_pages); + for (i = 0; i < nr_pages; i++) { + fuse_wait_on_page_writeback(inode, + readahead_index(rac) + i); + ap->descs[i].length = PAGE_SIZE; + } + ap->num_pages = nr_pages; + fuse_send_readpages(ia, rac->file); } -out: - return err; } static ssize_t fuse_cache_read_iter(struct kiocb *iocb, struct iov_iter *to) @@ -3373,10 +3329,10 @@ static const struct file_operations fuse_file_operations = { static const struct address_space_operations fuse_file_aops = { .readpage = fuse_readpage, + .readahead = fuse_readahead, .writepage = fuse_writepage, .writepages = fuse_writepages, .launder_page = fuse_launder_page, - .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, .direct_IO = fuse_direct_IO, diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 786c1ce8f030..72c9560f4467 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -577,7 +577,7 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, } /** - * gfs2_readpages - Read a bunch of pages at once + * gfs2_readahead - Read a bunch of pages at once * @file: The file to read from * @mapping: Address space info * @pages: List of pages to read @@ -590,31 +590,24 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos, * obviously not something we'd want to do on too regular a basis. * Any I/O we ignore at this time will be done via readpage later. * 2. We don't handle stuffed files here we let readpage do the honours. - * 3. mpage_readpages() does most of the heavy lifting in the common case. + * 3. mpage_readahead() does most of the heavy lifting in the common case. * 4. gfs2_block_map() is relied upon to set BH_Boundary in the right places. */ -static int gfs2_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void gfs2_readahead(struct readahead_control *rac) { - struct inode *inode = mapping->host; + struct inode *inode = rac->mapping->host; struct gfs2_inode *ip = GFS2_I(inode); - struct gfs2_sbd *sdp = GFS2_SB(inode); struct gfs2_holder gh; - int ret; gfs2_holder_init(ip->i_gl, LM_ST_SHARED, 0, &gh); - ret = gfs2_glock_nq(&gh); - if (unlikely(ret)) + if (gfs2_glock_nq(&gh)) goto out_uninit; if (!gfs2_is_stuffed(ip)) - ret = mpage_readpages(mapping, pages, nr_pages, gfs2_block_map); + mpage_readahead(rac, gfs2_block_map); gfs2_glock_dq(&gh); out_uninit: gfs2_holder_uninit(&gh); - if (unlikely(gfs2_withdrawn(sdp))) - ret = -EIO; - return ret; } /** @@ -833,7 +826,7 @@ static const struct address_space_operations gfs2_aops = { .writepage = gfs2_writepage, .writepages = gfs2_writepages, .readpage = gfs2_readpage, - .readpages = gfs2_readpages, + .readahead = gfs2_readahead, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, .releasepage = gfs2_releasepage, @@ -847,7 +840,7 @@ static const struct address_space_operations gfs2_jdata_aops = { .writepage = gfs2_jdata_writepage, .writepages = gfs2_jdata_writepages, .readpage = gfs2_readpage, - .readpages = gfs2_readpages, + .readahead = gfs2_readahead, .set_page_dirty = jdata_set_page_dirty, .bmap = gfs2_bmap, .invalidatepage = gfs2_invalidatepage, diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c3f7732415be..c0f2875c946c 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -354,7 +354,7 @@ static __be64 *gfs2_dir_get_hash_table(struct gfs2_inode *ip) hc = kmalloc(hsize, GFP_NOFS | __GFP_NOWARN); if (hc == NULL) - hc = __vmalloc(hsize, GFP_NOFS, PAGE_KERNEL); + hc = __vmalloc(hsize, GFP_NOFS); if (hc == NULL) return ERR_PTR(-ENOMEM); @@ -1166,7 +1166,7 @@ static int dir_double_exhash(struct gfs2_inode *dip) hc2 = kmalloc_array(hsize_bytes, 2, GFP_NOFS | __GFP_NOWARN); if (hc2 == NULL) - hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS, PAGE_KERNEL); + hc2 = __vmalloc(hsize_bytes * 2, GFP_NOFS); if (!hc2) return -ENOMEM; @@ -1327,7 +1327,7 @@ static void *gfs2_alloc_sort_buffer(unsigned size) if (size < KMALLOC_MAX_SIZE) ptr = kmalloc(size, GFP_NOFS | __GFP_NOWARN); if (!ptr) - ptr = __vmalloc(size, GFP_NOFS, PAGE_KERNEL); + ptr = __vmalloc(size, GFP_NOFS); return ptr; } @@ -1987,8 +1987,7 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, ht = kzalloc(size, GFP_NOFS | __GFP_NOWARN); if (ht == NULL) - ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO, - PAGE_KERNEL); + ht = __vmalloc(size, GFP_NOFS | __GFP_NOWARN | __GFP_ZERO); if (!ht) return -ENOMEM; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 8259fef3f986..4b67d47a7e00 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1365,7 +1365,7 @@ int gfs2_quota_init(struct gfs2_sbd *sdp) sdp->sd_quota_bitmap = kzalloc(bm_size, GFP_NOFS | __GFP_NOWARN); if (sdp->sd_quota_bitmap == NULL) sdp->sd_quota_bitmap = __vmalloc(bm_size, GFP_NOFS | - __GFP_ZERO, PAGE_KERNEL); + __GFP_ZERO); if (!sdp->sd_quota_bitmap) return error; diff --git a/fs/hpfs/file.c b/fs/hpfs/file.c index b36abf9cb345..2de0d3492d15 100644 --- a/fs/hpfs/file.c +++ b/fs/hpfs/file.c @@ -125,10 +125,9 @@ static int hpfs_writepage(struct page *page, struct writeback_control *wbc) return block_write_full_page(page, hpfs_get_block, wbc); } -static int hpfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void hpfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, hpfs_get_block); + mpage_readahead(rac, hpfs_get_block); } static int hpfs_writepages(struct address_space *mapping, @@ -198,7 +197,7 @@ static int hpfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, const struct address_space_operations hpfs_aops = { .readpage = hpfs_readpage, .writepage = hpfs_writepage, - .readpages = hpfs_readpages, + .readahead = hpfs_readahead, .writepages = hpfs_writepages, .write_begin = hpfs_write_begin, .write_end = hpfs_write_end, diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 89e21961d1ad..a1ed7620fbac 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -59,24 +59,19 @@ iomap_page_create(struct inode *inode, struct page *page) * migrate_page_move_mapping() assumes that pages with private data have * their count elevated by 1. */ - get_page(page); - set_page_private(page, (unsigned long)iop); - SetPagePrivate(page); + attach_page_private(page, iop); return iop; } static void iomap_page_release(struct page *page) { - struct iomap_page *iop = to_iomap_page(page); + struct iomap_page *iop = detach_page_private(page); if (!iop) return; WARN_ON_ONCE(atomic_read(&iop->read_count)); WARN_ON_ONCE(atomic_read(&iop->write_count)); - ClearPagePrivate(page); - set_page_private(page, 0); - put_page(page); kfree(iop); } @@ -214,9 +209,8 @@ iomap_read_end_io(struct bio *bio) struct iomap_readpage_ctx { struct page *cur_page; bool cur_page_in_bio; - bool is_readahead; struct bio *bio; - struct list_head *pages; + struct readahead_control *rac; }; static void @@ -308,7 +302,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (ctx->bio) submit_bio(ctx->bio); - if (ctx->is_readahead) /* same as readahead_gfp_mask */ + if (ctx->rac) /* same as readahead_gfp_mask */ gfp |= __GFP_NORETRY | __GFP_NOWARN; ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs)); /* @@ -319,7 +313,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data, if (!ctx->bio) ctx->bio = bio_alloc(orig_gfp, 1); ctx->bio->bi_opf = REQ_OP_READ; - if (ctx->is_readahead) + if (ctx->rac) ctx->bio->bi_opf |= REQ_RAHEAD; ctx->bio->bi_iter.bi_sector = sector; bio_set_dev(ctx->bio, iomap->bdev); @@ -367,7 +361,7 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) } /* - * Just like mpage_readpages and block_read_full_page we always + * Just like mpage_readahead and block_read_full_page we always * return 0 and just mark the page as PageError on errors. This * should be cleaned up all through the stack eventually. */ @@ -375,36 +369,8 @@ iomap_readpage(struct page *page, const struct iomap_ops *ops) } EXPORT_SYMBOL_GPL(iomap_readpage); -static struct page * -iomap_next_page(struct inode *inode, struct list_head *pages, loff_t pos, - loff_t length, loff_t *done) -{ - while (!list_empty(pages)) { - struct page *page = lru_to_page(pages); - - if (page_offset(page) >= (u64)pos + length) - break; - - list_del(&page->lru); - if (!add_to_page_cache_lru(page, inode->i_mapping, page->index, - GFP_NOFS)) - return page; - - /* - * If we already have a page in the page cache at index we are - * done. Upper layers don't care if it is uptodate after the - * readpages call itself as every page gets checked again once - * actually needed. - */ - *done += PAGE_SIZE; - put_page(page); - } - - return NULL; -} - static loff_t -iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, +iomap_readahead_actor(struct inode *inode, loff_t pos, loff_t length, void *data, struct iomap *iomap, struct iomap *srcmap) { struct iomap_readpage_ctx *ctx = data; @@ -418,10 +384,7 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, ctx->cur_page = NULL; } if (!ctx->cur_page) { - ctx->cur_page = iomap_next_page(inode, ctx->pages, - pos, length, &done); - if (!ctx->cur_page) - break; + ctx->cur_page = readahead_page(ctx->rac); ctx->cur_page_in_bio = false; } ret = iomap_readpage_actor(inode, pos + done, length - done, @@ -431,32 +394,43 @@ iomap_readpages_actor(struct inode *inode, loff_t pos, loff_t length, return done; } -int -iomap_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, const struct iomap_ops *ops) +/** + * iomap_readahead - Attempt to read pages from a file. + * @rac: Describes the pages to be read. + * @ops: The operations vector for the filesystem. + * + * This function is for filesystems to call to implement their readahead + * address_space operation. + * + * Context: The @ops callbacks may submit I/O (eg to read the addresses of + * blocks from disc), and may wait for it. The caller may be trying to + * access a different page, and so sleeping excessively should be avoided. + * It may allocate memory, but should avoid costly allocations. This + * function is called with memalloc_nofs set, so allocations will not cause + * the filesystem to be reentered. + */ +void iomap_readahead(struct readahead_control *rac, const struct iomap_ops *ops) { + struct inode *inode = rac->mapping->host; + loff_t pos = readahead_pos(rac); + loff_t length = readahead_length(rac); struct iomap_readpage_ctx ctx = { - .pages = pages, - .is_readahead = true, + .rac = rac, }; - loff_t pos = page_offset(list_entry(pages->prev, struct page, lru)); - loff_t last = page_offset(list_entry(pages->next, struct page, lru)); - loff_t length = last - pos + PAGE_SIZE, ret = 0; - trace_iomap_readpages(mapping->host, nr_pages); + trace_iomap_readahead(inode, readahead_count(rac)); while (length > 0) { - ret = iomap_apply(mapping->host, pos, length, 0, ops, - &ctx, iomap_readpages_actor); + loff_t ret = iomap_apply(inode, pos, length, 0, ops, + &ctx, iomap_readahead_actor); if (ret <= 0) { WARN_ON_ONCE(ret == 0); - goto done; + break; } pos += ret; length -= ret; } - ret = 0; -done: + if (ctx.bio) submit_bio(ctx.bio); if (ctx.cur_page) { @@ -464,15 +438,8 @@ done: unlock_page(ctx.cur_page); put_page(ctx.cur_page); } - - /* - * Check that we didn't lose a page due to the arcance calling - * conventions.. - */ - WARN_ON_ONCE(!ret && !list_empty(ctx.pages)); - return ret; } -EXPORT_SYMBOL_GPL(iomap_readpages); +EXPORT_SYMBOL_GPL(iomap_readahead); /* * iomap_is_partially_uptodate checks whether blocks within a page are @@ -554,14 +521,8 @@ iomap_migrate_page(struct address_space *mapping, struct page *newpage, if (ret != MIGRATEPAGE_SUCCESS) return ret; - if (page_has_private(page)) { - ClearPagePrivate(page); - get_page(newpage); - set_page_private(newpage, page_private(page)); - set_page_private(page, 0); - put_page(page); - SetPagePrivate(newpage); - } + if (page_has_private(page)) + attach_page_private(newpage, detach_page_private(page)); if (mode != MIGRATE_SYNC_NO_COPY) migrate_page_copy(newpage, page); diff --git a/fs/iomap/trace.h b/fs/iomap/trace.h index 4df19c66f597..5693a39d52fb 100644 --- a/fs/iomap/trace.h +++ b/fs/iomap/trace.h @@ -39,7 +39,7 @@ DEFINE_EVENT(iomap_readpage_class, name, \ TP_PROTO(struct inode *inode, int nr_pages), \ TP_ARGS(inode, nr_pages)) DEFINE_READPAGE_EVENT(iomap_readpage); -DEFINE_READPAGE_EVENT(iomap_readpages); +DEFINE_READPAGE_EVENT(iomap_readahead); DECLARE_EVENT_CLASS(iomap_range_class, TP_PROTO(struct inode *inode, unsigned long off, unsigned int len), diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 62c0462dc89f..95b1f377ad09 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -1185,10 +1185,9 @@ static int isofs_readpage(struct file *file, struct page *page) return mpage_readpage(page, isofs_get_block); } -static int isofs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void isofs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, isofs_get_block); + mpage_readahead(rac, isofs_get_block); } static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) @@ -1198,7 +1197,7 @@ static sector_t _isofs_bmap(struct address_space *mapping, sector_t block) static const struct address_space_operations isofs_aops = { .readpage = isofs_readpage, - .readpages = isofs_readpages, + .readahead = isofs_readahead, .bmap = _isofs_bmap }; diff --git a/fs/jfs/inode.c b/fs/jfs/inode.c index 9486afcdac76..6f65bfa9f18d 100644 --- a/fs/jfs/inode.c +++ b/fs/jfs/inode.c @@ -296,10 +296,9 @@ static int jfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, jfs_get_block); } -static int jfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void jfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, jfs_get_block); + mpage_readahead(rac, jfs_get_block); } static void jfs_write_failed(struct address_space *mapping, loff_t to) @@ -358,7 +357,7 @@ static ssize_t jfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter) const struct address_space_operations jfs_aops = { .readpage = jfs_readpage, - .readpages = jfs_readpages, + .readahead = jfs_readahead, .writepage = jfs_writepage, .writepages = jfs_writepages, .write_begin = jfs_write_begin, diff --git a/fs/mpage.c b/fs/mpage.c index ccba3c4c4479..830e6cc2a9e7 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -91,7 +91,7 @@ mpage_alloc(struct block_device *bdev, } /* - * support function for mpage_readpages. The fs supplied get_block might + * support function for mpage_readahead. The fs supplied get_block might * return an up to date buffer. This is used to map that buffer into * the page, which allows readpage to avoid triggering a duplicate call * to get_block. @@ -338,13 +338,8 @@ confused: } /** - * mpage_readpages - populate an address space with some pages & start reads against them - * @mapping: the address_space - * @pages: The address of a list_head which contains the target pages. These - * pages have their ->index populated and are otherwise uninitialised. - * The page at @pages->prev has the lowest file offset, and reads should be - * issued in @pages->prev to @pages->next order. - * @nr_pages: The number of pages at *@pages + * mpage_readahead - start reads against pages + * @rac: Describes which pages to read. * @get_block: The filesystem's block mapper function. * * This function walks the pages and the blocks within each page, building and @@ -381,36 +376,25 @@ confused: * * This all causes the disk requests to be issued in the correct order. */ -int -mpage_readpages(struct address_space *mapping, struct list_head *pages, - unsigned nr_pages, get_block_t get_block) +void mpage_readahead(struct readahead_control *rac, get_block_t get_block) { + struct page *page; struct mpage_readpage_args args = { .get_block = get_block, .is_readahead = true, }; - unsigned page_idx; - - for (page_idx = 0; page_idx < nr_pages; page_idx++) { - struct page *page = lru_to_page(pages); + while ((page = readahead_page(rac))) { prefetchw(&page->flags); - list_del(&page->lru); - if (!add_to_page_cache_lru(page, mapping, - page->index, - readahead_gfp_mask(mapping))) { - args.page = page; - args.nr_pages = nr_pages - page_idx; - args.bio = do_mpage_readpage(&args); - } + args.page = page; + args.nr_pages = readahead_count(rac); + args.bio = do_mpage_readpage(&args); put_page(page); } - BUG_ON(!list_empty(pages)); if (args.bio) mpage_bio_submit(REQ_OP_READ, REQ_RAHEAD, args.bio); - return 0; } -EXPORT_SYMBOL(mpage_readpages); +EXPORT_SYMBOL(mpage_readahead); /* * This isn't called much at all @@ -563,7 +547,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc, * Page has buffers, but they are all unmapped. The page was * created by pagein or read over a hole which was handled by * block_read_full_page(). If this address_space is also - * using mpage_readpages then this can rarely happen. + * using mpage_readahead then this can rarely happen. */ goto confused; } diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c index 7a57ff2528af..8f7cff7a4293 100644 --- a/fs/nfs/blocklayout/extent_tree.c +++ b/fs/nfs/blocklayout/extent_tree.c @@ -582,7 +582,7 @@ retry: if (!arg->layoutupdate_pages) return -ENOMEM; - start_p = __vmalloc(buffer_size, GFP_NOFS, PAGE_KERNEL); + start_p = __vmalloc(buffer_size, GFP_NOFS); if (!start_p) { kfree(arg->layoutupdate_pages); return -ENOMEM; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 1f32a9fbfdaf..6673a77884d9 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -668,7 +668,8 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize) } /* - * Record the page as unstable and mark its inode as dirty. + * Record the page as unstable (an extra writeback period) and mark its + * inode as dirty. */ static inline void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo) @@ -676,8 +677,11 @@ void nfs_mark_page_unstable(struct page *page, struct nfs_commit_info *cinfo) if (!cinfo->dreq) { struct inode *inode = page_file_mapping(page)->host; - inc_node_page_state(page, NR_UNSTABLE_NFS); - inc_wb_stat(&inode_to_bdi(inode)->wb, WB_RECLAIMABLE); + /* This page is really still in write-back - just that the + * writeback is happening on the server now. + */ + inc_node_page_state(page, NR_WRITEBACK); + inc_wb_stat(&inode_to_bdi(inode)->wb, WB_WRITEBACK); __mark_inode_dirty(inode, I_DIRTY_DATASYNC); } } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 1e767f779c49..639c34fec04a 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -946,9 +946,9 @@ nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg, static void nfs_clear_page_commit(struct page *page) { - dec_node_page_state(page, NR_UNSTABLE_NFS); + dec_node_page_state(page, NR_WRITEBACK); dec_wb_stat(&inode_to_bdi(page_file_mapping(page)->host)->wb, - WB_RECLAIMABLE); + WB_WRITEBACK); } /* Called holding the request lock on @req */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 0aa02eb18bd3..c3fbab1753ec 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -979,12 +979,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) /* - * We want less throttling in balance_dirty_pages() - * and shrink_inactive_list() so that nfs to + * We want throttling in balance_dirty_pages() + * and shrink_inactive_list() to only consider + * the backingdev we are writing to, so that nfs to * localhost doesn't cause nfsd to lock up due to all * the client's dirty pages or its congested queue. */ - current->flags |= PF_LESS_THROTTLE; + current->flags |= PF_LOCAL_THROTTLE; exp = fhp->fh_export; use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp); @@ -1037,7 +1038,7 @@ out_nfserr: nfserr = nfserrno(host_err); } if (test_bit(RQ_LOCAL, &rqstp->rq_flags)) - current_restore_flags(pflags, PF_LESS_THROTTLE); + current_restore_flags(pflags, PF_LOCAL_THROTTLE); return nfserr; } diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c index 671085512e0f..ceeb3b441844 100644 --- a/fs/nilfs2/inode.c +++ b/fs/nilfs2/inode.c @@ -145,18 +145,9 @@ static int nilfs_readpage(struct file *file, struct page *page) return mpage_readpage(page, nilfs_get_block); } -/** - * nilfs_readpages() - implement readpages() method of nilfs_aops {} - * address_space_operations. - * @file - file struct of the file to be read - * @mapping - address_space struct used for reading multiple pages - * @pages - the pages to be read - * @nr_pages - number of pages to be read - */ -static int nilfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) +static void nilfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, nilfs_get_block); + mpage_readahead(rac, nilfs_get_block); } static int nilfs_writepages(struct address_space *mapping, @@ -308,7 +299,7 @@ const struct address_space_operations nilfs_aops = { .readpage = nilfs_readpage, .writepages = nilfs_writepages, .set_page_dirty = nilfs_set_page_dirty, - .readpages = nilfs_readpages, + .readahead = nilfs_readahead, .write_begin = nilfs_write_begin, .write_end = nilfs_write_end, /* .releasepage = nilfs_releasepage, */ diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 554b744f41bf..bb0a43860ad2 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1732,7 +1732,7 @@ void mark_ntfs_record_dirty(struct page *page, const unsigned int ofs) { bh = bh->b_this_page; } while (bh); tail->b_this_page = head; - attach_page_buffers(page, head); + attach_page_private(page, head); } else buffers_to_free = bh; } diff --git a/fs/ntfs/malloc.h b/fs/ntfs/malloc.h index 842b0bfc3ac9..7068425735f1 100644 --- a/fs/ntfs/malloc.h +++ b/fs/ntfs/malloc.h @@ -34,7 +34,7 @@ static inline void *__ntfs_malloc(unsigned long size, gfp_t gfp_mask) /* return (void *)__get_free_page(gfp_mask); */ } if (likely((size >> PAGE_SHIFT) < totalram_pages())) - return __vmalloc(size, gfp_mask, PAGE_KERNEL); + return __vmalloc(size, gfp_mask); return NULL; } diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 3aac5c917afe..fbb9f1bc623d 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -504,7 +504,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, bh = bh->b_this_page; } while (bh); tail->b_this_page = head; - attach_page_buffers(page, head); + attach_page_private(page, head); } bh = head = page_buffers(page); BUG_ON(!bh); diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3a67a6518ddf..3bfb4147895a 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -350,14 +350,11 @@ out: * grow out to a tree. If need be, detecting boundary extents could * trivially be added in a future version of ocfs2_get_block(). */ -static int ocfs2_readpages(struct file *filp, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void ocfs2_readahead(struct readahead_control *rac) { - int ret, err = -EIO; - struct inode *inode = mapping->host; + int ret; + struct inode *inode = rac->mapping->host; struct ocfs2_inode_info *oi = OCFS2_I(inode); - loff_t start; - struct page *last; /* * Use the nonblocking flag for the dlm code to avoid page @@ -365,36 +362,31 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping, */ ret = ocfs2_inode_lock_full(inode, NULL, 0, OCFS2_LOCK_NONBLOCK); if (ret) - return err; + return; - if (down_read_trylock(&oi->ip_alloc_sem) == 0) { - ocfs2_inode_unlock(inode, 0); - return err; - } + if (down_read_trylock(&oi->ip_alloc_sem) == 0) + goto out_unlock; /* * Don't bother with inline-data. There isn't anything * to read-ahead in that case anyway... */ if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) - goto out_unlock; + goto out_up; /* * Check whether a remote node truncated this file - we just * drop out in that case as it's not worth handling here. */ - last = lru_to_page(pages); - start = (loff_t)last->index << PAGE_SHIFT; - if (start >= i_size_read(inode)) - goto out_unlock; + if (readahead_pos(rac) >= i_size_read(inode)) + goto out_up; - err = mpage_readpages(mapping, pages, nr_pages, ocfs2_get_block); + mpage_readahead(rac, ocfs2_get_block); -out_unlock: +out_up: up_read(&oi->ip_alloc_sem); +out_unlock: ocfs2_inode_unlock(inode, 0); - - return err; } /* Note: Because we don't support holes, our allocation has @@ -2474,7 +2466,7 @@ static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter) const struct address_space_operations ocfs2_aops = { .readpage = ocfs2_readpage, - .readpages = ocfs2_readpages, + .readahead = ocfs2_readahead, .writepage = ocfs2_writepage, .write_begin = ocfs2_write_begin, .write_end = ocfs2_write_end, diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 55a6512e9fde..f105746063ed 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2760,6 +2760,7 @@ leave: * Returns: 1 if dlm->spinlock was dropped/retaken, 0 if never dropped */ int dlm_empty_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) + __must_hold(&dlm->spinlock) { int ret; int lock_dropped = 0; diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index 9150cfa4df7d..ee5d98516212 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -279,6 +279,7 @@ enum ocfs2_mount_options OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT = 1 << 15, /* Journal Async Commit */ OCFS2_MOUNT_ERRORS_CONT = 1 << 16, /* Return EIO to the calling process on error */ OCFS2_MOUNT_ERRORS_ROFS = 1 << 17, /* Change filesystem to read-only on error */ + OCFS2_MOUNT_NOCLUSTER = 1 << 18, /* No cluster aware filesystem mount */ }; #define OCFS2_OSB_SOFT_RO 0x0001 @@ -673,7 +674,8 @@ static inline int ocfs2_cluster_o2cb_global_heartbeat(struct ocfs2_super *osb) static inline int ocfs2_mount_local(struct ocfs2_super *osb) { - return (osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT); + return ((osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT) + || (osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER)); } static inline int ocfs2_uses_extended_slot_map(struct ocfs2_super *osb) diff --git a/fs/ocfs2/slot_map.c b/fs/ocfs2/slot_map.c index 8caeceeaeda7..4da0e4b1e79b 100644 --- a/fs/ocfs2/slot_map.c +++ b/fs/ocfs2/slot_map.c @@ -254,14 +254,16 @@ static int __ocfs2_find_empty_slot(struct ocfs2_slot_info *si, int i, ret = -ENOSPC; if ((preferred >= 0) && (preferred < si->si_num_slots)) { - if (!si->si_slots[preferred].sl_valid) { + if (!si->si_slots[preferred].sl_valid || + !si->si_slots[preferred].sl_node_num) { ret = preferred; goto out; } } for(i = 0; i < si->si_num_slots; i++) { - if (!si->si_slots[i].sl_valid) { + if (!si->si_slots[i].sl_valid || + !si->si_slots[i].sl_node_num) { ret = i; break; } @@ -456,24 +458,30 @@ int ocfs2_find_slot(struct ocfs2_super *osb) spin_lock(&osb->osb_lock); ocfs2_update_slot_info(si); - /* search for ourselves first and take the slot if it already - * exists. Perhaps we need to mark this in a variable for our - * own journal recovery? Possibly not, though we certainly - * need to warn to the user */ - slot = __ocfs2_node_num_to_slot(si, osb->node_num); - if (slot < 0) { - /* if no slot yet, then just take 1st available - * one. */ - slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (ocfs2_mount_local(osb)) + /* use slot 0 directly in local mode */ + slot = 0; + else { + /* search for ourselves first and take the slot if it already + * exists. Perhaps we need to mark this in a variable for our + * own journal recovery? Possibly not, though we certainly + * need to warn to the user */ + slot = __ocfs2_node_num_to_slot(si, osb->node_num); if (slot < 0) { - spin_unlock(&osb->osb_lock); - mlog(ML_ERROR, "no free slots available!\n"); - status = -EINVAL; - goto bail; - } - } else - printk(KERN_INFO "ocfs2: Slot %d on device (%s) was already " - "allocated to this node!\n", slot, osb->dev_str); + /* if no slot yet, then just take 1st available + * one. */ + slot = __ocfs2_find_empty_slot(si, osb->preferred_slot); + if (slot < 0) { + spin_unlock(&osb->osb_lock); + mlog(ML_ERROR, "no free slots available!\n"); + status = -EINVAL; + goto bail; + } + } else + printk(KERN_INFO "ocfs2: Slot %d on device (%s) was " + "already allocated to this node!\n", + slot, osb->dev_str); + } ocfs2_set_slot(si, slot, osb->node_num); osb->slot_num = slot; diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index ac61eeaf3837..71ea9ce71a6b 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -175,6 +175,7 @@ enum { Opt_dir_resv_level, Opt_journal_async_commit, Opt_err_cont, + Opt_nocluster, Opt_err, }; @@ -208,6 +209,7 @@ static const match_table_t tokens = { {Opt_dir_resv_level, "dir_resv_level=%u"}, {Opt_journal_async_commit, "journal_async_commit"}, {Opt_err_cont, "errors=continue"}, + {Opt_nocluster, "nocluster"}, {Opt_err, NULL} }; @@ -619,6 +621,13 @@ static int ocfs2_remount(struct super_block *sb, int *flags, char *data) goto out; } + tmp = OCFS2_MOUNT_NOCLUSTER; + if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { + ret = -EINVAL; + mlog(ML_ERROR, "Cannot change nocluster option on remount\n"); + goto out; + } + tmp = OCFS2_MOUNT_HB_LOCAL | OCFS2_MOUNT_HB_GLOBAL | OCFS2_MOUNT_HB_NONE; if ((osb->s_mount_opt & tmp) != (parsed_options.mount_opt & tmp)) { @@ -859,6 +868,7 @@ static int ocfs2_verify_userspace_stack(struct ocfs2_super *osb, } if (ocfs2_userspace_stack(osb) && + !(osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && strncmp(osb->osb_cluster_stack, mopt->cluster_stack, OCFS2_STACK_LABEL_LEN)) { mlog(ML_ERROR, @@ -1139,6 +1149,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) osb->s_mount_opt & OCFS2_MOUNT_DATA_WRITEBACK ? "writeback" : "ordered"); + if ((osb->s_mount_opt & OCFS2_MOUNT_NOCLUSTER) && + !(osb->s_feature_incompat & OCFS2_FEATURE_INCOMPAT_LOCAL_MOUNT)) + printk(KERN_NOTICE "ocfs2: The shared device (%s) is mounted " + "without cluster aware mode.\n", osb->dev_str); + atomic_set(&osb->vol_state, VOLUME_MOUNTED); wake_up(&osb->osb_mount_event); @@ -1445,6 +1460,9 @@ static int ocfs2_parse_options(struct super_block *sb, case Opt_journal_async_commit: mopt->mount_opt |= OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT; break; + case Opt_nocluster: + mopt->mount_opt |= OCFS2_MOUNT_NOCLUSTER; + break; default: mlog(ML_ERROR, "Unrecognized mount option \"%s\" " @@ -1556,6 +1574,9 @@ static int ocfs2_show_options(struct seq_file *s, struct dentry *root) if (opts & OCFS2_MOUNT_JOURNAL_ASYNC_COMMIT) seq_printf(s, ",journal_async_commit"); + if (opts & OCFS2_MOUNT_NOCLUSTER) + seq_printf(s, ",nocluster"); + return 0; } diff --git a/fs/omfs/file.c b/fs/omfs/file.c index d640b9388238..d7b5f09d298c 100644 --- a/fs/omfs/file.c +++ b/fs/omfs/file.c @@ -289,10 +289,9 @@ static int omfs_readpage(struct file *file, struct page *page) return block_read_full_page(page, omfs_get_block); } -static int omfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void omfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, omfs_get_block); + mpage_readahead(rac, omfs_get_block); } static int omfs_writepage(struct page *page, struct writeback_control *wbc) @@ -373,7 +372,7 @@ const struct inode_operations omfs_file_inops = { const struct address_space_operations omfs_aops = { .readpage = omfs_readpage, - .readpages = omfs_readpages, + .readahead = omfs_readahead, .writepage = omfs_writepage, .writepages = omfs_writepages, .write_begin = omfs_write_begin, diff --git a/fs/open.c b/fs/open.c index e62b1db06638..6cd48a61cda3 100644 --- a/fs/open.c +++ b/fs/open.c @@ -775,9 +775,8 @@ static int do_dentry_open(struct file *f, path_get(&f->f_path); f->f_inode = inode; f->f_mapping = inode->i_mapping; - - /* Ensure that we skip any errors that predate opening of the file */ f->f_wb_err = filemap_sample_wb_err(f->f_mapping); + f->f_sb_err = file_sample_sb_err(f); if (unlikely(f->f_flags & O_PATH)) { f->f_mode = FMODE_PATH | FMODE_OPENED; diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 12ae630fbed7..48f0547d4850 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -62,12 +62,7 @@ static int orangefs_writepage_locked(struct page *page, } else { ret = 0; } - if (wr) { - kfree(wr); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); - } + kfree(detach_page_private(page)); return ret; } @@ -409,9 +404,7 @@ static int orangefs_write_begin(struct file *file, wr->len = len; wr->uid = current_fsuid(); wr->gid = current_fsgid(); - SetPagePrivate(page); - set_page_private(page, (unsigned long)wr); - get_page(page); + attach_page_private(page, wr); okay: return 0; } @@ -459,18 +452,12 @@ static void orangefs_invalidatepage(struct page *page, wr = (struct orangefs_write_range *)page_private(page); if (offset == 0 && length == PAGE_SIZE) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); + kfree(detach_page_private(page)); return; /* write range entirely within invalidate range (or equal) */ } else if (page_offset(page) + offset <= wr->pos && wr->pos + wr->len <= page_offset(page) + offset + length) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); + kfree(detach_page_private(page)); /* XXX is this right? only caller in fs */ cancel_dirty_page(page); return; @@ -535,12 +522,7 @@ static int orangefs_releasepage(struct page *page, gfp_t foo) static void orangefs_freepage(struct page *page) { - if (PagePrivate(page)) { - kfree((struct orangefs_write_range *)page_private(page)); - set_page_private(page, 0); - ClearPagePrivate(page); - put_page(page); - } + kfree(detach_page_private(page)); } static int orangefs_launder_page(struct page *page) @@ -740,9 +722,7 @@ vm_fault_t orangefs_page_mkwrite(struct vm_fault *vmf) wr->len = PAGE_SIZE; wr->uid = current_fsuid(); wr->gid = current_fsgid(); - SetPagePrivate(page); - set_page_private(page, (unsigned long)wr); - get_page(page); + attach_page_private(page, wr); okay: file_update_time(vmf->vma->vm_file); diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 09cd51c8d23d..ecc63ce01be7 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -110,8 +110,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v) show_val_kb(m, "PageTables: ", global_zone_page_state(NR_PAGETABLE)); - show_val_kb(m, "NFS_Unstable: ", - global_node_page_state(NR_UNSTABLE_NFS)); + show_val_kb(m, "NFS_Unstable: ", 0); show_val_kb(m, "Bounce: ", global_zone_page_state(NR_BOUNCE)); show_val_kb(m, "WritebackTmp: ", diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 10a6d472397f..6ad407d5efe2 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -546,10 +546,17 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, struct mem_size_stats *mss = walk->private; struct vm_area_struct *vma = walk->vma; bool locked = !!(vma->vm_flags & VM_LOCKED); - struct page *page; + struct page *page = NULL; + + if (pmd_present(*pmd)) { + /* FOLL_DUMP will return -EFAULT on huge zero page */ + page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { + swp_entry_t entry = pmd_to_swp_entry(*pmd); - /* FOLL_DUMP will return -EFAULT on huge zero page */ - page = follow_trans_huge_pmd(vma, addr, pmd, FOLL_DUMP); + if (is_migration_entry(entry)) + page = migration_entry_to_page(entry); + } if (IS_ERR_OR_NULL(page)) return; if (PageAnon(page)) @@ -578,8 +585,7 @@ static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - if (pmd_present(*pmd)) - smaps_pmd_entry(pmd, addr, walk); + smaps_pmd_entry(pmd, addr, walk); spin_unlock(ptl); goto out; } diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c index 345db56c98fd..755293c8c71a 100644 --- a/fs/qnx6/inode.c +++ b/fs/qnx6/inode.c @@ -99,10 +99,9 @@ static int qnx6_readpage(struct file *file, struct page *page) return mpage_readpage(page, qnx6_get_block); } -static int qnx6_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void qnx6_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, qnx6_get_block); + mpage_readahead(rac, qnx6_get_block); } /* @@ -499,7 +498,7 @@ static sector_t qnx6_bmap(struct address_space *mapping, sector_t block) } static const struct address_space_operations qnx6_aops = { .readpage = qnx6_readpage, - .readpages = qnx6_readpages, + .readahead = qnx6_readahead, .bmap = qnx6_bmap }; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 6419e6dacc39..0031070b3692 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -1160,11 +1160,9 @@ failure: return retval; } -static int -reiserfs_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void reiserfs_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, reiserfs_get_block); + mpage_readahead(rac, reiserfs_get_block); } /* @@ -3434,7 +3432,7 @@ out: const struct address_space_operations reiserfs_address_space_operations = { .writepage = reiserfs_writepage, .readpage = reiserfs_readpage, - .readpages = reiserfs_readpages, + .readahead = reiserfs_readahead, .releasepage = reiserfs_releasepage, .invalidatepage = reiserfs_invalidatepage, .write_begin = reiserfs_write_begin, diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c index 4f9b9fb59362..64f61330564a 100644 --- a/fs/squashfs/block.c +++ b/fs/squashfs/block.c @@ -13,6 +13,7 @@ * datablocks and metadata blocks. */ +#include <linux/blkdev.h> #include <linux/fs.h> #include <linux/vfs.h> #include <linux/slab.h> @@ -27,44 +28,103 @@ #include "page_actor.h" /* - * Read the metadata block length, this is stored in the first two - * bytes of the metadata block. + * Returns the amount of bytes copied to the page actor. */ -static struct buffer_head *get_block_length(struct super_block *sb, - u64 *cur_index, int *offset, int *length) +static int copy_bio_to_actor(struct bio *bio, + struct squashfs_page_actor *actor, + int offset, int req_length) +{ + void *actor_addr = squashfs_first_page(actor); + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); + int copied_bytes = 0; + int actor_offset = 0; + + if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) + return 0; + + while (copied_bytes < req_length) { + int bytes_to_copy = min_t(int, bvec->bv_len - offset, + PAGE_SIZE - actor_offset); + + bytes_to_copy = min_t(int, bytes_to_copy, + req_length - copied_bytes); + memcpy(actor_addr + actor_offset, + page_address(bvec->bv_page) + bvec->bv_offset + offset, + bytes_to_copy); + + actor_offset += bytes_to_copy; + copied_bytes += bytes_to_copy; + offset += bytes_to_copy; + + if (actor_offset >= PAGE_SIZE) { + actor_addr = squashfs_next_page(actor); + if (!actor_addr) + break; + actor_offset = 0; + } + if (offset >= bvec->bv_len) { + if (!bio_next_segment(bio, &iter_all)) + break; + offset = 0; + } + } + squashfs_finish_page(actor); + return copied_bytes; +} + +static int squashfs_bio_read(struct super_block *sb, u64 index, int length, + struct bio **biop, int *block_offset) { struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head *bh; - - bh = sb_bread(sb, *cur_index); - if (bh == NULL) - return NULL; - - if (msblk->devblksize - *offset == 1) { - *length = (unsigned char) bh->b_data[*offset]; - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *length |= (unsigned char) bh->b_data[0] << 8; - *offset = 1; - } else { - *length = (unsigned char) bh->b_data[*offset] | - (unsigned char) bh->b_data[*offset + 1] << 8; - *offset += 2; - - if (*offset == msblk->devblksize) { - put_bh(bh); - bh = sb_bread(sb, ++(*cur_index)); - if (bh == NULL) - return NULL; - *offset = 0; + const u64 read_start = round_down(index, msblk->devblksize); + const sector_t block = read_start >> msblk->devblksize_log2; + const u64 read_end = round_up(index + length, msblk->devblksize); + const sector_t block_end = read_end >> msblk->devblksize_log2; + int offset = read_start - round_down(index, PAGE_SIZE); + int total_len = (block_end - block) << msblk->devblksize_log2; + const int page_count = DIV_ROUND_UP(total_len + offset, PAGE_SIZE); + int error, i; + struct bio *bio; + + bio = bio_alloc(GFP_NOIO, page_count); + if (!bio) + return -ENOMEM; + + bio_set_dev(bio, sb->s_bdev); + bio->bi_opf = READ; + bio->bi_iter.bi_sector = block * (msblk->devblksize >> SECTOR_SHIFT); + + for (i = 0; i < page_count; ++i) { + unsigned int len = + min_t(unsigned int, PAGE_SIZE - offset, total_len); + struct page *page = alloc_page(GFP_NOIO); + + if (!page) { + error = -ENOMEM; + goto out_free_bio; + } + if (!bio_add_page(bio, page, len, offset)) { + error = -EIO; + goto out_free_bio; } + offset = 0; + total_len -= len; } - return bh; -} + error = submit_bio_wait(bio); + if (error) + goto out_free_bio; + *biop = bio; + *block_offset = index & ((1 << msblk->devblksize_log2) - 1); + return 0; + +out_free_bio: + bio_free_pages(bio); + bio_put(bio); + return error; +} /* * Read and decompress a metadata block or datablock. Length is non-zero @@ -76,129 +136,88 @@ static struct buffer_head *get_block_length(struct super_block *sb, * algorithms). */ int squashfs_read_data(struct super_block *sb, u64 index, int length, - u64 *next_index, struct squashfs_page_actor *output) + u64 *next_index, struct squashfs_page_actor *output) { struct squashfs_sb_info *msblk = sb->s_fs_info; - struct buffer_head **bh; - int offset = index & ((1 << msblk->devblksize_log2) - 1); - u64 cur_index = index >> msblk->devblksize_log2; - int bytes, compressed, b = 0, k = 0, avail, i; - - bh = kcalloc(((output->length + msblk->devblksize - 1) - >> msblk->devblksize_log2) + 1, sizeof(*bh), GFP_KERNEL); - if (bh == NULL) - return -ENOMEM; + struct bio *bio = NULL; + int compressed; + int res; + int offset; if (length) { /* * Datablock. */ - bytes = -offset; compressed = SQUASHFS_COMPRESSED_BLOCK(length); length = SQUASHFS_COMPRESSED_SIZE_BLOCK(length); - if (next_index) - *next_index = index + length; - TRACE("Block @ 0x%llx, %scompressed size %d, src size %d\n", index, compressed ? "" : "un", length, output->length); - - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto read_failure; - - for (b = 0; bytes < length; b++, cur_index++) { - bh[b] = sb_getblk(sb, cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; - } - ll_rw_block(REQ_OP_READ, 0, b, bh); } else { /* * Metadata block. */ - if ((index + 2) > msblk->bytes_used) - goto read_failure; + const u8 *data; + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); - bh[0] = get_block_length(sb, &cur_index, &offset, &length); - if (bh[0] == NULL) - goto read_failure; - b = 1; + if (index + 2 > msblk->bytes_used) { + res = -EIO; + goto out; + } + res = squashfs_bio_read(sb, index, 2, &bio, &offset); + if (res) + goto out; + + if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) { + res = -EIO; + goto out_free_bio; + } + /* Extract the length of the metadata block */ + data = page_address(bvec->bv_page) + bvec->bv_offset; + length = data[offset]; + if (offset <= bvec->bv_len - 1) { + length |= data[offset + 1] << 8; + } else { + if (WARN_ON_ONCE(!bio_next_segment(bio, &iter_all))) { + res = -EIO; + goto out_free_bio; + } + data = page_address(bvec->bv_page) + bvec->bv_offset; + length |= data[0] << 8; + } + bio_free_pages(bio); + bio_put(bio); - bytes = msblk->devblksize - offset; compressed = SQUASHFS_COMPRESSED(length); length = SQUASHFS_COMPRESSED_SIZE(length); - if (next_index) - *next_index = index + length + 2; + index += 2; TRACE("Block @ 0x%llx, %scompressed size %d\n", index, - compressed ? "" : "un", length); - - if (length < 0 || length > output->length || - (index + length) > msblk->bytes_used) - goto block_release; - - for (; bytes < length; b++) { - bh[b] = sb_getblk(sb, ++cur_index); - if (bh[b] == NULL) - goto block_release; - bytes += msblk->devblksize; - } - ll_rw_block(REQ_OP_READ, 0, b - 1, bh + 1); + compressed ? "" : "un", length); } + if (next_index) + *next_index = index + length; - for (i = 0; i < b; i++) { - wait_on_buffer(bh[i]); - if (!buffer_uptodate(bh[i])) - goto block_release; - } + res = squashfs_bio_read(sb, index, length, &bio, &offset); + if (res) + goto out; if (compressed) { - if (!msblk->stream) - goto read_failure; - length = squashfs_decompress(msblk, bh, b, offset, length, - output); - if (length < 0) - goto read_failure; - } else { - /* - * Block is uncompressed. - */ - int in, pg_offset = 0; - void *data = squashfs_first_page(output); - - for (bytes = length; k < b; k++) { - in = min(bytes, msblk->devblksize - offset); - bytes -= in; - while (in) { - if (pg_offset == PAGE_SIZE) { - data = squashfs_next_page(output); - pg_offset = 0; - } - avail = min_t(int, in, PAGE_SIZE - - pg_offset); - memcpy(data + pg_offset, bh[k]->b_data + offset, - avail); - in -= avail; - pg_offset += avail; - offset += avail; - } - offset = 0; - put_bh(bh[k]); + if (!msblk->stream) { + res = -EIO; + goto out_free_bio; } - squashfs_finish_page(output); + res = squashfs_decompress(msblk, bio, offset, length, output); + } else { + res = copy_bio_to_actor(bio, output, offset, length); } - kfree(bh); - return length; - -block_release: - for (; k < b; k++) - put_bh(bh[k]); +out_free_bio: + bio_free_pages(bio); + bio_put(bio); +out: + if (res < 0) + ERROR("Failed to read block 0x%llx: %d\n", index, res); -read_failure: - ERROR("squashfs_read_data failed to read block 0x%llx\n", - (unsigned long long) index); - kfree(bh); - return -EIO; + return res; } diff --git a/fs/squashfs/decompressor.h b/fs/squashfs/decompressor.h index ec8617523e56..1b9ccfd0aa51 100644 --- a/fs/squashfs/decompressor.h +++ b/fs/squashfs/decompressor.h @@ -10,13 +10,14 @@ * decompressor.h */ +#include <linux/bio.h> + struct squashfs_decompressor { void *(*init)(struct squashfs_sb_info *, void *); void *(*comp_opts)(struct squashfs_sb_info *, void *, int); void (*free)(void *); int (*decompress)(struct squashfs_sb_info *, void *, - struct buffer_head **, int, int, int, - struct squashfs_page_actor *); + struct bio *, int, int, struct squashfs_page_actor *); int id; char *name; int supported; diff --git a/fs/squashfs/decompressor_multi.c b/fs/squashfs/decompressor_multi.c index c181dee235bb..db9f12a3ea05 100644 --- a/fs/squashfs/decompressor_multi.c +++ b/fs/squashfs/decompressor_multi.c @@ -6,7 +6,7 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/sched.h> #include <linux/wait.h> #include <linux/cpumask.h> @@ -180,14 +180,15 @@ wait: } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, + int offset, int length, + struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; struct decomp_stream *decomp_stream = get_decomp_stream(msblk, stream); res = msblk->decompressor->decompress(msblk, decomp_stream->stream, - bh, b, offset, length, output); + bio, offset, length, output); put_decomp_stream(decomp_stream, stream); if (res < 0) ERROR("%s decompression failed, data probably corrupt\n", diff --git a/fs/squashfs/decompressor_multi_percpu.c b/fs/squashfs/decompressor_multi_percpu.c index e206ebfe003c..b881b9283b7f 100644 --- a/fs/squashfs/decompressor_multi_percpu.c +++ b/fs/squashfs/decompressor_multi_percpu.c @@ -75,8 +75,8 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, + int offset, int length, struct squashfs_page_actor *output) { struct squashfs_stream *stream; int res; @@ -84,8 +84,8 @@ int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, local_lock(&msblk->stream->lock); stream = this_cpu_ptr(msblk->stream); - res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, - offset, length, output); + res = msblk->decompressor->decompress(msblk, stream->stream, bio, + offset, length, output); local_unlock(&msblk->stream->lock); diff --git a/fs/squashfs/decompressor_single.c b/fs/squashfs/decompressor_single.c index 550c3e592032..4eb3d083d45e 100644 --- a/fs/squashfs/decompressor_single.c +++ b/fs/squashfs/decompressor_single.c @@ -7,7 +7,7 @@ #include <linux/types.h> #include <linux/mutex.h> #include <linux/slab.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include "squashfs_fs.h" #include "squashfs_fs_sb.h" @@ -59,14 +59,15 @@ void squashfs_decompressor_destroy(struct squashfs_sb_info *msblk) } } -int squashfs_decompress(struct squashfs_sb_info *msblk, struct buffer_head **bh, - int b, int offset, int length, struct squashfs_page_actor *output) +int squashfs_decompress(struct squashfs_sb_info *msblk, struct bio *bio, + int offset, int length, + struct squashfs_page_actor *output) { int res; struct squashfs_stream *stream = msblk->stream; mutex_lock(&stream->mutex); - res = msblk->decompressor->decompress(msblk, stream->stream, bh, b, + res = msblk->decompressor->decompress(msblk, stream->stream, bio, offset, length, output); mutex_unlock(&stream->mutex); diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c index c4e47e0588c7..233d5582fbee 100644 --- a/fs/squashfs/lz4_wrapper.c +++ b/fs/squashfs/lz4_wrapper.c @@ -4,7 +4,7 @@ * Phillip Lougher <phillip@squashfs.org.uk> */ -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/mutex.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -89,20 +89,23 @@ static void lz4_free(void *strm) static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, + struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); struct squashfs_lz4 *stream = strm; void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int bytes = length, res; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); + while (bio_next_segment(bio, &iter_all)) { + int avail = min(bytes, ((int)bvec->bv_len) - offset); + + data = page_address(bvec->bv_page) + bvec->bv_offset; + memcpy(buff, data + offset, avail); buff += avail; bytes -= avail; offset = 0; - put_bh(bh[i]); } res = LZ4_decompress_safe(stream->input, stream->output, diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c index aa3c3dafc33d..97bb7d92ddcd 100644 --- a/fs/squashfs/lzo_wrapper.c +++ b/fs/squashfs/lzo_wrapper.c @@ -9,7 +9,7 @@ */ #include <linux/mutex.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/lzo.h> @@ -63,21 +63,24 @@ static void lzo_free(void *strm) static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, + struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); struct squashfs_lzo *stream = strm; void *buff = stream->input, *data; - int avail, i, bytes = length, res; + int bytes = length, res; size_t out_len = output->length; - for (i = 0; i < b; i++) { - avail = min(bytes, msblk->devblksize - offset); - memcpy(buff, bh[i]->b_data + offset, avail); + while (bio_next_segment(bio, &iter_all)) { + int avail = min(bytes, ((int)bvec->bv_len) - offset); + + data = page_address(bvec->bv_page) + bvec->bv_offset; + memcpy(buff, data + offset, avail); buff += avail; bytes -= avail; offset = 0; - put_bh(bh[i]); } res = lzo1x_decompress_safe(stream->input, (size_t)length, diff --git a/fs/squashfs/squashfs.h b/fs/squashfs/squashfs.h index 2797763ed046..9783e01c8100 100644 --- a/fs/squashfs/squashfs.h +++ b/fs/squashfs/squashfs.h @@ -40,8 +40,8 @@ extern void *squashfs_decompressor_setup(struct super_block *, unsigned short); /* decompressor_xxx.c */ extern void *squashfs_decompressor_create(struct squashfs_sb_info *, void *); extern void squashfs_decompressor_destroy(struct squashfs_sb_info *); -extern int squashfs_decompress(struct squashfs_sb_info *, struct buffer_head **, - int, int, int, struct squashfs_page_actor *); +extern int squashfs_decompress(struct squashfs_sb_info *, struct bio *, + int, int, struct squashfs_page_actor *); extern int squashfs_max_decompressors(void); /* export.c */ diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c index 4b2f2051a6dc..e80419aed862 100644 --- a/fs/squashfs/xz_wrapper.c +++ b/fs/squashfs/xz_wrapper.c @@ -10,7 +10,7 @@ #include <linux/mutex.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/xz.h> #include <linux/bitops.h> @@ -117,11 +117,12 @@ static void squashfs_xz_free(void *strm) static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, + struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { - enum xz_ret xz_err; - int avail, total = 0, k = 0; + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); + int total = 0, error = 0; struct squashfs_xz *stream = strm; xz_dec_reset(stream->state); @@ -131,11 +132,23 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, stream->buf.out_size = PAGE_SIZE; stream->buf.out = squashfs_first_page(output); - do { - if (stream->buf.in_pos == stream->buf.in_size && k < b) { - avail = min(length, msblk->devblksize - offset); + for (;;) { + enum xz_ret xz_err; + + if (stream->buf.in_pos == stream->buf.in_size) { + const void *data; + int avail; + + if (!bio_next_segment(bio, &iter_all)) { + /* XZ_STREAM_END must be reached. */ + error = -EIO; + break; + } + + avail = min(length, ((int)bvec->bv_len) - offset); + data = page_address(bvec->bv_page) + bvec->bv_offset; length -= avail; - stream->buf.in = bh[k]->b_data + offset; + stream->buf.in = data + offset; stream->buf.in_size = avail; stream->buf.in_pos = 0; offset = 0; @@ -150,23 +163,17 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm, } xz_err = xz_dec_run(stream->state, &stream->buf); - - if (stream->buf.in_pos == stream->buf.in_size && k < b) - put_bh(bh[k++]); - } while (xz_err == XZ_OK); + if (xz_err == XZ_STREAM_END) + break; + if (xz_err != XZ_OK) { + error = -EIO; + break; + } + } squashfs_finish_page(output); - if (xz_err != XZ_STREAM_END || k < b) - goto out; - - return total + stream->buf.out_pos; - -out: - for (; k < b; k++) - put_bh(bh[k]); - - return -EIO; + return error ? error : total + stream->buf.out_pos; } const struct squashfs_decompressor squashfs_xz_comp_ops = { diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c index f2226afa1625..bcb881ec47f2 100644 --- a/fs/squashfs/zlib_wrapper.c +++ b/fs/squashfs/zlib_wrapper.c @@ -10,7 +10,7 @@ #include <linux/mutex.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/zlib.h> #include <linux/vmalloc.h> @@ -50,21 +50,35 @@ static void zlib_free(void *strm) static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, + struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { - int zlib_err, zlib_init = 0, k = 0; + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); + int zlib_init = 0, error = 0; z_stream *stream = strm; stream->avail_out = PAGE_SIZE; stream->next_out = squashfs_first_page(output); stream->avail_in = 0; - do { - if (stream->avail_in == 0 && k < b) { - int avail = min(length, msblk->devblksize - offset); + for (;;) { + int zlib_err; + + if (stream->avail_in == 0) { + const void *data; + int avail; + + if (!bio_next_segment(bio, &iter_all)) { + /* Z_STREAM_END must be reached. */ + error = -EIO; + break; + } + + avail = min(length, ((int)bvec->bv_len) - offset); + data = page_address(bvec->bv_page) + bvec->bv_offset; length -= avail; - stream->next_in = bh[k]->b_data + offset; + stream->next_in = data + offset; stream->avail_in = avail; offset = 0; } @@ -78,37 +92,28 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm, if (!zlib_init) { zlib_err = zlib_inflateInit(stream); if (zlib_err != Z_OK) { - squashfs_finish_page(output); - goto out; + error = -EIO; + break; } zlib_init = 1; } zlib_err = zlib_inflate(stream, Z_SYNC_FLUSH); - - if (stream->avail_in == 0 && k < b) - put_bh(bh[k++]); - } while (zlib_err == Z_OK); + if (zlib_err == Z_STREAM_END) + break; + if (zlib_err != Z_OK) { + error = -EIO; + break; + } + } squashfs_finish_page(output); - if (zlib_err != Z_STREAM_END) - goto out; - - zlib_err = zlib_inflateEnd(stream); - if (zlib_err != Z_OK) - goto out; - - if (k < b) - goto out; - - return stream->total_out; - -out: - for (; k < b; k++) - put_bh(bh[k]); + if (!error) + if (zlib_inflateEnd(stream) != Z_OK) + error = -EIO; - return -EIO; + return error ? error : stream->total_out; } const struct squashfs_decompressor squashfs_zlib_comp_ops = { diff --git a/fs/squashfs/zstd_wrapper.c b/fs/squashfs/zstd_wrapper.c index b448c2a1d0ed..b7cb1faa652d 100644 --- a/fs/squashfs/zstd_wrapper.c +++ b/fs/squashfs/zstd_wrapper.c @@ -9,7 +9,7 @@ */ #include <linux/mutex.h> -#include <linux/buffer_head.h> +#include <linux/bio.h> #include <linux/slab.h> #include <linux/zstd.h> #include <linux/vmalloc.h> @@ -59,33 +59,44 @@ static void zstd_free(void *strm) static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, - struct buffer_head **bh, int b, int offset, int length, + struct bio *bio, int offset, int length, struct squashfs_page_actor *output) { struct workspace *wksp = strm; ZSTD_DStream *stream; size_t total_out = 0; - size_t zstd_err; - int k = 0; + int error = 0; ZSTD_inBuffer in_buf = { NULL, 0, 0 }; ZSTD_outBuffer out_buf = { NULL, 0, 0 }; + struct bvec_iter_all iter_all = {}; + struct bio_vec *bvec = bvec_init_iter_all(&iter_all); stream = ZSTD_initDStream(wksp->window_size, wksp->mem, wksp->mem_size); if (!stream) { ERROR("Failed to initialize zstd decompressor\n"); - goto out; + return -EIO; } out_buf.size = PAGE_SIZE; out_buf.dst = squashfs_first_page(output); - do { - if (in_buf.pos == in_buf.size && k < b) { - int avail = min(length, msblk->devblksize - offset); + for (;;) { + size_t zstd_err; + if (in_buf.pos == in_buf.size) { + const void *data; + int avail; + + if (!bio_next_segment(bio, &iter_all)) { + error = -EIO; + break; + } + + avail = min(length, ((int)bvec->bv_len) - offset); + data = page_address(bvec->bv_page) + bvec->bv_offset; length -= avail; - in_buf.src = bh[k]->b_data + offset; + in_buf.src = data + offset; in_buf.size = avail; in_buf.pos = 0; offset = 0; @@ -97,8 +108,8 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, /* Shouldn't run out of pages * before stream is done. */ - squashfs_finish_page(output); - goto out; + error = -EIO; + break; } out_buf.pos = 0; out_buf.size = PAGE_SIZE; @@ -107,29 +118,20 @@ static int zstd_uncompress(struct squashfs_sb_info *msblk, void *strm, total_out -= out_buf.pos; zstd_err = ZSTD_decompressStream(stream, &out_buf, &in_buf); total_out += out_buf.pos; /* add the additional data produced */ - - if (in_buf.pos == in_buf.size && k < b) - put_bh(bh[k++]); - } while (zstd_err != 0 && !ZSTD_isError(zstd_err)); - - squashfs_finish_page(output); - - if (ZSTD_isError(zstd_err)) { - ERROR("zstd decompression error: %d\n", - (int)ZSTD_getErrorCode(zstd_err)); - goto out; + if (zstd_err == 0) + break; + + if (ZSTD_isError(zstd_err)) { + ERROR("zstd decompression error: %d\n", + (int)ZSTD_getErrorCode(zstd_err)); + error = -EIO; + break; + } } - if (k < b) - goto out; - - return (int)total_out; - -out: - for (; k < b; k++) - put_bh(bh[k]); + squashfs_finish_page(output); - return -EIO; + return error ? error : total_out; } const struct squashfs_decompressor squashfs_zstd_comp_ops = { diff --git a/fs/sync.c b/fs/sync.c index 4d1ff010bc5a..c6f6f5be5682 100644 --- a/fs/sync.c +++ b/fs/sync.c @@ -161,7 +161,7 @@ SYSCALL_DEFINE1(syncfs, int, fd) { struct fd f = fdget(fd); struct super_block *sb; - int ret; + int ret, ret2; if (!f.file) return -EBADF; @@ -171,8 +171,10 @@ SYSCALL_DEFINE1(syncfs, int, fd) ret = sync_filesystem(sb); up_read(&sb->s_umount); + ret2 = errseq_check_and_advance(&sb->s_wb_err, &f.file->f_sb_err); + fdput(f); - return ret; + return ret ? ret : ret2; } /** diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index 0f5a480fe264..31288d8fa2ce 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -815,7 +815,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); - buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory for dumping LEB %d", lnum); return; diff --git a/fs/ubifs/lprops.c b/fs/ubifs/lprops.c index 29826c51883a..22bfda158f7f 100644 --- a/fs/ubifs/lprops.c +++ b/fs/ubifs/lprops.c @@ -1095,7 +1095,7 @@ static int scan_check_cb(struct ubifs_info *c, return LPT_SCAN_CONTINUE; } - buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) return -ENOMEM; diff --git a/fs/ubifs/lpt_commit.c b/fs/ubifs/lpt_commit.c index ff5e0411cf2d..d76a19e460cd 100644 --- a/fs/ubifs/lpt_commit.c +++ b/fs/ubifs/lpt_commit.c @@ -1596,7 +1596,7 @@ static int dbg_check_ltab_lnum(struct ubifs_info *c, int lnum) if (!dbg_is_chk_lprops(c)) return 0; - buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = p = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory for ltab checking"); return 0; @@ -1845,7 +1845,7 @@ static void dump_lpt_leb(const struct ubifs_info *c, int lnum) void *buf, *p; pr_err("(pid %d) start dumping LEB %d\n", current->pid, lnum); - buf = p = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = p = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory to dump LPT"); return; diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 283f9eb48410..2c294085ffed 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -977,7 +977,7 @@ static int dbg_scan_orphans(struct ubifs_info *c, struct check_info *ci) if (c->no_orphs) return 0; - buf = __vmalloc(c->leb_size, GFP_NOFS, PAGE_KERNEL); + buf = __vmalloc(c->leb_size, GFP_NOFS); if (!buf) { ubifs_err(c, "cannot allocate memory to check orphans"); return 0; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index e875bc5668ee..adaba8e8b326 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -195,10 +195,9 @@ static int udf_readpage(struct file *file, struct page *page) return mpage_readpage(page, udf_get_block); } -static int udf_readpages(struct file *file, struct address_space *mapping, - struct list_head *pages, unsigned nr_pages) +static void udf_readahead(struct readahead_control *rac) { - return mpage_readpages(mapping, pages, nr_pages, udf_get_block); + mpage_readahead(rac, udf_get_block); } static int udf_write_begin(struct file *file, struct address_space *mapping, @@ -234,7 +233,7 @@ static sector_t udf_bmap(struct address_space *mapping, sector_t block) const struct address_space_operations udf_aops = { .readpage = udf_readpage, - .readpages = udf_readpages, + .readahead = udf_readahead, .writepage = udf_writepage, .writepages = udf_writepages, .write_begin = udf_write_begin, diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 1da94237a8cf..f1366475c389 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -48,7 +48,7 @@ __kmem_vmalloc(size_t size, xfs_km_flags_t flags) if (flags & KM_NOFS) nofs_flag = memalloc_nofs_save(); - ptr = __vmalloc(size, lflags, PAGE_KERNEL); + ptr = __vmalloc(size, lflags); if (flags & KM_NOFS) memalloc_nofs_restore(nofs_flag); diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 9d9cebf18726..1fd4fb7a607c 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -621,14 +621,11 @@ xfs_vm_readpage( return iomap_readpage(page, &xfs_read_iomap_ops); } -STATIC int -xfs_vm_readpages( - struct file *unused, - struct address_space *mapping, - struct list_head *pages, - unsigned nr_pages) +STATIC void +xfs_vm_readahead( + struct readahead_control *rac) { - return iomap_readpages(mapping, pages, nr_pages, &xfs_read_iomap_ops); + iomap_readahead(rac, &xfs_read_iomap_ops); } static int @@ -644,7 +641,7 @@ xfs_iomap_swapfile_activate( const struct address_space_operations xfs_address_space_operations = { .readpage = xfs_vm_readpage, - .readpages = xfs_vm_readpages, + .readahead = xfs_vm_readahead, .writepage = xfs_vm_writepage, .writepages = xfs_vm_writepages, .set_page_dirty = iomap_set_page_dirty, diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 9ec3eaf1c618..65538d18e64f 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -477,7 +477,7 @@ _xfs_buf_map_pages( nofs_flag = memalloc_nofs_save(); do { bp->b_addr = vm_map_ram(bp->b_pages, bp->b_page_count, - -1, PAGE_KERNEL); + -1); if (bp->b_addr) break; vm_unmap_aliases(); diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c index 3ce9829a6936..dba874a61fc5 100644 --- a/fs/zonefs/super.c +++ b/fs/zonefs/super.c @@ -78,10 +78,9 @@ static int zonefs_readpage(struct file *unused, struct page *page) return iomap_readpage(page, &zonefs_iomap_ops); } -static int zonefs_readpages(struct file *unused, struct address_space *mapping, - struct list_head *pages, unsigned int nr_pages) +static void zonefs_readahead(struct readahead_control *rac) { - return iomap_readpages(mapping, pages, nr_pages, &zonefs_iomap_ops); + iomap_readahead(rac, &zonefs_iomap_ops); } /* @@ -128,7 +127,7 @@ static int zonefs_writepages(struct address_space *mapping, static const struct address_space_operations zonefs_file_aops = { .readpage = zonefs_readpage, - .readpages = zonefs_readpages, + .readahead = zonefs_readahead, .writepage = zonefs_writepage, .writepages = zonefs_writepages, .set_page_dirty = iomap_set_page_dirty, |