diff options
author | Gao Xiang <hsiangkao@linux.alibaba.com> | 2021-10-09 04:08:39 +0800 |
---|---|---|
committer | Gao Xiang <hsiangkao@linux.alibaba.com> | 2021-10-19 23:44:30 +0800 |
commit | 386292919c255da42ff6d6d3c51594f796ac146a (patch) | |
tree | 091b5ad7c47dd5350335d45dd7dc3f529a21d6ab /fs/erofs/zdata.c | |
parent | 72bb52620fdffca95a14ee52188a33cd84e574e2 (diff) | |
download | linux-386292919c255da42ff6d6d3c51594f796ac146a.tar.bz2 |
erofs: introduce readmore decompression strategy
Previously, the readahead window was strictly followed by EROFS
decompression strategy in order to minimize extra memory footprint.
However, it could become inefficient if just reading the partial
requested data for much big LZ4 pclusters and the upcoming LZMA
implementation.
Let's try to request the leading data in a pcluster without
triggering memory reclaiming instead for the LZ4 approach first
to boost up 100% randread of large big pclusters, and it has no real
impact on low memory scenarios.
It also introduces a way to expand read lengths in order to decompress
the whole pcluster, which is useful for LZMA since the algorithm
itself is relatively slow and causes CPU bound, but LZ4 is not.
Link: https://lore.kernel.org/r/20211008200839.24541-4-xiang@kernel.org
Reviewed-by: Chao Yu <chao@kernel.org>
Signed-off-by: Gao Xiang <hsiangkao@linux.alibaba.com>
Diffstat (limited to 'fs/erofs/zdata.c')
-rw-r--r-- | fs/erofs/zdata.c | 99 |
1 files changed, 80 insertions, 19 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index a9dced07c3c6..98d3bd25d894 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -1387,6 +1387,72 @@ static void z_erofs_runqueue(struct super_block *sb, z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool); } +/* + * Since partial uptodate is still unimplemented for now, we have to use + * approximate readmore strategies as a start. + */ +static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, + struct readahead_control *rac, + erofs_off_t end, + struct list_head *pagepool, + bool backmost) +{ + struct inode *inode = f->inode; + struct erofs_map_blocks *map = &f->map; + erofs_off_t cur; + int err; + + if (backmost) { + map->m_la = end; + /* TODO: pass in EROFS_GET_BLOCKS_READMORE for LZMA later */ + err = z_erofs_map_blocks_iter(inode, map, 0); + if (err) + return; + + /* expend ra for the trailing edge if readahead */ + if (rac) { + loff_t newstart = readahead_pos(rac); + + cur = round_up(map->m_la + map->m_llen, PAGE_SIZE); + readahead_expand(rac, newstart, cur - newstart); + return; + } + end = round_up(end, PAGE_SIZE); + } else { + end = round_up(map->m_la, PAGE_SIZE); + + if (!map->m_llen) + return; + } + + cur = map->m_la + map->m_llen - 1; + while (cur >= end) { + pgoff_t index = cur >> PAGE_SHIFT; + struct page *page; + + page = erofs_grab_cache_page_nowait(inode->i_mapping, index); + if (!page) + goto skip; + + if (PageUptodate(page)) { + unlock_page(page); + put_page(page); + goto skip; + } + + err = z_erofs_do_read_page(f, page, pagepool); + if (err) + erofs_err(inode->i_sb, + "readmore error at page %lu @ nid %llu", + index, EROFS_I(inode)->nid); + put_page(page); +skip: + if (cur < PAGE_SIZE) + break; + cur = (index << PAGE_SHIFT) - 1; + } +} + static int z_erofs_readpage(struct file *file, struct page *page) { struct inode *const inode = page->mapping->host; @@ -1395,10 +1461,13 @@ static int z_erofs_readpage(struct file *file, struct page *page) LIST_HEAD(pagepool); trace_erofs_readpage(page, false); - f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT; + z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1, + &pagepool, true); err = z_erofs_do_read_page(&f, page, &pagepool); + z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false); + (void)z_erofs_collector_end(&f.clt); /* if some compressed cluster ready, need submit them anyway */ @@ -1419,29 +1488,20 @@ static void z_erofs_readahead(struct readahead_control *rac) { struct inode *const inode = rac->mapping->host; struct erofs_sb_info *const sbi = EROFS_I_SB(inode); - - unsigned int nr_pages = readahead_count(rac); - bool sync = (sbi->opt.readahead_sync_decompress && - nr_pages <= sbi->opt.max_sync_decompress_pages); struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode); struct page *page, *head = NULL; + unsigned int nr_pages; LIST_HEAD(pagepool); - trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false); - f.readahead = true; f.headoffset = readahead_pos(rac); - while ((page = readahead_page(rac))) { - prefetchw(&page->flags); - - /* - * A pure asynchronous readahead is indicated if - * a PG_readahead marked page is hitted at first. - * Let's also do asynchronous decompression for this case. - */ - sync &= !(PageReadahead(page) && !head); + z_erofs_pcluster_readmore(&f, rac, f.headoffset + + readahead_length(rac) - 1, &pagepool, true); + nr_pages = readahead_count(rac); + trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false); + while ((page = readahead_page(rac))) { set_page_private(page, (unsigned long)head); head = page; } @@ -1460,11 +1520,12 @@ static void z_erofs_readahead(struct readahead_control *rac) page->index, EROFS_I(inode)->nid); put_page(page); } - + z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false); (void)z_erofs_collector_end(&f.clt); - z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync); - + z_erofs_runqueue(inode->i_sb, &f, &pagepool, + sbi->opt.readahead_sync_decompress && + nr_pages <= sbi->opt.max_sync_decompress_pages); if (f.map.mpage) put_page(f.map.mpage); |