summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-05-02 14:13:46 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-05-02 14:13:46 -0700
commit9ccce092fc64d19504fa54de4fd659e279cc92e7 (patch)
tree050c91cf4d884d9feb27c1074c3f73cab4f5bbf8
parent27787ba3fa4904422b3928b898d1bd3d74d98bea (diff)
parent211f9f2e0503efa4023a46920e7ad07377b4ec58 (diff)
downloadlinux-9ccce092fc64d19504fa54de4fd659e279cc92e7.tar.bz2
Merge tag 'for-linus-5.13-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux
Pull orangefs updates from Mike Marshall: "orangefs: implement orangefs_readahead mm/readahead.c/read_pages was quite a bit different back when I put my open-coded readahead logic into orangefs_readpage. That logic seemed to work as designed back then, it is a trainwreck now. This implements orangefs_readahead using the new xarray and readahead_expand features and removes all my open-coded readahead logic. This results in an extreme read performance improvement, these sample numbers are from my test VM: Here's an example of what's upstream in 5.11.8-200.fc33.x86_64: 30+0 records in 30+0 records out 125829120 bytes (126 MB, 120 MiB) copied, 5.77943 s, 21.8 MB/s And here's this version of orangefs_readahead on top of 5.12.0-rc4: 30+0 records in 30+0 records out 125829120 bytes (126 MB, 120 MiB) copied, 0.325919 s, 386 MB/s There are four xfstest regressions with this patch. David Howells and Matthew Wilcox have been helping me work with this code" * tag 'for-linus-5.13-ofs-1' of git://git.kernel.org/pub/scm/linux/kernel/git/hubcap/linux: orangefs: leave files in the page cache for a few micro seconds at least Orangef: implement orangefs_readahead.
-rw-r--r--fs/orangefs/file.c34
-rw-r--r--fs/orangefs/inode.c122
-rw-r--r--fs/orangefs/orangefs-mod.c2
3 files changed, 54 insertions, 104 deletions
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index ccef8c9dd516..86810e5d7914 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -248,21 +248,7 @@ populate_shared_memory:
* or it can pointers to struct page's
*/
- /*
- * When reading, readahead_size will only be zero when
- * we're doing O_DIRECT, otherwise we got here from
- * orangefs_readpage.
- *
- * If we got here from orangefs_readpage we want to
- * copy either a page or the whole file into the io
- * vector, whichever is smaller.
- */
- if (readahead_size)
- copy_amount =
- min(new_op->downcall.resp.io.amt_complete,
- (__s64)PAGE_SIZE);
- else
- copy_amount = new_op->downcall.resp.io.amt_complete;
+ copy_amount = new_op->downcall.resp.io.amt_complete;
ret = orangefs_bufmap_copy_to_iovec(iter, buffer_index,
copy_amount);
@@ -283,19 +269,11 @@ populate_shared_memory:
out:
if (buffer_index >= 0) {
- if ((readahead_size) && (type == ORANGEFS_IO_READ)) {
- /* readpage */
- *index_return = buffer_index;
- gossip_debug(GOSSIP_FILE_DEBUG,
- "%s: hold on to buffer_index :%d:\n",
- __func__, buffer_index);
- } else {
- /* O_DIRECT */
- orangefs_bufmap_put(buffer_index);
- gossip_debug(GOSSIP_FILE_DEBUG,
- "%s(%pU): PUT buffer_index %d\n",
- __func__, handle, buffer_index);
- }
+ orangefs_bufmap_put(buffer_index);
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): PUT buffer_index %d\n",
+ __func__, handle, buffer_index);
+ buffer_index = -1;
}
op_release(new_op);
return ret;
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
index 85b3dd2d769d..6bf35a0d61f3 100644
--- a/fs/orangefs/inode.c
+++ b/fs/orangefs/inode.c
@@ -245,6 +245,50 @@ static int orangefs_writepages(struct address_space *mapping,
static int orangefs_launder_page(struct page *);
+static void orangefs_readahead(struct readahead_control *rac)
+{
+ loff_t offset;
+ struct iov_iter iter;
+ struct file *file = rac->file;
+ struct inode *inode = file->f_mapping->host;
+ struct xarray *i_pages;
+ struct page *page;
+ loff_t new_start = readahead_pos(rac);
+ int ret;
+ size_t new_len = 0;
+
+ loff_t bytes_remaining = inode->i_size - readahead_pos(rac);
+ loff_t pages_remaining = bytes_remaining / PAGE_SIZE;
+
+ if (pages_remaining >= 1024)
+ new_len = 4194304;
+ else if (pages_remaining > readahead_count(rac))
+ new_len = bytes_remaining;
+
+ if (new_len)
+ readahead_expand(rac, new_start, new_len);
+
+ offset = readahead_pos(rac);
+ i_pages = &file->f_mapping->i_pages;
+
+ iov_iter_xarray(&iter, READ, i_pages, offset, readahead_length(rac));
+
+ /* read in the pages. */
+ if ((ret = wait_for_direct_io(ORANGEFS_IO_READ, inode,
+ &offset, &iter, readahead_length(rac),
+ inode->i_size, NULL, NULL, file)) < 0)
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s: wait_for_direct_io failed. \n", __func__);
+ else
+ ret = 0;
+
+ /* clean up. */
+ while ((page = readahead_page(rac))) {
+ page_endio(page, false, ret);
+ put_page(page);
+ }
+}
+
static int orangefs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
@@ -252,44 +296,24 @@ static int orangefs_readpage(struct file *file, struct page *page)
struct bio_vec bv;
ssize_t ret;
loff_t off; /* offset into this page */
- pgoff_t index; /* which page */
- struct page *next_page;
- char *kaddr;
- loff_t read_size;
- int buffer_index = -1; /* orangefs shared memory slot */
- int slot_index; /* index into slot */
- int remaining;
-
- /*
- * Get up to this many bytes from Orangefs at a time and try
- * to fill them into the page cache at once. Tests with dd made
- * this seem like a reasonable static number, if there was
- * interest perhaps this number could be made setable through
- * sysfs...
- */
- read_size = 524288;
if (PageDirty(page))
orangefs_launder_page(page);
off = page_offset(page);
- index = off >> PAGE_SHIFT;
bv.bv_page = page;
bv.bv_len = PAGE_SIZE;
bv.bv_offset = 0;
iov_iter_bvec(&iter, READ, &bv, 1, PAGE_SIZE);
ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, &off, &iter,
- read_size, inode->i_size, NULL, &buffer_index, file);
- remaining = ret;
+ PAGE_SIZE, inode->i_size, NULL, NULL, file);
/* this will only zero remaining unread portions of the page data */
iov_iter_zero(~0U, &iter);
/* takes care of potential aliasing */
flush_dcache_page(page);
if (ret < 0) {
SetPageError(page);
- unlock_page(page);
- goto out;
} else {
SetPageUptodate(page);
if (PageError(page))
@@ -298,60 +322,7 @@ static int orangefs_readpage(struct file *file, struct page *page)
}
/* unlock the page after the ->readpage() routine completes */
unlock_page(page);
-
- if (remaining > PAGE_SIZE) {
- slot_index = 0;
- while ((remaining - PAGE_SIZE) >= PAGE_SIZE) {
- remaining -= PAGE_SIZE;
- /*
- * It is an optimization to try and fill more than one
- * page... by now we've already gotten the single
- * page we were after, if stuff doesn't seem to
- * be going our way at this point just return
- * and hope for the best.
- *
- * If we look for pages and they're already there is
- * one reason to give up, and if they're not there
- * and we can't create them is another reason.
- */
-
- index++;
- slot_index++;
- next_page = find_get_page(inode->i_mapping, index);
- if (next_page) {
- gossip_debug(GOSSIP_FILE_DEBUG,
- "%s: found next page, quitting\n",
- __func__);
- put_page(next_page);
- goto out;
- }
- next_page = find_or_create_page(inode->i_mapping,
- index,
- GFP_KERNEL);
- /*
- * I've never hit this, leave it as a printk for
- * now so it will be obvious.
- */
- if (!next_page) {
- printk("%s: can't create next page, quitting\n",
- __func__);
- goto out;
- }
- kaddr = kmap_atomic(next_page);
- orangefs_bufmap_page_fill(kaddr,
- buffer_index,
- slot_index);
- kunmap_atomic(kaddr);
- SetPageUptodate(next_page);
- unlock_page(next_page);
- put_page(next_page);
- }
- }
-
-out:
- if (buffer_index != -1)
- orangefs_bufmap_put(buffer_index);
- return ret;
+ return ret;
}
static int orangefs_write_begin(struct file *file,
@@ -660,6 +631,7 @@ out:
/** ORANGEFS2 implementation of address space operations */
static const struct address_space_operations orangefs_address_operations = {
.writepage = orangefs_writepage,
+ .readahead = orangefs_readahead,
.readpage = orangefs_readpage,
.writepages = orangefs_writepages,
.set_page_dirty = __set_page_dirty_nobuffers,
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
index 74a3d6337ef4..cd7297815f91 100644
--- a/fs/orangefs/orangefs-mod.c
+++ b/fs/orangefs/orangefs-mod.c
@@ -31,7 +31,7 @@ static ulong module_parm_debug_mask;
__u64 orangefs_gossip_debug_mask;
int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS;
int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS;
-int orangefs_cache_timeout_msecs = 50;
+int orangefs_cache_timeout_msecs = 500;
int orangefs_dcache_timeout_msecs = 50;
int orangefs_getattr_timeout_msecs = 50;