From 542b994bdb2662d38f1a400adf3e5da3adceb50d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Feb 2020 19:25:56 -0500 Subject: NFS: Assume cred is pinned by open context in I/O requests In read/write/commit, we should be able to assume that the cred is pinned by the open context. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 20b3717cd7ca..c9c3edefc5be 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -627,7 +627,7 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, .callback_ops = call_ops, .callback_data = hdr, .workqueue = nfsiod_workqueue, - .flags = RPC_TASK_ASYNC | flags, + .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | flags, }; int ret = 0; -- cgit v1.2.3 From 1de3af9883fe2b689d1f61b205e9f5a0cedca8e7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 28 Mar 2020 11:39:29 -0400 Subject: NFS: Remove unused FLUSH_SYNC support in nfs_initiate_pgio() If the FLUSH_SYNC flag is set, nfs_initiate_pgio() will currently wait for completion, and then return the status of the I/O operation. What we actually want to report in nfs_pageio_doio() is whether or not the RPC call was launched successfully, whereas actual I/O status is intended handled in the reply callbacks. Since FLUSH_SYNC is never set by any of the callers anyway, let's just remove that code altogether. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index c9c3edefc5be..be5e209399ea 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -629,7 +629,6 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, .workqueue = nfsiod_workqueue, .flags = RPC_TASK_ASYNC | RPC_TASK_CRED_NOREF | flags, }; - int ret = 0; hdr->rw_ops->rw_initiate(hdr, &msg, rpc_ops, &task_setup_data, how); @@ -641,18 +640,10 @@ int nfs_initiate_pgio(struct rpc_clnt *clnt, struct nfs_pgio_header *hdr, (unsigned long long)hdr->args.offset); task = rpc_run_task(&task_setup_data); - if (IS_ERR(task)) { - ret = PTR_ERR(task); - goto out; - } - if (how & FLUSH_SYNC) { - ret = rpc_wait_for_completion_task(task); - if (ret == 0) - ret = task->tk_status; - } + if (IS_ERR(task)) + return PTR_ERR(task); rpc_put_task(task); -out: - return ret; + return 0; } EXPORT_SYMBOL_GPL(nfs_initiate_pgio); -- cgit v1.2.3 From 08ca8b21f760c0ed5034a5c122092eec22ccf8f4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 1 Apr 2020 13:04:49 -0400 Subject: NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests() When a subrequest is being detached from the subgroup, we want to ensure that it is not holding the group lock, or in the process of waiting for the group lock. Fixes: 5b2b5187fa85 ("NFS: Fix nfs_page_group_destroy() and nfs_lock_and_join_requests() race cases") Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 67 ++++++++++++++++++++++++++++++++---------------- fs/nfs/write.c | 10 ++++++-- include/linux/nfs_page.h | 2 ++ 3 files changed, 55 insertions(+), 24 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index be5e209399ea..0e3f0f241d83 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -133,47 +133,70 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); /* - * nfs_page_group_lock - lock the head of the page group - * @req - request in group that is to be locked + * nfs_page_set_headlock - set the request PG_HEADLOCK + * @req: request that is to be locked * - * this lock must be held when traversing or modifying the page - * group list + * this lock must be held when modifying req->wb_head * * return 0 on success, < 0 on error */ int -nfs_page_group_lock(struct nfs_page *req) +nfs_page_set_headlock(struct nfs_page *req) { - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - - if (!test_and_set_bit(PG_HEADLOCK, &head->wb_flags)) + if (!test_and_set_bit(PG_HEADLOCK, &req->wb_flags)) return 0; - set_bit(PG_CONTENDED1, &head->wb_flags); + set_bit(PG_CONTENDED1, &req->wb_flags); smp_mb__after_atomic(); - return wait_on_bit_lock(&head->wb_flags, PG_HEADLOCK, + return wait_on_bit_lock(&req->wb_flags, PG_HEADLOCK, TASK_UNINTERRUPTIBLE); } /* - * nfs_page_group_unlock - unlock the head of the page group - * @req - request in group that is to be unlocked + * nfs_page_clear_headlock - clear the request PG_HEADLOCK + * @req: request that is to be locked */ void -nfs_page_group_unlock(struct nfs_page *req) +nfs_page_clear_headlock(struct nfs_page *req) { - struct nfs_page *head = req->wb_head; - - WARN_ON_ONCE(head != head->wb_head); - smp_mb__before_atomic(); - clear_bit(PG_HEADLOCK, &head->wb_flags); + clear_bit(PG_HEADLOCK, &req->wb_flags); smp_mb__after_atomic(); - if (!test_bit(PG_CONTENDED1, &head->wb_flags)) + if (!test_bit(PG_CONTENDED1, &req->wb_flags)) return; - wake_up_bit(&head->wb_flags, PG_HEADLOCK); + wake_up_bit(&req->wb_flags, PG_HEADLOCK); +} + +/* + * nfs_page_group_lock - lock the head of the page group + * @req: request in group that is to be locked + * + * this lock must be held when traversing or modifying the page + * group list + * + * return 0 on success, < 0 on error + */ +int +nfs_page_group_lock(struct nfs_page *req) +{ + int ret; + + ret = nfs_page_set_headlock(req); + if (ret || req->wb_head == req) + return ret; + return nfs_page_set_headlock(req->wb_head); +} + +/* + * nfs_page_group_unlock - unlock the head of the page group + * @req: request in group that is to be unlocked + */ +void +nfs_page_group_unlock(struct nfs_page *req) +{ + if (req != req->wb_head) + nfs_page_clear_headlock(req->wb_head); + nfs_page_clear_headlock(req); } /* diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 626e99cbb50e..a6d7926b0653 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -428,22 +428,28 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, destroy_list = (subreq->wb_this_page == old_head) ? NULL : subreq->wb_this_page; + /* Note: lock subreq in order to change subreq->wb_head */ + nfs_page_set_headlock(subreq); WARN_ON_ONCE(old_head != subreq->wb_head); /* make sure old group is not used */ subreq->wb_this_page = subreq; + subreq->wb_head = subreq; clear_bit(PG_REMOVE, &subreq->wb_flags); /* Note: races with nfs_page_group_destroy() */ if (!kref_read(&subreq->wb_kref)) { /* Check if we raced with nfs_page_group_destroy() */ - if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) + if (test_and_clear_bit(PG_TEARDOWN, &subreq->wb_flags)) { + nfs_page_clear_headlock(subreq); nfs_free_request(subreq); + } else + nfs_page_clear_headlock(subreq); continue; } + nfs_page_clear_headlock(subreq); - subreq->wb_head = subreq; nfs_release_request(old_head); if (test_and_clear_bit(PG_INODE_REF, &subreq->wb_flags)) { diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 0bbd587fac6a..7e9419d74b86 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -142,6 +142,8 @@ extern void nfs_unlock_and_release_request(struct nfs_page *); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern int nfs_page_set_headlock(struct nfs_page *req); +extern void nfs_page_clear_headlock(struct nfs_page *req); extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* -- cgit v1.2.3 From dc9dc2febb17f72e9878eb540ad3996f7984239a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Mar 2020 19:55:05 -0400 Subject: NFS: Fix use-after-free issues in nfs_pageio_add_request() We need to ensure that we create the mirror requests before calling nfs_pageio_add_request_mirror() on the request we are adding. Otherwise, we can end up with a use-after-free if the call to nfs_pageio_add_request_mirror() triggers I/O. Fixes: c917cfaf9bbe ("NFS: Fix up NFS I/O subrequest creation") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 48 ++++++++++++++++++++++++------------------------ 1 file changed, 24 insertions(+), 24 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 0e3f0f241d83..99eb839c5778 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1191,38 +1191,38 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (desc->pg_error < 0) goto out_failed; - for (midx = 0; midx < desc->pg_mirror_count; midx++) { - if (midx) { - nfs_page_group_lock(req); - - /* find the last request */ - for (lastreq = req->wb_head; - lastreq->wb_this_page != req->wb_head; - lastreq = lastreq->wb_this_page) - ; - - dupreq = nfs_create_subreq(req, lastreq, - pgbase, offset, bytes); - - nfs_page_group_unlock(req); - if (IS_ERR(dupreq)) { - desc->pg_error = PTR_ERR(dupreq); - goto out_failed; - } - } else - dupreq = req; + /* Create the mirror instances first, and fire them off */ + for (midx = 1; midx < desc->pg_mirror_count; midx++) { + nfs_page_group_lock(req); + + /* find the last request */ + for (lastreq = req->wb_head; + lastreq->wb_this_page != req->wb_head; + lastreq = lastreq->wb_this_page) + ; + + dupreq = nfs_create_subreq(req, lastreq, + pgbase, offset, bytes); + + nfs_page_group_unlock(req); + if (IS_ERR(dupreq)) { + desc->pg_error = PTR_ERR(dupreq); + goto out_failed; + } - if (nfs_pgio_has_mirroring(desc)) - desc->pg_mirror_idx = midx; + desc->pg_mirror_idx = midx; if (!nfs_pageio_add_request_mirror(desc, dupreq)) goto out_cleanup_subreq; } + desc->pg_mirror_idx = 0; + if (!nfs_pageio_add_request_mirror(desc, req)) + goto out_failed; + return 1; out_cleanup_subreq: - if (req != dupreq) - nfs_pageio_cleanup_request(desc, dupreq); + nfs_pageio_cleanup_request(desc, dupreq); out_failed: nfs_pageio_error_cleanup(desc); return 0; -- cgit v1.2.3 From 862f35c94730c9270833f3ad05bd758a29f204ed Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Mar 2020 20:06:45 -0400 Subject: NFS: Fix memory leaks in nfs_pageio_stop_mirroring() If we just set the mirror count to 1 without first clearing out the mirrors, we can leak queued up requests. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 99eb839c5778..1fd4862217e0 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -900,15 +900,6 @@ static void nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, pgio->pg_mirror_count = mirror_count; } -/* - * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) - */ -void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) -{ - pgio->pg_mirror_count = 1; - pgio->pg_mirror_idx = 0; -} - static void nfs_pageio_cleanup_mirroring(struct nfs_pageio_descriptor *pgio) { pgio->pg_mirror_count = 1; @@ -1334,6 +1325,14 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index) } } +/* + * nfs_pageio_stop_mirroring - stop using mirroring (set mirror count to 1) + */ +void nfs_pageio_stop_mirroring(struct nfs_pageio_descriptor *pgio) +{ + nfs_pageio_complete(pgio); +} + int __init nfs_init_nfspagecache(void) { nfs_page_cachep = kmem_cache_create("nfs_page", -- cgit v1.2.3 From 377840ee48cde0700678ef14141106bbd13e00b5 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 29 Mar 2020 20:03:33 -0400 Subject: NFS: Remove the redundant function nfs_pgio_has_mirroring() We need to trust that desc->pg_mirror_idx is set correctly, whether or not mirroring is enabled. Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 6 ------ fs/nfs/pagelist.c | 7 ++----- 2 files changed, 2 insertions(+), 11 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 78f317fac940..1f32a9fbfdaf 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -274,12 +274,6 @@ void nfs_free_request(struct nfs_page *req); struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc); -static inline bool nfs_pgio_has_mirroring(struct nfs_pageio_descriptor *desc) -{ - WARN_ON_ONCE(desc->pg_mirror_count < 1); - return desc->pg_mirror_count > 1; -} - static inline bool nfs_match_open_context(const struct nfs_open_context *ctx1, const struct nfs_open_context *ctx2) { diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 1fd4862217e0..f535a92403bf 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -33,9 +33,7 @@ static const struct rpc_call_ops nfs_pgio_common_ops; struct nfs_pgio_mirror * nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) { - return nfs_pgio_has_mirroring(desc) ? - &desc->pg_mirrors[desc->pg_mirror_idx] : - &desc->pg_mirrors[0]; + return &desc->pg_mirrors[desc->pg_mirror_idx]; } EXPORT_SYMBOL_GPL(nfs_pgio_current_mirror); @@ -1231,8 +1229,7 @@ static void nfs_pageio_complete_mirror(struct nfs_pageio_descriptor *desc, struct nfs_pgio_mirror *mirror = &desc->pg_mirrors[mirror_idx]; u32 restore_idx = desc->pg_mirror_idx; - if (nfs_pgio_has_mirroring(desc)) - desc->pg_mirror_idx = mirror_idx; + desc->pg_mirror_idx = mirror_idx; for (;;) { nfs_pageio_doio(desc); if (desc->pg_error < 0 || !mirror->pg_recoalesce) -- cgit v1.2.3 From a62f8e3bd836bf1abde1648a45e14afd050dbd23 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 11:12:16 -0400 Subject: NFS: Clean up nfs_lock_and_join_requests() Clean up nfs_lock_and_join_requests() to simplify the calculation of the range covered by the page group, taking into account the presence of mirrors. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 74 +++++++++++++++++++++++++++++++++++++++ fs/nfs/write.c | 91 +++++++++++------------------------------------- include/linux/nfs_page.h | 1 + 3 files changed, 95 insertions(+), 71 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index f535a92403bf..261236157e33 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -130,6 +130,80 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); +/* + * nfs_unroll_locks - unlock all newly locked reqs and wait on @req + * @head: head request of page group, must be holding head lock + * @req: request that couldn't lock and needs to wait on the req bit lock + * + * This is a helper function for nfs_lock_and_join_requests + * returns 0 on success, < 0 on error. + */ +static void +nfs_unroll_locks(struct nfs_page *head, struct nfs_page *req) +{ + struct nfs_page *tmp; + + /* relinquish all the locks successfully grabbed this run */ + for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { + if (!kref_read(&tmp->wb_kref)) + continue; + nfs_unlock_and_release_request(tmp); + } +} + +/* + * nfs_page_group_lock_subreq - try to lock a subrequest + * @head: head request of page group + * @subreq: request to lock + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +static int +nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) +{ + int ret; + + if (!kref_get_unless_zero(&subreq->wb_kref)) + return 0; + while (!nfs_lock_request(subreq)) { + nfs_page_group_unlock(head); + ret = nfs_wait_on_request(subreq); + if (!ret) + ret = nfs_page_group_lock(head); + if (ret < 0) { + nfs_unroll_locks(head, subreq); + nfs_release_request(subreq); + return ret; + } + } + return 0; +} + +/* + * nfs_page_group_lock_subrequests - try to lock the subrequests + * @head: head request of page group + * + * This is a helper function for nfs_lock_and_join_requests which + * must be called with the head request and page group both locked. + * On error, it returns with the page group unlocked. + */ +int nfs_page_group_lock_subrequests(struct nfs_page *head) +{ + struct nfs_page *subreq; + int ret; + + /* lock each request in the page group */ + for (subreq = head->wb_this_page; subreq != head; + subreq = subreq->wb_this_page) { + ret = nfs_page_group_lock_subreq(head, subreq); + if (ret < 0) + return ret; + } + return 0; +} + /* * nfs_page_set_headlock - set the request PG_HEADLOCK * @req: request that is to be locked diff --git a/fs/nfs/write.c b/fs/nfs/write.c index a6d7926b0653..832cf57ea442 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -379,34 +379,6 @@ static void nfs_end_page_writeback(struct nfs_page *req) clear_bdi_congested(inode_to_bdi(inode), BLK_RW_ASYNC); } -/* - * nfs_unroll_locks_and_wait - unlock all newly locked reqs and wait on @req - * - * this is a helper function for nfs_lock_and_join_requests - * - * @inode - inode associated with request page group, must be holding inode lock - * @head - head request of page group, must be holding head lock - * @req - request that couldn't lock and needs to wait on the req bit lock - * - * NOTE: this must be called holding page_group bit lock - * which will be released before returning. - * - * returns 0 on success, < 0 on error. - */ -static void -nfs_unroll_locks(struct inode *inode, struct nfs_page *head, - struct nfs_page *req) -{ - struct nfs_page *tmp; - - /* relinquish all the locks successfully grabbed this run */ - for (tmp = head->wb_this_page ; tmp != req; tmp = tmp->wb_this_page) { - if (!kref_read(&tmp->wb_kref)) - continue; - nfs_unlock_and_release_request(tmp); - } -} - /* * nfs_destroy_unlinked_subrequests - destroy recently unlinked subrequests * @@ -487,7 +459,7 @@ nfs_lock_and_join_requests(struct page *page) struct inode *inode = page_file_mapping(page)->host; struct nfs_page *head, *subreq; struct nfs_page *destroy_list = NULL; - unsigned int total_bytes; + unsigned int pgbase, off, bytes; int ret; try_again: @@ -520,49 +492,30 @@ try_again: goto release_request; /* lock each request in the page group */ - total_bytes = head->wb_bytes; + ret = nfs_page_group_lock_subrequests(head); + if (ret < 0) + goto release_request; + + pgbase = head->wb_pgbase; + bytes = head->wb_bytes; + off = head->wb_offset; for (subreq = head->wb_this_page; subreq != head; subreq = subreq->wb_this_page) { - - if (!kref_get_unless_zero(&subreq->wb_kref)) { - if (subreq->wb_offset == head->wb_offset + total_bytes) - total_bytes += subreq->wb_bytes; - continue; - } - - while (!nfs_lock_request(subreq)) { - /* - * Unlock page to allow nfs_page_group_sync_on_bit() - * to succeed - */ - nfs_page_group_unlock(head); - ret = nfs_wait_on_request(subreq); - if (!ret) - ret = nfs_page_group_lock(head); - if (ret < 0) { - nfs_unroll_locks(inode, head, subreq); - nfs_release_request(subreq); - goto release_request; - } - } - /* - * Subrequests are always contiguous, non overlapping - * and in order - but may be repeated (mirrored writes). - */ - if (subreq->wb_offset == (head->wb_offset + total_bytes)) { - /* keep track of how many bytes this group covers */ - total_bytes += subreq->wb_bytes; - } else if (WARN_ON_ONCE(subreq->wb_offset < head->wb_offset || - ((subreq->wb_offset + subreq->wb_bytes) > - (head->wb_offset + total_bytes)))) { - nfs_page_group_unlock(head); - nfs_unroll_locks(inode, head, subreq); - nfs_unlock_and_release_request(subreq); - ret = -EIO; - goto release_request; + /* Subrequests should always form a contiguous range */ + if (pgbase > subreq->wb_pgbase) { + off -= pgbase - subreq->wb_pgbase; + bytes += pgbase - subreq->wb_pgbase; + pgbase = subreq->wb_pgbase; } + bytes = max(subreq->wb_pgbase + subreq->wb_bytes + - pgbase, bytes); } + /* Set the head request's range to cover the former page group */ + head->wb_pgbase = pgbase; + head->wb_bytes = bytes; + head->wb_offset = off; + /* Now that all requests are locked, make sure they aren't on any list. * Commit list removal accounting is done after locks are dropped */ subreq = head; @@ -576,10 +529,6 @@ try_again: /* destroy list will be terminated by head */ destroy_list = head->wb_this_page; head->wb_this_page = head; - - /* change head request to cover whole range that - * the former page group covered */ - head->wb_bytes = total_bytes; } /* Postpone destruction of this request */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 7e9419d74b86..dd205bc6bc58 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); -- cgit v1.2.3 From 44a65a0c278336719892287a185836fddeabb933 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 31 Mar 2020 18:27:26 -0400 Subject: NFS: Reverse the submission order of requests in __nfs_pageio_add_request() If we have to split the request up into subrequests, we have to submit the request pointed to by the function call parameter last, in case there is an error or other issue that causes us to exit before the last request is submitted. The reason is that the caller is expected to perform cleanup in those cases. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 133 ++++++++++++++++++++++++++---------------------------- 1 file changed, 64 insertions(+), 69 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 261236157e33..b9805d1dac75 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -454,15 +454,23 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page, } static struct nfs_page * -nfs_create_subreq(struct nfs_page *req, struct nfs_page *last, - unsigned int pgbase, unsigned int offset, +nfs_create_subreq(struct nfs_page *req, + unsigned int pgbase, + unsigned int offset, unsigned int count) { + struct nfs_page *last; struct nfs_page *ret; ret = __nfs_create_request(req->wb_lock_context, req->wb_page, pgbase, offset, count); if (!IS_ERR(ret)) { + /* find the last request */ + for (last = req->wb_head; + last->wb_this_page != req->wb_head; + last = last->wb_this_page) + ; + nfs_lock_request(ret); ret->wb_index = req->wb_index; nfs_page_group_init(ret, last); @@ -988,7 +996,7 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1, } /** - * nfs_can_coalesce_requests - test two requests for compatibility + * nfs_coalesce_size - test two requests for compatibility * @prev: pointer to nfs_page * @req: pointer to nfs_page * @pgio: pointer to nfs_pagio_descriptor @@ -997,41 +1005,36 @@ static bool nfs_match_lock_context(const struct nfs_lock_context *l1, * page data area they describe is contiguous, and that their RPC * credentials, NFSv4 open state, and lockowners are the same. * - * Return 'true' if this is the case, else return 'false'. + * Returns size of the request that can be coalesced */ -static bool nfs_can_coalesce_requests(struct nfs_page *prev, +static unsigned int nfs_coalesce_size(struct nfs_page *prev, struct nfs_page *req, struct nfs_pageio_descriptor *pgio) { - size_t size; struct file_lock_context *flctx; if (prev) { if (!nfs_match_open_context(nfs_req_openctx(req), nfs_req_openctx(prev))) - return false; + return 0; flctx = d_inode(nfs_req_openctx(req)->dentry)->i_flctx; if (flctx != NULL && !(list_empty_careful(&flctx->flc_posix) && list_empty_careful(&flctx->flc_flock)) && !nfs_match_lock_context(req->wb_lock_context, prev->wb_lock_context)) - return false; + return 0; if (req_offset(req) != req_offset(prev) + prev->wb_bytes) - return false; + return 0; if (req->wb_page == prev->wb_page) { if (req->wb_pgbase != prev->wb_pgbase + prev->wb_bytes) - return false; + return 0; } else { if (req->wb_pgbase != 0 || prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE) - return false; + return 0; } } - size = pgio->pg_ops->pg_test(pgio, prev, req); - WARN_ON_ONCE(size > req->wb_bytes); - if (size && size < req->wb_bytes) - req->wb_bytes = size; - return size > 0; + return pgio->pg_ops->pg_test(pgio, prev, req); } /** @@ -1039,15 +1042,16 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev, * @desc: destination io descriptor * @req: request * - * Returns true if the request 'req' was successfully coalesced into the - * existing list of pages 'desc'. + * If the request 'req' was successfully coalesced into the existing list + * of pages 'desc', it returns the size of req. */ -static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, - struct nfs_page *req) +static unsigned int +nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, + struct nfs_page *req) { struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - struct nfs_page *prev = NULL; + unsigned int size; if (mirror->pg_count != 0) { prev = nfs_list_entry(mirror->pg_list.prev); @@ -1067,11 +1071,12 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, return 0; } - if (!nfs_can_coalesce_requests(prev, req, desc)) - return 0; + size = nfs_coalesce_size(prev, req, desc); + if (size < req->wb_bytes) + return size; nfs_list_move_request(req, &mirror->pg_list); mirror->pg_count += req->wb_bytes; - return 1; + return req->wb_bytes; } /* @@ -1111,7 +1116,8 @@ nfs_pageio_cleanup_request(struct nfs_pageio_descriptor *desc, * @req: request * * This may split a request into subrequests which are all part of the - * same page group. + * same page group. If so, it will submit @req as the last one, to ensure + * the pointer to @req is still valid in case of failure. * * Returns true if the request 'req' was successfully coalesced into the * existing list of pages 'desc'. @@ -1120,51 +1126,50 @@ static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - struct nfs_page *subreq; - unsigned int bytes_left = 0; - unsigned int offset, pgbase; + unsigned int size, subreq_size; nfs_page_group_lock(req); subreq = req; - bytes_left = subreq->wb_bytes; - offset = subreq->wb_offset; - pgbase = subreq->wb_pgbase; - - do { - if (!nfs_pageio_do_add_request(desc, subreq)) { - /* make sure pg_test call(s) did nothing */ - WARN_ON_ONCE(subreq->wb_bytes != bytes_left); - WARN_ON_ONCE(subreq->wb_offset != offset); - WARN_ON_ONCE(subreq->wb_pgbase != pgbase); - + subreq_size = subreq->wb_bytes; + for(;;) { + size = nfs_pageio_do_add_request(desc, subreq); + if (size == subreq_size) { + /* We successfully submitted a request */ + if (subreq == req) + break; + req->wb_pgbase += size; + req->wb_bytes -= size; + req->wb_offset += size; + subreq_size = req->wb_bytes; + subreq = req; + continue; + } + if (WARN_ON_ONCE(subreq != req)) { + nfs_page_group_unlock(req); + nfs_pageio_cleanup_request(desc, subreq); + subreq = req; + subreq_size = req->wb_bytes; + nfs_page_group_lock(req); + } + if (!size) { + /* Can't coalesce any more, so do I/O */ nfs_page_group_unlock(req); desc->pg_moreio = 1; nfs_pageio_doio(desc); if (desc->pg_error < 0 || mirror->pg_recoalesce) - goto out_cleanup_subreq; + return 0; /* retry add_request for this subreq */ nfs_page_group_lock(req); continue; } - - /* check for buggy pg_test call(s) */ - WARN_ON_ONCE(subreq->wb_bytes + subreq->wb_pgbase > PAGE_SIZE); - WARN_ON_ONCE(subreq->wb_bytes > bytes_left); - WARN_ON_ONCE(subreq->wb_bytes == 0); - - bytes_left -= subreq->wb_bytes; - offset += subreq->wb_bytes; - pgbase += subreq->wb_bytes; - - if (bytes_left) { - subreq = nfs_create_subreq(req, subreq, pgbase, - offset, bytes_left); - if (IS_ERR(subreq)) - goto err_ptr; - } - } while (bytes_left > 0); + subreq = nfs_create_subreq(req, req->wb_pgbase, + req->wb_offset, size); + if (IS_ERR(subreq)) + goto err_ptr; + subreq_size = size; + } nfs_page_group_unlock(req); return 1; @@ -1172,10 +1177,6 @@ err_ptr: desc->pg_error = PTR_ERR(subreq); nfs_page_group_unlock(req); return 0; -out_cleanup_subreq: - if (req != subreq) - nfs_pageio_cleanup_request(desc, subreq); - return 0; } static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) @@ -1244,7 +1245,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, { u32 midx; unsigned int pgbase, offset, bytes; - struct nfs_page *dupreq, *lastreq; + struct nfs_page *dupreq; pgbase = req->wb_pgbase; offset = req->wb_offset; @@ -1258,13 +1259,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, for (midx = 1; midx < desc->pg_mirror_count; midx++) { nfs_page_group_lock(req); - /* find the last request */ - for (lastreq = req->wb_head; - lastreq->wb_this_page != req->wb_head; - lastreq = lastreq->wb_this_page) - ; - - dupreq = nfs_create_subreq(req, lastreq, + dupreq = nfs_create_subreq(req, pgbase, offset, bytes); nfs_page_group_unlock(req); -- cgit v1.2.3 From e00ed89d7bd59c4ae49d6aeeee567187b1357a4b Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 30 Mar 2020 12:40:47 -0400 Subject: NFS: Refactor nfs_lock_and_join_requests() Refactor nfs_lock_and_join_requests() in order to separate out the subrequest merging into its own function nfs_lock_and_join_group() that can be used by O_DIRECT. Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 26 +++++++- fs/nfs/write.c | 164 ++++++++++++++++++++++++++++------------------- include/linux/nfs_page.h | 1 + 3 files changed, 123 insertions(+), 68 deletions(-) (limited to 'fs/nfs/pagelist.c') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index b9805d1dac75..f61f96603df7 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -130,6 +130,25 @@ nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx) } EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait); +/* + * nfs_page_lock_head_request - page lock the head of the page group + * @req: any member of the page group + */ +struct nfs_page * +nfs_page_group_lock_head(struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + while (!nfs_lock_request(head)) { + int ret = nfs_wait_on_request(head); + if (ret < 0) + return ERR_PTR(ret); + } + if (head != req) + kref_get(&head->wb_kref); + return head; +} + /* * nfs_unroll_locks - unlock all newly locked reqs and wait on @req * @head: head request of page group, must be holding head lock @@ -186,14 +205,16 @@ nfs_page_group_lock_subreq(struct nfs_page *head, struct nfs_page *subreq) * @head: head request of page group * * This is a helper function for nfs_lock_and_join_requests which - * must be called with the head request and page group both locked. - * On error, it returns with the page group unlocked. + * must be called with the head request locked. */ int nfs_page_group_lock_subrequests(struct nfs_page *head) { struct nfs_page *subreq; int ret; + ret = nfs_page_group_lock(head); + if (ret < 0) + return ret; /* lock each request in the page group */ for (subreq = head->wb_this_page; subreq != head; subreq = subreq->wb_this_page) { @@ -201,6 +222,7 @@ int nfs_page_group_lock_subrequests(struct nfs_page *head) if (ret < 0) return ret; } + nfs_page_group_unlock(head); return 0; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 832cf57ea442..63b64333c3ea 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -149,6 +149,31 @@ static void nfs_io_completion_put(struct nfs_io_completion *ioc) kref_put(&ioc->refcount, nfs_io_completion_release); } +static void +nfs_page_set_inode_ref(struct nfs_page *req, struct inode *inode) +{ + if (!test_and_set_bit(PG_INODE_REF, &req->wb_flags)) { + kref_get(&req->wb_kref); + atomic_long_inc(&NFS_I(inode)->nrequests); + } +} + +static int +nfs_cancel_remove_inode(struct nfs_page *req, struct inode *inode) +{ + int ret; + + if (!test_bit(PG_REMOVE, &req->wb_flags)) + return 0; + ret = nfs_page_group_lock(req); + if (ret) + return ret; + if (test_and_clear_bit(PG_REMOVE, &req->wb_flags)) + nfs_page_set_inode_ref(req, inode); + nfs_page_group_unlock(req); + return 0; +} + static struct nfs_page * nfs_page_private_request(struct page *page) { @@ -218,6 +243,36 @@ static struct nfs_page *nfs_page_find_head_request(struct page *page) return req; } +static struct nfs_page *nfs_find_and_lock_page_request(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + struct nfs_page *req, *head; + int ret; + + for (;;) { + req = nfs_page_find_head_request(page); + if (!req) + return req; + head = nfs_page_group_lock_head(req); + if (head != req) + nfs_release_request(req); + if (IS_ERR(head)) + return head; + ret = nfs_cancel_remove_inode(head, inode); + if (ret < 0) { + nfs_unlock_and_release_request(head); + return ERR_PTR(ret); + } + /* Ensure that nobody removed the request before we locked it */ + if (head == nfs_page_private_request(page)) + break; + if (PageSwapCache(page)) + break; + nfs_unlock_and_release_request(head); + } + return head; +} + /* Adjust the file length if we're writing beyond the end */ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) { @@ -436,65 +491,22 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list, } /* - * nfs_lock_and_join_requests - join all subreqs to the head req and return - * a locked reference, cancelling any pending - * operations for this page. - * - * @page - the page used to lookup the "page group" of nfs_page structures + * nfs_join_page_group - destroy subrequests of the head req + * @head: the page used to lookup the "page group" of nfs_page structures + * @inode: Inode to which the request belongs. * * This function joins all sub requests to the head request by first * locking all requests in the group, cancelling any pending operations * and finally updating the head request to cover the whole range covered by * the (former) group. All subrequests are removed from any write or commit * lists, unlinked from the group and destroyed. - * - * Returns a locked, referenced pointer to the head request - which after - * this call is guaranteed to be the only request associated with the page. - * Returns NULL if no requests are found for @page, or a ERR_PTR if an - * error was encountered. */ -static struct nfs_page * -nfs_lock_and_join_requests(struct page *page) +static void +nfs_join_page_group(struct nfs_page *head, struct inode *inode) { - struct inode *inode = page_file_mapping(page)->host; - struct nfs_page *head, *subreq; + struct nfs_page *subreq; struct nfs_page *destroy_list = NULL; unsigned int pgbase, off, bytes; - int ret; - -try_again: - /* - * A reference is taken only on the head request which acts as a - * reference to the whole page group - the group will not be destroyed - * until the head reference is released. - */ - head = nfs_page_find_head_request(page); - if (!head) - return NULL; - - /* lock the page head first in order to avoid an ABBA inefficiency */ - if (!nfs_lock_request(head)) { - ret = nfs_wait_on_request(head); - nfs_release_request(head); - if (ret < 0) - return ERR_PTR(ret); - goto try_again; - } - - /* Ensure that nobody removed the request before we locked it */ - if (head != nfs_page_private_request(page) && !PageSwapCache(page)) { - nfs_unlock_and_release_request(head); - goto try_again; - } - - ret = nfs_page_group_lock(head); - if (ret < 0) - goto release_request; - - /* lock each request in the page group */ - ret = nfs_page_group_lock_subrequests(head); - if (ret < 0) - goto release_request; pgbase = head->wb_pgbase; bytes = head->wb_bytes; @@ -531,30 +543,50 @@ try_again: head->wb_this_page = head; } - /* Postpone destruction of this request */ - if (test_and_clear_bit(PG_REMOVE, &head->wb_flags)) { - set_bit(PG_INODE_REF, &head->wb_flags); - kref_get(&head->wb_kref); - atomic_long_inc(&NFS_I(inode)->nrequests); - } + nfs_destroy_unlinked_subrequests(destroy_list, head, inode); +} - nfs_page_group_unlock(head); +/* + * nfs_lock_and_join_requests - join all subreqs to the head req + * @page: the page used to lookup the "page group" of nfs_page structures + * + * This function joins all sub requests to the head request by first + * locking all requests in the group, cancelling any pending operations + * and finally updating the head request to cover the whole range covered by + * the (former) group. All subrequests are removed from any write or commit + * lists, unlinked from the group and destroyed. + * + * Returns a locked, referenced pointer to the head request - which after + * this call is guaranteed to be the only request associated with the page. + * Returns NULL if no requests are found for @page, or a ERR_PTR if an + * error was encountered. + */ +static struct nfs_page * +nfs_lock_and_join_requests(struct page *page) +{ + struct inode *inode = page_file_mapping(page)->host; + struct nfs_page *head; + int ret; - nfs_destroy_unlinked_subrequests(destroy_list, head, inode); + /* + * A reference is taken only on the head request which acts as a + * reference to the whole page group - the group will not be destroyed + * until the head reference is released. + */ + head = nfs_find_and_lock_page_request(page); + if (IS_ERR_OR_NULL(head)) + return head; - /* Did we lose a race with nfs_inode_remove_request()? */ - if (!(PagePrivate(page) || PageSwapCache(page))) { + /* lock each request in the page group */ + ret = nfs_page_group_lock_subrequests(head); + if (ret < 0) { nfs_unlock_and_release_request(head); - return NULL; + return ERR_PTR(ret); } - /* still holds ref on head from nfs_page_find_head_request - * and still has lock on head from lock loop */ - return head; + nfs_join_page_group(head, inode); -release_request: - nfs_unlock_and_release_request(head); - return ERR_PTR(ret); + return head; } static void nfs_write_error(struct nfs_page *req, int error) diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index dd205bc6bc58..99198c039bd6 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -139,6 +139,7 @@ extern size_t nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, extern int nfs_wait_on_request(struct nfs_page *); extern void nfs_unlock_request(struct nfs_page *req); extern void nfs_unlock_and_release_request(struct nfs_page *); +extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req); extern int nfs_page_group_lock_subrequests(struct nfs_page *head); extern int nfs_page_group_lock(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); -- cgit v1.2.3