From d0379a5d066a998b0210a81dc52e266ce4daaa36 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 16 Nov 2015 11:26:07 -0500 Subject: pNFS/flexfiles: Support server-supplied layoutstats sampling period Some servers want to be able to control the frequency with which clients report layoutstats, for instance, in order to monitor QoS for a particular file or set of file. In order to support this, the flexfiles layout allows the server to pass this info as a hint in the layout payload. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 16 +++++++++++++--- fs/nfs/flexfilelayout/flexfilelayout.h | 1 + 2 files changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 03516c80855a..616817a46410 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -505,9 +505,17 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, } p = xdr_inline_decode(&stream, 4); - if (p) - fls->flags = be32_to_cpup(p); + if (!p) + goto out_sort_mirrors; + fls->flags = be32_to_cpup(p); + + p = xdr_inline_decode(&stream, 4); + if (!p) + goto out_sort_mirrors; + for (i=0; i < fls->mirror_array_cnt; i++) + fls->mirror_array[i]->report_interval = be32_to_cpup(p); +out_sort_mirrors: ff_layout_sort_mirrors(fls); rc = ff_layout_check_layout(lgr); if (rc) @@ -603,7 +611,9 @@ nfs4_ff_layoutstat_start_io(struct nfs4_ff_layout_mirror *mirror, mirror->start_time = now; if (ktime_equal(mirror->last_report_time, notime)) mirror->last_report_time = now; - if (layoutstats_timer != 0) + if (mirror->report_interval != 0) + report_interval = (s64)mirror->report_interval * 1000LL; + else if (layoutstats_timer != 0) report_interval = (s64)layoutstats_timer * 1000LL; if (ktime_to_ms(ktime_sub(now, mirror->last_report_time)) >= report_interval) { diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h index 2bb08bc6aaf0..dd353bb7dc0a 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.h +++ b/fs/nfs/flexfilelayout/flexfilelayout.h @@ -85,6 +85,7 @@ struct nfs4_ff_layout_mirror { struct nfs4_ff_layoutstat write_stat; ktime_t start_time; ktime_t last_report_time; + u32 report_interval; }; struct nfs4_ff_layout_segment { -- cgit v1.2.3 From d600ad1f2bdbf97c4818dcc85b174f72c90c21bd Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Fri, 4 Dec 2015 02:57:48 +0800 Subject: NFS41: pop some layoutget errors to application For ERESTARTSYS/EIO/EROFS/ENOSPC/E2BIG in layoutget, we should just bail out instead of hiding the error and retrying inband IO. Change all the call sites to pop the error all the way up. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/direct.c | 15 ++++++++++++++- fs/nfs/filelayout/filelayout.c | 17 +++++++++++++++-- fs/nfs/flexfilelayout/flexfilelayout.c | 25 ++++++++++++++++++++++--- fs/nfs/pagelist.c | 9 ++++++++- fs/nfs/pnfs.c | 24 ++++++++++++++++++------ fs/nfs/read.c | 2 +- 6 files changed, 78 insertions(+), 14 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 4b1d08f56aba..2e7142bcb4c8 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -670,6 +670,10 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) req = nfs_list_entry(reqs.next); nfs_direct_setup_mirroring(dreq, &desc, req); + if (desc.pg_error < 0) { + list_splice_init(&reqs, &failed); + goto out_failed; + } list_for_each_entry_safe(req, tmp, &reqs, wb_list) { if (!nfs_pageio_add_request(&desc, req)) { @@ -677,13 +681,17 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq) nfs_list_add_request(req, &failed); spin_lock(cinfo.lock); dreq->flags = 0; - dreq->error = -EIO; + if (desc.pg_error < 0) + dreq->error = desc.pg_error; + else + dreq->error = -EIO; spin_unlock(cinfo.lock); } nfs_release_request(req); } nfs_pageio_complete(&desc); +out_failed: while (!list_empty(&failed)) { req = nfs_list_entry(failed.next); nfs_list_remove_request(req); @@ -900,6 +908,11 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq, } nfs_direct_setup_mirroring(dreq, &desc, req); + if (desc.pg_error < 0) { + nfs_free_request(req); + result = desc.pg_error; + break; + } nfs_lock_request(req); req->wb_index = pos >> PAGE_SHIFT; diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index 02ec07973bc4..ae07b0f56659 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -883,13 +883,19 @@ static void filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_read_mds(pgio); @@ -902,13 +908,20 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_commit_info cinfo; int status; - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } + /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 616817a46410..57e4010e3cde 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -795,13 +795,19 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, int ds_idx; /* Use full layout for now */ - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) goto out_mds; @@ -835,13 +841,19 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, int i; int status; - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) goto out_mds; @@ -877,18 +889,25 @@ static unsigned int ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) { - if (!pgio->pg_lseg) + if (!pgio->pg_lseg) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, 0, NFS4_MAX_UINT64, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + goto out; + } + } if (pgio->pg_lseg) return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); /* no lseg means that pnfs is not in use, so no mirroring here */ nfs_pageio_reset_write_mds(pgio); +out: return 1; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 452a011ba0d8..728f65884cea 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -874,6 +874,9 @@ static int nfs_pageio_setup_mirroring(struct nfs_pageio_descriptor *pgio, mirror_count = pgio->pg_ops->pg_get_mirror_count(pgio, req); + if (pgio->pg_error < 0) + return pgio->pg_error; + if (!mirror_count || mirror_count > NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX) return -EINVAL; @@ -982,6 +985,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc, } else { if (desc->pg_ops->pg_init) desc->pg_ops->pg_init(desc, req); + if (desc->pg_error < 0) + return 0; mirror->pg_base = req->wb_pgbase; } if (!nfs_can_coalesce_requests(prev, req, desc)) @@ -1147,6 +1152,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, bytes = req->wb_bytes; nfs_pageio_setup_mirroring(desc, req); + if (desc->pg_error < 0) + return 0; for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { @@ -1232,7 +1239,7 @@ int nfs_pageio_resend(struct nfs_pageio_descriptor *desc, nfs_pageio_complete(desc); if (!list_empty(&failed)) { list_move(&failed, &hdr->pages); - return -EIO; + return desc->pg_error < 0 ? desc->pg_error : -EIO; } return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6095a8d42766..b1acc4135c3c 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -906,14 +906,15 @@ send_layoutget(struct pnfs_layout_hdr *lo, if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { - case -ENOMEM: case -ERESTARTSYS: + case -EIO: + case -ENOSPC: + case -EROFS: + case -E2BIG: break; default: - /* remember that LAYOUTGET failed and suspend trying */ - pnfs_layout_io_set_failed(lo, range->iomode); + return NULL; } - return NULL; } else pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(range->iomode)); @@ -1649,7 +1650,7 @@ out: "(%s, offset: %llu, length: %llu)\n", __func__, ino->i_sb->s_id, (unsigned long long)NFS_FILEID(ino), - lseg == NULL ? "not found" : "found", + IS_ERR_OR_NULL(lseg) ? "not found" : "found", iomode==IOMODE_RW ? "read/write" : "read-only", (unsigned long long)pos, (unsigned long long)count); @@ -1828,6 +1829,11 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r rd_size, IOMODE_READ, GFP_KERNEL); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } } /* If no lseg, fall back to read through mds */ if (pgio->pg_lseg == NULL) @@ -1840,13 +1846,19 @@ void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req, u64 wb_size) { - if (pgio->pg_lseg == NULL) + if (pgio->pg_lseg == NULL) { pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, req->wb_context, req_offset(req), wb_size, IOMODE_RW, GFP_NOFS); + if (IS_ERR(pgio->pg_lseg)) { + pgio->pg_error = PTR_ERR(pgio->pg_lseg); + pgio->pg_lseg = NULL; + return; + } + } /* If no lseg, fall back to write through mds */ if (pgio->pg_lseg == NULL) nfs_pageio_reset_write_mds(pgio); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0a5e33f33b5c..0bb580174cb3 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -115,7 +115,7 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, pgm = &pgio.pg_mirrors[0]; NFS_I(inode)->read_io += pgm->pg_bytes_written; - return 0; + return pgio.pg_error < 0 ? pgio.pg_error : 0; } static void nfs_readpage_release(struct nfs_page *req) -- cgit v1.2.3 From c18b96a1b8629db8cf5ac9f8974ccb4abcc209ab Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 01:59:56 +0800 Subject: nfs: clean up rest of reqs when failing to add one If we fail to set up things before sending anything over wire, we need to clean up the reqs that are still attached to the IO descriptor. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 728f65884cea..13faa303eb3b 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -1126,6 +1126,7 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { + struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); int ret; do { @@ -1153,7 +1154,7 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, nfs_pageio_setup_mirroring(desc, req); if (desc->pg_error < 0) - return 0; + goto out_failed; for (midx = 0; midx < desc->pg_mirror_count; midx++) { if (midx) { @@ -1170,7 +1171,8 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (IS_ERR(dupreq)) { nfs_page_group_unlock(req); - return 0; + desc->pg_error = PTR_ERR(dupreq); + goto out_failed; } nfs_lock_request(dupreq); @@ -1183,10 +1185,19 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, if (nfs_pgio_has_mirroring(desc)) desc->pg_mirror_idx = midx; if (!nfs_pageio_add_request_mirror(desc, dupreq)) - return 0; + goto out_failed; } return 1; + +out_failed: + /* + * We might have failed before sending any reqs over wire. + * clean up rest of the reqs in mirror pg_list + */ + if (desc->pg_error) + desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + return 0; } /* -- cgit v1.2.3 From 2bff2288579f1e4af2f05a7f7443c85b7766d5ac Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 02:03:17 +0800 Subject: nfs: centralize pgio error cleanup In case we fail during setting things up for read/write IO, set pg_error in IO descriptor and do the cleanup in nfs_pageio_add_request, where we clean up all pages that are still hanging around on the IO descriptor. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/pagelist.c | 53 +++++++++++++++++++++++++++++------------------------ fs/nfs/pnfs.c | 12 ++++-------- 2 files changed, 33 insertions(+), 32 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index 13faa303eb3b..7c71b71016b5 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -664,22 +664,11 @@ EXPORT_SYMBOL_GPL(nfs_initiate_pgio); * @desc: IO descriptor * @hdr: pageio header */ -static int nfs_pgio_error(struct nfs_pageio_descriptor *desc, - struct nfs_pgio_header *hdr) +static void nfs_pgio_error(struct nfs_pgio_header *hdr) { - struct nfs_pgio_mirror *mirror; - u32 midx; - set_bit(NFS_IOHDR_REDO, &hdr->flags); nfs_pgio_data_destroy(hdr); hdr->completion_ops->completion(hdr); - /* TODO: Make sure it's right to clean up all mirrors here - * and not just hdr->pgio_mirror_idx */ - for (midx = 0; midx < desc->pg_mirror_count; midx++) { - mirror = &desc->pg_mirrors[midx]; - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); - } - return -ENOMEM; } /** @@ -800,8 +789,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, unsigned int pagecount, pageused; pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); - if (!nfs_pgarray_set(&hdr->page_array, pagecount)) - return nfs_pgio_error(desc, hdr); + if (!nfs_pgarray_set(&hdr->page_array, pagecount)) { + nfs_pgio_error(hdr); + desc->pg_error = -ENOMEM; + return desc->pg_error; + } nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); pages = hdr->page_array.pagevec; @@ -819,8 +811,11 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc, *pages++ = last_page = req->wb_page; } } - if (WARN_ON_ONCE(pageused != pagecount)) - return nfs_pgio_error(desc, hdr); + if (WARN_ON_ONCE(pageused != pagecount)) { + nfs_pgio_error(hdr); + desc->pg_error = -EINVAL; + return desc->pg_error; + } if ((desc->pg_ioflags & FLUSH_COND_STABLE) && (desc->pg_moreio || nfs_reqs_to_commit(&cinfo))) @@ -843,10 +838,8 @@ static int nfs_generic_pg_pgios(struct nfs_pageio_descriptor *desc) hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - /* TODO: make sure this is right with mirroring - or - * should it back out all mirrors? */ - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); - return -ENOMEM; + desc->pg_error = -ENOMEM; + return desc->pg_error; } nfs_pgheader_init(desc, hdr, nfs_pgio_header_free); ret = nfs_generic_pgio(desc, hdr); @@ -1126,7 +1119,6 @@ static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc) static int nfs_pageio_add_request_mirror(struct nfs_pageio_descriptor *desc, struct nfs_page *req) { - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); int ret; do { @@ -1193,10 +1185,23 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, out_failed: /* * We might have failed before sending any reqs over wire. - * clean up rest of the reqs in mirror pg_list + * Clean up rest of the reqs in mirror pg_list. */ - if (desc->pg_error) - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); + if (desc->pg_error) { + struct nfs_pgio_mirror *mirror; + void (*func)(struct list_head *); + + /* remember fatal errors */ + if (nfs_error_is_fatal(desc->pg_error)) + mapping_set_error(desc->pg_inode->i_mapping, + desc->pg_error); + + func = desc->pg_completion_ops->error_cleanup; + for (midx = 0; midx < desc->pg_mirror_count; midx++) { + mirror = &desc->pg_mirrors[midx]; + func(&mirror->pg_list); + } + } return 0; } diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b1acc4135c3c..0fb3552ccfbe 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -2026,15 +2026,13 @@ static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); - return -ENOMEM; + desc->pg_error = -ENOMEM; + return desc->pg_error; } nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); @@ -2157,15 +2155,13 @@ static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) { - struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc); - struct nfs_pgio_header *hdr; int ret; hdr = nfs_pgio_header_alloc(desc->pg_rw_ops); if (!hdr) { - desc->pg_completion_ops->error_cleanup(&mirror->pg_list); - return -ENOMEM; + desc->pg_error = -ENOMEM; + return desc->pg_error; } nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); hdr->lseg = pnfs_get_lseg(desc->pg_lseg); -- cgit v1.2.3 From 0bcbf039f6b2bcefe4f5dada76079080edf9ecd0 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 15:57:31 +0800 Subject: nfs: handle request add failure properly When we fail to queue a read page to IO descriptor, we need to clean it up otherwise it is hanging around preventing nfs module from being removed. When we fail to queue a write page to IO descriptor, we need to clean it up and also save the failure status to open context. Then at file close, we can try to write pages back again and drop the page if it fails to writeback in .launder_page, which will be done in the next patch. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 6 ++++++ fs/nfs/internal.h | 14 ++++++++++++++ fs/nfs/pnfs.c | 15 +++------------ fs/nfs/read.c | 41 +++++++++++++++++++++++------------------ fs/nfs/write.c | 22 +++++++++++++++++++++- 5 files changed, 67 insertions(+), 31 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index c7e8b87da5b2..74fb1223c2f5 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -912,6 +912,12 @@ void nfs_file_clear_open_context(struct file *filp) if (ctx) { struct inode *inode = d_inode(ctx->dentry); + /* + * We fatal error on write before. Try to writeback + * every page again. + */ + if (ctx->error < 0) + invalidate_inode_pages2(inode->i_mapping); filp->private_data = NULL; spin_lock(&inode->i_lock); list_move_tail(&ctx->list, &NFS_I(inode)->open_files); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 313d55402238..68f773dc226e 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -711,3 +711,17 @@ static inline u32 nfs_stateid_hash(nfs4_stateid *stateid) return 0; } #endif + +static inline bool nfs_error_is_fatal(int err) +{ + switch (err) { + case -ERESTARTSYS: + case -EIO: + case -ENOSPC: + case -EROFS: + case -E2BIG: + return true; + default: + return false; + } +} diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 0fb3552ccfbe..580207bc52a5 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -904,18 +904,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, lseg = nfs4_proc_layoutget(lgp, gfp_flags); } while (lseg == ERR_PTR(-EAGAIN)); - if (IS_ERR(lseg)) { - switch (PTR_ERR(lseg)) { - case -ERESTARTSYS: - case -EIO: - case -ENOSPC: - case -EROFS: - case -E2BIG: - break; - default: - return NULL; - } - } else + if (IS_ERR(lseg) && !nfs_error_is_fatal(PTR_ERR(lseg))) + lseg = NULL; + else pnfs_layout_clear_fail_bit(lo, pnfs_iomode_to_fail_bit(range->iomode)); diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 0bb580174cb3..eb31e23e7def 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -85,6 +85,23 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); +static void nfs_readpage_release(struct nfs_page *req) +{ + struct inode *inode = d_inode(req->wb_context->dentry); + + dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, + (unsigned long long)NFS_FILEID(inode), req->wb_bytes, + (long long)req_offset(req)); + + if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { + if (PageUptodate(req->wb_page)) + nfs_readpage_to_fscache(inode, req->wb_page, 0); + + unlock_page(req->wb_page); + } + nfs_release_request(req); +} + int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, struct page *page) { @@ -106,7 +123,10 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, nfs_pageio_init_read(&pgio, inode, false, &nfs_async_read_completion_ops); - nfs_pageio_add_request(&pgio, new); + if (!nfs_pageio_add_request(&pgio, new)) { + nfs_list_remove_request(new); + nfs_readpage_release(new); + } nfs_pageio_complete(&pgio); /* It doesn't make sense to do mirrored reads! */ @@ -118,23 +138,6 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, return pgio.pg_error < 0 ? pgio.pg_error : 0; } -static void nfs_readpage_release(struct nfs_page *req) -{ - struct inode *inode = d_inode(req->wb_context->dentry); - - dprintk("NFS: read done (%s/%llu %d@%lld)\n", inode->i_sb->s_id, - (unsigned long long)NFS_FILEID(inode), req->wb_bytes, - (long long)req_offset(req)); - - if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE)) { - if (PageUptodate(req->wb_page)) - nfs_readpage_to_fscache(inode, req->wb_page, 0); - - unlock_page(req->wb_page); - } - nfs_release_request(req); -} - static void nfs_page_group_set_uptodate(struct nfs_page *req) { if (nfs_page_group_sync_on_bit(req, PG_UPTODATE)) @@ -361,6 +364,8 @@ readpage_async_filler(void *data, struct page *page) if (len < PAGE_CACHE_SIZE) zero_user_segment(page, len, PAGE_CACHE_SIZE); if (!nfs_pageio_add_request(desc->pgio, new)) { + nfs_list_remove_request(new); + nfs_readpage_release(new); error = desc->pgio->pg_error; goto out_unlock; } diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 7b9316406930..9dafb08ddae5 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -545,6 +545,15 @@ try_again: return head; } +static void nfs_write_error_remove_page(struct nfs_page *req) +{ + nfs_unlock_request(req); + nfs_end_page_writeback(req); + nfs_release_request(req); + generic_error_remove_page(page_file_mapping(req->wb_page), + req->wb_page); +} + /* * Find an associated nfs write request, and prepare to flush it out * May return an error if the user signalled nfs_wait_on_request(). @@ -567,8 +576,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, ret = 0; if (!nfs_pageio_add_request(pgio, req)) { - nfs_redirty_request(req); ret = pgio->pg_error; + /* + * Remove the problematic req upon fatal errors, + * while other dirty pages can still be around + * until they get flushed. + */ + if (nfs_error_is_fatal(ret)) { + nfs_context_set_write_error(req->wb_context, ret); + nfs_write_error_remove_page(req); + } else { + nfs_redirty_request(req); + ret = -EAGAIN; + } } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); -- cgit v1.2.3 From d6c843b96e1cb5199147e3281a724e3c0b69a9ab Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 16:20:43 +0800 Subject: nfs: only remove page from mapping if launder_page fails Instead of dropping pages when write fails, only do it when we get fatal failure in launder_page write back. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 2 +- fs/nfs/write.c | 39 +++++++++++++++++++++++---------------- include/linux/nfs_fs.h | 14 +++++++++++++- 3 files changed, 37 insertions(+), 18 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 93e236429c5d..f188dd071dfc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -545,7 +545,7 @@ static int nfs_launder_page(struct page *page) inode->i_ino, (long long)page_offset(page)); nfs_fscache_wait_on_page_write(nfsi, page); - return nfs_wb_page(inode, page); + return nfs_wb_launder_page(inode, page); } static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 9dafb08ddae5..4d254232d728 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -559,7 +559,8 @@ static void nfs_write_error_remove_page(struct nfs_page *req) * May return an error if the user signalled nfs_wait_on_request(). */ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, - struct page *page, bool nonblock) + struct page *page, bool nonblock, + bool launder) { struct nfs_page *req; int ret = 0; @@ -578,17 +579,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, if (!nfs_pageio_add_request(pgio, req)) { ret = pgio->pg_error; /* - * Remove the problematic req upon fatal errors, - * while other dirty pages can still be around - * until they get flushed. + * Remove the problematic req upon fatal errors + * in launder case, while other dirty pages can + * still be around until they get flushed. */ if (nfs_error_is_fatal(ret)) { nfs_context_set_write_error(req->wb_context, ret); - nfs_write_error_remove_page(req); - } else { - nfs_redirty_request(req); - ret = -EAGAIN; + if (launder) { + nfs_write_error_remove_page(req); + goto out; + } } + nfs_redirty_request(req); + ret = -EAGAIN; } else nfs_add_stats(page_file_mapping(page)->host, NFSIOS_WRITEPAGES, 1); @@ -596,12 +599,14 @@ out: return ret; } -static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, struct nfs_pageio_descriptor *pgio) +static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, + struct nfs_pageio_descriptor *pgio, bool launder) { int ret; nfs_pageio_cond_complete(pgio, page_file_index(page)); - ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE); + ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, + launder); if (ret == -EAGAIN) { redirty_page_for_writepage(wbc, page); ret = 0; @@ -612,7 +617,9 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, st /* * Write an mmapped page to the server. */ -static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc) +static int nfs_writepage_locked(struct page *page, + struct writeback_control *wbc, + bool launder) { struct nfs_pageio_descriptor pgio; struct inode *inode = page_file_mapping(page)->host; @@ -621,7 +628,7 @@ static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false, &nfs_async_write_completion_ops); - err = nfs_do_writepage(page, wbc, &pgio); + err = nfs_do_writepage(page, wbc, &pgio, launder); nfs_pageio_complete(&pgio); if (err < 0) return err; @@ -634,7 +641,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc) { int ret; - ret = nfs_writepage_locked(page, wbc); + ret = nfs_writepage_locked(page, wbc, false); unlock_page(page); return ret; } @@ -643,7 +650,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control * { int ret; - ret = nfs_do_writepage(page, wbc, data); + ret = nfs_do_writepage(page, wbc, data, false); unlock_page(page); return ret; } @@ -1931,7 +1938,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page) /* * Write back all requests on one page - we do this before reading it. */ -int nfs_wb_page(struct inode *inode, struct page *page) +int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) { loff_t range_start = page_file_offset(page); loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1); @@ -1948,7 +1955,7 @@ int nfs_wb_page(struct inode *inode, struct page *page) for (;;) { wait_on_page_writeback(page); if (clear_page_dirty_for_io(page)) { - ret = nfs_writepage_locked(page, &wbc); + ret = nfs_writepage_locked(page, &wbc, launder); if (ret < 0) goto out_error; continue; diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index c0e961474a52..b88fc46cfbb8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -517,12 +517,24 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_page(struct inode *inode, struct page* page); +extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(void); extern void nfs_commit_free(struct nfs_commit_data *data); +static inline int +nfs_wb_launder_page(struct inode *inode, struct page *page) +{ + return nfs_wb_single_page(inode, page, true); +} + +static inline int +nfs_wb_page(struct inode *inode, struct page *page) +{ + return nfs_wb_single_page(inode, page, false); +} + static inline int nfs_have_writebacks(struct inode *inode) { -- cgit v1.2.3 From 7c1e6e58e2aa8b95d1b9888db1945eb0a388efb9 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 5 Dec 2015 17:01:01 +0800 Subject: NFS41: map NFS4ERR_LAYOUTUNAVAILABLE to ENODATA Instead of mapping it to EIO that is a fatal error and fails application. We'll go inband after getting NFS4ERR_LAYOUTUNAVAILABLE. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/nfs4proc.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'fs/nfs') diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 18d862db15b6..0065bbdc53c3 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7795,6 +7795,15 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) switch (task->tk_status) { case 0: goto out; + + /* + * NFS4ERR_LAYOUTUNAVAILABLE means we are not supposed to use pnfs + * on the file. set tk_status to -ENODATA to tell upper layer to + * retry go inband. + */ + case -NFS4ERR_LAYOUTUNAVAILABLE: + task->tk_status = -ENODATA; + goto out; /* * NFS4ERR_BADLAYOUT means the MDS cannot return a layout of * length lgp->args.minlength != 0 (see RFC5661 section 18.43.3). -- cgit v1.2.3 From c22eeb8697352bd696300aabaf08eaff99bdb635 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 7 Dec 2015 17:31:38 -0500 Subject: pnfs/flexfiles: do not mark delay-like status as DS failure We just need to delay and retry in these cases. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 57e4010e3cde..b392156e6743 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1188,6 +1188,14 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, } } + switch (status) { + case NFS4ERR_DELAY: + case NFS4ERR_GRACE: + return; + default: + break; + } + mirror = FF_LAYOUT_COMP(lseg, idx); err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), mirror, offset, length, status, opnum, @@ -1399,7 +1407,6 @@ static int ff_layout_write_done_cb(struct rpc_task *task, ff_layout_reset_write(hdr, false); return task->tk_status; case -EAGAIN: - rpc_restart_call_prepare(task); return -EAGAIN; } -- cgit v1.2.3 From 141b9b59ed8ae2602b2c285149ec8d4f6b05c4d9 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Mon, 7 Dec 2015 18:33:14 -0500 Subject: pnfs/flexfiles: count io stat in rpc_count_stats callback If client ever restarts IO due to some errors, we'll endup mis-counting IO stats if we do the counting in .rpc_done callback. Move it to .rpc_count_stats callback that is only called when releasing RPC. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index b392156e6743..5ede5c26c757 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1516,11 +1516,6 @@ static void ff_layout_write_call_done(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count, - hdr->res.verf->committed); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1535,6 +1530,11 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); + rpc_count_iostats_metrics(task, &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } @@ -1566,6 +1566,11 @@ static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) } static void ff_layout_commit_done(struct rpc_task *task, void *data) +{ + pnfs_generic_write_commit_done(task, data); +} + +static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) { struct nfs_commit_data *cdata = data; struct nfs_page *req; @@ -1580,13 +1585,6 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data) FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), count, count, NFS_FILE_SYNC); - pnfs_generic_write_commit_done(task, data); -} - -static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) -{ - struct nfs_commit_data *cdata = data; - rpc_count_iostats_metrics(task, &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); } -- cgit v1.2.3 From 2e5b29f0448be9ea8da3ee0412a2043fee59e131 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 14 Dec 2015 16:25:11 -0500 Subject: pNFS/flexfiles: Don't prevent flexfiles client from retrying LAYOUTGET Fix a bug in which flexfiles clients are falling back to I/O through the MDS even when the FF_FLAGS_NO_IO_THRU_MDS flag is set. The flexfiles client will always report errors through the LAYOUTRETURN and/or LAYOUTERROR mechanisms, so it should normally be safe for it to retry the LAYOUTGET until it fails or succeeds. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 4 ---- fs/nfs/flexfilelayout/flexfilelayoutdev.c | 16 ++++------------ fs/nfs/pnfs.c | 18 ++---------------- fs/nfs/pnfs.h | 21 --------------------- 4 files changed, 6 insertions(+), 53 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 5ede5c26c757..1da19d709458 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1399,11 +1399,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: - pnfs_set_retry_layoutget(hdr->lseg->pls_layout); ff_layout_reset_write(hdr, true); return task->tk_status; case -NFS4ERR_RESET_TO_MDS: - pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); ff_layout_reset_write(hdr, false); return task->tk_status; case -EAGAIN: @@ -1438,11 +1436,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, switch (err) { case -NFS4ERR_RESET_TO_PNFS: - pnfs_set_retry_layoutget(data->lseg->pls_layout); pnfs_generic_prepare_to_resend_writes(data); return -EAGAIN; case -NFS4ERR_RESET_TO_MDS: - pnfs_clear_retry_layoutget(data->lseg->pls_layout); pnfs_generic_prepare_to_resend_writes(data); return -EAGAIN; case -EAGAIN: diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c index e125e55de86d..bd0327541366 100644 --- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c @@ -429,22 +429,14 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, mirror, lseg->pls_range.offset, lseg->pls_range.length, NFS4ERR_NXIO, OP_ILLEGAL, GFP_NOIO); - if (fail_return) { - pnfs_error_mark_layout_for_return(ino, lseg); - if (ff_layout_has_available_ds(lseg)) - pnfs_set_retry_layoutget(lseg->pls_layout); - else - pnfs_clear_retry_layoutget(lseg->pls_layout); - - } else { + if (!fail_return) { if (ff_layout_has_available_ds(lseg)) set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lseg->pls_layout->plh_flags); - else { + else pnfs_error_mark_layout_for_return(ino, lseg); - pnfs_clear_retry_layoutget(lseg->pls_layout); - } - } + } else + pnfs_error_mark_layout_for_return(ino, lseg); } out_update_creds: if (ff_layout_update_mirror_cred(mirror, ds)) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 580207bc52a5..6b42362cdbb0 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -618,7 +618,6 @@ pnfs_destroy_layout(struct nfs_inode *nfsi) pnfs_get_layout_hdr(lo); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED); pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); - pnfs_clear_retry_layoutget(lo); spin_unlock(&nfsi->vfs_inode.i_lock); pnfs_free_lseg_list(&tmp_list); pnfs_put_layout_hdr(lo); @@ -1094,7 +1093,6 @@ bool pnfs_roc(struct inode *ino) &lo->plh_flags)) layoutreturn = pnfs_prepare_layoutreturn(lo); - pnfs_clear_retry_layoutget(lo); list_for_each_entry_safe(lseg, tmp, &lo->plh_segs, pls_list) /* If we are sending layoutreturn, invalidate all valid lsegs */ if (layoutreturn || test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { @@ -1457,25 +1455,15 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, return ret; } -/* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ -static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode) -{ - if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) - return 1; - return nfs_wait_bit_killable(key, mode); -} - static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) { - if (!pnfs_should_retry_layoutget(lo)) - return false; /* * send layoutcommit as it can hold up layoutreturn due to lseg * reference */ pnfs_layoutcommit_inode(lo->plh_inode, false); return !wait_on_bit_action(&lo->plh_flags, NFS_LAYOUT_RETURN, - pnfs_layoutget_retry_bit_wait, + nfs_wait_bit_killable, TASK_UNINTERRUPTIBLE); } @@ -1550,8 +1538,7 @@ lookup_again: } /* if LAYOUTGET already failed once we don't try again */ - if (pnfs_layout_io_test_failed(lo, iomode) && - !pnfs_should_retry_layoutget(lo)) { + if (pnfs_layout_io_test_failed(lo, iomode)) { trace_pnfs_update_layout(ino, pos, count, iomode, lo, PNFS_UPDATE_LAYOUT_IO_TEST_FAIL); goto out_unlock; @@ -1628,7 +1615,6 @@ lookup_again: arg.length = PAGE_CACHE_ALIGN(arg.length); lseg = send_layoutget(lo, ctx, &arg, gfp_flags); - pnfs_clear_retry_layoutget(lo); atomic_dec(&lo->plh_outstanding); trace_pnfs_update_layout(ino, pos, count, iomode, lo, PNFS_UPDATE_LAYOUT_SEND_LAYOUTGET); diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index d1990e90e7a0..6916ff4e86f9 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h @@ -98,7 +98,6 @@ enum { NFS_LAYOUT_RETURN_BEFORE_CLOSE, /* Return this layout before close */ NFS_LAYOUT_INVALID_STID, /* layout stateid id is invalid */ NFS_LAYOUT_FIRST_LAYOUTGET, /* Serialize first layoutget */ - NFS_LAYOUT_RETRY_LAYOUTGET, /* Retry layoutget */ }; enum layoutdriver_policy_flags { @@ -379,26 +378,6 @@ nfs4_get_deviceid(struct nfs4_deviceid_node *d) return d; } -static inline void pnfs_set_retry_layoutget(struct pnfs_layout_hdr *lo) -{ - if (!test_and_set_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) - atomic_inc(&lo->plh_refcount); -} - -static inline void pnfs_clear_retry_layoutget(struct pnfs_layout_hdr *lo) -{ - if (test_and_clear_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags)) { - atomic_dec(&lo->plh_refcount); - /* wake up waiters for LAYOUTRETURN as that is not needed */ - wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); - } -} - -static inline bool pnfs_should_retry_layoutget(struct pnfs_layout_hdr *lo) -{ - return test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, &lo->plh_flags); -} - static inline struct pnfs_layout_segment * pnfs_get_lseg(struct pnfs_layout_segment *lseg) { -- cgit v1.2.3 From b9fc773ef512dd3db71560463d0e5543c35fe976 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 15 Dec 2015 11:15:05 -0500 Subject: pNFS/flexfiles: Don't mark the entire layout as failed, when returning it In pNFS/flexfiles, we want to return the layout without necessarily marking it as having completely failed. We therefore move the call to pnfs_layout_io_set_failed() out of pnfs_error_mark_layout_for_return(), and then ensura that pNFS/files layout calls it separately. Signed-off-by: Trond Myklebust --- fs/nfs/filelayout/filelayout.c | 1 + fs/nfs/pnfs.c | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ae07b0f56659..bb1f4e7a3270 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -202,6 +202,7 @@ static int filelayout_async_handle_error(struct rpc_task *task, task->tk_status); nfs4_mark_deviceid_unavailable(devid); pnfs_error_mark_layout_for_return(inode, lseg); + pnfs_set_lo_fail(lseg); rpc_wake_up(&tbl->slot_tbl_waitq); /* fall through */ default: diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 6b42362cdbb0..113c3b327e24 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -1763,7 +1763,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, struct pnfs_layout_segment *lseg) { struct pnfs_layout_hdr *lo = NFS_I(inode)->layout; - int iomode = pnfs_iomode_to_fail_bit(lseg->pls_range.iomode); struct pnfs_layout_range range = { .iomode = lseg->pls_range.iomode, .offset = 0, @@ -1772,8 +1771,6 @@ void pnfs_error_mark_layout_for_return(struct inode *inode, LIST_HEAD(free_me); spin_lock(&inode->i_lock); - /* set failure bit so that pnfs path will be retried later */ - pnfs_layout_set_fail_bit(lo, iomode); if (lo->plh_return_iomode == 0) lo->plh_return_iomode = range.iomode; else if (lo->plh_return_iomode != range.iomode) -- cgit v1.2.3 From 7eeea1679783e7c1c38074e8c562900b7e3d3dd7 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 17 Dec 2015 09:22:15 -0500 Subject: pNFS/flexfiles: Fix a statistics gathering imbalance When we replay a failed read, write or commit to the dataserver, we need to ensure that we call ff_layout_read_prepare_v3(), ff_layout_write_prepare_v3 or ff_layout_commit_prepare_v3() so that we reset the statistics. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 1da19d709458..14109a82ce84 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1130,7 +1130,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task, return -NFS4ERR_RESET_TO_PNFS; out_retry: task->tk_status = 0; - rpc_restart_call(task); + rpc_restart_call_prepare(task); rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); return -EAGAIN; } -- cgit v1.2.3 From 37e9ed22b1552fa94ee7db2901a5e7d8bdf60b15 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Dec 2015 12:30:24 -0500 Subject: pNFS: Add flag to track if we've called nfs4_ff_layout_stat_io_start_read/write Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 95 +++++++++++++++++++++++++--------- include/linux/nfs_xdr.h | 2 + 2 files changed, 72 insertions(+), 25 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 14109a82ce84..9257679a15ba 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1279,14 +1279,31 @@ ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) return ff_layout_test_devid_unavailable(node); } -static int ff_layout_read_prepare_common(struct rpc_task *task, - struct nfs_pgio_header *hdr) +static void ff_layout_read_record_layoutstats_start(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; nfs4_ff_layout_stat_io_start_read(hdr->inode, FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), hdr->args.count, task->tk_start); +} + +static void ff_layout_read_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; + nfs4_ff_layout_stat_io_end_read(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, + hdr->res.count); +} +static int ff_layout_read_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1302,6 +1319,7 @@ static int ff_layout_read_prepare_common(struct rpc_task *task, } hdr->pgio_done_cb = ff_layout_read_done_cb; + ff_layout_read_record_layoutstats_start(task, hdr); return 0; } @@ -1360,10 +1378,6 @@ static void ff_layout_read_call_done(struct rpc_task *task, void *data) dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); - nfs4_ff_layout_stat_io_end_read(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count); - if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && task->tk_status == 0) { nfs4_sequence_done(task, &hdr->res.seq_res); @@ -1378,6 +1392,7 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; + ff_layout_read_record_layoutstats_done(task, hdr); rpc_count_iostats_metrics(task, &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); } @@ -1453,14 +1468,31 @@ static int ff_layout_commit_done_cb(struct rpc_task *task, return 0; } -static int ff_layout_write_prepare_common(struct rpc_task *task, - struct nfs_pgio_header *hdr) +static void ff_layout_write_record_layoutstats_start(struct rpc_task *task, + struct nfs_pgio_header *hdr) { + if (test_and_set_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; nfs4_ff_layout_stat_io_start_write(hdr->inode, FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), hdr->args.count, task->tk_start); +} + +static void ff_layout_write_record_layoutstats_done(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ + if (!test_and_clear_bit(NFS_IOHDR_STAT, &hdr->flags)) + return; + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), + hdr->args.count, hdr->res.count, + hdr->res.verf->committed); +} +static int ff_layout_write_prepare_common(struct rpc_task *task, + struct nfs_pgio_header *hdr) +{ if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { rpc_exit(task, -EIO); return -EIO; @@ -1477,6 +1509,7 @@ static int ff_layout_write_prepare_common(struct rpc_task *task, return -EAGAIN; } + ff_layout_write_record_layoutstats_start(task, hdr); return 0; } @@ -1526,23 +1559,45 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) { struct nfs_pgio_header *hdr = data; - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(hdr->lseg, hdr->pgio_mirror_idx), - hdr->args.count, hdr->res.count, - hdr->res.verf->committed); - + ff_layout_write_record_layoutstats_done(task, hdr); rpc_count_iostats_metrics(task, &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } -static void ff_layout_commit_prepare_common(struct rpc_task *task, +static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, struct nfs_commit_data *cdata) { + if (test_and_set_bit(NFS_IOHDR_STAT, &cdata->flags)) + return; nfs4_ff_layout_stat_io_start_write(cdata->inode, FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), 0, task->tk_start); } +static void ff_layout_commit_record_layoutstats_done(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + struct nfs_page *req; + __u64 count = 0; + + if (!test_and_clear_bit(NFS_IOHDR_STAT, &cdata->flags)) + return; + + if (task->tk_status == 0) { + list_for_each_entry(req, &cdata->pages, wb_list) + count += req->wb_bytes; + } + nfs4_ff_layout_stat_io_end_write(task, + FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), + count, count, NFS_FILE_SYNC); +} + +static void ff_layout_commit_prepare_common(struct rpc_task *task, + struct nfs_commit_data *cdata) +{ + ff_layout_commit_record_layoutstats_start(task, cdata); +} + static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) { ff_layout_commit_prepare_common(task, data); @@ -1569,18 +1624,8 @@ static void ff_layout_commit_done(struct rpc_task *task, void *data) static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) { struct nfs_commit_data *cdata = data; - struct nfs_page *req; - __u64 count = 0; - - if (task->tk_status == 0) { - list_for_each_entry(req, &cdata->pages, wb_list) - count += req->wb_bytes; - } - - nfs4_ff_layout_stat_io_end_write(task, - FF_LAYOUT_COMP(cdata->lseg, cdata->ds_commit_index), - count, count, NFS_FILE_SYNC); + ff_layout_commit_record_layoutstats_done(task, cdata); rpc_count_iostats_metrics(task, &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); } diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 11bbae44f4cb..7b30ac0c7def 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1375,6 +1375,7 @@ enum { NFS_IOHDR_ERROR = 0, NFS_IOHDR_EOF, NFS_IOHDR_REDO, + NFS_IOHDR_STAT, }; struct nfs_pgio_header { @@ -1454,6 +1455,7 @@ struct nfs_commit_data { const struct rpc_call_ops *mds_ops; const struct nfs_commit_completion_ops *completion_ops; int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data); + unsigned long flags; }; struct nfs_pgio_completion_ops { -- cgit v1.2.3 From 4d0ac22109740c36174c5aac630876b2af0f6943 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 22 Dec 2015 15:32:00 -0500 Subject: pNFS/flexfiles: Ensure we record layoutstats even if RPC is terminated early Currently, we will only record the layoutstats correctly if the RPC call successfully obtains a slot. If we exit before that happens, then we may find ourselves starting the busy timer through the call in ff_layout_(read|write)_prepare_layoutstats, but never stopping it. The same thing happens if we're doing DA-DS. The fix is to ensure that we catch these cases in the rpc_release() callback. Signed-off-by: Trond Myklebust --- fs/nfs/flexfilelayout/flexfilelayout.c | 37 ++++++++++++++++++++++++++++------ 1 file changed, 31 insertions(+), 6 deletions(-) (limited to 'fs/nfs') diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c index 9257679a15ba..2981cd190bfd 100644 --- a/fs/nfs/flexfilelayout/flexfilelayout.c +++ b/fs/nfs/flexfilelayout/flexfilelayout.c @@ -1397,6 +1397,15 @@ static void ff_layout_read_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); } +static void ff_layout_read_release(void *data) +{ + struct nfs_pgio_header *hdr = data; + + ff_layout_read_record_layoutstats_done(&hdr->task, hdr); + pnfs_generic_rw_release(data); +} + + static int ff_layout_write_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr) { @@ -1564,6 +1573,14 @@ static void ff_layout_write_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); } +static void ff_layout_write_release(void *data) +{ + struct nfs_pgio_header *hdr = data; + + ff_layout_write_record_layoutstats_done(&hdr->task, hdr); + pnfs_generic_rw_release(data); +} + static void ff_layout_commit_record_layoutstats_start(struct rpc_task *task, struct nfs_commit_data *cdata) { @@ -1630,46 +1647,54 @@ static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); } +static void ff_layout_commit_release(void *data) +{ + struct nfs_commit_data *cdata = data; + + ff_layout_commit_record_layoutstats_done(&cdata->task, cdata); + pnfs_generic_commit_release(data); +} + static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { .rpc_call_prepare = ff_layout_read_prepare_v3, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, - .rpc_release = pnfs_generic_rw_release, + .rpc_release = ff_layout_read_release, }; static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { .rpc_call_prepare = ff_layout_read_prepare_v4, .rpc_call_done = ff_layout_read_call_done, .rpc_count_stats = ff_layout_read_count_stats, - .rpc_release = pnfs_generic_rw_release, + .rpc_release = ff_layout_read_release, }; static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { .rpc_call_prepare = ff_layout_write_prepare_v3, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, - .rpc_release = pnfs_generic_rw_release, + .rpc_release = ff_layout_write_release, }; static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { .rpc_call_prepare = ff_layout_write_prepare_v4, .rpc_call_done = ff_layout_write_call_done, .rpc_count_stats = ff_layout_write_count_stats, - .rpc_release = pnfs_generic_rw_release, + .rpc_release = ff_layout_write_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { .rpc_call_prepare = ff_layout_commit_prepare_v3, .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, - .rpc_release = pnfs_generic_commit_release, + .rpc_release = ff_layout_commit_release, }; static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { .rpc_call_prepare = ff_layout_commit_prepare_v4, .rpc_call_done = ff_layout_commit_done, .rpc_count_stats = ff_layout_commit_count_stats, - .rpc_release = pnfs_generic_commit_release, + .rpc_release = ff_layout_commit_release, }; static enum pnfs_try_status -- cgit v1.2.3