From 8bbfa9f3889b643fc7de82c0c761ef17097f8faf Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:34 +1100 Subject: knfsd: remove the nfsd thread busy histogram Stop gathering the data that feeds the 'th' line in /proc/net/rpc/nfsd because the questionable data provided is not worth the scalability impact of calculating it. Instead, always report zeroes. The current approach suffers from three major issues: 1. update_thread_usage() increments buckets by call service time or call arrival time...in jiffies. On lightly loaded machines, call service times are usually < 1 jiffy; on heavily loaded machines call arrival times will be << 1 jiffy. So a large portion of the updates to the buckets are rounded down to zero, and the histogram is undercounting. 2. As seen previously on the nfs mailing list, the format in which the histogram is presented is cryptic, difficult to explain, and difficult to use. 3. Updating the histogram requires taking a global spinlock and dirtying the global variables nfsd_last_call, nfsd_busy, and nfsdstats *twice* on every RPC call, which is a significant scaling limitation. Testing on a 4 CPU 4 NIC Altix using 4 IRIX clients each doing 1K streaming reads at full line rate, shows the stats update code (inlined into nfsd()) takes about 1.7% of each CPU. This patch drops the contribution from nfsd() into the profile noise. This patch is a forward-ported version of knfsd-remove-nfsd-threadstats which has been shipping in the SGI "Enhanced NFS" product since 2006. In that time, exactly one customer has noticed that the threadstats were missing. It has been previously posted: http://article.gmane.org/gmane.linux.nfs/10376 and more recently requested to be posted again. Signed-off-by: Greg Banks Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 28 ---------------------------- 1 file changed, 28 deletions(-) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 07e4f5d7baa8..c3eb0759fd57 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -40,9 +40,6 @@ extern struct svc_program nfsd_program; static int nfsd(void *vrqstp); struct timeval nfssvc_boot; -static atomic_t nfsd_busy; -static unsigned long nfsd_last_call; -static DEFINE_SPINLOCK(nfsd_call_lock); /* * nfsd_mutex protects nfsd_serv -- both the pointer itself and the members @@ -227,7 +224,6 @@ int nfsd_create_serv(void) nfsd_max_blksize /= 2; } - atomic_set(&nfsd_busy, 0); nfsd_serv = svc_create_pooled(&nfsd_program, nfsd_max_blksize, AF_INET, nfsd_last_thread, nfsd, THIS_MODULE); @@ -376,26 +372,6 @@ nfsd_svc(unsigned short port, int nrservs) return error; } -static inline void -update_thread_usage(int busy_threads) -{ - unsigned long prev_call; - unsigned long diff; - int decile; - - spin_lock(&nfsd_call_lock); - prev_call = nfsd_last_call; - nfsd_last_call = jiffies; - decile = busy_threads*10/nfsdstats.th_cnt; - if (decile>0 && decile <= 10) { - diff = nfsd_last_call - prev_call; - if ( (nfsdstats.th_usage[decile-1] += diff) >= NFSD_USAGE_WRAP) - nfsdstats.th_usage[decile-1] -= NFSD_USAGE_WRAP; - if (decile == 10) - nfsdstats.th_fullcnt++; - } - spin_unlock(&nfsd_call_lock); -} /* * This is the NFS server kernel thread @@ -464,8 +440,6 @@ nfsd(void *vrqstp) continue; } - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_inc(&nfsd_busy); /* Lock the export hash tables for reading. */ exp_readlock(); @@ -474,8 +448,6 @@ nfsd(void *vrqstp) /* Unlock export hash tables */ exp_readunlock(); - update_thread_usage(atomic_read(&nfsd_busy)); - atomic_dec(&nfsd_busy); } /* Clear signals before calling svc_exit_thread() */ -- cgit v1.2.3 From 03cf6c9f49a8fea953d38648d016e3f46e814991 Mon Sep 17 00:00:00 2001 From: Greg Banks Date: Tue, 13 Jan 2009 21:26:36 +1100 Subject: knfsd: add file to export stats about nfsd pools Add /proc/fs/nfsd/pool_stats to export to userspace various statistics about the operation of rpc server thread pools. This patch is based on a forward-ported version of knfsd-add-pool-thread-stats which has been shipping in the SGI "Enhanced NFS" product since 2006 and which was previously posted: http://article.gmane.org/gmane.linux.nfs/10375 It has also been updated thus: * moved EXPORT_SYMBOL() to near the function it exports * made the new struct struct seq_operations const * used SEQ_START_TOKEN instead of ((void *)1) * merged fix from SGI PV 990526 "sunrpc: use dprintk instead of printk in svc_pool_stats_*()" by Harshula Jayasuriya. * merged fix from SGI PV 964001 "Crash reading pool_stats before nfsds are started". Signed-off-by: Greg Banks Signed-off-by: Harshula Jayasuriya Signed-off-by: J. Bruce Fields --- fs/nfsd/nfsctl.c | 12 ++++++ fs/nfsd/nfssvc.c | 7 ++++ include/linux/sunrpc/svc.h | 11 +++++ net/sunrpc/svc_xprt.c | 100 ++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 129 insertions(+), 1 deletion(-) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 3d93b2064ce5..4adebb6312c4 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -60,6 +60,7 @@ enum { NFSD_FO_UnlockFS, NFSD_Threads, NFSD_Pool_Threads, + NFSD_Pool_Stats, NFSD_Versions, NFSD_Ports, NFSD_MaxBlkSize, @@ -172,6 +173,16 @@ static const struct file_operations exports_operations = { .owner = THIS_MODULE, }; +extern int nfsd_pool_stats_open(struct inode *inode, struct file *file); + +static struct file_operations pool_stats_operations = { + .open = nfsd_pool_stats_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release, + .owner = THIS_MODULE, +}; + /*----------------------------------------------------------------------------*/ /* * payload - write methods @@ -1246,6 +1257,7 @@ static int nfsd_fill_super(struct super_block * sb, void * data, int silent) [NFSD_Fh] = {"filehandle", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Threads] = {"threads", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Pool_Threads] = {"pool_threads", &transaction_ops, S_IWUSR|S_IRUSR}, + [NFSD_Pool_Stats] = {"pool_stats", &pool_stats_operations, S_IRUGO}, [NFSD_Versions] = {"versions", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Ports] = {"portlist", &transaction_ops, S_IWUSR|S_IRUGO}, [NFSD_MaxBlkSize] = {"max_block_size", &transaction_ops, S_IWUSR|S_IRUGO}, diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index c3eb0759fd57..ef0a3686639d 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -546,3 +546,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) nfsd_cache_update(rqstp, proc->pc_cachetype, statp + 1); return 1; } + +int nfsd_pool_stats_open(struct inode *inode, struct file *file) +{ + if (nfsd_serv == NULL) + return -ENODEV; + return svc_pool_stats_open(nfsd_serv, file); +} diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 39ec186a492d..9f9f699dd469 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -24,6 +24,15 @@ */ typedef int (*svc_thread_fn)(void *); +/* statistics for svc_pool structures */ +struct svc_pool_stats { + unsigned long packets; + unsigned long sockets_queued; + unsigned long threads_woken; + unsigned long overloads_avoided; + unsigned long threads_timedout; +}; + /* * * RPC service thread pool. @@ -42,6 +51,7 @@ struct svc_pool { unsigned int sp_nrthreads; /* # of threads in pool */ struct list_head sp_all_threads; /* all server threads */ int sp_nwaking; /* number of threads woken but not yet active */ + struct svc_pool_stats sp_stats; /* statistics on pool operation */ } ____cacheline_aligned_in_smp; /* @@ -396,6 +406,7 @@ struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, sa_family_t, void (*shutdown)(struct svc_serv *), svc_thread_fn, struct module *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); int svc_process(struct svc_rqst *); int svc_register(const struct svc_serv *, const unsigned short, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0551b6b6cf8c..1e66f2491460 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -318,6 +318,8 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) goto out_unlock; } + pool->sp_stats.packets++; + /* Mark transport as busy. It will remain in this state until * the provider calls svc_xprt_received. We update XPT_BUSY * atomically because it also guards against trying to enqueue @@ -355,6 +357,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) if (pool->sp_nwaking >= SVC_MAX_WAKING) { /* too many threads are runnable and trying to wake up */ thread_avail = 0; + pool->sp_stats.overloads_avoided++; } if (thread_avail) { @@ -374,11 +377,13 @@ void svc_xprt_enqueue(struct svc_xprt *xprt) atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved); rqstp->rq_waking = 1; pool->sp_nwaking++; + pool->sp_stats.threads_woken++; BUG_ON(xprt->xpt_pool != pool); wake_up(&rqstp->rq_wait); } else { dprintk("svc: transport %p put into queue\n", xprt); list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; BUG_ON(xprt->xpt_pool != pool); } @@ -591,6 +596,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) int pages; struct xdr_buf *arg; DECLARE_WAITQUEUE(wait, current); + long time_left; dprintk("svc: server %p waiting for data (to = %ld)\n", rqstp, timeout); @@ -676,12 +682,14 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) add_wait_queue(&rqstp->rq_wait, &wait); spin_unlock_bh(&pool->sp_lock); - schedule_timeout(timeout); + time_left = schedule_timeout(timeout); try_to_freeze(); spin_lock_bh(&pool->sp_lock); remove_wait_queue(&rqstp->rq_wait, &wait); + if (!time_left) + pool->sp_stats.threads_timedout++; xprt = rqstp->rq_xprt; if (!xprt) { @@ -1114,3 +1122,93 @@ int svc_xprt_names(struct svc_serv *serv, char *buf, int buflen) return totlen; } EXPORT_SYMBOL_GPL(svc_xprt_names); + + +/*----------------------------------------------------------------------------*/ + +static void *svc_pool_stats_start(struct seq_file *m, loff_t *pos) +{ + unsigned int pidx = (unsigned int)*pos; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_start, *pidx=%u\n", pidx); + + lock_kernel(); + /* bump up the pseudo refcount while traversing */ + svc_get(serv); + unlock_kernel(); + + if (!pidx) + return SEQ_START_TOKEN; + return (pidx > serv->sv_nrpools ? NULL : &serv->sv_pools[pidx-1]); +} + +static void *svc_pool_stats_next(struct seq_file *m, void *p, loff_t *pos) +{ + struct svc_pool *pool = p; + struct svc_serv *serv = m->private; + + dprintk("svc_pool_stats_next, *pos=%llu\n", *pos); + + if (p == SEQ_START_TOKEN) { + pool = &serv->sv_pools[0]; + } else { + unsigned int pidx = (pool - &serv->sv_pools[0]); + if (pidx < serv->sv_nrpools-1) + pool = &serv->sv_pools[pidx+1]; + else + pool = NULL; + } + ++*pos; + return pool; +} + +static void svc_pool_stats_stop(struct seq_file *m, void *p) +{ + struct svc_serv *serv = m->private; + + lock_kernel(); + /* this function really, really should have been called svc_put() */ + svc_destroy(serv); + unlock_kernel(); +} + +static int svc_pool_stats_show(struct seq_file *m, void *p) +{ + struct svc_pool *pool = p; + + if (p == SEQ_START_TOKEN) { + seq_puts(m, "# pool packets-arrived sockets-enqueued threads-woken overloads-avoided threads-timedout\n"); + return 0; + } + + seq_printf(m, "%u %lu %lu %lu %lu %lu\n", + pool->sp_id, + pool->sp_stats.packets, + pool->sp_stats.sockets_queued, + pool->sp_stats.threads_woken, + pool->sp_stats.overloads_avoided, + pool->sp_stats.threads_timedout); + + return 0; +} + +static const struct seq_operations svc_pool_stats_seq_ops = { + .start = svc_pool_stats_start, + .next = svc_pool_stats_next, + .stop = svc_pool_stats_stop, + .show = svc_pool_stats_show, +}; + +int svc_pool_stats_open(struct svc_serv *serv, struct file *file) +{ + int err; + + err = seq_open(file, &svc_pool_stats_seq_ops); + if (!err) + ((struct seq_file *) file->private_data)->private = serv; + return err; +} +EXPORT_SYMBOL(svc_pool_stats_open); + +/*----------------------------------------------------------------------------*/ -- cgit v1.2.3 From 074fe897536f095309c5aaffcf912952882ab2cb Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:15 +0300 Subject: nfsd41: DRC save, restore, and clear functions Cache all the result pages, including the rpc header in rq_respages[0], for a request in the slot table cache entry. Cache the statp pointer from nfsd_dispatch which points into rq_respages[0] just past the rpc header. When setting a cache entry, calculate and save the length of the nfs data minus the rpc header for rq_respages[0]. When replaying a cache entry, replace the cached rpc header with the replayed request rpc result header, unless there is not enough room in the cached results first page. In that case, use the cached rpc header. The sessions fore channel maxresponse size cached is set to NFSD_PAGES_PER_SLOT * PAGE_SIZE. For compounds we are cacheing with operations such as READDIR that use the xdr_buf->pages to hold data, we choose to cache the extra page of data rather than copying data from xdr_buf->pages into the xdr_buf->head page. [nfsd41: limit cache to maxresponsesize_cached] [nfsd41: mv nfsd4_set_statp under CONFIG_NFSD_V4_1] [nfsd41: rename nfsd4_move_pages] [nfsd41: rename page_no variable] [nfsd41: rename nfsd4_set_cache_entry] [nfsd41: fix nfsd41_copy_replay_data comment] [nfsd41: add to nfsd4_set_cache_entry] Signed-off-by: Andy Adamson Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4state.c | 142 +++++++++++++++++++++++++++++++++++++++++++++ fs/nfsd/nfssvc.c | 4 ++ include/linux/nfsd/cache.h | 1 + include/linux/nfsd/state.h | 13 +++++ include/linux/nfsd/xdr4.h | 4 ++ 5 files changed, 164 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 9243dca3576c..a37b91dab1bf 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -852,6 +852,148 @@ out_err: return; } +void +nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp) +{ + struct nfsd4_compoundres *resp = rqstp->rq_resp; + + resp->cstate.statp = statp; +} + +/* + * Dereference the result pages. + */ +static void +nfsd4_release_respages(struct page **respages, short resused) +{ + int i; + + dprintk("--> %s\n", __func__); + for (i = 0; i < resused; i++) { + if (!respages[i]) + continue; + put_page(respages[i]); + respages[i] = NULL; + } +} + +static void +nfsd4_copy_pages(struct page **topages, struct page **frompages, short count) +{ + int i; + + for (i = 0; i < count; i++) { + topages[i] = frompages[i]; + if (!topages[i]) + continue; + get_page(topages[i]); + } +} + +/* + * Cache the reply pages up to NFSD_PAGES_PER_SLOT + 1, clearing the previous + * pages. We add a page to NFSD_PAGES_PER_SLOT for the case where the total + * length of the XDR response is less than se_fmaxresp_cached + * (NFSD_PAGES_PER_SLOT * PAGE_SIZE) but the xdr_buf pages is used for a + * of the reply (e.g. readdir). + * + * Store the base and length of the rq_req.head[0] page + * of the NFSv4.1 data, just past the rpc header. + */ +void +nfsd4_store_cache_entry(struct nfsd4_compoundres *resp) +{ + struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + struct svc_rqst *rqstp = resp->rqstp; + struct nfsd4_compoundargs *args = rqstp->rq_argp; + struct nfsd4_op *op = &args->ops[resp->opcnt]; + struct kvec *resv = &rqstp->rq_res.head[0]; + + dprintk("--> %s entry %p\n", __func__, entry); + + /* Don't cache a failed OP_SEQUENCE. */ + if (resp->opcnt == 1 && op->opnum == OP_SEQUENCE && resp->cstate.status) + return; + nfsd4_release_respages(entry->ce_respages, entry->ce_resused); + entry->ce_resused = rqstp->rq_resused; + if (entry->ce_resused > NFSD_PAGES_PER_SLOT + 1) + entry->ce_resused = NFSD_PAGES_PER_SLOT + 1; + nfsd4_copy_pages(entry->ce_respages, rqstp->rq_respages, + entry->ce_resused); + entry->ce_status = resp->cstate.status; + entry->ce_datav.iov_base = resp->cstate.statp; + entry->ce_datav.iov_len = resv->iov_len - ((char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0])); + entry->ce_opcnt = resp->opcnt; + /* Current request rpc header length*/ + entry->ce_rpchdrlen = (char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0]); +} + +/* + * We keep the rpc header, but take the nfs reply from the replycache. + */ +static int +nfsd41_copy_replay_data(struct nfsd4_compoundres *resp, + struct nfsd4_cache_entry *entry) +{ + struct svc_rqst *rqstp = resp->rqstp; + struct kvec *resv = &resp->rqstp->rq_res.head[0]; + int len; + + /* Current request rpc header length*/ + len = (char *)resp->cstate.statp - + (char *)page_address(rqstp->rq_respages[0]); + if (entry->ce_datav.iov_len + len > PAGE_SIZE) { + dprintk("%s v41 cached reply too large (%Zd).\n", __func__, + entry->ce_datav.iov_len); + return 0; + } + /* copy the cached reply nfsd data past the current rpc header */ + memcpy((char *)resv->iov_base + len, entry->ce_datav.iov_base, + entry->ce_datav.iov_len); + resv->iov_len = len + entry->ce_datav.iov_len; + return 1; +} + +/* + * Keep the first page of the replay. Copy the NFSv4.1 data from the first + * cached page. Replace any futher replay pages from the cache. + */ +__be32 +nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp) +{ + struct nfsd4_cache_entry *entry = &resp->cstate.slot->sl_cache_entry; + __be32 status; + + dprintk("--> %s entry %p\n", __func__, entry); + + + if (!nfsd41_copy_replay_data(resp, entry)) { + /* + * Not enough room to use the replay rpc header, send the + * cached header. Release all the allocated result pages. + */ + svc_free_res_pages(resp->rqstp); + nfsd4_copy_pages(resp->rqstp->rq_respages, entry->ce_respages, + entry->ce_resused); + } else { + /* Release all but the first allocated result page */ + + resp->rqstp->rq_resused--; + svc_free_res_pages(resp->rqstp); + + nfsd4_copy_pages(&resp->rqstp->rq_respages[1], + &entry->ce_respages[1], + entry->ce_resused - 1); + } + + resp->rqstp->rq_resused = entry->ce_resused; + status = entry->ce_status; + + return status; +} + /* * Set the exchange_id flags returned by the server. */ diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index ef0a3686639d..b5168d1898ec 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -515,6 +515,10 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp) + rqstp->rq_res.head[0].iov_len; rqstp->rq_res.head[0].iov_len += sizeof(__be32); + /* NFSv4.1 DRC requires statp */ + if (rqstp->rq_vers == 4) + nfsd4_set_statp(rqstp, statp); + /* Now call the procedure handler, and encode NFS status. */ nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp); nfserr = map_new_errors(rqstp->rq_vers, nfserr); diff --git a/include/linux/nfsd/cache.h b/include/linux/nfsd/cache.h index 04b355c801d8..a59a2df6d079 100644 --- a/include/linux/nfsd/cache.h +++ b/include/linux/nfsd/cache.h @@ -75,5 +75,6 @@ int nfsd_reply_cache_init(void); void nfsd_reply_cache_shutdown(void); int nfsd_cache_lookup(struct svc_rqst *, int); void nfsd_cache_update(struct svc_rqst *, int, __be32 *); +void nfsd4_set_statp(struct svc_rqst *rqstp, __be32 *statp); #endif /* NFSCACHE_H */ diff --git a/include/linux/nfsd/state.h b/include/linux/nfsd/state.h index 90829db76861..f1edb1d98523 100644 --- a/include/linux/nfsd/state.h +++ b/include/linux/nfsd/state.h @@ -99,9 +99,22 @@ struct nfs4_callback { struct rpc_clnt * cb_client; }; +/* Maximum number of pages per slot cache entry */ +#define NFSD_PAGES_PER_SLOT 1 + +struct nfsd4_cache_entry { + __be32 ce_status; + struct kvec ce_datav; /* encoded NFSv4.1 data in rq_res.head[0] */ + struct page *ce_respages[NFSD_PAGES_PER_SLOT + 1]; + short ce_resused; + int ce_opcnt; + int ce_rpchdrlen; +}; + struct nfsd4_slot { bool sl_inuse; u32 sl_seqid; + struct nfsd4_cache_entry sl_cache_entry; }; struct nfsd4_session { diff --git a/include/linux/nfsd/xdr4.h b/include/linux/nfsd/xdr4.h index 6e28a041008d..d091684325af 100644 --- a/include/linux/nfsd/xdr4.h +++ b/include/linux/nfsd/xdr4.h @@ -51,6 +51,8 @@ struct nfsd4_compound_state { /* For sessions DRC */ struct nfsd4_session *session; struct nfsd4_slot *slot; + __be32 *statp; + u32 status; }; struct nfsd4_change_info { @@ -487,6 +489,8 @@ extern __be32 nfsd4_setclientid(struct svc_rqst *rqstp, extern __be32 nfsd4_setclientid_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_setclientid_confirm *setclientid_confirm); +extern void nfsd4_store_cache_entry(struct nfsd4_compoundres *resp); +extern __be32 nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp); extern __be32 nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *, struct nfsd4_exchange_id *); -- cgit v1.2.3 From c3d06f9ce8544fecfe13e377d1e2c2e47fe18dbc Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Fri, 3 Apr 2009 08:28:18 +0300 Subject: nfsd41: hard page limit for DRC Use no more than 1/128th of the number of free pages at nfsd startup for the v4.1 DRC. This is an arbitrary default which should probably end up under the control of an administrator. Signed-off-by: Andy Adamson [moved added fields in struct svc_serv under CONFIG_NFSD_V4_1] Signed-off-by: Benny Halevy [fix set_max_drc calculation of sv_drc_max_pages] [moved NFSD_DRC_SIZE_SHIFT's declaration up in header file] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 23 +++++++++++++++++++++++ include/linux/nfsd/nfsd.h | 3 +++ include/linux/sunrpc/svc.h | 2 ++ 3 files changed, 28 insertions(+) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b5168d1898ec..b53a098e97a4 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -197,6 +198,26 @@ void nfsd_reset_versions(void) } } +/* + * Each session guarantees a negotiated per slot memory cache for replies + * which in turn consumes memory beyond the v2/v3/v4.0 server. A dedicated + * NFSv4.1 server might want to use more memory for a DRC than a machine + * with mutiple services. + * + * Impose a hard limit on the number of pages for the DRC which varies + * according to the machines free pages. This is of course only a default. + * + * For now this is a #defined shift which could be under admin control + * in the future. + */ +static void set_max_drc(void) +{ + nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() + >> NFSD_DRC_SIZE_SHIFT; + nfsd_serv->sv_drc_pages_used = 0; + dprintk("%s svc_drc_max_pages %u\n", __func__, + nfsd_serv->sv_drc_max_pages); +} int nfsd_create_serv(void) { @@ -229,6 +250,8 @@ int nfsd_create_serv(void) nfsd_last_thread, nfsd, THIS_MODULE); if (nfsd_serv == NULL) err = -ENOMEM; + else + set_max_drc(); do_gettimeofday(&nfssvc_boot); /* record boot time */ return err; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index ab9616d09204..1f063d495159 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -331,6 +331,9 @@ extern struct timeval nfssvc_boot; #define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ +/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ +#define NFSD_DRC_SIZE_SHIFT 7 + /* * The following attributes are currently not supported by the NFSv4 server: * ARCHIVE (deprecated anyway) diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 815dd589d4db..d209c630a4a1 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -95,6 +95,8 @@ struct svc_serv { struct module * sv_module; /* optional module to count when * adding threads */ svc_thread_fn sv_function; /* main function for threads */ + unsigned int sv_drc_max_pages; /* Total pages for DRC */ + unsigned int sv_drc_pages_used;/* DRC pages used */ }; /* -- cgit v1.2.3 From 8daf220a6a83c47b9648c28bb819c14c60bad7f9 Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Fri, 3 Apr 2009 08:28:59 +0300 Subject: nfsd41: control nfsv4.1 svc via /proc/fs/nfsd/versions Support enabling and disabling nfsv4.1 via /proc/fs/nfsd/versions by writing the strings "+4.1" or "-4.1" correspondingly. Use user mode nfs-utils (rpc.nfsd option) to enable. This will allow us to get rid of CONFIG_NFSD_V4_1 [nfsd41: disable support for minorversion by default] Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 2 +- fs/nfsd/nfsctl.c | 26 +++++++++++++++++++++++--- fs/nfsd/nfssvc.c | 24 ++++++++++++++++++++++++ include/linux/nfsd/nfsd.h | 2 ++ 4 files changed, 50 insertions(+), 4 deletions(-) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index db208dd8fdca..52bb14debb30 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -921,7 +921,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp, * According to RFC3010, this takes precedence over all other errors. */ status = nfserr_minor_vers_mismatch; - if (args->minorversion > NFSD_SUPPORTED_MINOR_VERSION) + if (args->minorversion > nfsd_supported_minorversion) goto out; if (!nfs41_op_ordering_ok(args)) { diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index 4adebb6312c4..a9b8c75bf0bf 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -792,8 +792,9 @@ out_free: static ssize_t __write_versions(struct file *file, char *buf, size_t size) { char *mesg = buf; - char *vers, sign; + char *vers, *minorp, sign; int len, num; + unsigned minor; ssize_t tlen = 0; char *sep; @@ -814,9 +815,20 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) do { sign = *vers; if (sign == '+' || sign == '-') - num = simple_strtol((vers+1), NULL, 0); + num = simple_strtol((vers+1), &minorp, 0); else - num = simple_strtol(vers, NULL, 0); + num = simple_strtol(vers, &minorp, 0); + if (*minorp == '.') { + if (num < 4) + return -EINVAL; + minor = simple_strtoul(minorp+1, NULL, 0); + if (minor == 0) + return -EINVAL; + if (nfsd_minorversion(minor, sign == '-' ? + NFSD_CLEAR : NFSD_SET) < 0) + return -EINVAL; + goto next; + } switch(num) { case 2: case 3: @@ -826,6 +838,7 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) default: return -EINVAL; } + next: vers += len + 1; tlen += len; } while ((len = qword_get(&mesg, vers, size)) > 0); @@ -844,6 +857,13 @@ static ssize_t __write_versions(struct file *file, char *buf, size_t size) num); sep = " "; } + if (nfsd_vers(4, NFSD_AVAIL)) + for (minor = 1; minor <= NFSD_SUPPORTED_MINOR_VERSION; minor++) + len += sprintf(buf+len, " %c4.%u", + (nfsd_vers(4, NFSD_TEST) && + nfsd_minorversion(minor, NFSD_TEST)) ? + '+' : '-', + minor); len += sprintf(buf+len, "\n"); return len; } diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index b53a098e97a4..e9d57734a348 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -121,6 +121,8 @@ struct svc_program nfsd_program = { }; +u32 nfsd_supported_minorversion; + int nfsd_vers(int vers, enum vers_op change) { if (vers < NFSD_MINVERS || vers >= NFSD_NRVERS) @@ -147,6 +149,28 @@ int nfsd_vers(int vers, enum vers_op change) } return 0; } + +int nfsd_minorversion(u32 minorversion, enum vers_op change) +{ + if (minorversion > NFSD_SUPPORTED_MINOR_VERSION) + return -1; + switch(change) { + case NFSD_SET: + nfsd_supported_minorversion = minorversion; + break; + case NFSD_CLEAR: + if (minorversion == 0) + return -1; + nfsd_supported_minorversion = minorversion - 1; + break; + case NFSD_TEST: + return minorversion <= nfsd_supported_minorversion; + case NFSD_AVAIL: + return minorversion <= NFSD_SUPPORTED_MINOR_VERSION; + } + return 0; +} + /* * Maximum number of nfsd processes */ diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 1f063d495159..70339b7b83c0 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -53,6 +53,7 @@ typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int); extern struct svc_program nfsd_program; extern struct svc_version nfsd_version2, nfsd_version3, nfsd_version4; +extern u32 nfsd_supported_minorversion; extern struct mutex nfsd_mutex; extern struct svc_serv *nfsd_serv; @@ -149,6 +150,7 @@ int nfsd_set_posix_acl(struct svc_fh *, int, struct posix_acl *); enum vers_op {NFSD_SET, NFSD_CLEAR, NFSD_TEST, NFSD_AVAIL }; int nfsd_vers(int vers, enum vers_op change); +int nfsd_minorversion(u32 minorversion, enum vers_op change); void nfsd_reset_versions(void); int nfsd_create_serv(void); -- cgit v1.2.3 From f0ad670d7061efad138df19aefe569263c4ea37b Mon Sep 17 00:00:00 2001 From: Benny Halevy Date: Mon, 6 Apr 2009 12:00:36 +0300 Subject: nfsd41: define NFSD_DRC_SIZE_SHIFT in set_max_drc Fixes the following compiler error: fs/nfsd/nfssvc.c: In function 'set_max_drc': fs/nfsd/nfssvc.c:240: error: 'NFSD_DRC_SIZE_SHIFT' undeclared CONFIG_NFSD_V4 is not set Reported-by: Alexander Beregalov Signed-off-by: Benny Halevy Signed-off-by: J. Bruce Fields --- fs/nfsd/nfssvc.c | 2 ++ include/linux/nfsd/nfsd.h | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/nfsd/nfssvc.c') diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index e9d57734a348..469c931cca95 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -236,6 +236,8 @@ void nfsd_reset_versions(void) */ static void set_max_drc(void) { + /* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ + #define NFSD_DRC_SIZE_SHIFT 7 nfsd_serv->sv_drc_max_pages = nr_free_buffer_pages() >> NFSD_DRC_SIZE_SHIFT; nfsd_serv->sv_drc_pages_used = 0; diff --git a/include/linux/nfsd/nfsd.h b/include/linux/nfsd/nfsd.h index 0ec4d142c503..2b49d676d0c9 100644 --- a/include/linux/nfsd/nfsd.h +++ b/include/linux/nfsd/nfsd.h @@ -333,9 +333,6 @@ extern struct timeval nfssvc_boot; #define NFSD_LEASE_TIME (nfs4_lease_time()) #define NFSD_LAUNDROMAT_MINTIMEOUT 10 /* seconds */ -/* The percent of nr_free_buffer_pages used by the V4.1 server DRC */ -#define NFSD_DRC_SIZE_SHIFT 7 - /* * The following attributes are currently not supported by the NFSv4 server: * ARCHIVE (deprecated anyway) -- cgit v1.2.3