From b85d88068444ae5dcb1639bcef770ccbf085dd4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 May 2006 01:40:49 -0400 Subject: SUNRPC: select privileged port numbers at random Make the RPC client select privileged ephemeral source ports at random. This improves DRC behavior on the server by using the same port when reconnecting for the same mount point, but using a different port for fresh mounts. The Linux TCP implementation already does this for nonprivileged ports. Note that TCP sockets in TIME_WAIT will prevent quick reuse of a random ephemeral port number by leaving the port INUSE until the connection transitions out of TIME_WAIT. Test plan: Connectathon against every known server implementation using multiple mount points. Locking especially. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4b4e7dfdff14..21006b109101 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -930,6 +930,13 @@ static void xs_udp_timer(struct rpc_task *task) xprt_adjust_cwnd(task, -ETIMEDOUT); } +static unsigned short xs_get_random_port(void) +{ + unsigned short range = xprt_max_resvport - xprt_min_resvport; + unsigned short rand = (unsigned short) net_random() % range; + return rand + xprt_min_resvport; +} + /** * xs_set_port - reset the port number in the remote endpoint address * @xprt: generic transport @@ -1275,7 +1282,7 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_UDP; - xprt->port = xprt_max_resvport; + xprt->port = xs_get_random_port(); xprt->tsh_size = 0; xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; /* XXX: header size can vary due to auth type, IPv6, etc. */ @@ -1317,7 +1324,7 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to) memset(xprt->slot, 0, slot_table_size); xprt->prot = IPPROTO_TCP; - xprt->port = xprt_max_resvport; + xprt->port = xs_get_random_port(); xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32); xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; xprt->max_payload = RPC_MAX_FRAGMENT_SIZE; -- cgit v1.2.3 From bf3fcf89552f24657bcfb6a9d73cd167ebb496c6 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 25 May 2006 01:40:51 -0400 Subject: SUNRPC: NFS_ROOT always uses the same XIDs The XID generator uses get_random_bytes to generate an initial XID. NFS_ROOT starts up before the random driver, though, so get_random_bytes doesn't set a random XID for NFS_ROOT. This causes NFS_ROOT mount points to reuse XIDs every time the client is booted. If the client boots often enough, the server will start serving old replies out of its DRC. Use net_random() instead. Test plan: I/O intensive workloads should perform well and generate no errors. Traces taken during client reboots should show that NFS_ROOT mounts use unique XIDs after every reboot. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4dd5b3cfe754..02060d0e7be8 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include @@ -830,7 +830,7 @@ static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) static inline void xprt_init_xid(struct rpc_xprt *xprt) { - get_random_bytes(&xprt->xid, sizeof(xprt->xid)); + xprt->xid = net_random(); } static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt) -- cgit v1.2.3 From 1f5ce9e93aa96a867f195ed45f6f77935175f12e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:16 -0400 Subject: VFS: Unexport do_kern_mount() and clean up simple_pin_fs() Replace all module uses with the new vfs_kern_mount() interface, and fix up simple_pin_fs(). Signed-off-by: Trond Myklebust --- Documentation/filesystems/automount-support.txt | 2 +- drivers/usb/core/inode.c | 2 +- fs/afs/mntpt.c | 2 +- fs/afs/super.c | 2 +- fs/afs/super.h | 2 ++ fs/binfmt_misc.c | 3 ++- fs/configfs/mount.c | 2 +- fs/debugfs/inode.c | 2 +- fs/libfs.c | 4 ++-- fs/super.c | 4 +--- include/linux/fs.h | 2 +- mm/shmem.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- security/inode.c | 2 +- 14 files changed, 17 insertions(+), 16 deletions(-) (limited to 'net') diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt index 58c65a1713e5..7cac200e2a85 100644 --- a/Documentation/filesystems/automount-support.txt +++ b/Documentation/filesystems/automount-support.txt @@ -19,7 +19,7 @@ following procedure: (2) Have the follow_link() op do the following steps: - (a) Call do_kern_mount() to call the appropriate filesystem to set up a + (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a superblock and gain a vfsmount structure representing it. (b) Copy the nameidata provided as an argument and substitute the dentry diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c index 3cf945cc5b9a..695b90a17a68 100644 --- a/drivers/usb/core/inode.c +++ b/drivers/usb/core/inode.c @@ -569,7 +569,7 @@ static int create_special_files (void) ignore_mount = 1; /* create the devices special file */ - retval = simple_pin_fs("usbfs", &usbfs_mount, &usbfs_mount_count); + retval = simple_pin_fs(&usb_fs_type, &usbfs_mount, &usbfs_mount_count); if (retval) { err ("Unable to get usbfs mount"); goto exit; diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 4e6eeb59b83c..7b6dc03caf44 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -210,7 +210,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt) /* try and do the mount */ kdebug("--- attempting mount %s -o %s ---", devname, options); - mnt = do_kern_mount("afs", 0, devname, options); + mnt = vfs_kern_mount(&afs_fs_type, 0, devname, options); kdebug("--- mount result %p ---", mnt); free_page((unsigned long) devname); diff --git a/fs/afs/super.c b/fs/afs/super.c index 53c56e7231ab..93a7821db0d7 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -48,7 +48,7 @@ static void afs_put_super(struct super_block *sb); static void afs_destroy_inode(struct inode *inode); -static struct file_system_type afs_fs_type = { +struct file_system_type afs_fs_type = { .owner = THIS_MODULE, .name = "afs", .get_sb = afs_get_sb, diff --git a/fs/afs/super.h b/fs/afs/super.h index ac11362f4e95..32de8cc6fae8 100644 --- a/fs/afs/super.h +++ b/fs/afs/super.h @@ -38,6 +38,8 @@ static inline struct afs_super_info *AFS_FS_S(struct super_block *sb) return sb->s_fs_info; } +extern struct file_system_type afs_fs_type; + #endif /* __KERNEL__ */ #endif /* _LINUX_AFS_SUPER_H */ diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index d73d75591a39..c0a909e1d290 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -55,6 +55,7 @@ typedef struct { } Node; static DEFINE_RWLOCK(entries_lock); +static struct file_system_type bm_fs_type; static struct vfsmount *bm_mnt; static int entry_count; @@ -638,7 +639,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, if (!inode) goto out2; - err = simple_pin_fs("binfmt_misc", &bm_mnt, &entry_count); + err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count); if (err) { iput(inode); inode = NULL; diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c index f920d30478e5..be5d86ae56f0 100644 --- a/fs/configfs/mount.c +++ b/fs/configfs/mount.c @@ -118,7 +118,7 @@ static struct file_system_type configfs_fs_type = { int configfs_pin_fs(void) { - return simple_pin_fs("configfs", &configfs_mount, + return simple_pin_fs(&configfs_fs_type, &configfs_mount, &configfs_mnt_count); } diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index b55b4ea9a676..90f9417181fd 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -199,7 +199,7 @@ struct dentry *debugfs_create_file(const char *name, mode_t mode, pr_debug("debugfs: creating file '%s'\n",name); - error = simple_pin_fs("debugfs", &debugfs_mount, &debugfs_mount_count); + error = simple_pin_fs(&debug_fs_type, &debugfs_mount, &debugfs_mount_count); if (error) goto exit; diff --git a/fs/libfs.c b/fs/libfs.c index 7145ba7a48d0..4a3ec9ad8bed 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -424,13 +424,13 @@ out: static DEFINE_SPINLOCK(pin_fs_lock); -int simple_pin_fs(char *name, struct vfsmount **mount, int *count) +int simple_pin_fs(struct file_system_type *type, struct vfsmount **mount, int *count) { struct vfsmount *mnt = NULL; spin_lock(&pin_fs_lock); if (unlikely(!*mount)) { spin_unlock(&pin_fs_lock); - mnt = do_kern_mount(name, 0, name, NULL); + mnt = vfs_kern_mount(type, 0, type->name, NULL); if (IS_ERR(mnt)) return PTR_ERR(mnt); spin_lock(&pin_fs_lock); diff --git a/fs/super.c b/fs/super.c index 848be4fc67a2..15f2afdbf82e 100644 --- a/fs/super.c +++ b/fs/super.c @@ -864,11 +864,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) return mnt; } -EXPORT_SYMBOL_GPL(do_kern_mount); - struct vfsmount *kern_mount(struct file_system_type *type) { - return do_kern_mount(type->name, 0, type->name, NULL); + return vfs_kern_mount(type, 0, type->name, NULL); } EXPORT_SYMBOL(kern_mount); diff --git a/include/linux/fs.h b/include/linux/fs.h index f813bc8266aa..eca70cfe5b85 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1763,7 +1763,7 @@ extern struct inode_operations simple_dir_inode_operations; struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); extern int simple_fill_super(struct super_block *, int, struct tree_descr *); -extern int simple_pin_fs(char *name, struct vfsmount **mount, int *count); +extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t); diff --git a/mm/shmem.c b/mm/shmem.c index 4c5e68e4e9ae..8184342440f0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -2261,7 +2261,7 @@ static int __init init_tmpfs(void) #ifdef CONFIG_TMPFS devfs_mk_dir("shm"); #endif - shm_mnt = do_kern_mount(tmpfs_fs_type.name, MS_NOUSER, + shm_mnt = vfs_kern_mount(&tmpfs_fs_type, MS_NOUSER, tmpfs_fs_type.name, NULL); if (IS_ERR(shm_mnt)) { error = PTR_ERR(shm_mnt); diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index cc673dd8433f..a5226df8ac03 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -439,7 +439,7 @@ struct vfsmount *rpc_get_mount(void) { int err; - err = simple_pin_fs("rpc_pipefs", &rpc_mount, &rpc_mount_count); + err = simple_pin_fs(&rpc_pipe_fs_type, &rpc_mount, &rpc_mount_count); if (err != 0) return ERR_PTR(err); return rpc_mount; diff --git a/security/inode.c b/security/inode.c index 0f77b0223662..8bf40625c670 100644 --- a/security/inode.c +++ b/security/inode.c @@ -224,7 +224,7 @@ struct dentry *securityfs_create_file(const char *name, mode_t mode, pr_debug("securityfs: creating file '%s'\n",name); - error = simple_pin_fs("securityfs", &mount, &mount_count); + error = simple_pin_fs(&fs_type, &mount, &mount_count); if (error) { dentry = ERR_PTR(error); goto exit; -- cgit v1.2.3 From 8b23ea7bedb8b45a5bb56745fa3ff11018acf04e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:21 -0400 Subject: RPC: Allow struc xdr_stream to read the page section of an xdr_buf Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/xdr.c | 28 ++++++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 84c35d42d250..e6d3d349506c 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -194,6 +194,7 @@ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, uint32_t *p); extern uint32_t *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); +extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); #endif /* __KERNEL__ */ diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index ca4bfa57e116..49174f0d0a3e 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -568,8 +568,7 @@ EXPORT_SYMBOL(xdr_inline_decode); * * Moves data beyond the current pointer position from the XDR head[] buffer * into the page list. Any data that lies beyond current position + "len" - * bytes is moved into the XDR tail[]. The current pointer is then - * repositioned at the beginning of the XDR tail. + * bytes is moved into the XDR tail[]. */ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) { @@ -606,6 +605,31 @@ void xdr_read_pages(struct xdr_stream *xdr, unsigned int len) } EXPORT_SYMBOL(xdr_read_pages); +/** + * xdr_enter_page - decode data from the XDR page + * @xdr: pointer to xdr_stream struct + * @len: number of bytes of page data + * + * Moves data beyond the current pointer position from the XDR head[] buffer + * into the page list. Any data that lies beyond current position + "len" + * bytes is moved into the XDR tail[]. The current pointer is then + * repositioned at the beginning of the first XDR page. + */ +void xdr_enter_page(struct xdr_stream *xdr, unsigned int len) +{ + char * kaddr = page_address(xdr->buf->pages[0]); + xdr_read_pages(xdr, len); + /* + * Position current pointer at beginning of tail, and + * set remaining message length. + */ + if (len > PAGE_CACHE_SIZE - xdr->buf->page_base) + len = PAGE_CACHE_SIZE - xdr->buf->page_base; + xdr->p = (uint32_t *)(kaddr + xdr->buf->page_base); + xdr->end = (uint32_t *)((char *)xdr->p + len); +} +EXPORT_SYMBOL(xdr_enter_page); + static struct kvec empty_iov = {.iov_base = NULL, .iov_len = 0}; void -- cgit v1.2.3 From 81039f1f204a0fd2952112a240284e114f1a25e6 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 9 Jun 2006 09:34:34 -0400 Subject: NFS: Display the chosen RPCSEC_GSS security flavour in /proc/mounts Signed-off-by: Trond Myklebust --- fs/nfs/super.c | 29 +++++++++++++++++++++++++++++ net/sunrpc/auth_null.c | 2 ++ net/sunrpc/auth_unix.c | 1 + 3 files changed, 32 insertions(+) (limited to 'net') diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 4acd3ee96427..30f939bcb724 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -318,6 +318,34 @@ static int nfs_statfs(struct super_block *sb, struct kstatfs *buf) } +static const char *nfs_pseudoflavour_to_name(rpc_authflavor_t flavour) +{ + static struct { + rpc_authflavor_t flavour; + const char *str; + } sec_flavours[] = { + { RPC_AUTH_NULL, "null" }, + { RPC_AUTH_UNIX, "sys" }, + { RPC_AUTH_GSS_KRB5, "krb5" }, + { RPC_AUTH_GSS_KRB5I, "krb5i" }, + { RPC_AUTH_GSS_KRB5P, "krb5p" }, + { RPC_AUTH_GSS_LKEY, "lkey" }, + { RPC_AUTH_GSS_LKEYI, "lkeyi" }, + { RPC_AUTH_GSS_LKEYP, "lkeyp" }, + { RPC_AUTH_GSS_SPKM, "spkm" }, + { RPC_AUTH_GSS_SPKMI, "spkmi" }, + { RPC_AUTH_GSS_SPKMP, "spkmp" }, + { -1, "unknown" } + }; + int i; + + for (i=0; sec_flavours[i].flavour != -1; i++) { + if (sec_flavours[i].flavour == flavour) + break; + } + return sec_flavours[i].str; +} + /* * Describe the mount options in force on this server representation */ @@ -371,6 +399,7 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss, seq_printf(m, ",proto=%s", proto); seq_printf(m, ",timeo=%lu", 10U * nfss->retrans_timeo / HZ); seq_printf(m, ",retrans=%u", nfss->retrans_count); + seq_printf(m, ",sec=%s", nfs_pseudoflavour_to_name(nfss->client->cl_auth->au_flavor)); } /* diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index f56767aaa927..2eccffa96ba1 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -118,6 +118,8 @@ struct rpc_auth null_auth = { .au_cslack = 4, .au_rslack = 2, .au_ops = &authnull_ops, + .au_flavor = RPC_AUTH_NULL, + .au_count = ATOMIC_INIT(0), }; static diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index df14b6bfbf10..74c7406a1054 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -225,6 +225,7 @@ struct rpc_auth unix_auth = { .au_cslack = UNX_WRITESLACK, .au_rslack = 2, /* assume AUTH_NULL verf */ .au_ops = &authunix_ops, + .au_flavor = RPC_AUTH_UNIX, .au_count = ATOMIC_INIT(0), .au_credcache = &unix_cred_cache, }; -- cgit v1.2.3