From 20d4947353be60e909e6b1a79d241457edd6833f Mon Sep 17 00:00:00 2001 From: Patrick Ohly Date: Thu, 12 Feb 2009 05:03:38 +0000 Subject: net: socket infrastructure for SO_TIMESTAMPING The overlap with the old SO_TIMESTAMP[NS] options is handled so that time stamping in software (net_enable_timestamp()) is enabled when SO_TIMESTAMP[NS] and/or SO_TIMESTAMPING_RX_SOFTWARE is set. It's disabled if all of these are off. Signed-off-by: Patrick Ohly Signed-off-by: David S. Miller --- net/socket.c | 84 +++++++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 64 insertions(+), 20 deletions(-) (limited to 'net/socket.c') diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..47a3dc074eb0 100644 --- a/net/socket.c +++ b/net/socket.c @@ -545,6 +545,18 @@ void sock_release(struct socket *sock) sock->file = NULL; } +int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, + union skb_shared_tx *shtx) +{ + shtx->flags = 0; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) + shtx->hardware = 1; + if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) + shtx->software = 1; + return 0; +} +EXPORT_SYMBOL(sock_tx_timestamp); + static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t size) { @@ -595,33 +607,65 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, return result; } +static int ktime2ts(ktime_t kt, struct timespec *ts) +{ + if (kt.tv64) { + *ts = ktime_to_timespec(kt); + return 1; + } else { + return 0; + } +} + /* * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP) */ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) { - ktime_t kt = skb->tstamp; - - if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { - struct timeval tv; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - tv = ktime_to_timeval(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv); - } else { - struct timespec ts; - /* Race occurred between timestamp enabling and packet - receiving. Fill in the current time for now. */ - if (kt.tv64 == 0) - kt = ktime_get_real(); - skb->tstamp = kt; - ts = ktime_to_timespec(kt); - put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts); + int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP); + struct timespec ts[3]; + int empty = 1; + struct skb_shared_hwtstamps *shhwtstamps = + skb_hwtstamps(skb); + + /* Race occurred between timestamp enabling and packet + receiving. Fill in the current time for now. */ + if (need_software_tstamp && skb->tstamp.tv64 == 0) + __net_timestamp(skb); + + if (need_software_tstamp) { + if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) { + struct timeval tv; + skb_get_timestamp(skb, &tv); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, + sizeof(tv), &tv); + } else { + struct timespec ts; + skb_get_timestampns(skb, &ts); + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, + sizeof(ts), &ts); + } + } + + + memset(ts, 0, sizeof(ts)); + if (skb->tstamp.tv64 && + sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) { + skb_get_timestampns(skb, ts + 0); + empty = 0; + } + if (shhwtstamps) { + if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) && + ktime2ts(shhwtstamps->syststamp, ts + 1)) + empty = 0; + if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) && + ktime2ts(shhwtstamps->hwtstamp, ts + 2)) + empty = 0; } + if (!empty) + put_cmsg(msg, SOL_SOCKET, + SCM_TIMESTAMPING, sizeof(ts), &ts); } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -- cgit v1.2.3 From 76398425bb06b07cc3a3b1ce169c67dc9d6874ed Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Sun, 1 Feb 2009 14:26:59 -0700 Subject: Move FASYNC bit handling to f_op->fasync() Removing the BKL from FASYNC handling ran into the challenge of keeping the setting of the FASYNC bit in filp->f_flags atomic with regard to calls to the underlying fasync() function. Andi Kleen suggested moving the handling of that bit into fasync(); this patch does exactly that. As a result, we have a couple of internal API changes: fasync() must now manage the FASYNC bit, and it will be called without the BKL held. As it happens, every fasync() implementation in the kernel with one exception calls fasync_helper(). So, if we make fasync_helper() set the FASYNC bit, we can avoid making any changes to the other fasync() functions - as long as those functions, themselves, have proper locking. Most fasync() implementations do nothing but call fasync_helper() - which has its own lock - so they are easily verified as correct. The BKL had already been pushed down into the rest. The networking code has its own version of fasync_helper(), so that code has been augmented with explicit FASYNC bit handling. Cc: Al Viro Cc: David Miller Reviewed-by: Christoph Hellwig Signed-off-by: Jonathan Corbet --- Documentation/filesystems/Locking | 7 +++++-- fs/fcntl.c | 29 ++++++++++++++++------------- fs/ioctl.c | 13 +------------ net/socket.c | 7 +++++++ 4 files changed, 29 insertions(+), 27 deletions(-) (limited to 'net/socket.c') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ec6a9392a173..4e78ce677843 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that can and should be done using the internal locking with smaller critical areas). Current worst offender is ext2_get_block()... -->fasync() is a mess. This area needs a big cleanup and that will probably -affect locking. +->fasync() is called without BKL protection, and is responsible for +maintaining the FASYNC bit in filp->f_flags. Most instances call +fasync_helper(), which does that maintenance, so it's not normally +something one needs to worry about. Return values > 0 will be mapped to +zero in the VFS layer. ->readdir() and ->ioctl() on directories must be changed. Ideally we would move ->readdir() to inode_operations and use a separate method for directory diff --git a/fs/fcntl.c b/fs/fcntl.c index 04df8570a2d2..431bb6459273 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { @@ -177,23 +177,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return error; /* - * We still need a lock here for now to keep multiple FASYNC calls - * from racing with each other. + * ->fasync() is responsible for setting the FASYNC bit. */ - lock_kernel(); - if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; } - spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); spin_unlock(&filp->f_lock); + out: - unlock_kernel(); return error; } @@ -518,7 +514,7 @@ static DEFINE_RWLOCK(fasync_lock); static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by some character device drivers (mainly mice) + * fasync_helper() is used by almost all character device drivers * to set up the fasync queue. It returns negative on error, 0 if it did * no changes and positive if it added/deleted the entry. */ @@ -557,6 +553,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap result = 1; } out: + /* Fix up FASYNC bit while still holding fasync_lock */ + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); write_unlock_irq(&fasync_lock); return result; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 421aab465dab..e8e89edba576 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -427,19 +427,11 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) + /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else error = -ENOTTY; } - if (error) - return error; - - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); return error; } @@ -507,10 +499,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIOASYNC: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fioasync(fd, filp, argp); - unlock_kernel(); break; case FIOQSIZE: diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..0f75746ab06e 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1030,6 +1030,13 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); + prev = &(sock->fasync_list); for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) -- cgit v1.2.3 From 3ba13d179e8c24c68eac32b93593a6b10fcd1572 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 20 Feb 2009 06:02:22 +0000 Subject: constify dentry_operations: rest Signed-off-by: Al Viro --- arch/ia64/kernel/perfmon.c | 2 +- fs/anon_inodes.c | 2 +- fs/libfs.c | 2 +- fs/pipe.c | 2 +- kernel/cgroup.c | 2 +- net/socket.c | 2 +- net/sunrpc/rpc_pipe.c | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) (limited to 'net/socket.c') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 0e499757309b..5c0f408cfd71 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2196,7 +2196,7 @@ pfmfs_delete_dentry(struct dentry *dentry) return 1; } -static struct dentry_operations pfmfs_dentry_operations = { +static const struct dentry_operations pfmfs_dentry_operations = { .d_delete = pfmfs_delete_dentry, }; diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c index 3bbdb9d02376..1dd96d4406c0 100644 --- a/fs/anon_inodes.c +++ b/fs/anon_inodes.c @@ -48,7 +48,7 @@ static struct file_system_type anon_inode_fs_type = { .get_sb = anon_inodefs_get_sb, .kill_sb = kill_anon_super, }; -static struct dentry_operations anon_inodefs_dentry_operations = { +static const struct dentry_operations anon_inodefs_dentry_operations = { .d_delete = anon_inodefs_delete_dentry, }; diff --git a/fs/libfs.c b/fs/libfs.c index 49b44099dabb..ec600bd33e75 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -44,7 +44,7 @@ static int simple_delete_dentry(struct dentry *dentry) */ struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd) { - static struct dentry_operations simple_dentry_operations = { + static const struct dentry_operations simple_dentry_operations = { .d_delete = simple_delete_dentry, }; diff --git a/fs/pipe.c b/fs/pipe.c index df3719562fc1..6ddf05209a4c 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -870,7 +870,7 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen) dentry->d_inode->i_ino); } -static struct dentry_operations pipefs_dentry_operations = { +static const struct dentry_operations pipefs_dentry_operations = { .d_delete = pipefs_delete_dentry, .d_dname = pipefs_dname, }; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 9edb5c4b79b4..b01100ebd074 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1627,7 +1627,7 @@ static struct inode_operations cgroup_dir_inode_operations = { static int cgroup_create_file(struct dentry *dentry, int mode, struct super_block *sb) { - static struct dentry_operations cgroup_dops = { + static const struct dentry_operations cgroup_dops = { .d_iput = cgroup_diput, }; diff --git a/net/socket.c b/net/socket.c index 35dd7371752a..2f895f60ca8a 100644 --- a/net/socket.c +++ b/net/socket.c @@ -328,7 +328,7 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen) dentry->d_inode->i_ino); } -static struct dentry_operations sockfs_dentry_operations = { +static const struct dentry_operations sockfs_dentry_operations = { .d_delete = sockfs_delete_dentry, .d_dname = sockfs_dname, }; diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 577385a4a5dc..9ced0628d69c 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -480,7 +480,7 @@ static int rpc_delete_dentry(struct dentry *dentry) return 1; } -static struct dentry_operations rpc_dentry_operations = { +static const struct dentry_operations rpc_dentry_operations = { .d_delete = rpc_delete_dentry, }; -- cgit v1.2.3 From 8651d5c0b1f874c5b8307ae2b858bc40f9f02482 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 27 Mar 2009 17:10:48 -0400 Subject: lsm: Remove the socket_post_accept() hook The socket_post_accept() hook is not currently used by any in-tree modules and its existence continues to cause problems by confusing people about what can be safely accomplished using this hook. If a legitimate need for this hook arises in the future it can always be reintroduced. Signed-off-by: Paul Moore Signed-off-by: James Morris --- include/linux/security.h | 13 ------------- net/socket.c | 2 -- security/capability.c | 5 ----- security/security.c | 5 ----- 4 files changed, 25 deletions(-) (limited to 'net/socket.c') diff --git a/include/linux/security.h b/include/linux/security.h index 1f2ab6353c00..54ed15799a83 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -880,11 +880,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts) * @sock contains the listening socket structure. * @newsock contains the newly created server socket for connection. * Return 0 if permission is granted. - * @socket_post_accept: - * This hook allows a security module to copy security - * information into the newly created socket's inode. - * @sock contains the listening socket structure. - * @newsock contains the newly created server socket for connection. * @socket_sendmsg: * Check permission before transmitting a message to another socket. * @sock contains the socket structure. @@ -1554,8 +1549,6 @@ struct security_operations { struct sockaddr *address, int addrlen); int (*socket_listen) (struct socket *sock, int backlog); int (*socket_accept) (struct socket *sock, struct socket *newsock); - void (*socket_post_accept) (struct socket *sock, - struct socket *newsock); int (*socket_sendmsg) (struct socket *sock, struct msghdr *msg, int size); int (*socket_recvmsg) (struct socket *sock, @@ -2537,7 +2530,6 @@ int security_socket_bind(struct socket *sock, struct sockaddr *address, int addr int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen); int security_socket_listen(struct socket *sock, int backlog); int security_socket_accept(struct socket *sock, struct socket *newsock); -void security_socket_post_accept(struct socket *sock, struct socket *newsock); int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size); int security_socket_recvmsg(struct socket *sock, struct msghdr *msg, int size, int flags); @@ -2616,11 +2608,6 @@ static inline int security_socket_accept(struct socket *sock, return 0; } -static inline void security_socket_post_accept(struct socket *sock, - struct socket *newsock) -{ -} - static inline int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { diff --git a/net/socket.c b/net/socket.c index 0b14b79c03af..91d0c0254ffe 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1536,8 +1536,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr, fd_install(newfd, newfile); err = newfd; - security_socket_post_accept(sock, newsock); - out_put: fput_light(sock->file, fput_needed); out: diff --git a/security/capability.c b/security/capability.c index c545bd1300b5..21b6cead6a8e 100644 --- a/security/capability.c +++ b/security/capability.c @@ -620,10 +620,6 @@ static int cap_socket_accept(struct socket *sock, struct socket *newsock) return 0; } -static void cap_socket_post_accept(struct socket *sock, struct socket *newsock) -{ -} - static int cap_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { return 0; @@ -1014,7 +1010,6 @@ void security_fixup_ops(struct security_operations *ops) set_to_cap_if_null(ops, socket_connect); set_to_cap_if_null(ops, socket_listen); set_to_cap_if_null(ops, socket_accept); - set_to_cap_if_null(ops, socket_post_accept); set_to_cap_if_null(ops, socket_sendmsg); set_to_cap_if_null(ops, socket_recvmsg); set_to_cap_if_null(ops, socket_getsockname); diff --git a/security/security.c b/security/security.c index c3586c0d97e2..206e53844d2f 100644 --- a/security/security.c +++ b/security/security.c @@ -1007,11 +1007,6 @@ int security_socket_accept(struct socket *sock, struct socket *newsock) return security_ops->socket_accept(sock, newsock); } -void security_socket_post_accept(struct socket *sock, struct socket *newsock) -{ - security_ops->socket_post_accept(sock, newsock); -} - int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size) { return security_ops->socket_sendmsg(sock, msg, size); -- cgit v1.2.3