From 20d4947353be60e909e6b1a79d241457edd6833f Mon Sep 17 00:00:00 2001
From: Patrick Ohly <patrick.ohly@intel.com>
Date: Thu, 12 Feb 2009 05:03:38 +0000
Subject: net: socket infrastructure for SO_TIMESTAMPING

The overlap with the old SO_TIMESTAMP[NS] options is handled so
that time stamping in software (net_enable_timestamp()) is
enabled when SO_TIMESTAMP[NS] and/or SO_TIMESTAMPING_RX_SOFTWARE
is set.  It's disabled if all of these are off.

Signed-off-by: Patrick Ohly <patrick.ohly@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/socket.c | 84 +++++++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 64 insertions(+), 20 deletions(-)

(limited to 'net/socket.c')

diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a..47a3dc074eb0 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -545,6 +545,18 @@ void sock_release(struct socket *sock)
 	sock->file = NULL;
 }
 
+int sock_tx_timestamp(struct msghdr *msg, struct sock *sk,
+		      union skb_shared_tx *shtx)
+{
+	shtx->flags = 0;
+	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE))
+		shtx->hardware = 1;
+	if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE))
+		shtx->software = 1;
+	return 0;
+}
+EXPORT_SYMBOL(sock_tx_timestamp);
+
 static inline int __sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 				 struct msghdr *msg, size_t size)
 {
@@ -595,33 +607,65 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	return result;
 }
 
+static int ktime2ts(ktime_t kt, struct timespec *ts)
+{
+	if (kt.tv64) {
+		*ts = ktime_to_timespec(kt);
+		return 1;
+	} else {
+		return 0;
+	}
+}
+
 /*
  * called from sock_recv_timestamp() if sock_flag(sk, SOCK_RCVTSTAMP)
  */
 void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk,
 	struct sk_buff *skb)
 {
-	ktime_t kt = skb->tstamp;
-
-	if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
-		struct timeval tv;
-		/* Race occurred between timestamp enabling and packet
-		   receiving.  Fill in the current time for now. */
-		if (kt.tv64 == 0)
-			kt = ktime_get_real();
-		skb->tstamp = kt;
-		tv = ktime_to_timeval(kt);
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, sizeof(tv), &tv);
-	} else {
-		struct timespec ts;
-		/* Race occurred between timestamp enabling and packet
-		   receiving.  Fill in the current time for now. */
-		if (kt.tv64 == 0)
-			kt = ktime_get_real();
-		skb->tstamp = kt;
-		ts = ktime_to_timespec(kt);
-		put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, sizeof(ts), &ts);
+	int need_software_tstamp = sock_flag(sk, SOCK_RCVTSTAMP);
+	struct timespec ts[3];
+	int empty = 1;
+	struct skb_shared_hwtstamps *shhwtstamps =
+		skb_hwtstamps(skb);
+
+	/* Race occurred between timestamp enabling and packet
+	   receiving.  Fill in the current time for now. */
+	if (need_software_tstamp && skb->tstamp.tv64 == 0)
+		__net_timestamp(skb);
+
+	if (need_software_tstamp) {
+		if (!sock_flag(sk, SOCK_RCVTSTAMPNS)) {
+			struct timeval tv;
+			skb_get_timestamp(skb, &tv);
+			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP,
+				 sizeof(tv), &tv);
+		} else {
+			struct timespec ts;
+			skb_get_timestampns(skb, &ts);
+			put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS,
+				 sizeof(ts), &ts);
+		}
+	}
+
+
+	memset(ts, 0, sizeof(ts));
+	if (skb->tstamp.tv64 &&
+	    sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) {
+		skb_get_timestampns(skb, ts + 0);
+		empty = 0;
+	}
+	if (shhwtstamps) {
+		if (sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE) &&
+		    ktime2ts(shhwtstamps->syststamp, ts + 1))
+			empty = 0;
+		if (sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE) &&
+		    ktime2ts(shhwtstamps->hwtstamp, ts + 2))
+			empty = 0;
 	}
+	if (!empty)
+		put_cmsg(msg, SOL_SOCKET,
+			 SCM_TIMESTAMPING, sizeof(ts), &ts);
 }
 
 EXPORT_SYMBOL_GPL(__sock_recv_timestamp);
-- 
cgit v1.2.3


From 76398425bb06b07cc3a3b1ce169c67dc9d6874ed Mon Sep 17 00:00:00 2001
From: Jonathan Corbet <corbet@lwn.net>
Date: Sun, 1 Feb 2009 14:26:59 -0700
Subject: Move FASYNC bit handling to f_op->fasync()

Removing the BKL from FASYNC handling ran into the challenge of keeping the
setting of the FASYNC bit in filp->f_flags atomic with regard to calls to
the underlying fasync() function.  Andi Kleen suggested moving the handling
of that bit into fasync(); this patch does exactly that.  As a result, we
have a couple of internal API changes: fasync() must now manage the FASYNC
bit, and it will be called without the BKL held.

As it happens, every fasync() implementation in the kernel with one
exception calls fasync_helper().  So, if we make fasync_helper() set the
FASYNC bit, we can avoid making any changes to the other fasync()
functions - as long as those functions, themselves, have proper locking.
Most fasync() implementations do nothing but call fasync_helper() - which
has its own lock - so they are easily verified as correct.  The BKL had
already been pushed down into the rest.

The networking code has its own version of fasync_helper(), so that code
has been augmented with explicit FASYNC bit handling.

Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Miller <davem@davemloft.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 Documentation/filesystems/Locking |  7 +++++--
 fs/fcntl.c                        | 29 ++++++++++++++++-------------
 fs/ioctl.c                        | 13 +------------
 net/socket.c                      |  7 +++++++
 4 files changed, 29 insertions(+), 27 deletions(-)

(limited to 'net/socket.c')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index ec6a9392a173..4e78ce677843 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that
 can and should be done using the internal locking with smaller critical areas).
 Current worst offender is ext2_get_block()...
 
-->fasync() is a mess. This area needs a big cleanup and that will probably
-affect locking.
+->fasync() is called without BKL protection, and is responsible for
+maintaining the FASYNC bit in filp->f_flags.  Most instances call
+fasync_helper(), which does that maintenance, so it's not normally
+something one needs to worry about.  Return values > 0 will be mapped to
+zero in the VFS layer.
 
 ->readdir() and ->ioctl() on directories must be changed. Ideally we would
 move ->readdir() to inode_operations and use a separate method for directory
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 04df8570a2d2..431bb6459273 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes)
 	return ret;
 }
 
-#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME)
+#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME)
 
 static int setfl(int fd, struct file * filp, unsigned long arg)
 {
@@ -177,23 +177,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 		return error;
 
 	/*
-	 * We still need a lock here for now to keep multiple FASYNC calls
-	 * from racing with each other.
+	 * ->fasync() is responsible for setting the FASYNC bit.
 	 */
-	lock_kernel();
-	if ((arg ^ filp->f_flags) & FASYNC) {
-		if (filp->f_op && filp->f_op->fasync) {
-			error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
-			if (error < 0)
-				goto out;
-		}
+	if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op &&
+			filp->f_op->fasync) {
+		error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0);
+		if (error < 0)
+			goto out;
 	}
-
 	spin_lock(&filp->f_lock);
 	filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK);
 	spin_unlock(&filp->f_lock);
+
  out:
-	unlock_kernel();
 	return error;
 }
 
@@ -518,7 +514,7 @@ static DEFINE_RWLOCK(fasync_lock);
 static struct kmem_cache *fasync_cache __read_mostly;
 
 /*
- * fasync_helper() is used by some character device drivers (mainly mice)
+ * fasync_helper() is used by almost all character device drivers
  * to set up the fasync queue. It returns negative on error, 0 if it did
  * no changes and positive if it added/deleted the entry.
  */
@@ -557,6 +553,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap
 		result = 1;
 	}
 out:
+	/* Fix up FASYNC bit while still holding fasync_lock */
+	spin_lock(&filp->f_lock);
+	if (on)
+		filp->f_flags |= FASYNC;
+	else
+		filp->f_flags &= ~FASYNC;
+	spin_unlock(&filp->f_lock);
 	write_unlock_irq(&fasync_lock);
 	return result;
 }
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 421aab465dab..e8e89edba576 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -427,19 +427,11 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp,
 	/* Did FASYNC state change ? */
 	if ((flag ^ filp->f_flags) & FASYNC) {
 		if (filp->f_op && filp->f_op->fasync)
+			/* fasync() adjusts filp->f_flags */
 			error = filp->f_op->fasync(fd, filp, on);
 		else
 			error = -ENOTTY;
 	}
-	if (error)
-		return error;
-
-	spin_lock(&filp->f_lock);
-	if (on)
-		filp->f_flags |= FASYNC;
-	else
-		filp->f_flags &= ~FASYNC;
-	spin_unlock(&filp->f_lock);
 	return error;
 }
 
@@ -507,10 +499,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd,
 		break;
 
 	case FIOASYNC:
-		/* BKL needed to avoid races tweaking f_flags */
-		lock_kernel();
 		error = ioctl_fioasync(fd, filp, argp);
-		unlock_kernel();
 		break;
 
 	case FIOQSIZE:
diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a..0f75746ab06e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1030,6 +1030,13 @@ static int sock_fasync(int fd, struct file *filp, int on)
 
 	lock_sock(sk);
 
+	spin_lock(&filp->f_lock);
+	if (on)
+		filp->f_flags |= FASYNC;
+	else
+		filp->f_flags &= ~FASYNC;
+	spin_unlock(&filp->f_lock);
+
 	prev = &(sock->fasync_list);
 
 	for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
-- 
cgit v1.2.3


From 3ba13d179e8c24c68eac32b93593a6b10fcd1572 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 20 Feb 2009 06:02:22 +0000
Subject: constify dentry_operations: rest

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c | 2 +-
 fs/anon_inodes.c           | 2 +-
 fs/libfs.c                 | 2 +-
 fs/pipe.c                  | 2 +-
 kernel/cgroup.c            | 2 +-
 net/socket.c               | 2 +-
 net/sunrpc/rpc_pipe.c      | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'net/socket.c')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 0e499757309b..5c0f408cfd71 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2196,7 +2196,7 @@ pfmfs_delete_dentry(struct dentry *dentry)
 	return 1;
 }
 
-static struct dentry_operations pfmfs_dentry_operations = {
+static const struct dentry_operations pfmfs_dentry_operations = {
 	.d_delete = pfmfs_delete_dentry,
 };
 
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c
index 3bbdb9d02376..1dd96d4406c0 100644
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -48,7 +48,7 @@ static struct file_system_type anon_inode_fs_type = {
 	.get_sb		= anon_inodefs_get_sb,
 	.kill_sb	= kill_anon_super,
 };
-static struct dentry_operations anon_inodefs_dentry_operations = {
+static const struct dentry_operations anon_inodefs_dentry_operations = {
 	.d_delete	= anon_inodefs_delete_dentry,
 };
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 49b44099dabb..ec600bd33e75 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -44,7 +44,7 @@ static int simple_delete_dentry(struct dentry *dentry)
  */
 struct dentry *simple_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
 {
-	static struct dentry_operations simple_dentry_operations = {
+	static const struct dentry_operations simple_dentry_operations = {
 		.d_delete = simple_delete_dentry,
 	};
 
diff --git a/fs/pipe.c b/fs/pipe.c
index df3719562fc1..6ddf05209a4c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -870,7 +870,7 @@ static char *pipefs_dname(struct dentry *dentry, char *buffer, int buflen)
 				dentry->d_inode->i_ino);
 }
 
-static struct dentry_operations pipefs_dentry_operations = {
+static const struct dentry_operations pipefs_dentry_operations = {
 	.d_delete	= pipefs_delete_dentry,
 	.d_dname	= pipefs_dname,
 };
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 9edb5c4b79b4..b01100ebd074 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1627,7 +1627,7 @@ static struct inode_operations cgroup_dir_inode_operations = {
 static int cgroup_create_file(struct dentry *dentry, int mode,
 				struct super_block *sb)
 {
-	static struct dentry_operations cgroup_dops = {
+	static const struct dentry_operations cgroup_dops = {
 		.d_iput = cgroup_diput,
 	};
 
diff --git a/net/socket.c b/net/socket.c
index 35dd7371752a..2f895f60ca8a 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -328,7 +328,7 @@ static char *sockfs_dname(struct dentry *dentry, char *buffer, int buflen)
 				dentry->d_inode->i_ino);
 }
 
-static struct dentry_operations sockfs_dentry_operations = {
+static const struct dentry_operations sockfs_dentry_operations = {
 	.d_delete = sockfs_delete_dentry,
 	.d_dname  = sockfs_dname,
 };
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 577385a4a5dc..9ced0628d69c 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -480,7 +480,7 @@ static int rpc_delete_dentry(struct dentry *dentry)
 	return 1;
 }
 
-static struct dentry_operations rpc_dentry_operations = {
+static const struct dentry_operations rpc_dentry_operations = {
 	.d_delete = rpc_delete_dentry,
 };
 
-- 
cgit v1.2.3


From 8651d5c0b1f874c5b8307ae2b858bc40f9f02482 Mon Sep 17 00:00:00 2001
From: Paul Moore <paul.moore@hp.com>
Date: Fri, 27 Mar 2009 17:10:48 -0400
Subject: lsm: Remove the socket_post_accept() hook

The socket_post_accept() hook is not currently used by any in-tree modules
and its existence continues to cause problems by confusing people about
what can be safely accomplished using this hook.  If a legitimate need for
this hook arises in the future it can always be reintroduced.

Signed-off-by: Paul Moore <paul.moore@hp.com>
Signed-off-by: James Morris <jmorris@namei.org>
---
 include/linux/security.h | 13 -------------
 net/socket.c             |  2 --
 security/capability.c    |  5 -----
 security/security.c      |  5 -----
 4 files changed, 25 deletions(-)

(limited to 'net/socket.c')

diff --git a/include/linux/security.h b/include/linux/security.h
index 1f2ab6353c00..54ed15799a83 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -880,11 +880,6 @@ static inline void security_free_mnt_opts(struct security_mnt_opts *opts)
  *	@sock contains the listening socket structure.
  *	@newsock contains the newly created server socket for connection.
  *	Return 0 if permission is granted.
- * @socket_post_accept:
- *	This hook allows a security module to copy security
- *	information into the newly created socket's inode.
- *	@sock contains the listening socket structure.
- *	@newsock contains the newly created server socket for connection.
  * @socket_sendmsg:
  *	Check permission before transmitting a message to another socket.
  *	@sock contains the socket structure.
@@ -1554,8 +1549,6 @@ struct security_operations {
 			       struct sockaddr *address, int addrlen);
 	int (*socket_listen) (struct socket *sock, int backlog);
 	int (*socket_accept) (struct socket *sock, struct socket *newsock);
-	void (*socket_post_accept) (struct socket *sock,
-				    struct socket *newsock);
 	int (*socket_sendmsg) (struct socket *sock,
 			       struct msghdr *msg, int size);
 	int (*socket_recvmsg) (struct socket *sock,
@@ -2537,7 +2530,6 @@ int security_socket_bind(struct socket *sock, struct sockaddr *address, int addr
 int security_socket_connect(struct socket *sock, struct sockaddr *address, int addrlen);
 int security_socket_listen(struct socket *sock, int backlog);
 int security_socket_accept(struct socket *sock, struct socket *newsock);
-void security_socket_post_accept(struct socket *sock, struct socket *newsock);
 int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size);
 int security_socket_recvmsg(struct socket *sock, struct msghdr *msg,
 			    int size, int flags);
@@ -2616,11 +2608,6 @@ static inline int security_socket_accept(struct socket *sock,
 	return 0;
 }
 
-static inline void security_socket_post_accept(struct socket *sock,
-					       struct socket *newsock)
-{
-}
-
 static inline int security_socket_sendmsg(struct socket *sock,
 					  struct msghdr *msg, int size)
 {
diff --git a/net/socket.c b/net/socket.c
index 0b14b79c03af..91d0c0254ffe 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1536,8 +1536,6 @@ SYSCALL_DEFINE4(accept4, int, fd, struct sockaddr __user *, upeer_sockaddr,
 	fd_install(newfd, newfile);
 	err = newfd;
 
-	security_socket_post_accept(sock, newsock);
-
 out_put:
 	fput_light(sock->file, fput_needed);
 out:
diff --git a/security/capability.c b/security/capability.c
index c545bd1300b5..21b6cead6a8e 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -620,10 +620,6 @@ static int cap_socket_accept(struct socket *sock, struct socket *newsock)
 	return 0;
 }
 
-static void cap_socket_post_accept(struct socket *sock, struct socket *newsock)
-{
-}
-
 static int cap_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 {
 	return 0;
@@ -1014,7 +1010,6 @@ void security_fixup_ops(struct security_operations *ops)
 	set_to_cap_if_null(ops, socket_connect);
 	set_to_cap_if_null(ops, socket_listen);
 	set_to_cap_if_null(ops, socket_accept);
-	set_to_cap_if_null(ops, socket_post_accept);
 	set_to_cap_if_null(ops, socket_sendmsg);
 	set_to_cap_if_null(ops, socket_recvmsg);
 	set_to_cap_if_null(ops, socket_getsockname);
diff --git a/security/security.c b/security/security.c
index c3586c0d97e2..206e53844d2f 100644
--- a/security/security.c
+++ b/security/security.c
@@ -1007,11 +1007,6 @@ int security_socket_accept(struct socket *sock, struct socket *newsock)
 	return security_ops->socket_accept(sock, newsock);
 }
 
-void security_socket_post_accept(struct socket *sock, struct socket *newsock)
-{
-	security_ops->socket_post_accept(sock, newsock);
-}
-
 int security_socket_sendmsg(struct socket *sock, struct msghdr *msg, int size)
 {
 	return security_ops->socket_sendmsg(sock, msg, size);
-- 
cgit v1.2.3