From 109f6e39fa07c48f580125f531f46cb7c245b528 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 13 Jun 2010 03:30:14 +0000 Subject: af_unix: Allow SO_PEERCRED to work across namespaces. Use struct pid and struct cred to store the peer credentials on struct sock. This gives enough information to convert the peer credential information to a value relative to whatever namespace the socket is in at the time. This removes nasty surprises when using SO_PEERCRED on socket connetions where the processes on either side are in different pid and user namespaces. Signed-off-by: Eric W. Biederman Acked-by: Daniel Lezcano Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/af_unix.c | 37 ++++++++++++++++++++++++++++--------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index fef2cc5e9d2b..e1f1349fae86 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -450,11 +450,31 @@ static int unix_release_sock(struct sock *sk, int embrion) return 0; } +static void init_peercred(struct sock *sk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(task_tgid(current)); + sk->sk_peer_cred = get_current_cred(); +} + +static void copy_peercred(struct sock *sk, struct sock *peersk) +{ + put_pid(sk->sk_peer_pid); + if (sk->sk_peer_cred) + put_cred(sk->sk_peer_cred); + sk->sk_peer_pid = get_pid(peersk->sk_peer_pid); + sk->sk_peer_cred = get_cred(peersk->sk_peer_cred); +} + static int unix_listen(struct socket *sock, int backlog) { int err; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); + struct pid *old_pid = NULL; + const struct cred *old_cred = NULL; err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) @@ -470,12 +490,14 @@ static int unix_listen(struct socket *sock, int backlog) sk->sk_max_ack_backlog = backlog; sk->sk_state = TCP_LISTEN; /* set credentials so connect can copy them */ - sk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&sk->sk_peercred.uid, &sk->sk_peercred.gid); + init_peercred(sk); err = 0; out_unlock: unix_state_unlock(sk); + put_pid(old_pid); + if (old_cred) + put_cred(old_cred); out: return err; } @@ -1140,8 +1162,7 @@ restart: unix_peer(newsk) = sk; newsk->sk_state = TCP_ESTABLISHED; newsk->sk_type = sk->sk_type; - newsk->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&newsk->sk_peercred.uid, &newsk->sk_peercred.gid); + init_peercred(newsk); newu = unix_sk(newsk); newsk->sk_wq = &newu->peer_wq; otheru = unix_sk(other); @@ -1157,7 +1178,7 @@ restart: } /* Set credentials */ - sk->sk_peercred = other->sk_peercred; + copy_peercred(sk, other); sock->state = SS_CONNECTED; sk->sk_state = TCP_ESTABLISHED; @@ -1199,10 +1220,8 @@ static int unix_socketpair(struct socket *socka, struct socket *sockb) sock_hold(skb); unix_peer(ska) = skb; unix_peer(skb) = ska; - ska->sk_peercred.pid = skb->sk_peercred.pid = task_tgid_vnr(current); - current_euid_egid(&skb->sk_peercred.uid, &skb->sk_peercred.gid); - ska->sk_peercred.uid = skb->sk_peercred.uid; - ska->sk_peercred.gid = skb->sk_peercred.gid; + init_peercred(ska); + init_peercred(skb); if (ska->sk_type != SOCK_DGRAM) { ska->sk_state = TCP_ESTABLISHED; -- cgit v1.2.3 From 7361c36c5224519b258219fe3d0e8abc865d8134 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 13 Jun 2010 03:34:33 +0000 Subject: af_unix: Allow credentials to work across user and pid namespaces. In unix_skb_parms store pointers to struct pid and struct cred instead of raw uid, gid, and pid values, then translate the credentials on reception into values that are meaningful in the receiving processes namespaces. Signed-off-by: Eric W. Biederman Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- include/net/af_unix.h | 4 ++-- net/unix/af_unix.c | 53 ++++++++++++++++++++++++++++++--------------------- 2 files changed, 33 insertions(+), 24 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 20725e213aee..90c9e2872f27 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -23,7 +23,8 @@ struct unix_address { }; struct unix_skb_parms { - struct ucred creds; /* Skb credentials */ + struct pid *pid; /* Skb credentials */ + const struct cred *cred; struct scm_fp_list *fp; /* Passed files */ #ifdef CONFIG_SECURITY_NETWORK u32 secid; /* Security ID */ @@ -31,7 +32,6 @@ struct unix_skb_parms { }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define UNIXCREDS(skb) (&UNIXCB((skb)).creds) #define UNIXSID(skb) (&UNIXCB((skb)).secid) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e1f1349fae86..5fe9d6fe08b8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1316,18 +1316,20 @@ static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) int i; scm->fp = UNIXCB(skb).fp; - skb->destructor = sock_wfree; UNIXCB(skb).fp = NULL; for (i = scm->fp->count-1; i >= 0; i--) unix_notinflight(scm->fp->fp[i]); } -static void unix_destruct_fds(struct sk_buff *skb) +static void unix_destruct_scm(struct sk_buff *skb) { struct scm_cookie scm; memset(&scm, 0, sizeof(scm)); - unix_detach_fds(&scm, skb); + scm.pid = UNIXCB(skb).pid; + scm.cred = UNIXCB(skb).cred; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); /* Alas, it calls VFS */ /* So fscking what? fput() had been SMP-safe since the last Summer */ @@ -1350,10 +1352,22 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count-1; i >= 0; i--) unix_inflight(scm->fp->fp[i]); - skb->destructor = unix_destruct_fds; return 0; } +static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) +{ + int err = 0; + UNIXCB(skb).pid = get_pid(scm->pid); + UNIXCB(skb).cred = get_cred(scm->cred); + UNIXCB(skb).fp = NULL; + if (scm->fp && send_fds) + err = unix_attach_fds(scm, skb); + + skb->destructor = unix_destruct_scm; + return err; +} + /* * Send AF_UNIX data. */ @@ -1410,12 +1424,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); - if (siocb->scm->fp) { - err = unix_attach_fds(siocb->scm, skb); - if (err) - goto out_free; - } + err = unix_scm_to_skb(siocb->scm, skb, true); + if (err) + goto out_free; unix_get_secdata(siocb->scm, skb); skb_reset_transport_header(skb); @@ -1585,16 +1596,14 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock, */ size = min_t(int, size, skb_tailroom(skb)); - memcpy(UNIXCREDS(skb), &siocb->scm->creds, sizeof(struct ucred)); + /* Only send the fds in the first buffer */ - if (siocb->scm->fp && !fds_sent) { - err = unix_attach_fds(siocb->scm, skb); - if (err) { - kfree_skb(skb); - goto out_err; - } - fds_sent = true; + err = unix_scm_to_skb(siocb->scm, skb, !fds_sent); + if (err) { + kfree_skb(skb); + goto out_err; } + fds_sent = true; err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err) { @@ -1711,7 +1720,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, siocb->scm = &tmp_scm; memset(&tmp_scm, 0, sizeof(tmp_scm)); } - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); unix_set_secdata(siocb->scm, skb); if (!(flags & MSG_PEEK)) { @@ -1860,14 +1869,14 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (check_creds) { /* Never glue messages from different writers */ - if (memcmp(UNIXCREDS(skb), &siocb->scm->creds, - sizeof(siocb->scm->creds)) != 0) { + if ((UNIXCB(skb).pid != siocb->scm->pid) || + (UNIXCB(skb).cred != siocb->scm->cred)) { skb_queue_head(&sk->sk_receive_queue, skb); break; } } else { /* Copy credentials */ - siocb->scm->creds = *UNIXCREDS(skb); + scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).cred); check_creds = 1; } -- cgit v1.2.3 From 6616f7888c6088324727363276f6de05a1dca6fc Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 13 Jun 2010 03:35:48 +0000 Subject: af_unix: Allow connecting to sockets in other network namespaces. Remove the restriction that only allows connecting to a unix domain socket identified by unix path that is in the same network namespace. Crossing network namespaces is always tricky and we did not support this at first, because of a strict policy of don't mix the namespaces. Later after Pavel proposed this we did not support this because no one had performed the audit to make certain using unix domain sockets across namespaces is safe. What fundamentally makes connecting to af_unix sockets in other namespaces is safe is that you have to have the proper permissions on the unix domain socket inode that lives in the filesystem. If you want strict isolation you just don't create inodes where unfriendlys can get at them, or with permissions that allow unfriendlys to open them. All nicely handled for us by the mount namespace and other standard file system facilities. I looked through unix domain sockets and they are a very controlled environment so none of the work that goes on in dev_forward_skb to make crossing namespaces safe appears needed, we are not loosing controll of the skb and so do not need to set up the skb to look like it is comming in fresh from the outside world. Further the fields in struct unix_skb_parms should not have any problems crossing network namespaces. Now that we handle SCM_CREDENTIALS in a way that gives useable values across namespaces. There does not appear to be any operational problems with encouraging the use of unix domain sockets across containers either. Signed-off-by: Eric W. Biederman Acked-by: Daniel Lezcano Acked-by: Pavel Emelyanov Signed-off-by: David S. Miller --- net/unix/af_unix.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5fe9d6fe08b8..75ba48b0d12a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -282,7 +282,7 @@ static inline struct sock *unix_find_socket_byname(struct net *net, return s; } -static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) +static struct sock *unix_find_socket_byinode(struct inode *i) { struct sock *s; struct hlist_node *node; @@ -292,9 +292,6 @@ static struct sock *unix_find_socket_byinode(struct net *net, struct inode *i) &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) { struct dentry *dentry = unix_sk(s)->dentry; - if (!net_eq(sock_net(s), net)) - continue; - if (dentry && dentry->d_inode == i) { sock_hold(s); goto found; @@ -758,7 +755,7 @@ static struct sock *unix_find_other(struct net *net, err = -ECONNREFUSED; if (!S_ISSOCK(inode->i_mode)) goto put_fail; - u = unix_find_socket_byinode(net, inode); + u = unix_find_socket_byinode(inode); if (!u) goto put_fail; -- cgit v1.2.3 From 70d4bf6d467a330ccc947df9b2608e329d9e7708 Mon Sep 17 00:00:00 2001 From: Neil Horman Date: Tue, 20 Jul 2010 06:45:56 +0000 Subject: drop_monitor: convert some kfree_skb call sites to consume_skb Convert a few calls from kfree_skb to consume_skb Noticed while I was working on dropwatch that I was detecting lots of internal skb drops in several places. While some are legitimate, several were not, freeing skbs that were at the end of their life, rather than being discarded due to an error. This patch converts those calls sites from using kfree_skb to consume_skb, which quiets the in-kernel drop_monitor code from detecting them as drops. Tested successfully by myself Signed-off-by: Neil Horman Signed-off-by: David S. Miller --- kernel/audit.c | 2 +- net/netlink/af_netlink.c | 9 +++++---- net/unix/af_unix.c | 2 +- 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/kernel/audit.c b/kernel/audit.c index c71bd26631a2..8296aa516c5a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -407,7 +407,7 @@ static void kauditd_send_skb(struct sk_buff *skb) audit_hold_skb(skb); } else /* drop the extra reference if sent ok */ - kfree_skb(skb); + consume_skb(skb); } static int kauditd_thread(void *dummy) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 7aeaa83193db..8648a9922aab 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1076,14 +1076,15 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, sk_for_each_bound(sk, node, &nl_table[ssk->sk_protocol].mc_list) do_one_broadcast(sk, &info); - kfree_skb(skb); + consume_skb(skb); netlink_unlock_table(); - kfree_skb(info.skb2); - - if (info.delivery_failure) + if (info.delivery_failure) { + kfree_skb(info.skb2); return -ENOBUFS; + } else + consume_skb(info.skb2); if (info.delivered) { if (info.congested && (allocation & __GFP_WAIT)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 75ba48b0d12a..4414a18c63b4 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1906,7 +1906,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, break; } - kfree_skb(skb); + consume_skb(skb); if (siocb->scm->fp) break; -- cgit v1.2.3