From f55bb7f9cb82dec2f2e803d7bd0fc5929248e4d8 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:31:51 +0000 Subject: unix: Support peeking offset for datagram and seqpacket sockets The sk_peek_off manipulations are protected with the unix_sk->readlock mutex. This mutex is enough since all we need is to syncronize setting the offset vs reading the queue head. The latter is fully covered with the mentioned lock. The recently added __skb_recv_datagram's offset is used to pick the skb to read the data from. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'net/unix') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 85d3bb7490aa..3d9481de031f 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -530,6 +530,16 @@ static int unix_seqpacket_sendmsg(struct kiocb *, struct socket *, static int unix_seqpacket_recvmsg(struct kiocb *, struct socket *, struct msghdr *, size_t, int); +static void unix_set_peek_off(struct sock *sk, int val) +{ + struct unix_sock *u = unix_sk(sk); + + mutex_lock(&u->readlock); + sk->sk_peek_off = val; + mutex_unlock(&u->readlock); +} + + static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, .owner = THIS_MODULE, @@ -570,6 +580,7 @@ static const struct proto_ops unix_dgram_ops = { .recvmsg = unix_dgram_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_seqpacket_ops = { @@ -591,6 +602,7 @@ static const struct proto_ops unix_seqpacket_ops = { .recvmsg = unix_seqpacket_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static struct proto unix_proto = { @@ -1756,6 +1768,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, int noblock = flags & MSG_DONTWAIT; struct sk_buff *skb; int err; + int peeked, skip; err = -EOPNOTSUPP; if (flags&MSG_OOB) @@ -1769,7 +1782,9 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } - skb = skb_recv_datagram(sk, flags, noblock, &err); + skip = sk_peek_offset(sk, flags); + + skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err); if (!skb) { unix_state_lock(sk); /* Signal EOF on disconnected non-blocking SEQPACKET socket. */ @@ -1786,12 +1801,12 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (msg->msg_name) unix_copy_addr(msg, skb->sk); - if (size > skb->len) - size = skb->len; - else if (size < skb->len) + if (size > skb->len - skip) + size = skb->len - skip; + else if (size < skb->len - skip) msg->msg_flags |= MSG_TRUNC; - err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, size); + err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size); if (err) goto out_free; @@ -1808,6 +1823,8 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); + + sk_peek_offset_bwd(sk, skb->len); } else { /* It is questionable: on PEEK we could: - do not return fds - good, but too simple 8) @@ -1821,6 +1838,9 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, clearly however! */ + + sk_peek_offset_fwd(sk, size); + if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); } -- cgit v1.2.3 From fc0d753641f7b919c7273d9bd21ae6ab45e757f3 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Tue, 21 Feb 2012 07:32:06 +0000 Subject: unix: Support peeking offset for stream sockets The same here -- we can protect the sk_peek_off manipulations with the unix_sk->readlock mutex. The peeking of data from a stream socket is done in the datagram style, i.e. even if there's enough room for more data in the user buffer, only the head skb's data is copied in there. This feature is preserved when peeking data from a given offset -- the data is read till the nearest skb's boundary. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'net/unix') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 3d9481de031f..0be4d24f6ae8 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -559,6 +559,7 @@ static const struct proto_ops unix_stream_ops = { .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, + .set_peek_off = unix_set_peek_off, }; static const struct proto_ops unix_dgram_ops = { @@ -1904,6 +1905,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, int target; int err = 0; long timeo; + int skip; err = -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) @@ -1933,12 +1935,15 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, goto out; } + skip = sk_peek_offset(sk, flags); + do { int chunk; struct sk_buff *skb; unix_state_lock(sk); skb = skb_peek(&sk->sk_receive_queue); +again: if (skb == NULL) { unix_sk(sk)->recursion_level = 0; if (copied >= target) @@ -1973,6 +1978,13 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, unix_state_unlock(sk); break; } + + if (skip >= skb->len) { + skip -= skb->len; + skb = skb_peek_next(skb, &sk->sk_receive_queue); + goto again; + } + unix_state_unlock(sk); if (check_creds) { @@ -1992,8 +2004,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, sunaddr = NULL; } - chunk = min_t(unsigned int, skb->len, size); - if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { + chunk = min_t(unsigned int, skb->len - skip, size); + if (memcpy_toiovec(msg->msg_iov, skb->data + skip, chunk)) { if (copied == 0) copied = -EFAULT; break; @@ -2005,6 +2017,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (!(flags & MSG_PEEK)) { skb_pull(skb, chunk); + sk_peek_offset_bwd(sk, chunk); + if (UNIXCB(skb).fp) unix_detach_fds(siocb->scm, skb); @@ -2022,6 +2036,8 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); + sk_peek_offset_fwd(sk, chunk); + break; } } while (size); -- cgit v1.2.3 From 9f6f9af7694ede6314bed281eec74d588ba9474f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Feb 2012 23:24:55 +0000 Subject: af_unix: MSG_TRUNC support for dgram sockets Piergiorgio Beruto expressed the need to fetch size of first datagram in queue for AF_UNIX sockets and suggested a patch against SIOCINQ ioctl. I suggested instead to implement MSG_TRUNC support as a recv() input flag, as already done for RAW, UDP & NETLINK sockets. len = recv(fd, &byte, 1, MSG_PEEK | MSG_TRUNC); MSG_TRUNC asks recv() to return the real length of the packet, even when is was longer than the passed buffer. There is risk that a userland application used MSG_TRUNC by accident (since it had no effect on af_unix sockets) and this might break after this patch. Signed-off-by: Eric Dumazet Tested-by: Piergiorgio Beruto CC: Michael Kerrisk Signed-off-by: David S. Miller --- net/unix/af_unix.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/unix') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0be4d24f6ae8..8ee85aa79fa7 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1845,7 +1845,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock, if (UNIXCB(skb).fp) siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp); } - err = size; + err = (flags & MSG_TRUNC) ? skb->len - skip : size; scm_recv(sock, msg, siocb->scm, flags); -- cgit v1.2.3 From 80d326fab534a5380e8f6e509a0b9076655a9670 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 24 Feb 2012 14:30:15 +0000 Subject: netlink: add netlink_dump_control structure for netlink_dump_start() Davem considers that the argument list of this interface is getting out of control. This patch tries to address this issue following his proposal: struct netlink_dump_control c = { .dump = dump, .done = done, ... }; netlink_dump_start(..., &c); Suggested by David S. Miller. Signed-off-by: Pablo Neira Ayuso Signed-off-by: David S. Miller --- crypto/crypto_user.c | 10 +++++++--- drivers/infiniband/core/netlink.c | 10 +++++++--- include/linux/netlink.h | 10 +++++++--- net/core/rtnetlink.c | 9 +++++++-- net/ipv4/inet_diag.c | 18 ++++++++++++------ net/netfilter/ipset/ip_set_core.c | 10 +++++++--- net/netfilter/nf_conntrack_netlink.c | 18 ++++++++++++------ net/netfilter/nfnetlink_acct.c | 6 ++++-- net/netlink/af_netlink.c | 11 ++++------- net/netlink/genetlink.c | 9 +++++++-- net/unix/diag.c | 10 ++++++---- net/xfrm/xfrm_user.c | 9 +++++++-- 12 files changed, 87 insertions(+), 43 deletions(-) (limited to 'net/unix') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 16f8693cc147..b6ac1387770c 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -389,9 +389,13 @@ static int crypto_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) (nlh->nlmsg_flags & NLM_F_DUMP))) { if (link->dump == NULL) return -EINVAL; - - return netlink_dump_start(crypto_nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(crypto_nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, crypto_msg_min[type], attrs, CRYPTOCFGA_MAX, diff --git a/drivers/infiniband/core/netlink.c b/drivers/infiniband/core/netlink.c index d1c8196d15d7..396e29370304 100644 --- a/drivers/infiniband/core/netlink.c +++ b/drivers/infiniband/core/netlink.c @@ -147,9 +147,13 @@ static int ibnl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (op < 0 || op >= client->nops || !client->cb_table[RDMA_NL_GET_OP(op)].dump) return -EINVAL; - return netlink_dump_start(nls, skb, nlh, - client->cb_table[op].dump, - NULL, 0); + + { + struct netlink_dump_control c = { + .dump = client->cb_table[op].dump, + }; + return netlink_dump_start(nls, skb, nlh, &c); + } } } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index a390e9d54827..1f8c1a95f57c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -248,11 +248,15 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) #define NLMSG_PUT(skb, pid, seq, type, len) \ NLMSG_NEW(skb, pid, seq, type, len, 0) +struct netlink_dump_control { + int (*dump)(struct sk_buff *skb, struct netlink_callback *); + int (*done)(struct netlink_callback*); + u16 min_dump_alloc; +}; + extern int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, struct netlink_callback*), - int (*done)(struct netlink_callback*), - u16 min_dump_alloc); + struct netlink_dump_control *control); #define NL_NONROOT_RECV 0x1 diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 65aebd450027..7aef62e53113 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1981,8 +1981,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) __rtnl_unlock(); rtnl = net->rtnl; - err = netlink_dump_start(rtnl, skb, nlh, dumpit, - NULL, min_dump_alloc); + { + struct netlink_dump_control c = { + .dump = dumpit, + .min_dump_alloc = min_dump_alloc, + }; + err = netlink_dump_start(rtnl, skb, nlh, &c); + } rtnl_lock(); return err; } diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index fcf281819cd4..8d25a1c557eb 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -960,9 +960,12 @@ static int inet_diag_rcv_msg_compat(struct sk_buff *skb, struct nlmsghdr *nlh) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, nlh, - inet_diag_dump_compat, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump_compat, + }; + return netlink_dump_start(sock_diag_nlsk, skb, nlh, &c); + } } return inet_diag_get_exact_compat(skb, nlh); @@ -985,9 +988,12 @@ static int inet_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) inet_diag_bc_audit(nla_data(attr), nla_len(attr))) return -EINVAL; } - - return netlink_dump_start(sock_diag_nlsk, skb, h, - inet_diag_dump, NULL, 0); + { + struct netlink_dump_control c = { + .dump = inet_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } } return inet_diag_get_exact(skb, h, (struct inet_diag_req_v2 *)NLMSG_DATA(h)); diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 32dbf0fa89db..e7f90e7082b4 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1162,9 +1162,13 @@ ip_set_dump(struct sock *ctnl, struct sk_buff *skb, if (unlikely(protocol_failed(attr))) return -IPSET_ERR_PROTOCOL; - return netlink_dump_start(ctnl, skb, nlh, - ip_set_dump_start, - ip_set_dump_done, 0); + { + struct netlink_dump_control c = { + .dump = ip_set_dump_start, + .done = ip_set_dump_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } } /* Add, del and test */ diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 9307b033c0c9..61f7feb7932b 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -977,9 +977,13 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, u16 zone; int err; - if (nlh->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(ctnl, skb, nlh, ctnetlink_dump_table, - ctnetlink_done, 0); + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = ctnetlink_dump_table, + .done = ctnetlink_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); + } err = ctnetlink_parse_zone(cda[CTA_ZONE], &zone); if (err < 0) @@ -1850,9 +1854,11 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, int err; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(ctnl, skb, nlh, - ctnetlink_exp_dump_table, - ctnetlink_exp_done, 0); + struct netlink_dump_control c = { + .dump = ctnetlink_exp_dump_table, + .done = ctnetlink_exp_done, + }; + return netlink_dump_start(ctnl, skb, nlh, &c); } err = ctnetlink_parse_zone(cda[CTA_EXPECT_ZONE], &zone); diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index 11ba013e47f6..3eb348bfc4fb 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -171,8 +171,10 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, char *acct_name; if (nlh->nlmsg_flags & NLM_F_DUMP) { - return netlink_dump_start(nfnl, skb, nlh, nfnl_acct_dump, - NULL, 0); + struct netlink_dump_control c = { + .dump = nfnl_acct_dump, + }; + return netlink_dump_start(nfnl, skb, nlh, &c); } if (!tb[NFACCT_NAME]) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d751e3d4b4b..ab74845876d2 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1736,10 +1736,7 @@ errout_skb: int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, const struct nlmsghdr *nlh, - int (*dump)(struct sk_buff *skb, - struct netlink_callback *), - int (*done)(struct netlink_callback *), - u16 min_dump_alloc) + struct netlink_dump_control *control) { struct netlink_callback *cb; struct sock *sk; @@ -1750,10 +1747,10 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb == NULL) return -ENOBUFS; - cb->dump = dump; - cb->done = done; + cb->dump = control->dump; + cb->done = control->done; cb->nlh = nlh; - cb->min_dump_alloc = min_dump_alloc; + cb->min_dump_alloc = control->min_dump_alloc; atomic_inc(&skb->users); cb->skb = skb; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a1154717219e..9f40441d7a7d 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -563,8 +563,13 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) return -EOPNOTSUPP; genl_unlock(); - err = netlink_dump_start(net->genl_sock, skb, nlh, - ops->dumpit, ops->done, 0); + { + struct netlink_dump_control c = { + .dump = ops->dumpit, + .done = ops->done, + }; + err = netlink_dump_start(net->genl_sock, skb, nlh, &c); + } genl_lock(); return err; } diff --git a/net/unix/diag.c b/net/unix/diag.c index 6b7697fd911b..4195555aea65 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -301,10 +301,12 @@ static int unix_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) if (nlmsg_len(h) < hdrlen) return -EINVAL; - if (h->nlmsg_flags & NLM_F_DUMP) - return netlink_dump_start(sock_diag_nlsk, skb, h, - unix_diag_dump, NULL, 0); - else + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = unix_diag_dump, + }; + return netlink_dump_start(sock_diag_nlsk, skb, h, &c); + } else return unix_diag_get_exact(skb, h, (struct unix_diag_req *)NLMSG_DATA(h)); } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 66b84fbf2746..7128dde0fe1a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -2299,8 +2299,13 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) if (link->dump == NULL) return -EINVAL; - return netlink_dump_start(net->xfrm.nlsk, skb, nlh, - link->dump, link->done, 0); + { + struct netlink_dump_control c = { + .dump = link->dump, + .done = link->done, + }; + return netlink_dump_start(net->xfrm.nlsk, skb, nlh, &c); + } } err = nlmsg_parse(nlh, xfrm_msg_min[type], attrs, XFRMA_MAX, -- cgit v1.2.3