diff options
author | Cong Wang <cong.wang@bytedance.com> | 2021-03-30 19:32:33 -0700 |
---|---|---|
committer | Alexei Starovoitov <ast@kernel.org> | 2021-04-01 10:56:14 -0700 |
commit | 2bc793e3272a13e337416c057cb81c5396ad91d1 (patch) | |
tree | 1a611b3ce3edf032e4f6a9430fa8c5cd719e1728 /net/core | |
parent | d7f571188ecf25c244789b883c878ec7c64b5b08 (diff) | |
download | linux-2bc793e3272a13e337416c057cb81c5396ad91d1.tar.bz2 |
skmsg: Extract __tcp_bpf_recvmsg() and tcp_bpf_wait_data()
Although these two functions are only used by TCP, they are not
specific to TCP at all, both operate on skmsg and ingress_msg,
so fit in net/core/skmsg.c very well.
And we will need them for non-TCP, so rename and move them to
skmsg.c and export them to modules.
Signed-off-by: Cong Wang <cong.wang@bytedance.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20210331023237.41094-13-xiyou.wangcong@gmail.com
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/skmsg.c | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/net/core/skmsg.c b/net/core/skmsg.c index 9fc83f7cc1a0..92a83c02562a 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -399,6 +399,104 @@ out: } EXPORT_SYMBOL_GPL(sk_msg_memcopy_from_iter); +int sk_msg_wait_data(struct sock *sk, struct sk_psock *psock, int flags, + long timeo, int *err) +{ + DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; + + if (sk->sk_shutdown & RCV_SHUTDOWN) + return 1; + + if (!timeo) + return ret; + + add_wait_queue(sk_sleep(sk), &wait); + sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); + ret = sk_wait_event(sk, &timeo, + !list_empty(&psock->ingress_msg) || + !skb_queue_empty(&sk->sk_receive_queue), &wait); + sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); + remove_wait_queue(sk_sleep(sk), &wait); + return ret; +} +EXPORT_SYMBOL_GPL(sk_msg_wait_data); + +/* Receive sk_msg from psock->ingress_msg to @msg. */ +int sk_msg_recvmsg(struct sock *sk, struct sk_psock *psock, struct msghdr *msg, + int len, int flags) +{ + struct iov_iter *iter = &msg->msg_iter; + int peek = flags & MSG_PEEK; + struct sk_msg *msg_rx; + int i, copied = 0; + + msg_rx = sk_psock_peek_msg(psock); + while (copied != len) { + struct scatterlist *sge; + + if (unlikely(!msg_rx)) + break; + + i = msg_rx->sg.start; + do { + struct page *page; + int copy; + + sge = sk_msg_elem(msg_rx, i); + copy = sge->length; + page = sg_page(sge); + if (copied + copy > len) + copy = len - copied; + copy = copy_page_to_iter(page, sge->offset, copy, iter); + if (!copy) + return copied ? copied : -EFAULT; + + copied += copy; + if (likely(!peek)) { + sge->offset += copy; + sge->length -= copy; + if (!msg_rx->skb) + sk_mem_uncharge(sk, copy); + msg_rx->sg.size -= copy; + + if (!sge->length) { + sk_msg_iter_var_next(i); + if (!msg_rx->skb) + put_page(page); + } + } else { + /* Lets not optimize peek case if copy_page_to_iter + * didn't copy the entire length lets just break. + */ + if (copy != sge->length) + return copied; + sk_msg_iter_var_next(i); + } + + if (copied == len) + break; + } while (i != msg_rx->sg.end); + + if (unlikely(peek)) { + msg_rx = sk_psock_next_msg(psock, msg_rx); + if (!msg_rx) + break; + continue; + } + + msg_rx->sg.start = i; + if (!sge->length && msg_rx->sg.start == msg_rx->sg.end) { + msg_rx = sk_psock_dequeue_msg(psock); + kfree_sk_msg(msg_rx); + } + msg_rx = sk_psock_peek_msg(psock); + } + + return copied; +} +EXPORT_SYMBOL_GPL(sk_msg_recvmsg); + static struct sk_msg *sk_psock_create_ingress_msg(struct sock *sk, struct sk_buff *skb) { |