From b00c51ef8f72ced0965d021a291b98ff822c5337 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 22 Jan 2023 10:02:55 -0700 Subject: io_uring/net: cache provided buffer group value for multishot receives If we're using ring provided buffers with multishot receive, and we end up doing an io-wq based issue at some points that also needs to select a buffer, we'll lose the initially assigned buffer group as io_ring_buffer_select() correctly clears the buffer group list as the issue isn't serialized by the ctx uring_lock. This is fine for normal receives as the request puts the buffer and finishes, but for multishot, we will re-arm and do further receives. On the next trigger for this multishot receive, the receive will try and pick from a buffer group whose value is the same as the buffer ID of the las receive. That is obviously incorrect, and will result in a premature -ENOUFS error for the receive even if we had available buffers in the correct group. Cache the buffer group value at prep time, so we can restore it for future receives. This only needs doing for the above mentioned case, but just do it by default to keep it easier to read. Cc: stable@vger.kernel.org Fixes: b3fdea6ecb55 ("io_uring: multishot recv") Fixes: 9bb66906f23e ("io_uring: support multishot in recvmsg") Cc: Dylan Yudaken Signed-off-by: Jens Axboe --- io_uring/net.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/io_uring/net.c b/io_uring/net.c index fbc34a7c2743..90326b279965 100644 --- a/io_uring/net.c +++ b/io_uring/net.c @@ -62,6 +62,7 @@ struct io_sr_msg { u16 flags; /* initialised and used only by !msg send variants */ u16 addr_len; + u16 buf_group; void __user *addr; /* used only for send zerocopy */ struct io_kiocb *notif; @@ -580,6 +581,15 @@ int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) if (req->opcode == IORING_OP_RECV && sr->len) return -EINVAL; req->flags |= REQ_F_APOLL_MULTISHOT; + /* + * Store the buffer group for this multishot receive separately, + * as if we end up doing an io-wq based issue that selects a + * buffer, it has to be committed immediately and that will + * clear ->buf_list. This means we lose the link to the buffer + * list, and the eventual buffer put on completion then cannot + * restore it. + */ + sr->buf_group = req->buf_index; } #ifdef CONFIG_COMPAT @@ -596,6 +606,7 @@ static inline void io_recv_prep_retry(struct io_kiocb *req) sr->done_io = 0; sr->len = 0; /* get from the provided buffer */ + req->buf_index = sr->buf_group; } /* -- cgit v1.2.3 From ef5c600adb1d985513d2b612cc90403a148ff287 Mon Sep 17 00:00:00 2001 From: Dylan Yudaken Date: Fri, 27 Jan 2023 02:59:11 -0800 Subject: io_uring: always prep_async for drain requests Drain requests all go through io_drain_req, which has a quick exit in case there is nothing pending (ie the drain is not useful). In that case it can run the issue the request immediately. However for safety it queues it through task work. The problem is that in this case the request is run asynchronously, but the async work has not been prepared through io_req_prep_async. This has not been a problem up to now, as the task work always would run before returning to userspace, and so the user would not have a chance to race with it. However - with IORING_SETUP_DEFER_TASKRUN - this is no longer the case and the work might be defered, giving userspace a chance to change data being referred to in the request. Instead _always_ prep_async for drain requests, which is simpler anyway and removes this issue. Cc: stable@vger.kernel.org Fixes: c0e0d6ba25f1 ("io_uring: add IORING_SETUP_DEFER_TASKRUN") Signed-off-by: Dylan Yudaken Link: https://lore.kernel.org/r/20230127105911.2420061-1-dylany@meta.com Signed-off-by: Jens Axboe --- io_uring/io_uring.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/io_uring/io_uring.c b/io_uring/io_uring.c index 0a4efada9b3c..db623b3185c8 100644 --- a/io_uring/io_uring.c +++ b/io_uring/io_uring.c @@ -1765,17 +1765,12 @@ queue: } spin_unlock(&ctx->completion_lock); - ret = io_req_prep_async(req); - if (ret) { -fail: - io_req_defer_failed(req, ret); - return; - } io_prep_async_link(req); de = kmalloc(sizeof(*de), GFP_KERNEL); if (!de) { ret = -ENOMEM; - goto fail; + io_req_defer_failed(req, ret); + return; } spin_lock(&ctx->completion_lock); @@ -2048,13 +2043,16 @@ static void io_queue_sqe_fallback(struct io_kiocb *req) req->flags &= ~REQ_F_HARDLINK; req->flags |= REQ_F_LINK; io_req_defer_failed(req, req->cqe.res); - } else if (unlikely(req->ctx->drain_active)) { - io_drain_req(req); } else { int ret = io_req_prep_async(req); - if (unlikely(ret)) + if (unlikely(ret)) { io_req_defer_failed(req, ret); + return; + } + + if (unlikely(req->ctx->drain_active)) + io_drain_req(req); else io_queue_iowq(req, NULL); } -- cgit v1.2.3