summaryrefslogtreecommitdiffstats
path: root/tools/io_uring/queue.c
diff options
context:
space:
mode:
Diffstat (limited to 'tools/io_uring/queue.c')
-rw-r--r--tools/io_uring/queue.c164
1 files changed, 164 insertions, 0 deletions
diff --git a/tools/io_uring/queue.c b/tools/io_uring/queue.c
new file mode 100644
index 000000000000..88505e873ad9
--- /dev/null
+++ b/tools/io_uring/queue.c
@@ -0,0 +1,164 @@
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <errno.h>
+#include <string.h>
+
+#include "liburing.h"
+#include "barrier.h"
+
+static int __io_uring_get_completion(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr, int wait)
+{
+ struct io_uring_cq *cq = &ring->cq;
+ const unsigned mask = *cq->kring_mask;
+ unsigned head;
+ int ret;
+
+ *cqe_ptr = NULL;
+ head = *cq->khead;
+ do {
+ /*
+ * It's necessary to use a read_barrier() before reading
+ * the CQ tail, since the kernel updates it locklessly. The
+ * kernel has the matching store barrier for the update. The
+ * kernel also ensures that previous stores to CQEs are ordered
+ * with the tail update.
+ */
+ read_barrier();
+ if (head != *cq->ktail) {
+ *cqe_ptr = &cq->cqes[head & mask];
+ break;
+ }
+ if (!wait)
+ break;
+ ret = io_uring_enter(ring->ring_fd, 0, 1,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0)
+ return -errno;
+ } while (1);
+
+ if (*cqe_ptr) {
+ *cq->khead = head + 1;
+ /*
+ * Ensure that the kernel sees our new head, the kernel has
+ * the matching read barrier.
+ */
+ write_barrier();
+ }
+
+ return 0;
+}
+
+/*
+ * Return an IO completion, if one is readily available
+ */
+int io_uring_get_completion(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr)
+{
+ return __io_uring_get_completion(ring, cqe_ptr, 0);
+}
+
+/*
+ * Return an IO completion, waiting for it if necessary
+ */
+int io_uring_wait_completion(struct io_uring *ring,
+ struct io_uring_cqe **cqe_ptr)
+{
+ return __io_uring_get_completion(ring, cqe_ptr, 1);
+}
+
+/*
+ * Submit sqes acquired from io_uring_get_sqe() to the kernel.
+ *
+ * Returns number of sqes submitted
+ */
+int io_uring_submit(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ const unsigned mask = *sq->kring_mask;
+ unsigned ktail, ktail_next, submitted;
+ int ret;
+
+ /*
+ * If we have pending IO in the kring, submit it first. We need a
+ * read barrier here to match the kernels store barrier when updating
+ * the SQ head.
+ */
+ read_barrier();
+ if (*sq->khead != *sq->ktail) {
+ submitted = *sq->kring_entries;
+ goto submit;
+ }
+
+ if (sq->sqe_head == sq->sqe_tail)
+ return 0;
+
+ /*
+ * Fill in sqes that we have queued up, adding them to the kernel ring
+ */
+ submitted = 0;
+ ktail = ktail_next = *sq->ktail;
+ while (sq->sqe_head < sq->sqe_tail) {
+ ktail_next++;
+ read_barrier();
+
+ sq->array[ktail & mask] = sq->sqe_head & mask;
+ ktail = ktail_next;
+
+ sq->sqe_head++;
+ submitted++;
+ }
+
+ if (!submitted)
+ return 0;
+
+ if (*sq->ktail != ktail) {
+ /*
+ * First write barrier ensures that the SQE stores are updated
+ * with the tail update. This is needed so that the kernel
+ * will never see a tail update without the preceeding sQE
+ * stores being done.
+ */
+ write_barrier();
+ *sq->ktail = ktail;
+ /*
+ * The kernel has the matching read barrier for reading the
+ * SQ tail.
+ */
+ write_barrier();
+ }
+
+submit:
+ ret = io_uring_enter(ring->ring_fd, submitted, 0,
+ IORING_ENTER_GETEVENTS, NULL);
+ if (ret < 0)
+ return -errno;
+
+ return 0;
+}
+
+/*
+ * Return an sqe to fill. Application must later call io_uring_submit()
+ * when it's ready to tell the kernel about it. The caller may call this
+ * function multiple times before calling io_uring_submit().
+ *
+ * Returns a vacant sqe, or NULL if we're full.
+ */
+struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring)
+{
+ struct io_uring_sq *sq = &ring->sq;
+ unsigned next = sq->sqe_tail + 1;
+ struct io_uring_sqe *sqe;
+
+ /*
+ * All sqes are used
+ */
+ if (next - sq->sqe_head > *sq->kring_entries)
+ return NULL;
+
+ sqe = &sq->sqes[sq->sqe_tail & *sq->kring_mask];
+ sq->sqe_tail = next;
+ return sqe;
+}