diff options
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r-- | fs/io_uring.c | 194 |
1 files changed, 188 insertions, 6 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c index 84efb8956734..48ea3977012a 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -222,6 +222,8 @@ struct io_ring_ctx { unsigned sq_mask; unsigned sq_thread_idle; struct io_uring_sqe *sq_sqes; + + struct list_head defer_list; } ____cacheline_aligned_in_smp; /* IO offload */ @@ -239,6 +241,7 @@ struct io_ring_ctx { unsigned cq_mask; struct wait_queue_head cq_wait; struct fasync_struct *cq_fasync; + struct eventfd_ctx *cq_ev_fd; } ____cacheline_aligned_in_smp; /* @@ -327,8 +330,11 @@ struct io_kiocb { #define REQ_F_FIXED_FILE 4 /* ctx owns file */ #define REQ_F_SEQ_PREV 8 /* sequential with previous */ #define REQ_F_PREPPED 16 /* prep already done */ +#define REQ_F_IO_DRAIN 32 /* drain existing IO first */ +#define REQ_F_IO_DRAINED 64 /* drain done */ u64 user_data; - u64 error; + u32 error; /* iopoll result from callback */ + u32 sequence; struct work_struct work; }; @@ -356,6 +362,8 @@ struct io_submit_state { unsigned int ios_left; }; +static void io_sq_wq_submit_work(struct work_struct *work); + static struct kmem_cache *req_cachep; static const struct file_operations io_uring_fops; @@ -407,10 +415,36 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) spin_lock_init(&ctx->completion_lock); INIT_LIST_HEAD(&ctx->poll_list); INIT_LIST_HEAD(&ctx->cancel_list); + INIT_LIST_HEAD(&ctx->defer_list); return ctx; } -static void io_commit_cqring(struct io_ring_ctx *ctx) +static inline bool io_sequence_defer(struct io_ring_ctx *ctx, + struct io_kiocb *req) +{ + if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) + return false; + + return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; +} + +static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) +{ + struct io_kiocb *req; + + if (list_empty(&ctx->defer_list)) + return NULL; + + req = list_first_entry(&ctx->defer_list, struct io_kiocb, list); + if (!io_sequence_defer(ctx, req)) { + list_del_init(&req->list); + return req; + } + + return NULL; +} + +static void __io_commit_cqring(struct io_ring_ctx *ctx) { struct io_cq_ring *ring = ctx->cq_ring; @@ -425,6 +459,18 @@ static void io_commit_cqring(struct io_ring_ctx *ctx) } } +static void io_commit_cqring(struct io_ring_ctx *ctx) +{ + struct io_kiocb *req; + + __io_commit_cqring(ctx); + + while ((req = io_get_deferred_req(ctx)) != NULL) { + req->flags |= REQ_F_IO_DRAINED; + queue_work(ctx->sqo_wq, &req->work); + } +} + static struct io_uring_cqe *io_get_cqring(struct io_ring_ctx *ctx) { struct io_cq_ring *ring = ctx->cq_ring; @@ -471,6 +517,8 @@ static void io_cqring_ev_posted(struct io_ring_ctx *ctx) wake_up(&ctx->wait); if (waitqueue_active(&ctx->sqo_wait)) wake_up(&ctx->sqo_wait); + if (ctx->cq_ev_fd) + eventfd_signal(ctx->cq_ev_fd, 1); } static void io_cqring_add_event(struct io_ring_ctx *ctx, u64 user_data, @@ -1222,6 +1270,54 @@ static int io_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe, return 0; } +static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe) +{ + struct io_ring_ctx *ctx = req->ctx; + int ret = 0; + + if (!req->file) + return -EBADF; + /* Prep already done (EAGAIN retry) */ + if (req->flags & REQ_F_PREPPED) + return 0; + + if (unlikely(ctx->flags & IORING_SETUP_IOPOLL)) + return -EINVAL; + if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index)) + return -EINVAL; + + req->flags |= REQ_F_PREPPED; + return ret; +} + +static int io_sync_file_range(struct io_kiocb *req, + const struct io_uring_sqe *sqe, + bool force_nonblock) +{ + loff_t sqe_off; + loff_t sqe_len; + unsigned flags; + int ret; + + ret = io_prep_sfr(req, sqe); + if (ret) + return ret; + + /* sync_file_range always requires a blocking context */ + if (force_nonblock) + return -EAGAIN; + + sqe_off = READ_ONCE(sqe->off); + sqe_len = READ_ONCE(sqe->len); + flags = READ_ONCE(sqe->sync_range_flags); + + ret = sync_file_range(req->rw.ki_filp, sqe_off, sqe_len, flags); + + io_cqring_add_event(req->ctx, sqe->user_data, ret, 0); + io_put_req(req); + return 0; +} + static void io_poll_remove_one(struct io_kiocb *req) { struct io_poll_iocb *poll = &req->poll; @@ -1424,7 +1520,6 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) spin_unlock(&poll->head->lock); } if (mask) { /* no async, we'd stolen it */ - req->error = mangle_poll(mask); ipt.error = 0; io_poll_complete(ctx, req, mask); } @@ -1437,6 +1532,34 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe) return ipt.error; } +static int io_req_defer(struct io_ring_ctx *ctx, struct io_kiocb *req, + const struct io_uring_sqe *sqe) +{ + struct io_uring_sqe *sqe_copy; + + if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) + return 0; + + sqe_copy = kmalloc(sizeof(*sqe_copy), GFP_KERNEL); + if (!sqe_copy) + return -EAGAIN; + + spin_lock_irq(&ctx->completion_lock); + if (!io_sequence_defer(ctx, req) && list_empty(&ctx->defer_list)) { + spin_unlock_irq(&ctx->completion_lock); + kfree(sqe_copy); + return 0; + } + + memcpy(sqe_copy, sqe, sizeof(*sqe_copy)); + req->submit.sqe = sqe_copy; + + INIT_WORK(&req->work, io_sq_wq_submit_work); + list_add_tail(&req->list, &ctx->defer_list); + spin_unlock_irq(&ctx->completion_lock); + return -EIOCBQUEUED; +} + static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, const struct sqe_submit *s, bool force_nonblock) { @@ -1476,6 +1599,9 @@ static int __io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, case IORING_OP_POLL_REMOVE: ret = io_poll_remove(req, s->sqe); break; + case IORING_OP_SYNC_FILE_RANGE: + ret = io_sync_file_range(req, s->sqe, force_nonblock); + break; default: ret = -EINVAL; break; @@ -1684,6 +1810,11 @@ static int io_req_set_file(struct io_ring_ctx *ctx, const struct sqe_submit *s, flags = READ_ONCE(s->sqe->flags); fd = READ_ONCE(s->sqe->fd); + if (flags & IOSQE_IO_DRAIN) { + req->flags |= REQ_F_IO_DRAIN; + req->sequence = ctx->cached_sq_head - 1; + } + if (!io_op_needs_file(s->sqe)) { req->file = NULL; return 0; @@ -1713,7 +1844,7 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, int ret; /* enforce forwards compatibility on users */ - if (unlikely(s->sqe->flags & ~IOSQE_FIXED_FILE)) + if (unlikely(s->sqe->flags & ~(IOSQE_FIXED_FILE | IOSQE_IO_DRAIN))) return -EINVAL; req = io_get_req(ctx, state); @@ -1724,6 +1855,13 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct sqe_submit *s, if (unlikely(ret)) goto out; + ret = io_req_defer(ctx, req, s->sqe); + if (ret) { + if (ret == -EIOCBQUEUED) + ret = 0; + return ret; + } + ret = __io_submit_sqe(ctx, req, s, true); if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { struct io_uring_sqe *sqe_copy; @@ -2225,7 +2363,6 @@ static int io_sqe_files_scm(struct io_ring_ctx *ctx) left = ctx->nr_user_files; while (left) { unsigned this_files = min_t(unsigned, left, SCM_MAX_FD); - int ret; ret = __io_sqe_files_scm(ctx, this_files, total); if (ret) @@ -2334,7 +2471,7 @@ static int io_sq_offload_start(struct io_ring_ctx *ctx, nr_cpu_ids); ret = -EINVAL; - if (!cpu_possible(cpu)) + if (!cpu_online(cpu)) goto err; ctx->sqo_thread = kthread_create_on_cpu(io_sq_thread, @@ -2621,6 +2758,38 @@ err: return ret; } +static int io_eventfd_register(struct io_ring_ctx *ctx, void __user *arg) +{ + __s32 __user *fds = arg; + int fd; + + if (ctx->cq_ev_fd) + return -EBUSY; + + if (copy_from_user(&fd, fds, sizeof(*fds))) + return -EFAULT; + + ctx->cq_ev_fd = eventfd_ctx_fdget(fd); + if (IS_ERR(ctx->cq_ev_fd)) { + int ret = PTR_ERR(ctx->cq_ev_fd); + ctx->cq_ev_fd = NULL; + return ret; + } + + return 0; +} + +static int io_eventfd_unregister(struct io_ring_ctx *ctx) +{ + if (ctx->cq_ev_fd) { + eventfd_ctx_put(ctx->cq_ev_fd); + ctx->cq_ev_fd = NULL; + return 0; + } + + return -ENXIO; +} + static void io_ring_ctx_free(struct io_ring_ctx *ctx) { io_finish_async(ctx); @@ -2630,6 +2799,7 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx) io_iopoll_reap_events(ctx); io_sqe_buffer_unregister(ctx); io_sqe_files_unregister(ctx); + io_eventfd_unregister(ctx); #if defined(CONFIG_UNIX) if (ctx->ring_sock) @@ -3043,6 +3213,18 @@ static int __io_uring_register(struct io_ring_ctx *ctx, unsigned opcode, break; ret = io_sqe_files_unregister(ctx); break; + case IORING_REGISTER_EVENTFD: + ret = -EINVAL; + if (nr_args != 1) + break; + ret = io_eventfd_register(ctx, arg); + break; + case IORING_UNREGISTER_EVENTFD: + ret = -EINVAL; + if (arg || nr_args) + break; + ret = io_eventfd_unregister(ctx); + break; default: ret = -EINVAL; break; |