summaryrefslogtreecommitdiffstats
path: root/fs/io_uring.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/io_uring.c')
-rw-r--r--fs/io_uring.c209
1 files changed, 140 insertions, 69 deletions
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 2f305c097bd5..1f68105a41ed 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -354,6 +354,7 @@ struct io_ring_ctx {
unsigned cq_entries;
unsigned cq_mask;
atomic_t cq_timeouts;
+ unsigned cq_last_tm_flush;
unsigned long cq_check_overflow;
struct wait_queue_head cq_wait;
struct fasync_struct *cq_fasync;
@@ -1024,6 +1025,8 @@ static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
static int io_setup_async_rw(struct io_kiocb *req, const struct iovec *iovec,
const struct iovec *fast_iov,
struct iov_iter *iter, bool force);
+static void io_req_drop_files(struct io_kiocb *req);
+static void io_req_task_queue(struct io_kiocb *req);
static struct kmem_cache *req_cachep;
@@ -1047,8 +1050,7 @@ EXPORT_SYMBOL(io_uring_get_socket);
static inline void io_clean_op(struct io_kiocb *req)
{
- if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED |
- REQ_F_INFLIGHT))
+ if (req->flags & (REQ_F_NEED_CLEANUP | REQ_F_BUFFER_SELECTED))
__io_clean_op(req);
}
@@ -1068,14 +1070,21 @@ static bool io_match_task(struct io_kiocb *head,
{
struct io_kiocb *req;
- if (task && head->task != task)
+ if (task && head->task != task) {
+ /* in terms of cancelation, always match if req task is dead */
+ if (head->task->flags & PF_EXITING)
+ return true;
return false;
+ }
if (!files)
return true;
io_for_each_link(req, head) {
- if ((req->flags & REQ_F_WORK_INITIALIZED) &&
- (req->work.flags & IO_WQ_WORK_FILES) &&
+ if (!(req->flags & REQ_F_WORK_INITIALIZED))
+ continue;
+ if (req->file && req->file->f_op == &io_uring_fops)
+ return true;
+ if ((req->work.flags & IO_WQ_WORK_FILES) &&
req->work.identity->files == files)
return true;
}
@@ -1106,6 +1115,9 @@ static void io_sq_thread_drop_mm_files(void)
static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
{
+ if (current->flags & PF_EXITING)
+ return -EFAULT;
+
if (!current->files) {
struct files_struct *files;
struct nsproxy *nsproxy;
@@ -1133,6 +1145,8 @@ static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
{
struct mm_struct *mm;
+ if (current->flags & PF_EXITING)
+ return -EFAULT;
if (current->mm)
return 0;
@@ -1388,6 +1402,8 @@ static void io_req_clean_work(struct io_kiocb *req)
free_fs_struct(fs);
req->work.flags &= ~IO_WQ_WORK_FS;
}
+ if (req->flags & REQ_F_INFLIGHT)
+ io_req_drop_files(req);
io_put_identity(req->task->io_uring, req);
}
@@ -1497,11 +1513,14 @@ static bool io_grab_identity(struct io_kiocb *req)
return false;
atomic_inc(&id->files->count);
get_nsproxy(id->nsproxy);
- req->flags |= REQ_F_INFLIGHT;
- spin_lock_irq(&ctx->inflight_lock);
- list_add(&req->inflight_entry, &ctx->inflight_list);
- spin_unlock_irq(&ctx->inflight_lock);
+ if (!(req->flags & REQ_F_INFLIGHT)) {
+ req->flags |= REQ_F_INFLIGHT;
+
+ spin_lock_irq(&ctx->inflight_lock);
+ list_add(&req->inflight_entry, &ctx->inflight_list);
+ spin_unlock_irq(&ctx->inflight_lock);
+ }
req->work.flags |= IO_WQ_WORK_FILES;
}
if (!(req->work.flags & IO_WQ_WORK_MM) &&
@@ -1616,37 +1635,49 @@ static void __io_queue_deferred(struct io_ring_ctx *ctx)
do {
struct io_defer_entry *de = list_first_entry(&ctx->defer_list,
struct io_defer_entry, list);
- struct io_kiocb *link;
if (req_need_defer(de->req, de->seq))
break;
list_del_init(&de->list);
- /* punt-init is done before queueing for defer */
- link = __io_queue_async_work(de->req);
- if (link) {
- __io_queue_linked_timeout(link);
- /* drop submission reference */
- io_put_req_deferred(link, 1);
- }
+ io_req_task_queue(de->req);
kfree(de);
} while (!list_empty(&ctx->defer_list));
}
static void io_flush_timeouts(struct io_ring_ctx *ctx)
{
- while (!list_empty(&ctx->timeout_list)) {
+ u32 seq;
+
+ if (list_empty(&ctx->timeout_list))
+ return;
+
+ seq = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
+
+ do {
+ u32 events_needed, events_got;
struct io_kiocb *req = list_first_entry(&ctx->timeout_list,
struct io_kiocb, timeout.list);
if (io_is_timeout_noseq(req))
break;
- if (req->timeout.target_seq != ctx->cached_cq_tail
- - atomic_read(&ctx->cq_timeouts))
+
+ /*
+ * Since seq can easily wrap around over time, subtract
+ * the last seq at which timeouts were flushed before comparing.
+ * Assuming not more than 2^31-1 events have happened since,
+ * these subtractions won't have wrapped, so we can check if
+ * target is in [last_seq, current_seq] by comparing the two.
+ */
+ events_needed = req->timeout.target_seq - ctx->cq_last_tm_flush;
+ events_got = seq - ctx->cq_last_tm_flush;
+ if (events_got < events_needed)
break;
list_del_init(&req->timeout.list);
io_kill_timeout(req);
- }
+ } while (!list_empty(&ctx->timeout_list));
+
+ ctx->cq_last_tm_flush = seq;
}
static void io_commit_cqring(struct io_ring_ctx *ctx)
@@ -1742,12 +1773,13 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
struct io_kiocb *req, *tmp;
struct io_uring_cqe *cqe;
unsigned long flags;
- bool all_flushed;
+ bool all_flushed, posted;
LIST_HEAD(list);
if (!force && __io_cqring_events(ctx) == rings->cq_ring_entries)
return false;
+ posted = false;
spin_lock_irqsave(&ctx->completion_lock, flags);
list_for_each_entry_safe(req, tmp, &ctx->cq_overflow_list, compl.list) {
if (!io_match_task(req, tsk, files))
@@ -1767,6 +1799,7 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
WRITE_ONCE(ctx->rings->cq_overflow,
ctx->cached_cq_overflow);
}
+ posted = true;
}
all_flushed = list_empty(&ctx->cq_overflow_list);
@@ -1776,9 +1809,11 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
ctx->rings->sq_flags &= ~IORING_SQ_CQ_OVERFLOW;
}
- io_commit_cqring(ctx);
+ if (posted)
+ io_commit_cqring(ctx);
spin_unlock_irqrestore(&ctx->completion_lock, flags);
- io_cqring_ev_posted(ctx);
+ if (posted)
+ io_cqring_ev_posted(ctx);
while (!list_empty(&list)) {
req = list_first_entry(&list, struct io_kiocb, compl.list);
@@ -2170,6 +2205,9 @@ static void __io_req_task_submit(struct io_kiocb *req)
else
__io_req_task_cancel(req, -EFAULT);
mutex_unlock(&ctx->uring_lock);
+
+ if (ctx->flags & IORING_SETUP_SQPOLL)
+ io_sq_thread_drop_mm_files();
}
static void io_req_task_submit(struct callback_head *cb)
@@ -2245,6 +2283,8 @@ static void io_req_free_batch_finish(struct io_ring_ctx *ctx,
struct io_uring_task *tctx = rb->task->io_uring;
percpu_counter_sub(&tctx->inflight, rb->task_refs);
+ if (atomic_read(&tctx->in_idle))
+ wake_up(&tctx->wait);
put_task_struct_many(rb->task, rb->task_refs);
rb->task = NULL;
}
@@ -2263,6 +2303,8 @@ static void io_req_free_batch(struct req_batch *rb, struct io_kiocb *req)
struct io_uring_task *tctx = rb->task->io_uring;
percpu_counter_sub(&tctx->inflight, rb->task_refs);
+ if (atomic_read(&tctx->in_idle))
+ wake_up(&tctx->wait);
put_task_struct_many(rb->task, rb->task_refs);
}
rb->task = req->task;
@@ -3523,7 +3565,7 @@ static int io_read(struct io_kiocb *req, bool force_nonblock,
/* read it all, or we did blocking attempt. no retry. */
if (!iov_iter_count(iter) || !force_nonblock ||
- (req->file->f_flags & O_NONBLOCK))
+ (req->file->f_flags & O_NONBLOCK) || !(req->flags & REQ_F_ISREG))
goto done;
io_size -= ret;
@@ -4443,7 +4485,6 @@ static int io_close_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
* io_wq_work.flags, so initialize io_wq_work firstly.
*/
io_req_init_async(req);
- req->work.flags |= IO_WQ_WORK_NO_CANCEL;
if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
return -EINVAL;
@@ -4476,6 +4517,8 @@ static int io_close(struct io_kiocb *req, bool force_nonblock,
/* if the file has a flush method, be safe and punt to async */
if (close->put_file->f_op->flush && force_nonblock) {
+ /* not safe to cancel at this point */
+ req->work.flags |= IO_WQ_WORK_NO_CANCEL;
/* was never set, but play safe */
req->flags &= ~REQ_F_NOWAIT;
/* avoid grabbing files - we don't need the files */
@@ -5832,6 +5875,12 @@ static int io_timeout(struct io_kiocb *req)
tail = ctx->cached_cq_tail - atomic_read(&ctx->cq_timeouts);
req->timeout.target_seq = tail + off;
+ /* Update the last seq here in case io_flush_timeouts() hasn't.
+ * This is safe because ->completion_lock is held, and submissions
+ * and completions are never mixed in the same ->completion_lock section.
+ */
+ ctx->cq_last_tm_flush = tail;
+
/*
* Insertion sort, ensuring the first entry in the list is always
* the one we need first.
@@ -6126,8 +6175,10 @@ static void io_req_drop_files(struct io_kiocb *req)
struct io_uring_task *tctx = req->task->io_uring;
unsigned long flags;
- put_files_struct(req->work.identity->files);
- put_nsproxy(req->work.identity->nsproxy);
+ if (req->work.flags & IO_WQ_WORK_FILES) {
+ put_files_struct(req->work.identity->files);
+ put_nsproxy(req->work.identity->nsproxy);
+ }
spin_lock_irqsave(&ctx->inflight_lock, flags);
list_del(&req->inflight_entry);
spin_unlock_irqrestore(&ctx->inflight_lock, flags);
@@ -6194,9 +6245,6 @@ static void __io_clean_op(struct io_kiocb *req)
}
req->flags &= ~REQ_F_NEED_CLEANUP;
}
-
- if (req->flags & REQ_F_INFLIGHT)
- io_req_drop_files(req);
}
static int io_issue_sqe(struct io_kiocb *req, bool force_nonblock,
@@ -6415,6 +6463,16 @@ static struct file *io_file_get(struct io_submit_state *state,
file = __io_file_get(state, fd);
}
+ if (file && file->f_op == &io_uring_fops &&
+ !(req->flags & REQ_F_INFLIGHT)) {
+ io_req_init_async(req);
+ req->flags |= REQ_F_INFLIGHT;
+
+ spin_lock_irq(&ctx->inflight_lock);
+ list_add(&req->inflight_entry, &ctx->inflight_list);
+ spin_unlock_irq(&ctx->inflight_lock);
+ }
+
return file;
}
@@ -7056,6 +7114,7 @@ static int io_sq_thread(void *data)
if (sqt_spin || !time_after(jiffies, timeout)) {
io_run_task_work();
+ io_sq_thread_drop_mm_files();
cond_resched();
if (sqt_spin)
timeout = jiffies + sqd->sq_thread_idle;
@@ -7093,6 +7152,7 @@ static int io_sq_thread(void *data)
}
io_run_task_work();
+ io_sq_thread_drop_mm_files();
if (cur_css)
io_sq_thread_unassociate_blkcg();
@@ -7212,14 +7272,18 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
TASK_INTERRUPTIBLE);
/* make sure we run task_work before checking for signals */
ret = io_run_task_work_sig();
- if (ret > 0)
+ if (ret > 0) {
+ finish_wait(&ctx->wait, &iowq.wq);
continue;
+ }
else if (ret < 0)
break;
if (io_should_wake(&iowq))
break;
- if (test_bit(0, &ctx->cq_check_overflow))
+ if (test_bit(0, &ctx->cq_check_overflow)) {
+ finish_wait(&ctx->wait, &iowq.wq);
continue;
+ }
if (uts) {
timeout = schedule_timeout(timeout);
if (timeout == 0) {
@@ -8811,39 +8875,44 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
}
}
+static int io_uring_count_inflight(struct io_ring_ctx *ctx,
+ struct task_struct *task,
+ struct files_struct *files)
+{
+ struct io_kiocb *req;
+ int cnt = 0;
+
+ spin_lock_irq(&ctx->inflight_lock);
+ list_for_each_entry(req, &ctx->inflight_list, inflight_entry)
+ cnt += io_match_task(req, task, files);
+ spin_unlock_irq(&ctx->inflight_lock);
+ return cnt;
+}
+
static void io_uring_cancel_files(struct io_ring_ctx *ctx,
struct task_struct *task,
struct files_struct *files)
{
while (!list_empty_careful(&ctx->inflight_list)) {
struct io_task_cancel cancel = { .task = task, .files = files };
- struct io_kiocb *req;
DEFINE_WAIT(wait);
- bool found = false;
-
- spin_lock_irq(&ctx->inflight_lock);
- list_for_each_entry(req, &ctx->inflight_list, inflight_entry) {
- if (req->task != task ||
- req->work.identity->files != files)
- continue;
- found = true;
- break;
- }
- if (found)
- prepare_to_wait(&task->io_uring->wait, &wait,
- TASK_UNINTERRUPTIBLE);
- spin_unlock_irq(&ctx->inflight_lock);
+ int inflight;
- /* We need to keep going until we don't find a matching req */
- if (!found)
+ inflight = io_uring_count_inflight(ctx, task, files);
+ if (!inflight)
break;
io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb, &cancel, true);
io_poll_remove_all(ctx, task, files);
io_kill_timeouts(ctx, task, files);
+ io_cqring_overflow_flush(ctx, true, task, files);
/* cancellations _may_ trigger task work */
io_run_task_work();
- schedule();
+
+ prepare_to_wait(&task->io_uring->wait, &wait,
+ TASK_UNINTERRUPTIBLE);
+ if (inflight == io_uring_count_inflight(ctx, task, files))
+ schedule();
finish_wait(&task->io_uring->wait, &wait);
}
}
@@ -8881,14 +8950,13 @@ static void __io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
{
- WARN_ON_ONCE(ctx->sqo_task != current);
-
mutex_lock(&ctx->uring_lock);
ctx->sqo_dead = 1;
mutex_unlock(&ctx->uring_lock);
/* make sure callers enter the ring to get error */
- io_ring_set_wakeup_flag(ctx);
+ if (ctx->rings)
+ io_ring_set_wakeup_flag(ctx);
}
/*
@@ -8902,7 +8970,6 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
struct task_struct *task = current;
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
- /* for SQPOLL only sqo_task has task notes */
io_disable_sqo_submit(ctx);
task = ctx->sq_data->thread;
atomic_inc(&task->io_uring->in_idle);
@@ -8912,19 +8979,12 @@ static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
io_cancel_defer_files(ctx, task, files);
io_cqring_overflow_flush(ctx, true, task, files);
+ io_uring_cancel_files(ctx, task, files);
if (!files)
__io_uring_cancel_task_requests(ctx, task);
- else
- io_uring_cancel_files(ctx, task, files);
if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
atomic_dec(&task->io_uring->in_idle);
- /*
- * If the files that are going away are the ones in the thread
- * identity, clear them out.
- */
- if (task->io_uring->identity->files == files)
- task->io_uring->identity->files = NULL;
io_sq_thread_unpark(ctx->sq_data);
}
}
@@ -9048,6 +9108,10 @@ void __io_uring_task_cancel(void)
/* make sure overflow events are dropped */
atomic_inc(&tctx->in_idle);
+ /* trigger io_disable_sqo_submit() */
+ if (tctx->sqpoll)
+ __io_uring_files_cancel(NULL);
+
do {
/* read completions before cancelations */
inflight = tctx_inflight(tctx);
@@ -9058,12 +9122,12 @@ void __io_uring_task_cancel(void)
prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
/*
- * If we've seen completions, retry. This avoids a race where
- * a completion comes in before we did prepare_to_wait().
+ * If we've seen completions, retry without waiting. This
+ * avoids a race where a completion comes in before we did
+ * prepare_to_wait().
*/
- if (inflight != tctx_inflight(tctx))
- continue;
- schedule();
+ if (inflight == tctx_inflight(tctx))
+ schedule();
finish_wait(&tctx->wait, &wait);
} while (1);
@@ -9077,6 +9141,9 @@ static int io_uring_flush(struct file *file, void *data)
struct io_uring_task *tctx = current->io_uring;
struct io_ring_ctx *ctx = file->private_data;
+ if (fatal_signal_pending(current) || (current->flags & PF_EXITING))
+ io_uring_cancel_task_requests(ctx, NULL);
+
if (!tctx)
return 0;
@@ -9093,7 +9160,10 @@ static int io_uring_flush(struct file *file, void *data)
if (ctx->flags & IORING_SETUP_SQPOLL) {
/* there is only one file note, which is owned by sqo_task */
- WARN_ON_ONCE((ctx->sqo_task == current) ==
+ WARN_ON_ONCE(ctx->sqo_task != current &&
+ xa_load(&tctx->xa, (unsigned long)file));
+ /* sqo_dead check is for when this happens after cancellation */
+ WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
!xa_load(&tctx->xa, (unsigned long)file));
io_disable_sqo_submit(ctx);
@@ -9700,6 +9770,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
*/
ret = io_uring_install_fd(ctx, file);
if (ret < 0) {
+ io_disable_sqo_submit(ctx);
/* fput will clean it up */
fput(file);
return ret;