diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/caps.c | 3 | ||||
-rw-r--r-- | fs/ceph/debugfs.c | 2 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 8 | ||||
-rw-r--r-- | fs/ceph/quota.c | 4 | ||||
-rw-r--r-- | fs/configfs/dir.c | 1 | ||||
-rw-r--r-- | fs/coredump.c | 8 | ||||
-rw-r--r-- | fs/eventpoll.c | 107 | ||||
-rw-r--r-- | fs/gfs2/bmap.c | 16 | ||||
-rw-r--r-- | fs/gfs2/glock.c | 6 | ||||
-rw-r--r-- | fs/gfs2/inode.c | 7 | ||||
-rw-r--r-- | fs/gfs2/log.c | 11 | ||||
-rw-r--r-- | fs/gfs2/lops.c | 19 | ||||
-rw-r--r-- | fs/gfs2/meta_io.c | 2 | ||||
-rw-r--r-- | fs/gfs2/quota.c | 13 | ||||
-rw-r--r-- | fs/gfs2/quota.h | 3 | ||||
-rw-r--r-- | fs/gfs2/super.c | 1 | ||||
-rw-r--r-- | fs/gfs2/util.c | 10 | ||||
-rw-r--r-- | fs/io_uring.c | 65 | ||||
-rw-r--r-- | fs/splice.c | 45 | ||||
-rw-r--r-- | fs/vboxsf/super.c | 2 |
20 files changed, 167 insertions, 166 deletions
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 185db76300b3..5f3aa4d607de 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2749,7 +2749,7 @@ int ceph_try_get_caps(struct inode *inode, int need, int want, ret = try_get_cap_refs(inode, need, want, 0, flags, got); /* three special error codes */ - if (ret == -EAGAIN || ret == -EFBIG || ret == -EAGAIN) + if (ret == -EAGAIN || ret == -EFBIG || ret == -ESTALE) ret = 0; return ret; } @@ -3746,6 +3746,7 @@ retry: WARN_ON(1); tsession = NULL; target = -1; + mutex_lock(&session->s_mutex); } goto retry; diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index 481ac97b4d25..dcaed75de9e6 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -271,7 +271,7 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc) &congestion_kb_fops); snprintf(name, sizeof(name), "../../bdi/%s", - dev_name(fsc->sb->s_bdi->dev)); + bdi_dev_name(fsc->sb->s_bdi)); fsc->debugfs_bdi = debugfs_create_symlink("bdi", fsc->client->debugfs_dir, diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 486f91f9685b..7c63abf5bea9 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -3251,8 +3251,7 @@ static void handle_session(struct ceph_mds_session *session, void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; u32 op; - u64 seq; - unsigned long features = 0; + u64 seq, features = 0; int wake = 0; bool blacklisted = false; @@ -3271,9 +3270,8 @@ static void handle_session(struct ceph_mds_session *session, goto bad; /* version >= 3, feature bits */ ceph_decode_32_safe(&p, end, len, bad); - ceph_decode_need(&p, end, len, bad); - memcpy(&features, p, min_t(size_t, len, sizeof(features))); - p += len; + ceph_decode_64_safe(&p, end, features, bad); + p += len - sizeof(features); } mutex_lock(&mdsc->mutex); diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c index de56dee60540..19507e2fdb57 100644 --- a/fs/ceph/quota.c +++ b/fs/ceph/quota.c @@ -159,8 +159,8 @@ static struct inode *lookup_quotarealm_inode(struct ceph_mds_client *mdsc, } if (IS_ERR(in)) { - pr_warn("Can't lookup inode %llx (err: %ld)\n", - realm->ino, PTR_ERR(in)); + dout("Can't lookup inode %llx (err: %ld)\n", + realm->ino, PTR_ERR(in)); qri->timeout = jiffies + msecs_to_jiffies(60 * 1000); /* XXX */ } else { qri->timeout = 0; diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index cf7b7e1d5bd7..cb733652ecca 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1519,6 +1519,7 @@ static int configfs_rmdir(struct inode *dir, struct dentry *dentry) spin_lock(&configfs_dirent_lock); configfs_detach_rollback(dentry); spin_unlock(&configfs_dirent_lock); + config_item_put(parent_item); return -EINTR; } frag->frag_dead = true; diff --git a/fs/coredump.c b/fs/coredump.c index 408418e6aa13..478a0d810136 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -788,6 +788,14 @@ void do_coredump(const kernel_siginfo_t *siginfo) if (displaced) put_files_struct(displaced); if (!dump_interrupted()) { + /* + * umh disabled with CONFIG_STATIC_USERMODEHELPER_PATH="" would + * have this set to NULL. + */ + if (!cprm.file) { + pr_info("Core dump to |%s disabled\n", cn.corename); + goto close_fail; + } file_start_write(cprm.file); core_dumped = binfmt->core_dump(&cprm); file_end_write(cprm.file); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 8c596641a72b..12eebcdea9c8 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1171,6 +1171,10 @@ static inline bool chain_epi_lockless(struct epitem *epi) { struct eventpoll *ep = epi->ep; + /* Fast preliminary check */ + if (epi->next != EP_UNACTIVE_PTR) + return false; + /* Check that the same epi has not been just chained from another CPU */ if (cmpxchg(&epi->next, EP_UNACTIVE_PTR, NULL) != EP_UNACTIVE_PTR) return false; @@ -1237,16 +1241,12 @@ static int ep_poll_callback(wait_queue_entry_t *wait, unsigned mode, int sync, v * chained in ep->ovflist and requeued later on. */ if (READ_ONCE(ep->ovflist) != EP_UNACTIVE_PTR) { - if (epi->next == EP_UNACTIVE_PTR && - chain_epi_lockless(epi)) + if (chain_epi_lockless(epi)) + ep_pm_stay_awake_rcu(epi); + } else if (!ep_is_linked(epi)) { + /* In the usual case, add event to ready list. */ + if (list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) ep_pm_stay_awake_rcu(epi); - goto out_unlock; - } - - /* If this file is already in the ready list we exit soon */ - if (!ep_is_linked(epi) && - list_add_tail_lockless(&epi->rdllink, &ep->rdllist)) { - ep_pm_stay_awake_rcu(epi); } /* @@ -1822,7 +1822,6 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, { int res = 0, eavail, timed_out = 0; u64 slack = 0; - bool waiter = false; wait_queue_entry_t wait; ktime_t expires, *to = NULL; @@ -1867,55 +1866,75 @@ fetch_events: */ ep_reset_busy_poll_napi_id(ep); - /* - * We don't have any available event to return to the caller. We need - * to sleep here, and we will be woken by ep_poll_callback() when events - * become available. - */ - if (!waiter) { - waiter = true; - init_waitqueue_entry(&wait, current); + do { + /* + * Internally init_wait() uses autoremove_wake_function(), + * thus wait entry is removed from the wait queue on each + * wakeup. Why it is important? In case of several waiters + * each new wakeup will hit the next waiter, giving it the + * chance to harvest new event. Otherwise wakeup can be + * lost. This is also good performance-wise, because on + * normal wakeup path no need to call __remove_wait_queue() + * explicitly, thus ep->lock is not taken, which halts the + * event delivery. + */ + init_wait(&wait); write_lock_irq(&ep->lock); - __add_wait_queue_exclusive(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - - for (;;) { /* - * We don't want to sleep if the ep_poll_callback() sends us - * a wakeup in between. That's why we set the task state - * to TASK_INTERRUPTIBLE before doing the checks. + * Barrierless variant, waitqueue_active() is called under + * the same lock on wakeup ep_poll_callback() side, so it + * is safe to avoid an explicit barrier. */ - set_current_state(TASK_INTERRUPTIBLE); + __set_current_state(TASK_INTERRUPTIBLE); + /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. + * Do the final check under the lock. ep_scan_ready_list() + * plays with two lists (->rdllist and ->ovflist) and there + * is always a race when both lists are empty for short + * period of time although events are pending, so lock is + * important. */ - if (fatal_signal_pending(current)) { - res = -EINTR; - break; + eavail = ep_events_available(ep); + if (!eavail) { + if (signal_pending(current)) + res = -EINTR; + else + __add_wait_queue_exclusive(&ep->wq, &wait); } + write_unlock_irq(&ep->lock); - eavail = ep_events_available(ep); - if (eavail) + if (eavail || res) break; - if (signal_pending(current)) { - res = -EINTR; - break; - } if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { timed_out = 1; break; } - } + + /* We were woken up, thus go and try to harvest some events */ + eavail = 1; + + } while (0); __set_current_state(TASK_RUNNING); + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + send_events: + if (fatal_signal_pending(current)) { + /* + * Always short-circuit for fatal signals to allow + * threads to make a timely exit without the chance of + * finding more events available and fetching + * repeatedly. + */ + res = -EINTR; + } /* * Try to transfer events to user space. In case we get 0 events and * there's still timeout left over, we go trying again in search of @@ -1925,12 +1944,6 @@ send_events: !(res = ep_send_events(ep, events, maxevents)) && !timed_out) goto fetch_events; - if (waiter) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); - write_unlock_irq(&ep->lock); - } - return res; } diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 936a8ec6b48e..6306eaae378b 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -528,10 +528,12 @@ lower_metapath: /* Advance in metadata tree. */ (mp->mp_list[hgt])++; - if (mp->mp_list[hgt] >= sdp->sd_inptrs) { - if (!hgt) + if (hgt) { + if (mp->mp_list[hgt] >= sdp->sd_inptrs) + goto lower_metapath; + } else { + if (mp->mp_list[hgt] >= sdp->sd_diptrs) break; - goto lower_metapath; } fill_up_metapath: @@ -876,10 +878,9 @@ static int gfs2_iomap_get(struct inode *inode, loff_t pos, loff_t length, ret = -ENOENT; goto unlock; } else { - /* report a hole */ iomap->offset = pos; iomap->length = length; - goto do_alloc; + goto hole_found; } } iomap->length = size; @@ -933,8 +934,6 @@ unlock: return ret; do_alloc: - iomap->addr = IOMAP_NULL_ADDR; - iomap->type = IOMAP_HOLE; if (flags & IOMAP_REPORT) { if (pos >= size) ret = -ENOENT; @@ -956,6 +955,9 @@ do_alloc: if (pos < size && height == ip->i_height) ret = gfs2_hole_size(inode, lblock, len, mp, iomap); } +hole_found: + iomap->addr = IOMAP_NULL_ADDR; + iomap->type = IOMAP_HOLE; goto out; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 29f9b6684b74..bf70e3b14938 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -613,7 +613,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } - return; + goto skip_inval; } } if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) { @@ -633,6 +633,7 @@ __acquires(&gl->gl_lockref.lock) clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } +skip_inval: gfs2_glock_hold(gl); /* * Check for an error encountered since we called go_sync and go_inval. @@ -722,9 +723,6 @@ __acquires(&gl->gl_lockref.lock) goto out_unlock; if (nonblock) goto out_sched; - smp_mb(); - if (atomic_read(&gl->gl_revokes) != 0) - goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 70b2d3a1e866..5acd3ce30759 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -622,7 +622,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_no_open(file, NULL); } gfs2_glock_dq_uninit(ghs); - return error; + goto fail; } else if (error != -ENOENT) { goto fail_gunlock; } @@ -764,9 +764,11 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, error = finish_open(file, dentry, gfs2_open_common); } gfs2_glock_dq_uninit(ghs); + gfs2_qa_put(ip); gfs2_glock_dq_uninit(ghs + 1); clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); + gfs2_qa_put(dip); return error; fail_gunlock3: @@ -776,7 +778,6 @@ fail_gunlock2: clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); fail_free_inode: - gfs2_qa_put(ip); if (ip->i_gl) { glock_clear_object(ip->i_gl, ip); gfs2_glock_put(ip->i_gl); @@ -1005,7 +1006,7 @@ out_gunlock: out_child: gfs2_glock_dq(ghs); out_parent: - gfs2_qa_put(ip); + gfs2_qa_put(dip); gfs2_holder_uninit(ghs); gfs2_holder_uninit(ghs + 1); return error; diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c index 3a75843ae580..0644e58c6191 100644 --- a/fs/gfs2/log.c +++ b/fs/gfs2/log.c @@ -669,13 +669,13 @@ void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd) struct buffer_head *bh = bd->bd_bh; struct gfs2_glock *gl = bd->bd_gl; + sdp->sd_log_num_revoke++; + if (atomic_inc_return(&gl->gl_revokes) == 1) + gfs2_glock_hold(gl); bh->b_private = NULL; bd->bd_blkno = bh->b_blocknr; gfs2_remove_from_ail(bd); /* drops ref on bh */ bd->bd_bh = NULL; - sdp->sd_log_num_revoke++; - if (atomic_inc_return(&gl->gl_revokes) == 1) - gfs2_glock_hold(gl); set_bit(GLF_LFLUSH, &gl->gl_flags); list_add(&bd->bd_list, &sdp->sd_log_revokes); } @@ -1131,6 +1131,10 @@ int gfs2_logd(void *data) while (!kthread_should_stop()) { + if (gfs2_withdrawn(sdp)) { + msleep_interruptible(HZ); + continue; + } /* Check for errors writing to the journal */ if (sdp->sd_log_error) { gfs2_lm(sdp, @@ -1139,6 +1143,7 @@ int gfs2_logd(void *data) "prevent further damage.\n", sdp->sd_fsname, sdp->sd_log_error); gfs2_withdraw(sdp); + continue; } did_flush = false; diff --git a/fs/gfs2/lops.c b/fs/gfs2/lops.c index 5ea96757afc4..48b54ec1c793 100644 --- a/fs/gfs2/lops.c +++ b/fs/gfs2/lops.c @@ -263,7 +263,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno, struct super_block *sb = sdp->sd_vfs; struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES); - bio->bi_iter.bi_sector = blkno << (sb->s_blocksize_bits - 9); + bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift; bio_set_dev(bio, sb->s_bdev); bio->bi_end_io = end_io; bio->bi_private = sdp; @@ -509,7 +509,7 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, unsigned int bsize = sdp->sd_sb.sb_bsize, off; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; unsigned int shift = PAGE_SHIFT - bsize_shift; - unsigned int readahead_blocks = BIO_MAX_PAGES << shift; + unsigned int max_bio_size = 2 * 1024 * 1024; struct gfs2_journal_extent *je; int sz, ret = 0; struct bio *bio = NULL; @@ -537,12 +537,17 @@ int gfs2_find_jhead(struct gfs2_jdesc *jd, struct gfs2_log_header_host *head, off = 0; } - if (!bio || (bio_chained && !off)) { + if (!bio || (bio_chained && !off) || + bio->bi_iter.bi_size >= max_bio_size) { /* start new bio */ } else { - sz = bio_add_page(bio, page, bsize, off); - if (sz == bsize) - goto block_added; + sector_t sector = dblock << sdp->sd_fsb2bb_shift; + + if (bio_end_sector(bio) == sector) { + sz = bio_add_page(bio, page, bsize, off); + if (sz == bsize) + goto block_added; + } if (off) { unsigned int blocks = (PAGE_SIZE - off) >> bsize_shift; @@ -568,7 +573,7 @@ block_added: off += bsize; if (off == PAGE_SIZE) page = NULL; - if (blocks_submitted < blocks_read + readahead_blocks) { + if (blocks_submitted < 2 * max_bio_size >> bsize_shift) { /* Keep at least one bio in flight */ continue; } diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 4b72abcf83b2..9856cc2e0795 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -252,7 +252,7 @@ int gfs2_meta_read(struct gfs2_glock *gl, u64 blkno, int flags, int num = 0; if (unlikely(gfs2_withdrawn(sdp)) && - (!sdp->sd_jdesc || (blkno != sdp->sd_jdesc->jd_no_addr))) { + (!sdp->sd_jdesc || gl != sdp->sd_jinode_gl)) { *bhp = NULL; return -EIO; } diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index cc0c4b5800be..8259fef3f986 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -1051,8 +1051,7 @@ int gfs2_quota_lock(struct gfs2_inode *ip, kuid_t uid, kgid_t gid) u32 x; int error = 0; - if (capable(CAP_SYS_RESOURCE) || - sdp->sd_args.ar_quota != GFS2_QUOTA_ON) + if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) return 0; error = gfs2_quota_hold(ip, uid, gid); @@ -1125,7 +1124,7 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) int found; if (!test_and_clear_bit(GIF_QD_LOCKED, &ip->i_flags)) - goto out; + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { struct gfs2_quota_data *qd; @@ -1162,7 +1161,6 @@ void gfs2_quota_unlock(struct gfs2_inode *ip) qd_unlock(qda[x]); } -out: gfs2_quota_unhold(ip); } @@ -1210,9 +1208,6 @@ int gfs2_quota_check(struct gfs2_inode *ip, kuid_t uid, kgid_t gid, if (!test_bit(GIF_QD_LOCKED, &ip->i_flags)) return 0; - if (sdp->sd_args.ar_quota != GFS2_QUOTA_ON) - return 0; - for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; @@ -1270,7 +1265,9 @@ void gfs2_quota_change(struct gfs2_inode *ip, s64 change, if (ip->i_diskflags & GFS2_DIF_SYSTEM) return; - BUG_ON(ip->i_qadata->qa_ref <= 0); + if (gfs2_assert_withdraw(sdp, ip->i_qadata && + ip->i_qadata->qa_ref > 0)) + return; for (x = 0; x < ip->i_qadata->qa_qd_num; x++) { qd = ip->i_qadata->qa_qd[x]; diff --git a/fs/gfs2/quota.h b/fs/gfs2/quota.h index 7f9ca8ef40fc..21ada332d555 100644 --- a/fs/gfs2/quota.h +++ b/fs/gfs2/quota.h @@ -44,7 +44,8 @@ static inline int gfs2_quota_lock_check(struct gfs2_inode *ip, int ret; ap->allowed = UINT_MAX; /* Assume we are permitted a whole lot */ - if (sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) + if (capable(CAP_SYS_RESOURCE) || + sdp->sd_args.ar_quota == GFS2_QUOTA_OFF) return 0; ret = gfs2_quota_lock(ip, NO_UID_QUOTA_CHANGE, NO_GID_QUOTA_CHANGE); if (ret) diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 37fc41632aa2..956fced0a8ec 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1404,7 +1404,6 @@ out: if (ip->i_qadata) gfs2_assert_warn(sdp, ip->i_qadata->qa_ref == 0); gfs2_rs_delete(ip, NULL); - gfs2_qa_put(ip); gfs2_ordered_del_inode(ip); clear_inode(inode); gfs2_dir_hash_inval(ip); diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 9b64d40ab379..aa087a5675af 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -119,6 +119,12 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) if (!sb_rdonly(sdp->sd_vfs)) ret = gfs2_make_fs_ro(sdp); + if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ + if (!ret) + ret = -EIO; + clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); + goto skip_recovery; + } /* * Drop the glock for our journal so another node can recover it. */ @@ -159,10 +165,6 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE); } - if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */ - clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags); - goto skip_recovery; - } /* * Dequeue the "live" glock, but keep a reference so it's never freed. */ diff --git a/fs/io_uring.c b/fs/io_uring.c index 0b91b0631173..979d9f977409 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -680,8 +680,6 @@ struct io_op_def { unsigned needs_mm : 1; /* needs req->file assigned */ unsigned needs_file : 1; - /* needs req->file assigned IFF fd is >= 0 */ - unsigned fd_non_neg : 1; /* hash wq insertion if file is a regular file */ unsigned hash_reg_file : 1; /* unbound wq insertion if file is a non-regular file */ @@ -784,8 +782,6 @@ static const struct io_op_def io_op_defs[] = { .needs_file = 1, }, [IORING_OP_OPENAT] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -799,8 +795,6 @@ static const struct io_op_def io_op_defs[] = { }, [IORING_OP_STATX] = { .needs_mm = 1, - .needs_file = 1, - .fd_non_neg = 1, .needs_fs = 1, .file_table = 1, }, @@ -837,8 +831,6 @@ static const struct io_op_def io_op_defs[] = { .buffer_select = 1, }, [IORING_OP_OPENAT2] = { - .needs_file = 1, - .fd_non_neg = 1, .file_table = 1, .needs_fs = 1, }, @@ -5368,15 +5360,6 @@ static void io_wq_submit_work(struct io_wq_work **workptr) io_steal_work(req, workptr); } -static int io_req_needs_file(struct io_kiocb *req, int fd) -{ - if (!io_op_defs[req->opcode].needs_file) - return 0; - if ((fd == -1 || fd == AT_FDCWD) && io_op_defs[req->opcode].fd_non_neg) - return 0; - return 1; -} - static inline struct file *io_file_from_index(struct io_ring_ctx *ctx, int index) { @@ -5414,14 +5397,11 @@ static int io_file_get(struct io_submit_state *state, struct io_kiocb *req, } static int io_req_set_file(struct io_submit_state *state, struct io_kiocb *req, - int fd, unsigned int flags) + int fd) { bool fixed; - if (!io_req_needs_file(req, fd)) - return 0; - - fixed = (flags & IOSQE_FIXED_FILE); + fixed = (req->flags & REQ_F_FIXED_FILE) != 0; if (unlikely(!fixed && req->needs_fixed_file)) return -EBADF; @@ -5798,7 +5778,7 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, struct io_submit_state *state, bool async) { unsigned int sqe_flags; - int id, fd; + int id; /* * All io need record the previous position, if LINK vs DARIN, @@ -5850,8 +5830,10 @@ static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req, IOSQE_ASYNC | IOSQE_FIXED_FILE | IOSQE_BUFFER_SELECT | IOSQE_IO_LINK); - fd = READ_ONCE(sqe->fd); - return io_req_set_file(state, req, fd, sqe_flags); + if (!io_op_defs[req->opcode].needs_file) + return 0; + + return io_req_set_file(state, req, READ_ONCE(sqe->fd)); } static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr, @@ -7360,11 +7342,9 @@ static int io_uring_release(struct inode *inode, struct file *file) static void io_uring_cancel_files(struct io_ring_ctx *ctx, struct files_struct *files) { - struct io_kiocb *req; - DEFINE_WAIT(wait); - while (!list_empty_careful(&ctx->inflight_list)) { - struct io_kiocb *cancel_req = NULL; + struct io_kiocb *cancel_req = NULL, *req; + DEFINE_WAIT(wait); spin_lock_irq(&ctx->inflight_lock); list_for_each_entry(req, &ctx->inflight_list, inflight_entry) { @@ -7404,6 +7384,7 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, */ if (refcount_sub_and_test(2, &cancel_req->refs)) { io_put_req(cancel_req); + finish_wait(&ctx->inflight_wait, &wait); continue; } } @@ -7411,8 +7392,8 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx, io_wq_cancel_work(ctx->io_wq, &cancel_req->work); io_put_req(cancel_req); schedule(); + finish_wait(&ctx->inflight_wait, &wait); } - finish_wait(&ctx->inflight_wait, &wait); } static int io_uring_flush(struct file *file, void *data) @@ -7761,7 +7742,8 @@ err: return ret; } -static int io_uring_create(unsigned entries, struct io_uring_params *p) +static int io_uring_create(unsigned entries, struct io_uring_params *p, + struct io_uring_params __user *params) { struct user_struct *user = NULL; struct io_ring_ctx *ctx; @@ -7853,6 +7835,14 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) p->cq_off.overflow = offsetof(struct io_rings, cq_overflow); p->cq_off.cqes = offsetof(struct io_rings, cqes); + p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | + IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | + IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; + + if (copy_to_user(params, p, sizeof(*p))) { + ret = -EFAULT; + goto err; + } /* * Install ring fd as the very last thing, so we don't risk someone * having closed it before we finish setup @@ -7861,9 +7851,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p) if (ret < 0) goto err; - p->features = IORING_FEAT_SINGLE_MMAP | IORING_FEAT_NODROP | - IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS | - IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL; trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags); return ret; err: @@ -7879,7 +7866,6 @@ err: static long io_uring_setup(u32 entries, struct io_uring_params __user *params) { struct io_uring_params p; - long ret; int i; if (copy_from_user(&p, params, sizeof(p))) @@ -7894,14 +7880,7 @@ static long io_uring_setup(u32 entries, struct io_uring_params __user *params) IORING_SETUP_CLAMP | IORING_SETUP_ATTACH_WQ)) return -EINVAL; - ret = io_uring_create(entries, &p); - if (ret < 0) - return ret; - - if (copy_to_user(params, &p, sizeof(p))) - return -EFAULT; - - return ret; + return io_uring_create(entries, &p, params); } SYSCALL_DEFINE2(io_uring_setup, u32, entries, diff --git a/fs/splice.c b/fs/splice.c index 4735defc46ee..fd0a1e7e5959 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -1118,6 +1118,10 @@ long do_splice(struct file *in, loff_t __user *off_in, loff_t offset; long ret; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + ipipe = get_pipe_info(in); opipe = get_pipe_info(out); @@ -1125,12 +1129,6 @@ long do_splice(struct file *in, loff_t __user *off_in, if (off_in || off_out) return -ESPIPE; - if (!(in->f_mode & FMODE_READ)) - return -EBADF; - - if (!(out->f_mode & FMODE_WRITE)) - return -EBADF; - /* Splicing to self would be fun, but... */ if (ipipe == opipe) return -EINVAL; @@ -1153,9 +1151,6 @@ long do_splice(struct file *in, loff_t __user *off_in, offset = out->f_pos; } - if (unlikely(!(out->f_mode & FMODE_WRITE))) - return -EBADF; - if (unlikely(out->f_flags & O_APPEND)) return -EINVAL; @@ -1440,15 +1435,11 @@ SYSCALL_DEFINE6(splice, int, fd_in, loff_t __user *, off_in, error = -EBADF; in = fdget(fd_in); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - out = fdget(fd_out); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_splice(in.file, off_in, - out.file, off_out, - len, flags); - fdput(out); - } + out = fdget(fd_out); + if (out.file) { + error = do_splice(in.file, off_in, out.file, off_out, + len, flags); + fdput(out); } fdput(in); } @@ -1770,6 +1761,10 @@ static long do_tee(struct file *in, struct file *out, size_t len, struct pipe_inode_info *opipe = get_pipe_info(out); int ret = -EINVAL; + if (unlikely(!(in->f_mode & FMODE_READ) || + !(out->f_mode & FMODE_WRITE))) + return -EBADF; + /* * Duplicate the contents of ipipe to opipe without actually * copying the data. @@ -1795,7 +1790,7 @@ static long do_tee(struct file *in, struct file *out, size_t len, SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) { - struct fd in; + struct fd in, out; int error; if (unlikely(flags & ~SPLICE_F_ALL)) @@ -1807,14 +1802,10 @@ SYSCALL_DEFINE4(tee, int, fdin, int, fdout, size_t, len, unsigned int, flags) error = -EBADF; in = fdget(fdin); if (in.file) { - if (in.file->f_mode & FMODE_READ) { - struct fd out = fdget(fdout); - if (out.file) { - if (out.file->f_mode & FMODE_WRITE) - error = do_tee(in.file, out.file, - len, flags); - fdput(out); - } + out = fdget(fdout); + if (out.file) { + error = do_tee(in.file, out.file, len, flags); + fdput(out); } fdput(in); } diff --git a/fs/vboxsf/super.c b/fs/vboxsf/super.c index 675e26989376..8fe03b4a0d2b 100644 --- a/fs/vboxsf/super.c +++ b/fs/vboxsf/super.c @@ -164,7 +164,7 @@ static int vboxsf_fill_super(struct super_block *sb, struct fs_context *fc) goto fail_free; } - err = super_setup_bdi_name(sb, "vboxsf-%s.%d", fc->source, sbi->bdi_id); + err = super_setup_bdi_name(sb, "vboxsf-%d", sbi->bdi_id); if (err) goto fail_free; |