summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking9
-rw-r--r--Documentation/filesystems/vfs.txt15
-rw-r--r--arch/x86/entry/syscalls/syscall_32.tbl1
-rw-r--r--arch/x86/entry/syscalls/syscall_64.tbl1
-rw-r--r--crypto/af_alg.c14
-rw-r--r--crypto/algif_aead.c4
-rw-r--r--crypto/algif_hash.c2
-rw-r--r--crypto/algif_rng.c1
-rw-r--r--crypto/algif_skcipher.c4
-rw-r--r--drivers/char/random.c29
-rw-r--r--drivers/isdn/mISDN/socket.c3
-rw-r--r--drivers/net/ppp/pppoe.c2
-rw-r--r--drivers/net/ppp/pptp.c1
-rw-r--r--drivers/staging/comedi/drivers/serial2002.c4
-rw-r--r--drivers/staging/ipx/af_ipx.c2
-rw-r--r--drivers/vfio/virqfd.c2
-rw-r--r--drivers/vhost/vhost.c2
-rw-r--r--fs/aio.c731
-rw-r--r--fs/eventfd.c15
-rw-r--r--fs/eventpoll.c5
-rw-r--r--fs/pipe.c22
-rw-r--r--fs/select.c85
-rw-r--r--fs/timerfd.c22
-rw-r--r--include/crypto/if_alg.h3
-rw-r--r--include/linux/aio.h2
-rw-r--r--include/linux/compat.h7
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/net.h1
-rw-r--r--include/linux/poll.h14
-rw-r--r--include/linux/skbuff.h3
-rw-r--r--include/linux/syscalls.h6
-rw-r--r--include/net/bluetooth/bluetooth.h2
-rw-r--r--include/net/busy_poll.h15
-rw-r--r--include/net/iucv/af_iucv.h2
-rw-r--r--include/net/sctp/sctp.h3
-rw-r--r--include/net/sock.h2
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/net/udp.h2
-rw-r--r--include/uapi/asm-generic/unistd.h4
-rw-r--r--include/uapi/linux/aio_abi.h12
-rw-r--r--include/uapi/linux/types.h4
-rw-r--r--kernel/sys_ni.c2
-rw-r--r--mm/memcontrol.c2
-rw-r--r--net/9p/trans_fd.c18
-rw-r--r--net/appletalk/ddp.c2
-rw-r--r--net/atm/common.c11
-rw-r--r--net/atm/common.h2
-rw-r--r--net/atm/pvc.c2
-rw-r--r--net/atm/svc.c2
-rw-r--r--net/ax25/af_ax25.c2
-rw-r--r--net/bluetooth/af_bluetooth.c7
-rw-r--r--net/bluetooth/bnep/sock.c1
-rw-r--r--net/bluetooth/cmtp/sock.c1
-rw-r--r--net/bluetooth/hci_sock.c2
-rw-r--r--net/bluetooth/hidp/sock.c1
-rw-r--r--net/bluetooth/l2cap_sock.c2
-rw-r--r--net/bluetooth/rfcomm/sock.c2
-rw-r--r--net/bluetooth/sco.c2
-rw-r--r--net/caif/caif_socket.c12
-rw-r--r--net/can/bcm.c2
-rw-r--r--net/can/raw.c2
-rw-r--r--net/core/datagram.c13
-rw-r--r--net/core/sock.c6
-rw-r--r--net/dccp/dccp.h3
-rw-r--r--net/dccp/ipv4.c2
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/dccp/proto.c13
-rw-r--r--net/decnet/af_decnet.c6
-rw-r--r--net/ieee802154/socket.c4
-rw-r--r--net/ipv4/af_inet.c8
-rw-r--r--net/ipv4/tcp.c23
-rw-r--r--net/ipv4/udp.c10
-rw-r--r--net/ipv6/af_inet6.c4
-rw-r--r--net/ipv6/raw.c4
-rw-r--r--net/iucv/af_iucv.c7
-rw-r--r--net/kcm/kcmsock.c10
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_ip.c2
-rw-r--r--net/l2tp/l2tp_ip6.c2
-rw-r--r--net/l2tp/l2tp_ppp.c2
-rw-r--r--net/llc/af_llc.c2
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/netrom/af_netrom.c2
-rw-r--r--net/nfc/llcp_sock.c9
-rw-r--r--net/nfc/rawsock.c4
-rw-r--r--net/packet/af_packet.c9
-rw-r--r--net/phonet/socket.c9
-rw-r--r--net/qrtr/qrtr.c2
-rw-r--r--net/rose/af_rose.c2
-rw-r--r--net/rxrpc/af_rxrpc.c10
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c4
-rw-r--r--net/socket.c55
-rw-r--r--net/tipc/socket.c14
-rw-r--r--net/unix/af_unix.c30
-rw-r--r--net/vmw_vsock/af_vsock.c19
-rw-r--r--net/x25/af_x25.c2
-rw-r--r--virt/kvm/eventfd.c2
99 files changed, 849 insertions, 595 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 15853d522941..2c391338c675 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -440,7 +440,9 @@ prototypes:
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
- unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ __poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -471,7 +473,7 @@ prototypes:
};
locking rules:
- All may block.
+ All except for ->poll_mask may block.
->llseek() locking has moved from llseek to the individual llseek
implementations. If your fs is not using generic_file_llseek, you
@@ -503,6 +505,9 @@ in sys_read() and friends.
the lease within the individual filesystem to record the result of the
operation
+->poll_mask can be called with or without the waitqueue lock for the waitqueue
+returned from ->get_poll_head.
+
--------------------------- dquot_operations -------------------------------
prototypes:
int (*write_dquot) (struct dquot *);
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5fd325df59e2..829a7b7857a4 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -856,7 +856,9 @@ struct file_operations {
ssize_t (*read_iter) (struct kiocb *, struct iov_iter *);
ssize_t (*write_iter) (struct kiocb *, struct iov_iter *);
int (*iterate) (struct file *, struct dir_context *);
- unsigned int (*poll) (struct file *, struct poll_table_struct *);
+ __poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
@@ -901,6 +903,17 @@ otherwise noted.
activity on this file and (optionally) go to sleep until there
is activity. Called by the select(2) and poll(2) system calls
+ get_poll_head: Returns the struct wait_queue_head that callers can
+ wait on. Callers need to check the returned events using ->poll_mask
+ once woken. Can return NULL to indicate polling is not supported,
+ or any error code using the ERR_PTR convention to indicate that a
+ grave error occured and ->poll_mask shall not be called.
+
+ poll_mask: return the mask of EPOLL* values describing the file descriptor
+ state. Called either before going to sleep on the waitqueue returned by
+ get_poll_head, or after it has been woken. If ->get_poll_head and
+ ->poll_mask are implemented ->poll does not need to be implement.
+
unlocked_ioctl: called by the ioctl(2) system call.
compat_ioctl: called by the ioctl(2) system call when 32 bit system calls
diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl
index d6b27dab1b30..14a2f996e543 100644
--- a/arch/x86/entry/syscalls/syscall_32.tbl
+++ b/arch/x86/entry/syscalls/syscall_32.tbl
@@ -396,3 +396,4 @@
382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free
383 i386 statx sys_statx __ia32_sys_statx
384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl
+385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents
diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl
index 4dfe42666d0c..cd36232ab62f 100644
--- a/arch/x86/entry/syscalls/syscall_64.tbl
+++ b/arch/x86/entry/syscalls/syscall_64.tbl
@@ -341,6 +341,7 @@
330 common pkey_alloc __x64_sys_pkey_alloc
331 common pkey_free __x64_sys_pkey_free
332 common statx __x64_sys_statx
+333 common io_pgetevents __x64_sys_io_pgetevents
#
# x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 7846c0c20cfe..89ed613c017e 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -347,7 +347,6 @@ static const struct proto_ops alg_proto_ops = {
.sendpage = sock_no_sendpage,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.bind = alg_bind,
.release = af_alg_release,
@@ -1061,19 +1060,12 @@ void af_alg_async_cb(struct crypto_async_request *_req, int err)
}
EXPORT_SYMBOL_GPL(af_alg_async_cb);
-/**
- * af_alg_poll - poll system call handler
- */
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct alg_sock *ask = alg_sk(sk);
struct af_alg_ctx *ctx = ask->private;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
if (!ctx->more || ctx->used)
mask |= EPOLLIN | EPOLLRDNORM;
@@ -1083,7 +1075,7 @@ __poll_t af_alg_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL_GPL(af_alg_poll);
+EXPORT_SYMBOL_GPL(af_alg_poll_mask);
/**
* af_alg_alloc_areq - allocate struct af_alg_async_req
diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c
index 4b07edd5a9ff..330cf9f2b767 100644
--- a/crypto/algif_aead.c
+++ b/crypto/algif_aead.c
@@ -375,7 +375,7 @@ static struct proto_ops algif_aead_ops = {
.sendmsg = aead_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = aead_recvmsg,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static int aead_check_key(struct socket *sock)
@@ -471,7 +471,7 @@ static struct proto_ops algif_aead_ops_nokey = {
.sendmsg = aead_sendmsg_nokey,
.sendpage = aead_sendpage_nokey,
.recvmsg = aead_recvmsg_nokey,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static void *aead_bind(const char *name, u32 type, u32 mask)
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 6c9b1927a520..bfcf595fd8f9 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -288,7 +288,6 @@ static struct proto_ops algif_hash_ops = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.release = af_alg_release,
.sendmsg = hash_sendmsg,
@@ -396,7 +395,6 @@ static struct proto_ops algif_hash_ops_nokey = {
.mmap = sock_no_mmap,
.bind = sock_no_bind,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.release = af_alg_release,
.sendmsg = hash_sendmsg_nokey,
diff --git a/crypto/algif_rng.c b/crypto/algif_rng.c
index 150c2b6480ed..22df3799a17b 100644
--- a/crypto/algif_rng.c
+++ b/crypto/algif_rng.c
@@ -106,7 +106,6 @@ static struct proto_ops algif_rng_ops = {
.bind = sock_no_bind,
.accept = sock_no_accept,
.setsockopt = sock_no_setsockopt,
- .poll = sock_no_poll,
.sendmsg = sock_no_sendmsg,
.sendpage = sock_no_sendpage,
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c4e885df4564..15cf3c5222e0 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -205,7 +205,7 @@ static struct proto_ops algif_skcipher_ops = {
.sendmsg = skcipher_sendmsg,
.sendpage = af_alg_sendpage,
.recvmsg = skcipher_recvmsg,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static int skcipher_check_key(struct socket *sock)
@@ -301,7 +301,7 @@ static struct proto_ops algif_skcipher_ops_nokey = {
.sendmsg = skcipher_sendmsg_nokey,
.sendpage = skcipher_sendpage_nokey,
.recvmsg = skcipher_recvmsg_nokey,
- .poll = af_alg_poll,
+ .poll_mask = af_alg_poll_mask,
};
static void *skcipher_bind(const char *name, u32 type, u32 mask)
diff --git a/drivers/char/random.c b/drivers/char/random.c
index cd888d4ee605..a8fb0020ba5c 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -402,8 +402,7 @@ static struct poolinfo {
/*
* Static global variables
*/
-static DECLARE_WAIT_QUEUE_HEAD(random_read_wait);
-static DECLARE_WAIT_QUEUE_HEAD(random_write_wait);
+static DECLARE_WAIT_QUEUE_HEAD(random_wait);
static struct fasync_struct *fasync;
static DEFINE_SPINLOCK(random_ready_list_lock);
@@ -722,8 +721,8 @@ retry:
/* should we wake readers? */
if (entropy_bits >= random_read_wakeup_bits &&
- wq_has_sleeper(&random_read_wait)) {
- wake_up_interruptible(&random_read_wait);
+ wq_has_sleeper(&random_wait)) {
+ wake_up_interruptible_poll(&random_wait, POLLIN);
kill_fasync(&fasync, SIGIO, POLL_IN);
}
/* If the input pool is getting full, send some
@@ -1397,7 +1396,7 @@ retry:
trace_debit_entropy(r->name, 8 * ibytes);
if (ibytes &&
(r->entropy_count >> ENTROPY_SHIFT) < random_write_wakeup_bits) {
- wake_up_interruptible(&random_write_wait);
+ wake_up_interruptible_poll(&random_wait, POLLOUT);
kill_fasync(&fasync, SIGIO, POLL_OUT);
}
@@ -1839,7 +1838,7 @@ _random_read(int nonblock, char __user *buf, size_t nbytes)
if (nonblock)
return -EAGAIN;
- wait_event_interruptible(random_read_wait,
+ wait_event_interruptible(random_wait,
ENTROPY_BITS(&input_pool) >=
random_read_wakeup_bits);
if (signal_pending(current))
@@ -1876,14 +1875,17 @@ urandom_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
return ret;
}
+static struct wait_queue_head *
+random_get_poll_head(struct file *file, __poll_t events)
+{
+ return &random_wait;
+}
+
static __poll_t
-random_poll(struct file *file, poll_table * wait)
+random_poll_mask(struct file *file, __poll_t events)
{
- __poll_t mask;
+ __poll_t mask = 0;
- poll_wait(file, &random_read_wait, wait);
- poll_wait(file, &random_write_wait, wait);
- mask = 0;
if (ENTROPY_BITS(&input_pool) >= random_read_wakeup_bits)
mask |= EPOLLIN | EPOLLRDNORM;
if (ENTROPY_BITS(&input_pool) < random_write_wakeup_bits)
@@ -1990,7 +1992,8 @@ static int random_fasync(int fd, struct file *filp, int on)
const struct file_operations random_fops = {
.read = random_read,
.write = random_write,
- .poll = random_poll,
+ .get_poll_head = random_get_poll_head,
+ .poll_mask = random_poll_mask,
.unlocked_ioctl = random_ioctl,
.fasync = random_fasync,
.llseek = noop_llseek,
@@ -2323,7 +2326,7 @@ void add_hwgenerator_randomness(const char *buffer, size_t count,
* We'll be woken up again once below random_write_wakeup_thresh,
* or when the calling thread is about to terminate.
*/
- wait_event_interruptible(random_write_wait, kthread_should_stop() ||
+ wait_event_interruptible(random_wait, kthread_should_stop() ||
ENTROPY_BITS(&input_pool) <= random_write_wakeup_bits);
mix_pool_bytes(poolp, buffer, count);
credit_entropy_bits(poolp, entropy);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1f8f489b4167..98f90aadd141 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -588,7 +588,7 @@ static const struct proto_ops data_sock_ops = {
.getname = data_sock_getname,
.sendmsg = mISDN_sock_sendmsg,
.recvmsg = mISDN_sock_recvmsg,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = data_sock_setsockopt,
@@ -745,7 +745,6 @@ static const struct proto_ops base_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index ce61231e96ea..de51e8f70f44 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -1107,7 +1107,7 @@ static const struct proto_ops pppoe_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppoe_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c
index c4267ecefd85..157b67c1bf8e 100644
--- a/drivers/net/ppp/pptp.c
+++ b/drivers/net/ppp/pptp.c
@@ -624,7 +624,6 @@ static const struct proto_ops pptp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pptp_getname,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/drivers/staging/comedi/drivers/serial2002.c b/drivers/staging/comedi/drivers/serial2002.c
index b3f3b4a201af..5471b2212a62 100644
--- a/drivers/staging/comedi/drivers/serial2002.c
+++ b/drivers/staging/comedi/drivers/serial2002.c
@@ -113,7 +113,7 @@ static void serial2002_tty_read_poll_wait(struct file *f, int timeout)
long elapsed;
__poll_t mask;
- mask = f->f_op->poll(f, &table.pt);
+ mask = vfs_poll(f, &table.pt);
if (mask & (EPOLLRDNORM | EPOLLRDBAND | EPOLLIN |
EPOLLHUP | EPOLLERR)) {
break;
@@ -136,7 +136,7 @@ static int serial2002_tty_read(struct file *f, int timeout)
result = -1;
if (!IS_ERR(f)) {
- if (f->f_op->poll) {
+ if (file_can_poll(f)) {
serial2002_tty_read_poll_wait(f, timeout);
if (kernel_read(f, &ch, 1, &pos) == 1)
diff --git a/drivers/staging/ipx/af_ipx.c b/drivers/staging/ipx/af_ipx.c
index 5703dd176787..208b5c161631 100644
--- a/drivers/staging/ipx/af_ipx.c
+++ b/drivers/staging/ipx/af_ipx.c
@@ -1965,7 +1965,7 @@ static const struct proto_ops ipx_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = ipx_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ipx_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ipx_compat_ioctl,
diff --git a/drivers/vfio/virqfd.c b/drivers/vfio/virqfd.c
index 085700f1be10..2a1be859ee71 100644
--- a/drivers/vfio/virqfd.c
+++ b/drivers/vfio/virqfd.c
@@ -166,7 +166,7 @@ int vfio_virqfd_enable(void *opaque,
init_waitqueue_func_entry(&virqfd->wait, virqfd_wakeup);
init_poll_funcptr(&virqfd->pt, virqfd_ptable_queue_proc);
- events = irqfd.file->f_op->poll(irqfd.file, &virqfd->pt);
+ events = vfs_poll(irqfd.file, &virqfd->pt);
/*
* Check if there was an event already pending on the eventfd
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index f0be5f35ab28..895eaa25807c 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -208,7 +208,7 @@ int vhost_poll_start(struct vhost_poll *poll, struct file *file)
if (poll->wqh)
return 0;
- mask = file->f_op->poll(file, &poll->table);
+ mask = vfs_poll(file, &poll->table);
if (mask)
vhost_poll_wakeup(&poll->wait, 0, 0, poll_to_key(mask));
if (mask & EPOLLERR) {
diff --git a/fs/aio.c b/fs/aio.c
index 49f53516eef0..b850e92ee0d5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -5,6 +5,7 @@
* Implements an efficient asynchronous io interface.
*
* Copyright 2000, 2001, 2002 Red Hat, Inc. All Rights Reserved.
+ * Copyright 2018 Christoph Hellwig.
*
* See ../COPYING for licensing terms.
*/
@@ -46,6 +47,8 @@
#include "internal.h"
+#define KIOCB_KEY 0
+
#define AIO_RING_MAGIC 0xa10a10a1
#define AIO_RING_COMPAT_FEATURES 1
#define AIO_RING_INCOMPAT_FEATURES 0
@@ -156,21 +159,29 @@ struct kioctx {
unsigned id;
};
-/*
- * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
- * cancelled or completed (this makes a certain amount of sense because
- * successful cancellation - io_cancel() - does deliver the completion to
- * userspace).
- *
- * And since most things don't implement kiocb cancellation and we'd really like
- * kiocb completion to be lockless when possible, we use ki_cancel to
- * synchronize cancellation and completion - we only set it to KIOCB_CANCELLED
- * with xchg() or cmpxchg(), see batch_complete_aio() and kiocb_cancel().
- */
-#define KIOCB_CANCELLED ((void *) (~0ULL))
+struct fsync_iocb {
+ struct work_struct work;
+ struct file *file;
+ bool datasync;
+};
+
+struct poll_iocb {
+ struct file *file;
+ __poll_t events;
+ struct wait_queue_head *head;
+
+ union {
+ struct wait_queue_entry wait;
+ struct work_struct work;
+ };
+};
struct aio_kiocb {
- struct kiocb common;
+ union {
+ struct kiocb rw;
+ struct fsync_iocb fsync;
+ struct poll_iocb poll;
+ };
struct kioctx *ki_ctx;
kiocb_cancel_fn *ki_cancel;
@@ -264,9 +275,6 @@ static int __init aio_setup(void)
kiocb_cachep = KMEM_CACHE(aio_kiocb, SLAB_HWCACHE_ALIGN|SLAB_PANIC);
kioctx_cachep = KMEM_CACHE(kioctx,SLAB_HWCACHE_ALIGN|SLAB_PANIC);
-
- pr_debug("sizeof(struct page) = %zu\n", sizeof(struct page));
-
return 0;
}
__initcall(aio_setup);
@@ -552,42 +560,20 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned int nr_events)
void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
{
- struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, common);
+ struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
struct kioctx *ctx = req->ki_ctx;
unsigned long flags;
- spin_lock_irqsave(&ctx->ctx_lock, flags);
-
- if (!req->ki_list.next)
- list_add(&req->ki_list, &ctx->active_reqs);
+ if (WARN_ON_ONCE(!list_empty(&req->ki_list)))
+ return;
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_add_tail(&req->ki_list, &ctx->active_reqs);
req->ki_cancel = cancel;
-
spin_unlock_irqrestore(&ctx->ctx_lock, flags);
}
EXPORT_SYMBOL(kiocb_set_cancel_fn);
-static int kiocb_cancel(struct aio_kiocb *kiocb)
-{
- kiocb_cancel_fn *old, *cancel;
-
- /*
- * Don't want to set kiocb->ki_cancel = KIOCB_CANCELLED unless it
- * actually has a cancel function, hence the cmpxchg()
- */
-
- cancel = READ_ONCE(kiocb->ki_cancel);
- do {
- if (!cancel || cancel == KIOCB_CANCELLED)
- return -EINVAL;
-
- old = cancel;
- cancel = cmpxchg(&kiocb->ki_cancel, old, KIOCB_CANCELLED);
- } while (cancel != old);
-
- return cancel(&kiocb->common);
-}
-
/*
* free_ioctx() should be RCU delayed to synchronize against the RCU
* protected lookup_ioctx() and also needs process context to call
@@ -634,7 +620,7 @@ static void free_ioctx_users(struct percpu_ref *ref)
while (!list_empty(&ctx->active_reqs)) {
req = list_first_entry(&ctx->active_reqs,
struct aio_kiocb, ki_list);
- kiocb_cancel(req);
+ req->ki_cancel(&req->rw);
list_del_init(&req->ki_list);
}
@@ -1041,7 +1027,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx)
goto out_put;
percpu_ref_get(&ctx->reqs);
-
+ INIT_LIST_HEAD(&req->ki_list);
req->ki_ctx = ctx;
return req;
out_put:
@@ -1049,15 +1035,6 @@ out_put:
return NULL;
}
-static void kiocb_free(struct aio_kiocb *req)
-{
- if (req->common.ki_filp)
- fput(req->common.ki_filp);
- if (req->ki_eventfd != NULL)
- eventfd_ctx_put(req->ki_eventfd);
- kmem_cache_free(kiocb_cachep, req);
-}
-
static struct kioctx *lookup_ioctx(unsigned long ctx_id)
{
struct aio_ring __user *ring = (void __user *)ctx_id;
@@ -1088,44 +1065,14 @@ out:
/* aio_complete
* Called when the io request on the given iocb is complete.
*/
-static void aio_complete(struct kiocb *kiocb, long res, long res2)
+static void aio_complete(struct aio_kiocb *iocb, long res, long res2)
{
- struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, common);
struct kioctx *ctx = iocb->ki_ctx;
struct aio_ring *ring;
struct io_event *ev_page, *event;
unsigned tail, pos, head;
unsigned long flags;
- if (kiocb->ki_flags & IOCB_WRITE) {
- struct file *file = kiocb->ki_filp;
-
- /*
- * Tell lockdep we inherited freeze protection from submission
- * thread.
- */
- if (S_ISREG(file_inode(file)->i_mode))
- __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE);
- file_end_write(file);
- }
-
- /*
- * Special case handling for sync iocbs:
- * - events go directly into the iocb for fast handling
- * - the sync task with the iocb in its stack holds the single iocb
- * ref, no other paths have a way to get another ref
- * - the sync task helpfully left a reference to itself in the iocb
- */
- BUG_ON(is_sync_kiocb(kiocb));
-
- if (iocb->ki_list.next) {
- unsigned long flags;
-
- spin_lock_irqsave(&ctx->ctx_lock, flags);
- list_del(&iocb->ki_list);
- spin_unlock_irqrestore(&ctx->ctx_lock, flags);
- }
-
/*
* Add a completion event to the ring buffer. Must be done holding
* ctx->completion_lock to prevent other code from messing with the tail
@@ -1179,11 +1126,12 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2)
* eventfd. The eventfd_signal() function is safe to be called
* from IRQ context.
*/
- if (iocb->ki_eventfd != NULL)
+ if (iocb->ki_eventfd) {
eventfd_signal(iocb->ki_eventfd, 1);
+ eventfd_ctx_put(iocb->ki_eventfd);
+ }
- /* everything turned out well, dispose of the aiocb. */
- kiocb_free(iocb);
+ kmem_cache_free(kiocb_cachep, iocb);
/*
* We have to order our ring_info tail store above and test
@@ -1249,14 +1197,13 @@ static long aio_read_events_ring(struct kioctx *ctx,
if (head == tail)
break;
- avail = min(avail, nr - ret);
- avail = min_t(long, avail, AIO_EVENTS_PER_PAGE -
- ((head + AIO_EVENTS_OFFSET) % AIO_EVENTS_PER_PAGE));
-
pos = head + AIO_EVENTS_OFFSET;
page = ctx->ring_pages[pos / AIO_EVENTS_PER_PAGE];
pos %= AIO_EVENTS_PER_PAGE;
+ avail = min(avail, nr - ret);
+ avail = min_t(long, avail, AIO_EVENTS_PER_PAGE - pos);
+
ev = kmap(page);
copy_ret = copy_to_user(event + ret, ev + pos,
sizeof(*ev) * avail);
@@ -1327,10 +1274,6 @@ static long read_events(struct kioctx *ctx, long min_nr, long nr,
wait_event_interruptible_hrtimeout(ctx->wait,
aio_read_events(ctx, min_nr, nr, event, &ret),
until);
-
- if (!ret && signal_pending(current))
- ret = -EINTR;
-
return ret;
}
@@ -1446,6 +1389,58 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx)
return -EINVAL;
}
+static void aio_remove_iocb(struct aio_kiocb *iocb)
+{
+ struct kioctx *ctx = iocb->ki_ctx;
+ unsigned long flags;
+
+ spin_lock_irqsave(&ctx->ctx_lock, flags);
+ list_del(&iocb->ki_list);
+ spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+}
+
+static void aio_complete_rw(struct kiocb *kiocb, long res, long res2)
+{
+ struct aio_kiocb *iocb = container_of(kiocb, struct aio_kiocb, rw);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+
+ if (kiocb->ki_flags & IOCB_WRITE) {
+ struct inode *inode = file_inode(kiocb->ki_filp);
+
+ /*
+ * Tell lockdep we inherited freeze protection from submission
+ * thread.
+ */
+ if (S_ISREG(inode->i_mode))
+ __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
+ file_end_write(kiocb->ki_filp);
+ }
+
+ fput(kiocb->ki_filp);
+ aio_complete(iocb, res, res2);
+}
+
+static int aio_prep_rw(struct kiocb *req, struct iocb *iocb)
+{
+ int ret;
+
+ req->ki_filp = fget(iocb->aio_fildes);
+ if (unlikely(!req->ki_filp))
+ return -EBADF;
+ req->ki_complete = aio_complete_rw;
+ req->ki_pos = iocb->aio_offset;
+ req->ki_flags = iocb_flags(req->ki_filp);
+ if (iocb->aio_flags & IOCB_FLAG_RESFD)
+ req->ki_flags |= IOCB_EVENTFD;
+ req->ki_hint = file_write_hint(req->ki_filp);
+ ret = kiocb_set_rw_flags(req, iocb->aio_rw_flags);
+ if (unlikely(ret))
+ fput(req->ki_filp);
+ return ret;
+}
+
static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
bool vectored, bool compat, struct iov_iter *iter)
{
@@ -1465,11 +1460,11 @@ static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec,
return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
}
-static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
+static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
{
switch (ret) {
case -EIOCBQUEUED:
- return ret;
+ break;
case -ERESTARTSYS:
case -ERESTARTNOINTR:
case -ERESTARTNOHAND:
@@ -1481,85 +1476,270 @@ static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret)
ret = -EINTR;
/*FALLTHRU*/
default:
- aio_complete(req, ret, 0);
- return 0;
+ aio_complete_rw(req, ret, 0);
}
}
static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_READ)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->read_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret)
- ret = aio_ret(req, call_read_iter(file, req, &iter));
+ aio_rw_done(req, call_read_iter(file, req, &iter));
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored,
bool compat)
{
- struct file *file = req->ki_filp;
struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
struct iov_iter iter;
+ struct file *file;
ssize_t ret;
+ ret = aio_prep_rw(req, iocb);
+ if (ret)
+ return ret;
+ file = req->ki_filp;
+
+ ret = -EBADF;
if (unlikely(!(file->f_mode & FMODE_WRITE)))
- return -EBADF;
+ goto out_fput;
+ ret = -EINVAL;
if (unlikely(!file->f_op->write_iter))
- return -EINVAL;
+ goto out_fput;
ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter);
if (ret)
- return ret;
+ goto out_fput;
ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter));
if (!ret) {
- req->ki_flags |= IOCB_WRITE;
- file_start_write(file);
- ret = aio_ret(req, call_write_iter(file, req, &iter));
/*
- * We release freeze protection in aio_complete(). Fool lockdep
- * by telling it the lock got released so that it doesn't
- * complain about held lock when we return to userspace.
+ * Open-code file_start_write here to grab freeze protection,
+ * which will be released by another thread in
+ * aio_complete_rw(). Fool lockdep by telling it the lock got
+ * released so that it doesn't complain about the held lock when
+ * we return to userspace.
*/
- if (S_ISREG(file_inode(file)->i_mode))
+ if (S_ISREG(file_inode(file)->i_mode)) {
+ __sb_start_write(file_inode(file)->i_sb, SB_FREEZE_WRITE, true);
__sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE);
+ }
+ req->ki_flags |= IOCB_WRITE;
+ aio_rw_done(req, call_write_iter(file, req, &iter));
}
kfree(iovec);
+out_fput:
+ if (unlikely(ret))
+ fput(file);
return ret;
}
+static void aio_fsync_work(struct work_struct *work)
+{
+ struct fsync_iocb *req = container_of(work, struct fsync_iocb, work);
+ int ret;
+
+ ret = vfs_fsync(req->file, req->datasync);
+ fput(req->file);
+ aio_complete(container_of(req, struct aio_kiocb, fsync), ret, 0);
+}
+
+static int aio_fsync(struct fsync_iocb *req, struct iocb *iocb, bool datasync)
+{
+ if (unlikely(iocb->aio_buf || iocb->aio_offset || iocb->aio_nbytes ||
+ iocb->aio_rw_flags))
+ return -EINVAL;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (unlikely(!req->file->f_op->fsync)) {
+ fput(req->file);
+ return -EINVAL;
+ }
+
+ req->datasync = datasync;
+ INIT_WORK(&req->work, aio_fsync_work);
+ schedule_work(&req->work);
+ return 0;
+}
+
+/* need to use list_del_init so we can check if item was present */
+static inline bool __aio_poll_remove(struct poll_iocb *req)
+{
+ if (list_empty(&req->wait.entry))
+ return false;
+ list_del_init(&req->wait.entry);
+ return true;
+}
+
+static inline void __aio_poll_complete(struct aio_kiocb *iocb, __poll_t mask)
+{
+ fput(iocb->poll.file);
+ aio_complete(iocb, mangle_poll(mask), 0);
+}
+
+static void aio_poll_work(struct work_struct *work)
+{
+ struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, poll.work);
+
+ if (!list_empty_careful(&iocb->ki_list))
+ aio_remove_iocb(iocb);
+ __aio_poll_complete(iocb, iocb->poll.events);
+}
+
+static int aio_poll_cancel(struct kiocb *iocb)
+{
+ struct aio_kiocb *aiocb = container_of(iocb, struct aio_kiocb, rw);
+ struct poll_iocb *req = &aiocb->poll;
+ struct wait_queue_head *head = req->head;
+ bool found = false;
+
+ spin_lock(&head->lock);
+ found = __aio_poll_remove(req);
+ spin_unlock(&head->lock);
+
+ if (found) {
+ req->events = 0;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+ return 0;
+}
+
+static int aio_poll_wake(struct wait_queue_entry *wait, unsigned mode, int sync,
+ void *key)
+{
+ struct poll_iocb *req = container_of(wait, struct poll_iocb, wait);
+ struct aio_kiocb *iocb = container_of(req, struct aio_kiocb, poll);
+ struct file *file = req->file;
+ __poll_t mask = key_to_poll(key);
+
+ assert_spin_locked(&req->head->lock);
+
+ /* for instances that support it check for an event match first: */
+ if (mask && !(mask & req->events))
+ return 0;
+
+ mask = file->f_op->poll_mask(file, req->events);
+ if (!mask)
+ return 0;
+
+ __aio_poll_remove(req);
+
+ /*
+ * Try completing without a context switch if we can acquire ctx_lock
+ * without spinning. Otherwise we need to defer to a workqueue to
+ * avoid a deadlock due to the lock order.
+ */
+ if (spin_trylock(&iocb->ki_ctx->ctx_lock)) {
+ list_del_init(&iocb->ki_list);
+ spin_unlock(&iocb->ki_ctx->ctx_lock);
+
+ __aio_poll_complete(iocb, mask);
+ } else {
+ req->events = mask;
+ INIT_WORK(&req->work, aio_poll_work);
+ schedule_work(&req->work);
+ }
+
+ return 1;
+}
+
+static ssize_t aio_poll(struct aio_kiocb *aiocb, struct iocb *iocb)
+{
+ struct kioctx *ctx = aiocb->ki_ctx;
+ struct poll_iocb *req = &aiocb->poll;
+ __poll_t mask;
+
+ /* reject any unknown events outside the normal event mask. */
+ if ((u16)iocb->aio_buf != iocb->aio_buf)
+ return -EINVAL;
+ /* reject fields that are not defined for poll */
+ if (iocb->aio_offset || iocb->aio_nbytes || iocb->aio_rw_flags)
+ return -EINVAL;
+
+ req->events = demangle_poll(iocb->aio_buf) | EPOLLERR | EPOLLHUP;
+ req->file = fget(iocb->aio_fildes);
+ if (unlikely(!req->file))
+ return -EBADF;
+ if (!file_has_poll_mask(req->file))
+ goto out_fail;
+
+ req->head = req->file->f_op->get_poll_head(req->file, req->events);
+ if (!req->head)
+ goto out_fail;
+ if (IS_ERR(req->head)) {
+ mask = EPOLLERR;
+ goto done;
+ }
+
+ init_waitqueue_func_entry(&req->wait, aio_poll_wake);
+ aiocb->ki_cancel = aio_poll_cancel;
+
+ spin_lock_irq(&ctx->ctx_lock);
+ spin_lock(&req->head->lock);
+ mask = req->file->f_op->poll_mask(req->file, req->events);
+ if (!mask) {
+ __add_wait_queue(req->head, &req->wait);
+ list_add_tail(&aiocb->ki_list, &ctx->active_reqs);
+ }
+ spin_unlock(&req->head->lock);
+ spin_unlock_irq(&ctx->ctx_lock);
+done:
+ if (mask)
+ __aio_poll_complete(aiocb, mask);
+ return 0;
+out_fail:
+ fput(req->file);
+ return -EINVAL; /* same as no support for IOCB_CMD_POLL */
+}
+
static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
- struct iocb *iocb, bool compat)
+ bool compat)
{
struct aio_kiocb *req;
- struct file *file;
+ struct iocb iocb;
ssize_t ret;
+ if (unlikely(copy_from_user(&iocb, user_iocb, sizeof(iocb))))
+ return -EFAULT;
+
/* enforce forwards compatibility on users */
- if (unlikely(iocb->aio_reserved2)) {
+ if (unlikely(iocb.aio_reserved2)) {
pr_debug("EINVAL: reserve field set\n");
return -EINVAL;
}
/* prevent overflows */
if (unlikely(
- (iocb->aio_buf != (unsigned long)iocb->aio_buf) ||
- (iocb->aio_nbytes != (size_t)iocb->aio_nbytes) ||
- ((ssize_t)iocb->aio_nbytes < 0)
+ (iocb.aio_buf != (unsigned long)iocb.aio_buf) ||
+ (iocb.aio_nbytes != (size_t)iocb.aio_nbytes) ||
+ ((ssize_t)iocb.aio_nbytes < 0)
)) {
pr_debug("EINVAL: overflow check\n");
return -EINVAL;
@@ -1569,37 +1749,19 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
if (unlikely(!req))
return -EAGAIN;
- req->common.ki_filp = file = fget(iocb->aio_fildes);
- if (unlikely(!req->common.ki_filp)) {
- ret = -EBADF;
- goto out_put_req;
- }
- req->common.ki_pos = iocb->aio_offset;
- req->common.ki_complete = aio_complete;
- req->common.ki_flags = iocb_flags(req->common.ki_filp);
- req->common.ki_hint = file_write_hint(file);
-
- if (iocb->aio_flags & IOCB_FLAG_RESFD) {
+ if (iocb.aio_flags & IOCB_FLAG_RESFD) {
/*
* If the IOCB_FLAG_RESFD flag of aio_flags is set, get an
* instance of the file* now. The file descriptor must be
* an eventfd() fd, and will be signaled for each completed
* event using the eventfd_signal() function.
*/
- req->ki_eventfd = eventfd_ctx_fdget((int) iocb->aio_resfd);
+ req->ki_eventfd = eventfd_ctx_fdget((int) iocb.aio_resfd);
if (IS_ERR(req->ki_eventfd)) {
ret = PTR_ERR(req->ki_eventfd);
req->ki_eventfd = NULL;
goto out_put_req;
}
-
- req->common.ki_flags |= IOCB_EVENTFD;
- }
-
- ret = kiocb_set_rw_flags(&req->common, iocb->aio_rw_flags);
- if (unlikely(ret)) {
- pr_debug("EINVAL: aio_rw_flags\n");
- goto out_put_req;
}
ret = put_user(KIOCB_KEY, &user_iocb->aio_key);
@@ -1609,41 +1771,67 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb,
}
req->ki_user_iocb = user_iocb;
- req->ki_user_data = iocb->aio_data;
+ req->ki_user_data = iocb.aio_data;
- get_file(file);
- switch (iocb->aio_lio_opcode) {
+ switch (iocb.aio_lio_opcode) {
case IOCB_CMD_PREAD:
- ret = aio_read(&req->common, iocb, false, compat);
+ ret = aio_read(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PWRITE:
- ret = aio_write(&req->common, iocb, false, compat);
+ ret = aio_write(&req->rw, &iocb, false, compat);
break;
case IOCB_CMD_PREADV:
- ret = aio_read(&req->common, iocb, true, compat);
+ ret = aio_read(&req->rw, &iocb, true, compat);
break;
case IOCB_CMD_PWRITEV:
- ret = aio_write(&req->common, iocb, true, compat);
+ ret = aio_write(&req->rw, &iocb, true, compat);
+ break;
+ case IOCB_CMD_FSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, false);
+ break;
+ case IOCB_CMD_FDSYNC:
+ ret = aio_fsync(&req->fsync, &iocb, true);
+ break;
+ case IOCB_CMD_POLL:
+ ret = aio_poll(req, &iocb);
break;
default:
- pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode);
+ pr_debug("invalid aio operation %d\n", iocb.aio_lio_opcode);
ret = -EINVAL;
break;
}
- fput(file);
- if (ret && ret != -EIOCBQUEUED)
+ /*
+ * If ret is 0, we'd either done aio_complete() ourselves or have
+ * arranged for that to be done asynchronously. Anything non-zero
+ * means that we need to destroy req ourselves.
+ */
+ if (ret)
goto out_put_req;
return 0;
out_put_req:
put_reqs_available(ctx, 1);
percpu_ref_put(&ctx->reqs);
- kiocb_free(req);
+ if (req->ki_eventfd)
+ eventfd_ctx_put(req->ki_eventfd);
+ kmem_cache_free(kiocb_cachep, req);
return ret;
}
-static long do_io_submit(aio_context_t ctx_id, long nr,
- struct iocb __user *__user *iocbpp, bool compat)
+/* sys_io_submit:
+ * Queue the nr iocbs pointed to by iocbpp for processing. Returns
+ * the number of iocbs queued. May return -EINVAL if the aio_context
+ * specified by ctx_id is invalid, if nr is < 0, if the iocb at
+ * *iocbpp[0] is not properly initialized, if the operation specified
+ * is invalid for the file descriptor in the iocb. May fail with
+ * -EFAULT if any of the data structures point to invalid data. May
+ * fail with -EBADF if the file descriptor specified in the first
+ * iocb is invalid. May fail with -EAGAIN if insufficient resources
+ * are available to queue any iocbs. Will return 0 if nr is 0. Will
+ * fail with -ENOSYS if not implemented.
+ */
+SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
+ struct iocb __user * __user *, iocbpp)
{
struct kioctx *ctx;
long ret = 0;
@@ -1653,39 +1841,25 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
if (unlikely(nr < 0))
return -EINVAL;
- if (unlikely(nr > LONG_MAX/sizeof(*iocbpp)))
- nr = LONG_MAX/sizeof(*iocbpp);
-
- if (unlikely(!access_ok(VERIFY_READ, iocbpp, (nr*sizeof(*iocbpp)))))
- return -EFAULT;
-
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx)) {
pr_debug("EINVAL: invalid context id\n");
return -EINVAL;
}
- blk_start_plug(&plug);
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
- /*
- * AKPM: should this return a partial result if some of the IOs were
- * successfully submitted?
- */
- for (i=0; i<nr; i++) {
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
struct iocb __user *user_iocb;
- struct iocb tmp;
- if (unlikely(__get_user(user_iocb, iocbpp + i))) {
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
ret = -EFAULT;
break;
}
- if (unlikely(copy_from_user(&tmp, user_iocb, sizeof(tmp)))) {
- ret = -EFAULT;
- break;
- }
-
- ret = io_submit_one(ctx, user_iocb, &tmp, compat);
+ ret = io_submit_one(ctx, user_iocb, false);
if (ret)
break;
}
@@ -1695,59 +1869,44 @@ static long do_io_submit(aio_context_t ctx_id, long nr,
return i ? i : ret;
}
-/* sys_io_submit:
- * Queue the nr iocbs pointed to by iocbpp for processing. Returns
- * the number of iocbs queued. May return -EINVAL if the aio_context
- * specified by ctx_id is invalid, if nr is < 0, if the iocb at
- * *iocbpp[0] is not properly initialized, if the operation specified
- * is invalid for the file descriptor in the iocb. May fail with
- * -EFAULT if any of the data structures point to invalid data. May
- * fail with -EBADF if the file descriptor specified in the first
- * iocb is invalid. May fail with -EAGAIN if insufficient resources
- * are available to queue any iocbs. Will return 0 if nr is 0. Will
- * fail with -ENOSYS if not implemented.
- */
-SYSCALL_DEFINE3(io_submit, aio_context_t, ctx_id, long, nr,
- struct iocb __user * __user *, iocbpp)
-{
- return do_io_submit(ctx_id, nr, iocbpp, 0);
-}
-
#ifdef CONFIG_COMPAT
-static inline long
-copy_iocb(long nr, u32 __user *ptr32, struct iocb __user * __user *ptr64)
+COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
+ int, nr, compat_uptr_t __user *, iocbpp)
{
- compat_uptr_t uptr;
- int i;
+ struct kioctx *ctx;
+ long ret = 0;
+ int i = 0;
+ struct blk_plug plug;
- for (i = 0; i < nr; ++i) {
- if (get_user(uptr, ptr32 + i))
- return -EFAULT;
- if (put_user(compat_ptr(uptr), ptr64 + i))
- return -EFAULT;
+ if (unlikely(nr < 0))
+ return -EINVAL;
+
+ ctx = lookup_ioctx(ctx_id);
+ if (unlikely(!ctx)) {
+ pr_debug("EINVAL: invalid context id\n");
+ return -EINVAL;
}
- return 0;
-}
-#define MAX_AIO_SUBMITS (PAGE_SIZE/sizeof(struct iocb *))
+ if (nr > ctx->nr_events)
+ nr = ctx->nr_events;
-COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
- int, nr, u32 __user *, iocb)
-{
- struct iocb __user * __user *iocb64;
- long ret;
+ blk_start_plug(&plug);
+ for (i = 0; i < nr; i++) {
+ compat_uptr_t user_iocb;
- if (unlikely(nr < 0))
- return -EINVAL;
+ if (unlikely(get_user(user_iocb, iocbpp + i))) {
+ ret = -EFAULT;
+ break;
+ }
- if (nr > MAX_AIO_SUBMITS)
- nr = MAX_AIO_SUBMITS;
+ ret = io_submit_one(ctx, compat_ptr(user_iocb), true);
+ if (ret)
+ break;
+ }
+ blk_finish_plug(&plug);
- iocb64 = compat_alloc_user_space(nr * sizeof(*iocb64));
- ret = copy_iocb(nr, iocb, iocb64);
- if (!ret)
- ret = do_io_submit(ctx_id, nr, iocb64, 1);
- return ret;
+ percpu_ref_put(&ctx->users);
+ return i ? i : ret;
}
#endif
@@ -1755,15 +1914,12 @@ COMPAT_SYSCALL_DEFINE3(io_submit, compat_aio_context_t, ctx_id,
* Finds a given iocb for cancellation.
*/
static struct aio_kiocb *
-lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb, u32 key)
+lookup_kiocb(struct kioctx *ctx, struct iocb __user *iocb)
{
struct aio_kiocb *kiocb;
assert_spin_locked(&ctx->ctx_lock);
- if (key != KIOCB_KEY)
- return NULL;
-
/* TODO: use a hash or array, this sucks. */
list_for_each_entry(kiocb, &ctx->active_reqs, ki_list) {
if (kiocb->ki_user_iocb == iocb)
@@ -1787,25 +1943,24 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, struct iocb __user *, iocb,
{
struct kioctx *ctx;
struct aio_kiocb *kiocb;
+ int ret = -EINVAL;
u32 key;
- int ret;
- ret = get_user(key, &iocb->aio_key);
- if (unlikely(ret))
+ if (unlikely(get_user(key, &iocb->aio_key)))
return -EFAULT;
+ if (unlikely(key != KIOCB_KEY))
+ return -EINVAL;
ctx = lookup_ioctx(ctx_id);
if (unlikely(!ctx))
return -EINVAL;
spin_lock_irq(&ctx->ctx_lock);
-
- kiocb = lookup_kiocb(ctx, iocb, key);
- if (kiocb)
- ret = kiocb_cancel(kiocb);
- else
- ret = -EINVAL;
-
+ kiocb = lookup_kiocb(ctx, iocb);
+ if (kiocb) {
+ ret = kiocb->ki_cancel(&kiocb->rw);
+ list_del_init(&kiocb->ki_list);
+ }
spin_unlock_irq(&ctx->ctx_lock);
if (!ret) {
@@ -1860,13 +2015,60 @@ SYSCALL_DEFINE5(io_getevents, aio_context_t, ctx_id,
struct timespec __user *, timeout)
{
struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+SYSCALL_DEFINE6(io_pgetevents,
+ aio_context_t, ctx_id,
+ long, min_nr,
+ long, nr,
+ struct io_event __user *, events,
+ struct timespec __user *, timeout,
+ const struct __aio_sigset __user *, usig)
+{
+ struct __aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 ts;
+ int ret;
+
+ if (timeout && unlikely(get_timespec64(&ts, timeout)))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (unlikely(get_timespec64(&ts, timeout)))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(sigset_t))
+ return -EINVAL;
+ if (copy_from_user(&ksigmask, ksig.sigmask, sizeof(ksigmask)))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &ts : NULL);
+ return ret;
}
#ifdef CONFIG_COMPAT
@@ -1877,13 +2079,64 @@ COMPAT_SYSCALL_DEFINE5(io_getevents, compat_aio_context_t, ctx_id,
struct compat_timespec __user *, timeout)
{
struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (!ret && signal_pending(current))
+ ret = -EINTR;
+ return ret;
+}
+
+
+struct __compat_aio_sigset {
+ compat_sigset_t __user *sigmask;
+ compat_size_t sigsetsize;
+};
+
+COMPAT_SYSCALL_DEFINE6(io_pgetevents,
+ compat_aio_context_t, ctx_id,
+ compat_long_t, min_nr,
+ compat_long_t, nr,
+ struct io_event __user *, events,
+ struct compat_timespec __user *, timeout,
+ const struct __compat_aio_sigset __user *, usig)
+{
+ struct __compat_aio_sigset ksig = { NULL, };
+ sigset_t ksigmask, sigsaved;
+ struct timespec64 t;
+ int ret;
+
+ if (timeout && compat_get_timespec64(&t, timeout))
+ return -EFAULT;
+
+ if (usig && copy_from_user(&ksig, usig, sizeof(ksig)))
+ return -EFAULT;
- if (timeout) {
- if (compat_get_timespec64(&t, timeout))
+ if (ksig.sigmask) {
+ if (ksig.sigsetsize != sizeof(compat_sigset_t))
+ return -EINVAL;
+ if (get_compat_sigset(&ksigmask, ksig.sigmask))
return -EFAULT;
+ sigdelsetmask(&ksigmask, sigmask(SIGKILL) | sigmask(SIGSTOP));
+ sigprocmask(SIG_SETMASK, &ksigmask, &sigsaved);
+ }
+ ret = do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ if (signal_pending(current)) {
+ if (ksig.sigmask) {
+ current->saved_sigmask = sigsaved;
+ set_restore_sigmask();
+ }
+ if (!ret)
+ ret = -ERESTARTNOHAND;
+ } else {
+ if (ksig.sigmask)
+ sigprocmask(SIG_SETMASK, &sigsaved, NULL);
}
- return do_io_getevents(ctx_id, min_nr, nr, events, timeout ? &t : NULL);
+ return ret;
}
#endif
diff --git a/fs/eventfd.c b/fs/eventfd.c
index 08d3bd602f73..61c9514da5e9 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -101,14 +101,20 @@ static int eventfd_release(struct inode *inode, struct file *file)
return 0;
}
-static __poll_t eventfd_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *
+eventfd_get_poll_head(struct file *file, __poll_t events)
+{
+ struct eventfd_ctx *ctx = file->private_data;
+
+ return &ctx->wqh;
+}
+
+static __poll_t eventfd_poll_mask(struct file *file, __poll_t eventmask)
{
struct eventfd_ctx *ctx = file->private_data;
__poll_t events = 0;
u64 count;
- poll_wait(file, &ctx->wqh, wait);
-
/*
* All writes to ctx->count occur within ctx->wqh.lock. This read
* can be done outside ctx->wqh.lock because we know that poll_wait
@@ -305,7 +311,8 @@ static const struct file_operations eventfd_fops = {
.show_fdinfo = eventfd_show_fdinfo,
#endif
.release = eventfd_release,
- .poll = eventfd_poll,
+ .get_poll_head = eventfd_get_poll_head,
+ .poll_mask = eventfd_poll_mask,
.read = eventfd_read,
.write = eventfd_write,
.llseek = noop_llseek,
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 602ca4285b2e..67db22fe99c5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -884,8 +884,7 @@ static __poll_t ep_item_poll(const struct epitem *epi, poll_table *pt,
pt->_key = epi->event.events;
if (!is_file_epoll(epi->ffd.file))
- return epi->ffd.file->f_op->poll(epi->ffd.file, pt) &
- epi->event.events;
+ return vfs_poll(epi->ffd.file, pt) & epi->event.events;
ep = epi->ffd.file->private_data;
poll_wait(epi->ffd.file, &ep->poll_wait, pt);
@@ -2025,7 +2024,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
/* The target file descriptor must support poll */
error = -EPERM;
- if (!tf.file->f_op->poll)
+ if (!file_can_poll(tf.file))
goto error_tgt_fput;
/* Check if EPOLLWAKEUP is allowed */
diff --git a/fs/pipe.c b/fs/pipe.c
index 39d6f431da83..bb0840e234f3 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -509,19 +509,22 @@ static long pipe_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
}
}
-/* No kernel lock held - fine */
-static __poll_t
-pipe_poll(struct file *filp, poll_table *wait)
+static struct wait_queue_head *
+pipe_get_poll_head(struct file *filp, __poll_t events)
{
- __poll_t mask;
struct pipe_inode_info *pipe = filp->private_data;
- int nrbufs;
- poll_wait(filp, &pipe->wait, wait);
+ return &pipe->wait;
+}
+
+/* No kernel lock held - fine */
+static __poll_t pipe_poll_mask(struct file *filp, __poll_t events)
+{
+ struct pipe_inode_info *pipe = filp->private_data;
+ int nrbufs = pipe->nrbufs;
+ __poll_t mask = 0;
/* Reading only -- no need for acquiring the semaphore. */
- nrbufs = pipe->nrbufs;
- mask = 0;
if (filp->f_mode & FMODE_READ) {
mask = (nrbufs > 0) ? EPOLLIN | EPOLLRDNORM : 0;
if (!pipe->writers && filp->f_version != pipe->w_counter)
@@ -1020,7 +1023,8 @@ const struct file_operations pipefifo_fops = {
.llseek = no_llseek,
.read_iter = pipe_read,
.write_iter = pipe_write,
- .poll = pipe_poll,
+ .get_poll_head = pipe_get_poll_head,
+ .poll_mask = pipe_poll_mask,
.unlocked_ioctl = pipe_ioctl,
.release = pipe_release,
.fasync = pipe_fasync,
diff --git a/fs/select.c b/fs/select.c
index ba879c51288f..bc3cc0f98896 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -34,6 +34,29 @@
#include <linux/uaccess.h>
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt)
+{
+ if (file->f_op->poll) {
+ return file->f_op->poll(file, pt);
+ } else if (file_has_poll_mask(file)) {
+ unsigned int events = poll_requested_events(pt);
+ struct wait_queue_head *head;
+
+ if (pt && pt->_qproc) {
+ head = file->f_op->get_poll_head(file, events);
+ if (!head)
+ return DEFAULT_POLLMASK;
+ if (IS_ERR(head))
+ return EPOLLERR;
+ pt->_qproc(file, head, pt);
+ }
+
+ return file->f_op->poll_mask(file, events);
+ } else {
+ return DEFAULT_POLLMASK;
+ }
+}
+EXPORT_SYMBOL_GPL(vfs_poll);
/*
* Estimate expected accuracy in ns from a timeval.
@@ -233,7 +256,7 @@ static void __pollwait(struct file *filp, wait_queue_head_t *wait_address,
add_wait_queue(wait_address, &entry->wait);
}
-int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
+static int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
ktime_t *expires, unsigned long slack)
{
int rc = -EINTR;
@@ -258,7 +281,6 @@ int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
return rc;
}
-EXPORT_SYMBOL(poll_schedule_timeout);
/**
* poll_select_set_timeout - helper function to setup the timeout value
@@ -503,14 +525,10 @@ static int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time)
continue;
f = fdget(i);
if (f.file) {
- const struct file_operations *f_op;
- f_op = f.file->f_op;
- mask = DEFAULT_POLLMASK;
- if (f_op->poll) {
- wait_key_set(wait, in, out,
- bit, busy_flag);
- mask = (*f_op->poll)(f.file, wait);
- }
+ wait_key_set(wait, in, out, bit,
+ busy_flag);
+ mask = vfs_poll(f.file, wait);
+
fdput(f);
if ((mask & POLLIN_SET) && (in & bit)) {
res_in |= bit;
@@ -813,34 +831,29 @@ static inline __poll_t do_pollfd(struct pollfd *pollfd, poll_table *pwait,
bool *can_busy_poll,
__poll_t busy_flag)
{
- __poll_t mask;
- int fd;
-
- mask = 0;
- fd = pollfd->fd;
- if (fd >= 0) {
- struct fd f = fdget(fd);
- mask = EPOLLNVAL;
- if (f.file) {
- /* userland u16 ->events contains POLL... bitmap */
- __poll_t filter = demangle_poll(pollfd->events) |
- EPOLLERR | EPOLLHUP;
- mask = DEFAULT_POLLMASK;
- if (f.file->f_op->poll) {
- pwait->_key = filter;
- pwait->_key |= busy_flag;
- mask = f.file->f_op->poll(f.file, pwait);
- if (mask & busy_flag)
- *can_busy_poll = true;
- }
- /* Mask out unneeded events. */
- mask &= filter;
- fdput(f);
- }
- }
+ int fd = pollfd->fd;
+ __poll_t mask = 0, filter;
+ struct fd f;
+
+ if (fd < 0)
+ goto out;
+ mask = EPOLLNVAL;
+ f = fdget(fd);
+ if (!f.file)
+ goto out;
+
+ /* userland u16 ->events contains POLL... bitmap */
+ filter = demangle_poll(pollfd->events) | EPOLLERR | EPOLLHUP;
+ pwait->_key = filter | busy_flag;
+ mask = vfs_poll(f.file, pwait);
+ if (mask & busy_flag)
+ *can_busy_poll = true;
+ mask &= filter; /* Mask out unneeded events. */
+ fdput(f);
+
+out:
/* ... and so does ->revents */
pollfd->revents = mangle_poll(mask);
-
return mask;
}
diff --git a/fs/timerfd.c b/fs/timerfd.c
index cdad49da3ff7..d84a2bee4f82 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -226,21 +226,20 @@ static int timerfd_release(struct inode *inode, struct file *file)
kfree_rcu(ctx, rcu);
return 0;
}
-
-static __poll_t timerfd_poll(struct file *file, poll_table *wait)
+
+static struct wait_queue_head *timerfd_get_poll_head(struct file *file,
+ __poll_t eventmask)
{
struct timerfd_ctx *ctx = file->private_data;
- __poll_t events = 0;
- unsigned long flags;
- poll_wait(file, &ctx->wqh, wait);
+ return &ctx->wqh;
+}
- spin_lock_irqsave(&ctx->wqh.lock, flags);
- if (ctx->ticks)
- events |= EPOLLIN;
- spin_unlock_irqrestore(&ctx->wqh.lock, flags);
+static __poll_t timerfd_poll_mask(struct file *file, __poll_t eventmask)
+{
+ struct timerfd_ctx *ctx = file->private_data;
- return events;
+ return ctx->ticks ? EPOLLIN : 0;
}
static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
@@ -364,7 +363,8 @@ static long timerfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg
static const struct file_operations timerfd_fops = {
.release = timerfd_release,
- .poll = timerfd_poll,
+ .get_poll_head = timerfd_get_poll_head,
+ .poll_mask = timerfd_poll_mask,
.read = timerfd_read,
.llseek = noop_llseek,
.show_fdinfo = timerfd_show,
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index 482461d8931d..cc414db9da0a 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -245,8 +245,7 @@ ssize_t af_alg_sendpage(struct socket *sock, struct page *page,
int offset, size_t size, int flags);
void af_alg_free_resources(struct af_alg_async_req *areq);
void af_alg_async_cb(struct crypto_async_request *_req, int err);
-__poll_t af_alg_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t af_alg_poll_mask(struct socket *sock, __poll_t events);
struct af_alg_async_req *af_alg_alloc_areq(struct sock *sk,
unsigned int areqlen);
int af_alg_get_rsgl(struct sock *sk, struct msghdr *msg, int flags,
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 9d8aabecfe2d..b83e68dd006f 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -8,8 +8,6 @@ struct kioctx;
struct kiocb;
struct mm_struct;
-#define KIOCB_KEY 0
-
typedef int (kiocb_cancel_fn)(struct kiocb *);
/* prototypes */
diff --git a/include/linux/compat.h b/include/linux/compat.h
index 081281ad5772..ad192057b887 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -330,6 +330,7 @@ extern int put_compat_rusage(const struct rusage *,
struct compat_rusage __user *);
struct compat_siginfo;
+struct __compat_aio_sigset;
struct compat_dirent {
u32 d_ino;
@@ -553,6 +554,12 @@ asmlinkage long compat_sys_io_getevents(compat_aio_context_t ctx_id,
compat_long_t nr,
struct io_event __user *events,
struct compat_timespec __user *timeout);
+asmlinkage long compat_sys_io_pgetevents(compat_aio_context_t ctx_id,
+ compat_long_t min_nr,
+ compat_long_t nr,
+ struct io_event __user *events,
+ struct compat_timespec __user *timeout,
+ const struct __compat_aio_sigset __user *usig);
/* fs/cookies.c */
asmlinkage long compat_sys_lookup_dcookie(u32, u32, char __user *, compat_size_t);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 7f86730b67a9..d4c37d371da5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1711,6 +1711,8 @@ struct file_operations {
int (*iterate) (struct file *, struct dir_context *);
int (*iterate_shared) (struct file *, struct dir_context *);
__poll_t (*poll) (struct file *, struct poll_table_struct *);
+ struct wait_queue_head * (*get_poll_head)(struct file *, __poll_t);
+ __poll_t (*poll_mask) (struct file *, __poll_t);
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
diff --git a/include/linux/net.h b/include/linux/net.h
index 2248a052061d..3fd9d8c16581 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -147,6 +147,7 @@ struct proto_ops {
int (*getname) (struct socket *sock,
struct sockaddr *addr,
int peer);
+ __poll_t (*poll_mask) (struct socket *sock, __poll_t events);
__poll_t (*poll) (struct file *file, struct socket *sock,
struct poll_table_struct *wait);
int (*ioctl) (struct socket *sock, unsigned int cmd,
diff --git a/include/linux/poll.h b/include/linux/poll.h
index f45ebd017eaa..fdf86b4cbc71 100644
--- a/include/linux/poll.h
+++ b/include/linux/poll.h
@@ -74,6 +74,18 @@ static inline void init_poll_funcptr(poll_table *pt, poll_queue_proc qproc)
pt->_key = ~(__poll_t)0; /* all events enabled */
}
+static inline bool file_has_poll_mask(struct file *file)
+{
+ return file->f_op->get_poll_head && file->f_op->poll_mask;
+}
+
+static inline bool file_can_poll(struct file *file)
+{
+ return file->f_op->poll || file_has_poll_mask(file);
+}
+
+__poll_t vfs_poll(struct file *file, struct poll_table_struct *pt);
+
struct poll_table_entry {
struct file *filp;
__poll_t key;
@@ -96,8 +108,6 @@ struct poll_wqueues {
extern void poll_initwait(struct poll_wqueues *pwq);
extern void poll_freewait(struct poll_wqueues *pwq);
-extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state,
- ktime_t *expires, unsigned long slack);
extern u64 select_estimate_accuracy(struct timespec64 *tv);
#define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9065477ed255..89198379b39d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3250,8 +3250,7 @@ struct sk_buff *__skb_recv_datagram(struct sock *sk, unsigned flags,
int *peeked, int *off, int *err);
struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
int *err);
-__poll_t datagram_poll(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events);
int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
struct iov_iter *to, int size);
static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 70fcda1a9049..811172fcb916 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -290,6 +290,12 @@ asmlinkage long sys_io_getevents(aio_context_t ctx_id,
long nr,
struct io_event __user *events,
struct timespec __user *timeout);
+asmlinkage long sys_io_pgetevents(aio_context_t ctx_id,
+ long min_nr,
+ long nr,
+ struct io_event __user *events,
+ struct timespec __user *timeout,
+ const struct __aio_sigset *sig);
/* fs/xattr.c */
asmlinkage long sys_setxattr(const char __user *path, const char __user *name,
diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index ec9d6bc65855..53ce8176c313 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -271,7 +271,7 @@ int bt_sock_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags);
int bt_sock_stream_recvmsg(struct socket *sock, struct msghdr *msg,
size_t len, int flags);
-__poll_t bt_sock_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo);
int bt_sock_wait_ready(struct sock *sk, unsigned long flags);
diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h
index 71c72a939bf8..c5187438af38 100644
--- a/include/net/busy_poll.h
+++ b/include/net/busy_poll.h
@@ -121,6 +121,21 @@ static inline void sk_busy_loop(struct sock *sk, int nonblock)
#endif
}
+static inline void sock_poll_busy_loop(struct socket *sock, __poll_t events)
+{
+ if (sk_can_busy_loop(sock->sk) &&
+ events && (events & POLL_BUSY_LOOP)) {
+ /* once, only if requested by syscall */
+ sk_busy_loop(sock->sk, 1);
+ }
+}
+
+/* if this socket can poll_ll, tell the system call */
+static inline __poll_t sock_poll_busy_flag(struct socket *sock)
+{
+ return sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0;
+}
+
/* used in the NIC receive handler to mark the skb */
static inline void skb_mark_napi_id(struct sk_buff *skb,
struct napi_struct *napi)
diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index f4c21b5a1242..b0eaeb02d46d 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -153,8 +153,6 @@ struct iucv_sock_list {
atomic_t autobind_name;
};
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait);
void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 35498e613ff5..e6d349b2a791 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -109,8 +109,7 @@ int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb);
int sctp_inet_listen(struct socket *sock, int backlog);
void sctp_write_space(struct sock *sk);
void sctp_data_ready(struct sock *sk);
-__poll_t sctp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events);
void sctp_sock_rfree(struct sk_buff *skb);
void sctp_copy_sock(struct sock *newsk, struct sock *sk,
struct sctp_association *asoc);
diff --git a/include/net/sock.h b/include/net/sock.h
index 74d725fdbe0f..4d2e8ad98985 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1591,8 +1591,6 @@ int sock_no_connect(struct socket *, struct sockaddr *, int, int);
int sock_no_socketpair(struct socket *, struct socket *);
int sock_no_accept(struct socket *, struct socket *, int, bool);
int sock_no_getname(struct socket *, struct sockaddr *, int);
-__poll_t sock_no_poll(struct file *, struct socket *,
- struct poll_table_struct *);
int sock_no_ioctl(struct socket *, unsigned int, unsigned long);
int sock_no_listen(struct socket *, int);
int sock_no_shutdown(struct socket *, int);
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 51dc7a26a2fa..f88f8a2cab0d 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -388,8 +388,7 @@ bool tcp_peer_is_proven(struct request_sock *req, struct dst_entry *dst);
void tcp_close(struct sock *sk, long timeout);
void tcp_init_sock(struct sock *sk);
void tcp_init_transfer(struct sock *sk, int bpf_op);
-__poll_t tcp_poll(struct file *file, struct socket *sock,
- struct poll_table_struct *wait);
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events);
int tcp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen);
int tcp_setsockopt(struct sock *sk, int level, int optname,
diff --git a/include/net/udp.h b/include/net/udp.h
index 621778b80e3d..d8ca3b26964d 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -276,7 +276,7 @@ int udp_init_sock(struct sock *sk);
int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
int __udp_disconnect(struct sock *sk, int flags);
int udp_disconnect(struct sock *sk, int flags);
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events);
struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
netdev_features_t features,
bool is_ipv6);
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 8bcb186c6f67..42990676a55e 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -732,9 +732,11 @@ __SYSCALL(__NR_pkey_alloc, sys_pkey_alloc)
__SYSCALL(__NR_pkey_free, sys_pkey_free)
#define __NR_statx 291
__SYSCALL(__NR_statx, sys_statx)
+#define __NR_io_pgetevents 292
+__SC_COMP(__NR_io_pgetevents, sys_io_pgetevents, compat_sys_io_pgetevents)
#undef __NR_syscalls
-#define __NR_syscalls 292
+#define __NR_syscalls 293
/*
* 32 bit systems traditionally used different
diff --git a/include/uapi/linux/aio_abi.h b/include/uapi/linux/aio_abi.h
index a04adbc70ddf..ed0185945bb2 100644
--- a/include/uapi/linux/aio_abi.h
+++ b/include/uapi/linux/aio_abi.h
@@ -29,6 +29,7 @@
#include <linux/types.h>
#include <linux/fs.h>
+#include <linux/signal.h>
#include <asm/byteorder.h>
typedef __kernel_ulong_t aio_context_t;
@@ -38,10 +39,8 @@ enum {
IOCB_CMD_PWRITE = 1,
IOCB_CMD_FSYNC = 2,
IOCB_CMD_FDSYNC = 3,
- /* These two are experimental.
- * IOCB_CMD_PREADX = 4,
- * IOCB_CMD_POLL = 5,
- */
+ /* 4 was the experimental IOCB_CMD_PREADX */
+ IOCB_CMD_POLL = 5,
IOCB_CMD_NOOP = 6,
IOCB_CMD_PREADV = 7,
IOCB_CMD_PWRITEV = 8,
@@ -108,5 +107,10 @@ struct iocb {
#undef IFBIG
#undef IFLITTLE
+struct __aio_sigset {
+ sigset_t __user *sigmask;
+ size_t sigsetsize;
+};
+
#endif /* __LINUX__AIO_ABI_H */
diff --git a/include/uapi/linux/types.h b/include/uapi/linux/types.h
index cd4f0b897a48..2fce8b6876e9 100644
--- a/include/uapi/linux/types.h
+++ b/include/uapi/linux/types.h
@@ -49,11 +49,7 @@ typedef __u32 __bitwise __wsum;
#define __aligned_be64 __be64 __attribute__((aligned(8)))
#define __aligned_le64 __le64 __attribute__((aligned(8)))
-#ifdef __CHECK_POLL
typedef unsigned __bitwise __poll_t;
-#else
-typedef unsigned __poll_t;
-#endif
#endif /* __ASSEMBLY__ */
#endif /* _UAPI_LINUX_TYPES_H */
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 9791364925dc..183169c2a75b 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -43,7 +43,9 @@ COND_SYSCALL(io_submit);
COND_SYSCALL_COMPAT(io_submit);
COND_SYSCALL(io_cancel);
COND_SYSCALL(io_getevents);
+COND_SYSCALL(io_pgetevents);
COND_SYSCALL_COMPAT(io_getevents);
+COND_SYSCALL_COMPAT(io_pgetevents);
/* fs/xattr.c */
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 2bd3df3d101a..1695f38630f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3849,7 +3849,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
if (ret)
goto out_put_css;
- efile.file->f_op->poll(efile.file, &event->pt);
+ vfs_poll(efile.file, &event->pt);
spin_lock(&memcg->event_list_lock);
list_add(&event->list, &memcg->event_list);
diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c
index 848969fe7979..588bf88c3305 100644
--- a/net/9p/trans_fd.c
+++ b/net/9p/trans_fd.c
@@ -231,7 +231,7 @@ static void p9_conn_cancel(struct p9_conn *m, int err)
static __poll_t
p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
{
- __poll_t ret, n;
+ __poll_t ret;
struct p9_trans_fd *ts = NULL;
if (client && client->status == Connected)
@@ -243,19 +243,9 @@ p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err)
return EPOLLERR;
}
- if (!ts->rd->f_op->poll)
- ret = DEFAULT_POLLMASK;
- else
- ret = ts->rd->f_op->poll(ts->rd, pt);
-
- if (ts->rd != ts->wr) {
- if (!ts->wr->f_op->poll)
- n = DEFAULT_POLLMASK;
- else
- n = ts->wr->f_op->poll(ts->wr, pt);
- ret = (ret & ~EPOLLOUT) | (n & ~EPOLLIN);
- }
-
+ ret = vfs_poll(ts->rd, pt);
+ if (ts->rd != ts->wr)
+ ret = (ret & ~EPOLLOUT) | (vfs_poll(ts->wr, pt) & ~EPOLLIN);
return ret;
}
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 9b6bc5abe946..55fdba05d7d9 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1869,7 +1869,7 @@ static const struct proto_ops atalk_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = atalk_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = atalk_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = atalk_compat_ioctl,
diff --git a/net/atm/common.c b/net/atm/common.c
index fc78a0508ae1..1f2af59935db 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -648,16 +648,11 @@ out:
return error;
}
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- struct atm_vcc *vcc;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
-
- vcc = ATM_SD(sock);
+ struct atm_vcc *vcc = ATM_SD(sock);
+ __poll_t mask = 0;
/* exceptional events */
if (sk->sk_err)
diff --git a/net/atm/common.h b/net/atm/common.h
index 5850649068bb..526796ad230f 100644
--- a/net/atm/common.h
+++ b/net/atm/common.h
@@ -17,7 +17,7 @@ int vcc_connect(struct socket *sock, int itf, short vpi, int vci);
int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
int flags);
int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len);
-__poll_t vcc_poll(struct file *file, struct socket *sock, poll_table *wait);
+__poll_t vcc_poll_mask(struct socket *sock, __poll_t events);
int vcc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
int vcc_setsockopt(struct socket *sock, int level, int optname,
diff --git a/net/atm/pvc.c b/net/atm/pvc.c
index 2cb10af16afc..9f75092fe778 100644
--- a/net/atm/pvc.c
+++ b/net/atm/pvc.c
@@ -113,7 +113,7 @@ static const struct proto_ops pvc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pvc_getname,
- .poll = vcc_poll,
+ .poll_mask = vcc_poll_mask,
.ioctl = vcc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = vcc_compat_ioctl,
diff --git a/net/atm/svc.c b/net/atm/svc.c
index 2f91b766ac42..53f4ad7087b1 100644
--- a/net/atm/svc.c
+++ b/net/atm/svc.c
@@ -636,7 +636,7 @@ static const struct proto_ops svc_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = svc_accept,
.getname = svc_getname,
- .poll = vcc_poll,
+ .poll_mask = vcc_poll_mask,
.ioctl = svc_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = svc_compat_ioctl,
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c603d33d5410..d1d2442ce573 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1941,7 +1941,7 @@ static const struct proto_ops ax25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = ax25_accept,
.getname = ax25_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ax25_ioctl,
.listen = ax25_listen,
.shutdown = ax25_shutdown,
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 3264e1873219..510ab4f55df5 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -437,16 +437,13 @@ static inline __poll_t bt_accept_poll(struct sock *parent)
return 0;
}
-__poll_t bt_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t bt_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
BT_DBG("sock %p, sk %p", sock, sk);
- poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == BT_LISTEN)
return bt_accept_poll(sk);
@@ -478,7 +475,7 @@ __poll_t bt_sock_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL(bt_sock_poll);
+EXPORT_SYMBOL(bt_sock_poll_mask);
int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
diff --git a/net/bluetooth/bnep/sock.c b/net/bluetooth/bnep/sock.c
index b5116fa9835e..00deacdcb51c 100644
--- a/net/bluetooth/bnep/sock.c
+++ b/net/bluetooth/bnep/sock.c
@@ -175,7 +175,6 @@ static const struct proto_ops bnep_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/cmtp/sock.c b/net/bluetooth/cmtp/sock.c
index ce86a7bae844..e08f28fadd65 100644
--- a/net/bluetooth/cmtp/sock.c
+++ b/net/bluetooth/cmtp/sock.c
@@ -178,7 +178,6 @@ static const struct proto_ops cmtp_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 1506e1632394..d6c099861538 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1975,7 +1975,7 @@ static const struct proto_ops hci_sock_ops = {
.sendmsg = hci_sock_sendmsg,
.recvmsg = hci_sock_recvmsg,
.ioctl = hci_sock_ioctl,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = hci_sock_setsockopt,
diff --git a/net/bluetooth/hidp/sock.c b/net/bluetooth/hidp/sock.c
index 008ba439bd62..1eaac01f85de 100644
--- a/net/bluetooth/hidp/sock.c
+++ b/net/bluetooth/hidp/sock.c
@@ -208,7 +208,6 @@ static const struct proto_ops hidp_sock_ops = {
.getname = sock_no_getname,
.sendmsg = sock_no_sendmsg,
.recvmsg = sock_no_recvmsg,
- .poll = sock_no_poll,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 686bdc6b35b0..742a190034e6 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1653,7 +1653,7 @@ static const struct proto_ops l2cap_sock_ops = {
.getname = l2cap_sock_getname,
.sendmsg = l2cap_sock_sendmsg,
.recvmsg = l2cap_sock_recvmsg,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index d606e9212291..1cf57622473a 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -1049,7 +1049,7 @@ static const struct proto_ops rfcomm_sock_ops = {
.setsockopt = rfcomm_sock_setsockopt,
.getsockopt = rfcomm_sock_getsockopt,
.ioctl = rfcomm_sock_ioctl,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.socketpair = sock_no_socketpair,
.mmap = sock_no_mmap
};
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 413b8ee49fec..d60dbc61d170 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -1197,7 +1197,7 @@ static const struct proto_ops sco_sock_ops = {
.getname = sco_sock_getname,
.sendmsg = sco_sock_sendmsg,
.recvmsg = sco_sock_recvmsg,
- .poll = bt_sock_poll,
+ .poll_mask = bt_sock_poll_mask,
.ioctl = bt_sock_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index a6fb1b3bcad9..c7991867d622 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -934,15 +934,11 @@ static int caif_release(struct socket *sock)
}
/* Copied from af_unix.c:unix_poll(), added CAIF tx_flow handling */
-static __poll_t caif_poll(struct file *file,
- struct socket *sock, poll_table *wait)
+static __poll_t caif_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
struct caifsock *cf_sk = container_of(sk, struct caifsock, sk);
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -976,7 +972,7 @@ static const struct proto_ops caif_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = caif_poll,
+ .poll_mask = caif_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -997,7 +993,7 @@ static const struct proto_ops caif_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = caif_poll,
+ .poll_mask = caif_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/bcm.c b/net/can/bcm.c
index 6ad89f49b341..97fedff3f0c4 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1657,7 +1657,7 @@ static const struct proto_ops bcm_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/can/raw.c b/net/can/raw.c
index 1051eee82581..fd7e2f49ea6a 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -843,7 +843,7 @@ static const struct proto_ops raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = raw_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = can_ioctl, /* use can_ioctl() from af_can.c */
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9938952c5c78..f19bf3dc2bd6 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -819,9 +819,8 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
/**
* datagram_poll - generic datagram poll
- * @file: file struct
* @sock: socket
- * @wait: poll table
+ * @events to wait for
*
* Datagram poll: Again totally generic. This also handles
* sequenced packet sockets providing the socket receive queue
@@ -831,14 +830,10 @@ EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
* and you use a different write policy from sock_writeable()
* then please supply your own write_space callback.
*/
-__poll_t datagram_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t datagram_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -871,4 +866,4 @@ __poll_t datagram_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL(datagram_poll);
+EXPORT_SYMBOL(datagram_poll_mask);
diff --git a/net/core/sock.c b/net/core/sock.c
index 815770333d91..2aed99a541d5 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2567,12 +2567,6 @@ int sock_no_getname(struct socket *sock, struct sockaddr *saddr,
}
EXPORT_SYMBOL(sock_no_getname);
-__poll_t sock_no_poll(struct file *file, struct socket *sock, poll_table *pt)
-{
- return 0;
-}
-EXPORT_SYMBOL(sock_no_poll);
-
int sock_no_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
{
return -EOPNOTSUPP;
diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index f91e3816806b..0ea2ee56ac1b 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -316,8 +316,7 @@ int dccp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock,
int flags, int *addr_len);
void dccp_shutdown(struct sock *sk, int how);
int inet_dccp_listen(struct socket *sock, int backlog);
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait);
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events);
int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len);
void dccp_req_err(struct sock *sk, u64 seq);
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index b08feb219b44..a9e478cd3787 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -984,7 +984,7 @@ static const struct proto_ops inet_dccp_ops = {
.accept = inet_accept,
.getname = inet_getname,
/* FIXME: work on tcp_poll to rename it to inet_csk_poll */
- .poll = dccp_poll,
+ .poll_mask = dccp_poll_mask,
.ioctl = inet_ioctl,
/* FIXME: work on inet_listen to rename it to sock_common_listen */
.listen = inet_dccp_listen,
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 6344f1b18a6a..17fc4e0166ba 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1070,7 +1070,7 @@ static const struct proto_ops inet6_dccp_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet6_getname,
- .poll = dccp_poll,
+ .poll_mask = dccp_poll_mask,
.ioctl = inet6_ioctl,
.listen = inet_dccp_listen,
.shutdown = inet_shutdown,
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 0d56e36a6db7..ca21c1c76da0 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -312,20 +312,11 @@ int dccp_disconnect(struct sock *sk, int flags)
EXPORT_SYMBOL_GPL(dccp_disconnect);
-/*
- * Wait for a DCCP event.
- *
- * Note that we don't need to lock the socket, as the upper poll layers
- * take care of normal races (between the test and the event) and we don't
- * go look at any of the socket buffers directly.
- */
-__poll_t dccp_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+__poll_t dccp_poll_mask(struct socket *sock, __poll_t events)
{
__poll_t mask;
struct sock *sk = sock->sk;
- sock_poll_wait(file, sk_sleep(sk), wait);
if (sk->sk_state == DCCP_LISTEN)
return inet_csk_listen_poll(sk);
@@ -367,7 +358,7 @@ __poll_t dccp_poll(struct file *file, struct socket *sock,
return mask;
}
-EXPORT_SYMBOL_GPL(dccp_poll);
+EXPORT_SYMBOL_GPL(dccp_poll_mask);
int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 7d6ff983ba2c..9a686d890bfa 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1207,11 +1207,11 @@ static int dn_getname(struct socket *sock, struct sockaddr *uaddr,int peer)
}
-static __poll_t dn_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t dn_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct dn_scp *scp = DN_SK(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
if (!skb_queue_empty(&scp->other_receive_queue))
mask |= EPOLLRDBAND;
@@ -2331,7 +2331,7 @@ static const struct proto_ops dn_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = dn_accept,
.getname = dn_getname,
- .poll = dn_poll,
+ .poll_mask = dn_poll_mask,
.ioctl = dn_ioctl,
.listen = dn_listen,
.shutdown = dn_shutdown,
diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c
index a60658c85a9a..a0768d2759b8 100644
--- a/net/ieee802154/socket.c
+++ b/net/ieee802154/socket.c
@@ -423,7 +423,7 @@ static const struct proto_ops ieee802154_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -969,7 +969,7 @@ static const struct proto_ops ieee802154_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = ieee802154_sock_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index eaed0367e669..8a59428e63ab 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -986,7 +986,7 @@ const struct proto_ops inet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname,
- .poll = tcp_poll,
+ .poll_mask = tcp_poll_mask,
.ioctl = inet_ioctl,
.listen = inet_listen,
.shutdown = inet_shutdown,
@@ -1018,7 +1018,7 @@ const struct proto_ops inet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll = udp_poll,
+ .poll_mask = udp_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
@@ -1039,7 +1039,7 @@ EXPORT_SYMBOL(inet_dgram_ops);
/*
* For SOCK_RAW sockets; should be the same as inet_dgram_ops but without
- * udp_poll
+ * udp_poll_mask
*/
static const struct proto_ops inet_sockraw_ops = {
.family = PF_INET,
@@ -1050,7 +1050,7 @@ static const struct proto_ops inet_sockraw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = inet_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c9d00ef54dec..dec47e6789e7 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -494,32 +494,21 @@ static inline bool tcp_stream_is_readable(const struct tcp_sock *tp,
}
/*
- * Wait for a TCP event.
- *
- * Note that we don't need to lock the socket, as the upper poll layers
- * take care of normal races (between the test and the event) and we don't
- * go look at any of the socket buffers directly.
+ * Socket is not locked. We are protected from async events by poll logic and
+ * correct handling of state changes made by other threads is impossible in
+ * any case.
*/
-__poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t tcp_poll_mask(struct socket *sock, __poll_t events)
{
- __poll_t mask;
struct sock *sk = sock->sk;
const struct tcp_sock *tp = tcp_sk(sk);
+ __poll_t mask = 0;
int state;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
state = inet_sk_state_load(sk);
if (state == TCP_LISTEN)
return inet_csk_listen_poll(sk);
- /* Socket is not locked. We are protected from async events
- * by poll logic and correct handling of state changes
- * made by other threads is impossible in any case.
- */
-
- mask = 0;
-
/*
* EPOLLHUP is certainly not done right. But poll() doesn't
* have a notion of HUP in just one direction, and for a
@@ -600,7 +589,7 @@ __poll_t tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
return mask;
}
-EXPORT_SYMBOL(tcp_poll);
+EXPORT_SYMBOL(tcp_poll_mask);
int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
{
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 051a43ff3fb8..675433eb53a8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2501,7 +2501,7 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* udp_poll - wait for a UDP event.
* @file - file struct
* @sock - socket
- * @wait - poll table
+ * @events - events to wait for
*
* This is same as datagram poll, except for the special case of
* blocking sockets. If application is using a blocking fd
@@ -2510,23 +2510,23 @@ int compat_udp_getsockopt(struct sock *sk, int level, int optname,
* but then block when reading it. Add special case code
* to work around these arguably broken applications.
*/
-__poll_t udp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t udp_poll_mask(struct socket *sock, __poll_t events)
{
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
struct sock *sk = sock->sk;
if (!skb_queue_empty(&udp_sk(sk)->reader_queue))
mask |= EPOLLIN | EPOLLRDNORM;
/* Check for false positives due to checksum errors */
- if ((mask & EPOLLRDNORM) && !(file->f_flags & O_NONBLOCK) &&
+ if ((mask & EPOLLRDNORM) && !(sock->file->f_flags & O_NONBLOCK) &&
!(sk->sk_shutdown & RCV_SHUTDOWN) && first_packet_length(sk) == -1)
mask &= ~(EPOLLIN | EPOLLRDNORM);
return mask;
}
-EXPORT_SYMBOL(udp_poll);
+EXPORT_SYMBOL(udp_poll_mask);
int udp_abort(struct sock *sk, int err)
{
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 8da0b513f188..d443c18b45fe 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -571,7 +571,7 @@ const struct proto_ops inet6_stream_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = inet_accept, /* ok */
.getname = inet6_getname,
- .poll = tcp_poll, /* ok */
+ .poll_mask = tcp_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = inet_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
@@ -601,7 +601,7 @@ const struct proto_ops inet6_dgram_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll = udp_poll, /* ok */
+ .poll_mask = udp_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index afc307c89d1a..ce6f0d15b5dd 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1334,7 +1334,7 @@ void raw6_proc_exit(void)
}
#endif /* CONFIG_PROC_FS */
-/* Same as inet6_dgram_ops, sans udp_poll. */
+/* Same as inet6_dgram_ops, sans udp_poll_mask. */
const struct proto_ops inet6_sockraw_ops = {
.family = PF_INET6,
.owner = THIS_MODULE,
@@ -1344,7 +1344,7 @@ const struct proto_ops inet6_sockraw_ops = {
.socketpair = sock_no_socketpair, /* a do nothing */
.accept = sock_no_accept, /* a do nothing */
.getname = inet6_getname,
- .poll = datagram_poll, /* ok */
+ .poll_mask = datagram_poll_mask, /* ok */
.ioctl = inet6_ioctl, /* must change */
.listen = sock_no_listen, /* ok */
.shutdown = inet_shutdown, /* ok */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 893a022f9620..68e86257a549 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1488,14 +1488,11 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
return 0;
}
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t iucv_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == IUCV_LISTEN)
return iucv_accept_poll(sk);
@@ -2388,7 +2385,7 @@ static const struct proto_ops iucv_sock_ops = {
.getname = iucv_sock_getname,
.sendmsg = iucv_sock_sendmsg,
.recvmsg = iucv_sock_recvmsg,
- .poll = iucv_sock_poll,
+ .poll_mask = iucv_sock_poll_mask,
.ioctl = sock_no_ioctl,
.mmap = sock_no_mmap,
.socketpair = sock_no_socketpair,
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index d3601d421571..84b7d5c6fec8 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1336,9 +1336,9 @@ static void init_kcm_sock(struct kcm_sock *kcm, struct kcm_mux *mux)
struct list_head *head;
int index = 0;
- /* For SOCK_SEQPACKET sock type, datagram_poll checks the sk_state, so
- * we set sk_state, otherwise epoll_wait always returns right away with
- * EPOLLHUP
+ /* For SOCK_SEQPACKET sock type, datagram_poll_mask checks the sk_state,
+ * so we set sk_state, otherwise epoll_wait always returns right away
+ * with EPOLLHUP
*/
kcm->sk.sk_state = TCP_ESTABLISHED;
@@ -1903,7 +1903,7 @@ static const struct proto_ops kcm_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -1924,7 +1924,7 @@ static const struct proto_ops kcm_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = kcm_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 5e1d2946ffbf..8bdc1cbe490a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3751,7 +3751,7 @@ static const struct proto_ops pfkey_ops = {
/* Now the operations that really occur. */
.release = pfkey_release,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.sendmsg = pfkey_sendmsg,
.recvmsg = pfkey_recvmsg,
};
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index a9c05b2bc1b0..181073bf6925 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -613,7 +613,7 @@ static const struct proto_ops l2tp_ip_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 957369192ca1..336e4c00abbc 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -754,7 +754,7 @@ static const struct proto_ops l2tp_ip6_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = l2tp_ip6_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = inet6_ioctl,
.listen = sock_no_listen,
.shutdown = inet_shutdown,
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index 830469766c1f..3d8ca1231f8f 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -1788,7 +1788,7 @@ static const struct proto_ops pppol2tp_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pppol2tp_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
.setsockopt = pppol2tp_setsockopt,
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 1beeea9549fa..804de8490186 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -1192,7 +1192,7 @@ static const struct proto_ops llc_ui_ops = {
.socketpair = sock_no_socketpair,
.accept = llc_ui_accept,
.getname = llc_ui_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = llc_ui_ioctl,
.listen = llc_ui_listen,
.shutdown = llc_ui_shutdown,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 393573a99a5a..1189b84413d5 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2658,7 +2658,7 @@ static const struct proto_ops netlink_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = netlink_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = netlink_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index c2888c78d4c1..b97eb766a1d5 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1355,7 +1355,7 @@ static const struct proto_ops nr_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = nr_accept,
.getname = nr_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = nr_ioctl,
.listen = nr_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index ea0c0c6f1874..ab5bb14b49af 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -548,16 +548,13 @@ static inline __poll_t llcp_accept_poll(struct sock *parent)
return 0;
}
-static __poll_t llcp_sock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t llcp_sock_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
__poll_t mask = 0;
pr_debug("%p\n", sk);
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == LLCP_LISTEN)
return llcp_accept_poll(sk);
@@ -899,7 +896,7 @@ static const struct proto_ops llcp_sock_ops = {
.socketpair = sock_no_socketpair,
.accept = llcp_sock_accept,
.getname = llcp_sock_getname,
- .poll = llcp_sock_poll,
+ .poll_mask = llcp_sock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = llcp_sock_listen,
.shutdown = sock_no_shutdown,
@@ -919,7 +916,7 @@ static const struct proto_ops llcp_rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = llcp_sock_getname,
- .poll = llcp_sock_poll,
+ .poll_mask = llcp_sock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index e2188deb08dc..60c322531c49 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -284,7 +284,7 @@ static const struct proto_ops rawsock_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -304,7 +304,7 @@ static const struct proto_ops rawsock_raw_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index f9cdd27a7f6f..674390b1f084 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -4110,12 +4110,11 @@ static int packet_ioctl(struct socket *sock, unsigned int cmd,
return 0;
}
-static __poll_t packet_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t packet_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
- __poll_t mask = datagram_poll(file, sock, wait);
+ __poll_t mask = datagram_poll_mask(sock, events);
spin_lock_bh(&sk->sk_receive_queue.lock);
if (po->rx_ring.pg_vec) {
@@ -4457,7 +4456,7 @@ static const struct proto_ops packet_ops_spkt = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname_spkt,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -4478,7 +4477,7 @@ static const struct proto_ops packet_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = packet_getname,
- .poll = packet_poll,
+ .poll_mask = packet_poll_mask,
.ioctl = packet_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/phonet/socket.c b/net/phonet/socket.c
index 30187990257f..c295c4e20f01 100644
--- a/net/phonet/socket.c
+++ b/net/phonet/socket.c
@@ -340,15 +340,12 @@ static int pn_socket_getname(struct socket *sock, struct sockaddr *addr,
return sizeof(struct sockaddr_pn);
}
-static __poll_t pn_socket_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t pn_socket_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct pep_sock *pn = pep_sk(sk);
__poll_t mask = 0;
- poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_state == TCP_CLOSE)
return EPOLLERR;
if (!skb_queue_empty(&sk->sk_receive_queue))
@@ -448,7 +445,7 @@ const struct proto_ops phonet_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = pn_socket_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = pn_socket_ioctl,
.listen = sock_no_listen,
.shutdown = sock_no_shutdown,
@@ -473,7 +470,7 @@ const struct proto_ops phonet_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = pn_socket_accept,
.getname = pn_socket_getname,
- .poll = pn_socket_poll,
+ .poll_mask = pn_socket_poll_mask,
.ioctl = pn_socket_ioctl,
.listen = pn_socket_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c
index 2aa07b547b16..1b5025ea5b04 100644
--- a/net/qrtr/qrtr.c
+++ b/net/qrtr/qrtr.c
@@ -1023,7 +1023,7 @@ static const struct proto_ops qrtr_proto_ops = {
.recvmsg = qrtr_recvmsg,
.getname = qrtr_getname,
.ioctl = qrtr_ioctl,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.shutdown = sock_no_shutdown,
.setsockopt = sock_no_setsockopt,
.getsockopt = sock_no_getsockopt,
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 22a7f2b413ac..5b73fea849df 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1470,7 +1470,7 @@ static const struct proto_ops rose_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = rose_accept,
.getname = rose_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = rose_ioctl,
.listen = rose_listen,
.shutdown = sock_no_shutdown,
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 2b463047dd7b..3b1ac93efee2 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -734,15 +734,11 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname,
/*
* permit an RxRPC socket to be polled
*/
-static __poll_t rxrpc_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t rxrpc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct rxrpc_sock *rx = rxrpc_sk(sk);
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* the socket is readable if there are any messages waiting on the Rx
* queue */
@@ -949,7 +945,7 @@ static const struct proto_ops rxrpc_rpc_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = sock_no_getname,
- .poll = rxrpc_poll,
+ .poll_mask = rxrpc_poll_mask,
.ioctl = sock_no_ioctl,
.listen = rxrpc_listen,
.shutdown = rxrpc_shutdown,
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 0cd2e764f47f..7339918a805d 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1010,7 +1010,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = sctp_getname,
- .poll = sctp_poll,
+ .poll_mask = sctp_poll_mask,
.ioctl = inet6_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown,
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6bf0a9971888..11d93377ba5e 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1016,7 +1016,7 @@ static const struct proto_ops inet_seqpacket_ops = {
.socketpair = sock_no_socketpair,
.accept = inet_accept,
.getname = inet_getname, /* Semantics are different. */
- .poll = sctp_poll,
+ .poll_mask = sctp_poll_mask,
.ioctl = inet_ioctl,
.listen = sctp_inet_listen,
.shutdown = inet_shutdown, /* Looks harmless. */
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index ae7e7c606f72..bf747094d26b 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -7722,14 +7722,12 @@ out:
* here, again, by modeling the current TCP/UDP code. We don't have
* a good way to test with it yet.
*/
-__poll_t sctp_poll(struct file *file, struct socket *sock, poll_table *wait)
+__poll_t sctp_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct sctp_sock *sp = sctp_sk(sk);
__poll_t mask;
- poll_wait(file, sk_sleep(sk), wait);
-
sock_rps_record_flow(sk);
/* A TCP-style listening socket becomes readable when the accept queue
diff --git a/net/socket.c b/net/socket.c
index f10f1d947c78..2d752e9eb3f9 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -117,8 +117,10 @@ static ssize_t sock_write_iter(struct kiocb *iocb, struct iov_iter *from);
static int sock_mmap(struct file *file, struct vm_area_struct *vma);
static int sock_close(struct inode *inode, struct file *file);
-static __poll_t sock_poll(struct file *file,
- struct poll_table_struct *wait);
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+ __poll_t events);
+static __poll_t sock_poll_mask(struct file *file, __poll_t);
+static __poll_t sock_poll(struct file *file, struct poll_table_struct *wait);
static long sock_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
#ifdef CONFIG_COMPAT
static long compat_sock_ioctl(struct file *file,
@@ -141,6 +143,8 @@ static const struct file_operations socket_file_ops = {
.llseek = no_llseek,
.read_iter = sock_read_iter,
.write_iter = sock_write_iter,
+ .get_poll_head = sock_get_poll_head,
+ .poll_mask = sock_poll_mask,
.poll = sock_poll,
.unlocked_ioctl = sock_ioctl,
#ifdef CONFIG_COMPAT
@@ -1114,27 +1118,48 @@ out_release:
}
EXPORT_SYMBOL(sock_create_lite);
-/* No kernel lock held - perfect */
-static __poll_t sock_poll(struct file *file, poll_table *wait)
+static struct wait_queue_head *sock_get_poll_head(struct file *file,
+ __poll_t events)
{
- __poll_t busy_flag = 0;
- struct socket *sock;
+ struct socket *sock = file->private_data;
+
+ if (!sock->ops->poll_mask)
+ return NULL;
+ sock_poll_busy_loop(sock, events);
+ return sk_sleep(sock->sk);
+}
+
+static __poll_t sock_poll_mask(struct file *file, __poll_t events)
+{
+ struct socket *sock = file->private_data;
/*
- * We can't return errors to poll, so it's either yes or no.
+ * We need to be sure we are in sync with the socket flags modification.
+ *
+ * This memory barrier is paired in the wq_has_sleeper.
*/
- sock = file->private_data;
+ smp_mb();
+
+ /* this socket can poll_ll so tell the system call */
+ return sock->ops->poll_mask(sock, events) |
+ (sk_can_busy_loop(sock->sk) ? POLL_BUSY_LOOP : 0);
+}
- if (sk_can_busy_loop(sock->sk)) {
- /* this socket can poll_ll so tell the system call */
- busy_flag = POLL_BUSY_LOOP;
+/* No kernel lock held - perfect */
+static __poll_t sock_poll(struct file *file, poll_table *wait)
+{
+ struct socket *sock = file->private_data;
+ __poll_t events = poll_requested_events(wait), mask = 0;
- /* once, only if requested by syscall */
- if (wait && (wait->_key & POLL_BUSY_LOOP))
- sk_busy_loop(sock->sk, 1);
+ if (sock->ops->poll) {
+ sock_poll_busy_loop(sock, events);
+ mask = sock->ops->poll(file, sock, wait);
+ } else if (sock->ops->poll_mask) {
+ sock_poll_wait(file, sock_get_poll_head(file, events), wait);
+ mask = sock->ops->poll_mask(sock, events);
}
- return busy_flag | sock->ops->poll(file, sock, wait);
+ return mask | sock_poll_busy_flag(sock);
}
static int sock_mmap(struct file *file, struct vm_area_struct *vma)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 6be21575503a..3bb45042e833 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -692,10 +692,9 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
}
/**
- * tipc_poll - read and possibly block on pollmask
+ * tipc_poll - read pollmask
* @file: file structure associated with the socket
* @sock: socket for which to calculate the poll bits
- * @wait: ???
*
* Returns pollmask value
*
@@ -709,15 +708,12 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr,
* imply that the operation will succeed, merely that it should be performed
* and will not block.
*/
-static __poll_t tipc_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t tipc_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
struct tipc_sock *tsk = tipc_sk(sk);
__poll_t revents = 0;
- sock_poll_wait(file, sk_sleep(sk), wait);
-
if (sk->sk_shutdown & RCV_SHUTDOWN)
revents |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
if (sk->sk_shutdown == SHUTDOWN_MASK)
@@ -3028,7 +3024,7 @@ static const struct proto_ops msg_ops = {
.socketpair = tipc_socketpair,
.accept = sock_no_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = sock_no_listen,
.shutdown = tipc_shutdown,
@@ -3049,7 +3045,7 @@ static const struct proto_ops packet_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
@@ -3070,7 +3066,7 @@ static const struct proto_ops stream_ops = {
.socketpair = tipc_socketpair,
.accept = tipc_accept,
.getname = tipc_getname,
- .poll = tipc_poll,
+ .poll_mask = tipc_poll_mask,
.ioctl = tipc_ioctl,
.listen = tipc_listen,
.shutdown = tipc_shutdown,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e5473c03d667..95b02a71fd47 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -638,9 +638,8 @@ static int unix_stream_connect(struct socket *, struct sockaddr *,
static int unix_socketpair(struct socket *, struct socket *);
static int unix_accept(struct socket *, struct socket *, int, bool);
static int unix_getname(struct socket *, struct sockaddr *, int);
-static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
-static __poll_t unix_dgram_poll(struct file *, struct socket *,
- poll_table *);
+static __poll_t unix_poll_mask(struct socket *, __poll_t);
+static __poll_t unix_dgram_poll_mask(struct socket *, __poll_t);
static int unix_ioctl(struct socket *, unsigned int, unsigned long);
static int unix_shutdown(struct socket *, int);
static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
@@ -681,7 +680,7 @@ static const struct proto_ops unix_stream_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = unix_poll,
+ .poll_mask = unix_poll_mask,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -704,7 +703,7 @@ static const struct proto_ops unix_dgram_ops = {
.socketpair = unix_socketpair,
.accept = sock_no_accept,
.getname = unix_getname,
- .poll = unix_dgram_poll,
+ .poll_mask = unix_dgram_poll_mask,
.ioctl = unix_ioctl,
.listen = sock_no_listen,
.shutdown = unix_shutdown,
@@ -726,7 +725,7 @@ static const struct proto_ops unix_seqpacket_ops = {
.socketpair = unix_socketpair,
.accept = unix_accept,
.getname = unix_getname,
- .poll = unix_dgram_poll,
+ .poll_mask = unix_dgram_poll_mask,
.ioctl = unix_ioctl,
.listen = unix_listen,
.shutdown = unix_shutdown,
@@ -2630,13 +2629,10 @@ static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
return err;
}
-static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
+static __poll_t unix_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err)
@@ -2665,15 +2661,11 @@ static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wa
return mask;
}
-static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t unix_dgram_poll_mask(struct socket *sock, __poll_t events)
{
struct sock *sk = sock->sk, *other;
- unsigned int writable;
- __poll_t mask;
-
- sock_poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ int writable;
+ __poll_t mask = 0;
/* exceptional events? */
if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
@@ -2699,7 +2691,7 @@ static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
}
/* No write status requested, avoid expensive OUT tests. */
- if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
+ if (!(events & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
return mask;
writable = unix_writable(sk);
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index c1076c19b858..bb5d5fa68c35 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -850,18 +850,11 @@ static int vsock_shutdown(struct socket *sock, int mode)
return err;
}
-static __poll_t vsock_poll(struct file *file, struct socket *sock,
- poll_table *wait)
+static __poll_t vsock_poll_mask(struct socket *sock, __poll_t events)
{
- struct sock *sk;
- __poll_t mask;
- struct vsock_sock *vsk;
-
- sk = sock->sk;
- vsk = vsock_sk(sk);
-
- poll_wait(file, sk_sleep(sk), wait);
- mask = 0;
+ struct sock *sk = sock->sk;
+ struct vsock_sock *vsk = vsock_sk(sk);
+ __poll_t mask = 0;
if (sk->sk_err)
/* Signify that there has been an error on this socket. */
@@ -1091,7 +1084,7 @@ static const struct proto_ops vsock_dgram_ops = {
.socketpair = sock_no_socketpair,
.accept = sock_no_accept,
.getname = vsock_getname,
- .poll = vsock_poll,
+ .poll_mask = vsock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = sock_no_listen,
.shutdown = vsock_shutdown,
@@ -1849,7 +1842,7 @@ static const struct proto_ops vsock_stream_ops = {
.socketpair = sock_no_socketpair,
.accept = vsock_accept,
.getname = vsock_getname,
- .poll = vsock_poll,
+ .poll_mask = vsock_poll_mask,
.ioctl = sock_no_ioctl,
.listen = vsock_listen,
.shutdown = vsock_shutdown,
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index d49aa79b7997..f93365ae0fdd 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1750,7 +1750,7 @@ static const struct proto_ops x25_proto_ops = {
.socketpair = sock_no_socketpair,
.accept = x25_accept,
.getname = x25_getname,
- .poll = datagram_poll,
+ .poll_mask = datagram_poll_mask,
.ioctl = x25_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = compat_x25_ioctl,
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 6e865e8b5b10..90d30fbe95ae 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -397,7 +397,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args)
* Check if there was an event already pending on the eventfd
* before we registered, and trigger it as if we didn't miss it.
*/
- events = f.file->f_op->poll(f.file, &irqfd->pt);
+ events = vfs_poll(f.file, &irqfd->pt);
if (events & EPOLLIN)
schedule_work(&irqfd->inject);