summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-30 14:12:13 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-30 14:12:13 -0800
commit6a965666b7e7475c2f8c8e724703db58b8a8a445 (patch)
tree0fcd268af15fb52cd32343572f5eaee7bf52e0d8 /include
parent32ef9553635ab1236c33951a8bd9b5af1c3b1646 (diff)
parent3c0edea9b29f9be6c093f236f762202b30ac9431 (diff)
downloadlinux-6a965666b7e7475c2f8c8e724703db58b8a8a445.tar.bz2
Merge tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
Pull pipe rework from David Howells: "This is my set of preparatory patches for building a general notification queue on top of pipes. It makes a number of significant changes: - It removes the nr_exclusive argument from __wake_up_sync_key() as this is always 1. This prepares for the next step: - Adds wake_up_interruptible_sync_poll_locked() so that poll can be woken up from a function that's holding the poll waitqueue spinlock. - Change the pipe buffer ring to be managed in terms of unbounded head and tail indices rather than bounded index and length. This means that reading the pipe only needs to modify one index, not two. - A selection of helper functions are provided to query the state of the pipe buffer, plus a couple to apply updates to the pipe indices. - The pipe ring is allowed to have kernel-reserved slots. This allows many notification messages to be spliced in by the kernel without allowing userspace to pin too many pages if it writes to the same pipe. - Advance the head and tail indices inside the pipe waitqueue lock and use wake_up_interruptible_sync_poll_locked() to poke poll without having to take the lock twice. - Rearrange pipe_write() to preallocate the buffer it is going to write into and then drop the spinlock. This allows kernel notifications to then be added the ring whilst it is filling the buffer it allocated. The read side is stalled because the pipe mutex is still held. - Don't wake up readers on a pipe if there was already data in it when we added more. - Don't wake up writers on a pipe if the ring wasn't full before we removed a buffer" * tag 'notifications-pipe-prep-20191115' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: pipe: Remove sync on wake_ups pipe: Increase the writer-wakeup threshold to reduce context-switch count pipe: Check for ring full inside of the spinlock in pipe_write() pipe: Remove redundant wakeup from pipe_write() pipe: Rearrange sequence in pipe_write() to preallocate slot pipe: Conditionalise wakeup in pipe_read() pipe: Advance tail pointer inside of wait spinlock in pipe_read() pipe: Allow pipes to have kernel-reserved slots pipe: Use head and tail pointers for the ring, not cursor and length Add wake_up_interruptible_sync_poll_locked() Remove the nr_exclusive argument from __wake_up_sync_key() pipe: Reduce #inclusion of pipe_fs_i.h
Diffstat (limited to 'include')
-rw-r--r--include/linux/pipe_fs_i.h64
-rw-r--r--include/linux/uio.h4
-rw-r--r--include/linux/wait.h11
3 files changed, 69 insertions, 10 deletions
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5c626fdc10db..44f2245debda 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -30,9 +30,10 @@ struct pipe_buffer {
* struct pipe_inode_info - a linux kernel pipe
* @mutex: mutex protecting the whole thing
* @wait: reader/writer wait point in case of empty/full pipe
- * @nrbufs: the number of non-empty pipe buffers in this pipe
- * @buffers: total number of buffers (should be a power of 2)
- * @curbuf: the current pipe buffer entry
+ * @head: The point of buffer production
+ * @tail: The point of buffer consumption
+ * @max_usage: The maximum number of slots that may be used in the ring
+ * @ring_size: total number of buffers (should be a power of 2)
* @tmp_page: cached released page
* @readers: number of current readers of this pipe
* @writers: number of current writers of this pipe
@@ -48,7 +49,10 @@ struct pipe_buffer {
struct pipe_inode_info {
struct mutex mutex;
wait_queue_head_t wait;
- unsigned int nrbufs, curbuf, buffers;
+ unsigned int head;
+ unsigned int tail;
+ unsigned int max_usage;
+ unsigned int ring_size;
unsigned int readers;
unsigned int writers;
unsigned int files;
@@ -105,6 +109,58 @@ struct pipe_buf_operations {
};
/**
+ * pipe_empty - Return true if the pipe is empty
+ * @head: The pipe ring head pointer
+ * @tail: The pipe ring tail pointer
+ */
+static inline bool pipe_empty(unsigned int head, unsigned int tail)
+{
+ return head == tail;
+}
+
+/**
+ * pipe_occupancy - Return number of slots used in the pipe
+ * @head: The pipe ring head pointer
+ * @tail: The pipe ring tail pointer
+ */
+static inline unsigned int pipe_occupancy(unsigned int head, unsigned int tail)
+{
+ return head - tail;
+}
+
+/**
+ * pipe_full - Return true if the pipe is full
+ * @head: The pipe ring head pointer
+ * @tail: The pipe ring tail pointer
+ * @limit: The maximum amount of slots available.
+ */
+static inline bool pipe_full(unsigned int head, unsigned int tail,
+ unsigned int limit)
+{
+ return pipe_occupancy(head, tail) >= limit;
+}
+
+/**
+ * pipe_space_for_user - Return number of slots available to userspace
+ * @head: The pipe ring head pointer
+ * @tail: The pipe ring tail pointer
+ * @pipe: The pipe info structure
+ */
+static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int tail,
+ struct pipe_inode_info *pipe)
+{
+ unsigned int p_occupancy, p_space;
+
+ p_occupancy = pipe_occupancy(head, tail);
+ if (p_occupancy >= pipe->max_usage)
+ return 0;
+ p_space = pipe->ring_size - p_occupancy;
+ if (p_space > pipe->max_usage)
+ p_space = pipe->max_usage;
+ return p_space;
+}
+
+/**
* pipe_buf_get - get a reference to a pipe_buffer
* @pipe: the pipe that the buffer belongs to
* @buf: the buffer to get a reference to
diff --git a/include/linux/uio.h b/include/linux/uio.h
index ab5f523bc0df..9576fd8158d7 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -45,8 +45,8 @@ struct iov_iter {
union {
unsigned long nr_segs;
struct {
- int idx;
- int start_idx;
+ unsigned int head;
+ unsigned int start_head;
};
};
};
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 3eb7cae8206c..3283c8d02137 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -201,9 +201,10 @@ void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void
void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
unsigned int mode, void *key, wait_queue_entry_t *bookmark);
-void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+void __wake_up_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
+void __wake_up_locked_sync_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
void __wake_up_locked(struct wait_queue_head *wq_head, unsigned int mode, int nr);
-void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
+void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode);
#define wake_up(x) __wake_up(x, TASK_NORMAL, 1, NULL)
#define wake_up_nr(x, nr) __wake_up(x, TASK_NORMAL, nr, NULL)
@@ -214,7 +215,7 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
#define wake_up_interruptible(x) __wake_up(x, TASK_INTERRUPTIBLE, 1, NULL)
#define wake_up_interruptible_nr(x, nr) __wake_up(x, TASK_INTERRUPTIBLE, nr, NULL)
#define wake_up_interruptible_all(x) __wake_up(x, TASK_INTERRUPTIBLE, 0, NULL)
-#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE, 1)
+#define wake_up_interruptible_sync(x) __wake_up_sync((x), TASK_INTERRUPTIBLE)
/*
* Wakeup macros to be used to report events to the targets.
@@ -228,7 +229,9 @@ void __wake_up_sync(struct wait_queue_head *wq_head, unsigned int mode, int nr);
#define wake_up_interruptible_poll(x, m) \
__wake_up(x, TASK_INTERRUPTIBLE, 1, poll_to_key(m))
#define wake_up_interruptible_sync_poll(x, m) \
- __wake_up_sync_key((x), TASK_INTERRUPTIBLE, 1, poll_to_key(m))
+ __wake_up_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
+#define wake_up_interruptible_sync_poll_locked(x, m) \
+ __wake_up_locked_sync_key((x), TASK_INTERRUPTIBLE, poll_to_key(m))
#define ___wait_cond_timeout(condition) \
({ \