From 8cefc107ca54c8b06438b7dc9cc08bc0a11d5b98 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 15 Nov 2019 13:30:32 +0000 Subject: pipe: Use head and tail pointers for the ring, not cursor and length Convert pipes to use head and tail pointers for the buffer ring rather than pointer and length as the latter requires two atomic ops to update (or a combined op) whereas the former only requires one. (1) The head pointer is the point at which production occurs and points to the slot in which the next buffer will be placed. This is equivalent to pipe->curbuf + pipe->nrbufs. The head pointer belongs to the write-side. (2) The tail pointer is the point at which consumption occurs. It points to the next slot to be consumed. This is equivalent to pipe->curbuf. The tail pointer belongs to the read-side. (3) head and tail are allowed to run to UINT_MAX and wrap naturally. They are only masked off when the array is being accessed, e.g.: pipe->bufs[head & mask] This means that it is not necessary to have a dead slot in the ring as head == tail isn't ambiguous. (4) The ring is empty if "head == tail". A helper, pipe_empty(), is provided for this. (5) The occupancy of the ring is "head - tail". A helper, pipe_occupancy(), is provided for this. (6) The number of free slots in the ring is "pipe->ring_size - occupancy". A helper, pipe_space_for_user() is provided to indicate how many slots userspace may use. (7) The ring is full if "head - tail >= pipe->ring_size". A helper, pipe_full(), is provided for this. Signed-off-by: David Howells --- lib/iov_iter.c | 269 ++++++++++++++++++++++++++++++++------------------------- 1 file changed, 151 insertions(+), 118 deletions(-) (limited to 'lib') diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 639d5e7014c1..957f8828378a 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -325,28 +325,33 @@ done: static bool sanity(const struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - int idx = i->idx; - int next = pipe->curbuf + pipe->nrbufs; + unsigned int p_head = pipe->head; + unsigned int p_tail = pipe->tail; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int p_occupancy = pipe_occupancy(p_head, p_tail); + unsigned int i_head = i->head; + unsigned int idx; + if (i->iov_offset) { struct pipe_buffer *p; - if (unlikely(!pipe->nrbufs)) + if (unlikely(p_occupancy == 0)) goto Bad; // pipe must be non-empty - if (unlikely(idx != ((next - 1) & (pipe->buffers - 1)))) + if (unlikely(i_head != p_head - 1)) goto Bad; // must be at the last buffer... - p = &pipe->bufs[idx]; + p = &pipe->bufs[i_head & p_mask]; if (unlikely(p->offset + p->len != i->iov_offset)) goto Bad; // ... at the end of segment } else { - if (idx != (next & (pipe->buffers - 1))) + if (i_head != p_head) goto Bad; // must be right after the last buffer } return true; Bad: - printk(KERN_ERR "idx = %d, offset = %zd\n", i->idx, i->iov_offset); - printk(KERN_ERR "curbuf = %d, nrbufs = %d, buffers = %d\n", - pipe->curbuf, pipe->nrbufs, pipe->buffers); - for (idx = 0; idx < pipe->buffers; idx++) + printk(KERN_ERR "idx = %d, offset = %zd\n", i_head, i->iov_offset); + printk(KERN_ERR "head = %d, tail = %d, buffers = %d\n", + p_head, p_tail, pipe->ring_size); + for (idx = 0; idx < pipe->ring_size; idx++) printk(KERN_ERR "[%p %p %d %d]\n", pipe->bufs[idx].ops, pipe->bufs[idx].page, @@ -359,18 +364,15 @@ Bad: #define sanity(i) true #endif -static inline int next_idx(int idx, struct pipe_inode_info *pipe) -{ - return (idx + 1) & (pipe->buffers - 1); -} - static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; struct pipe_buffer *buf; + unsigned int p_tail = pipe->tail; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head = i->head; size_t off; - int idx; if (unlikely(bytes > i->count)) bytes = i->count; @@ -382,8 +384,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by return 0; off = i->iov_offset; - idx = i->idx; - buf = &pipe->bufs[idx]; + buf = &pipe->bufs[i_head & p_mask]; if (off) { if (offset == off && buf->page == page) { /* merge with the last one */ @@ -391,18 +392,21 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by i->iov_offset += bytes; goto out; } - idx = next_idx(idx, pipe); - buf = &pipe->bufs[idx]; + i_head++; + buf = &pipe->bufs[i_head & p_mask]; } - if (idx == pipe->curbuf && pipe->nrbufs) + if (pipe_full(i_head, p_tail, pipe->ring_size)) return 0; - pipe->nrbufs++; + buf->ops = &page_cache_pipe_buf_ops; - get_page(buf->page = page); + get_page(page); + buf->page = page; buf->offset = offset; buf->len = bytes; + + pipe->head = i_head + 1; i->iov_offset = offset + bytes; - i->idx = idx; + i->head = i_head; out: i->count -= bytes; return bytes; @@ -480,24 +484,30 @@ static inline bool allocated(struct pipe_buffer *buf) return buf->ops == &default_pipe_buf_ops; } -static inline void data_start(const struct iov_iter *i, int *idxp, size_t *offp) +static inline void data_start(const struct iov_iter *i, + unsigned int *iter_headp, size_t *offp) { + unsigned int p_mask = i->pipe->ring_size - 1; + unsigned int iter_head = i->head; size_t off = i->iov_offset; - int idx = i->idx; - if (off && (!allocated(&i->pipe->bufs[idx]) || off == PAGE_SIZE)) { - idx = next_idx(idx, i->pipe); + + if (off && (!allocated(&i->pipe->bufs[iter_head & p_mask]) || + off == PAGE_SIZE)) { + iter_head++; off = 0; } - *idxp = idx; + *iter_headp = iter_head; *offp = off; } static size_t push_pipe(struct iov_iter *i, size_t size, - int *idxp, size_t *offp) + int *iter_headp, size_t *offp) { struct pipe_inode_info *pipe = i->pipe; + unsigned int p_tail = pipe->tail; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int iter_head; size_t off; - int idx; ssize_t left; if (unlikely(size > i->count)) @@ -506,33 +516,34 @@ static size_t push_pipe(struct iov_iter *i, size_t size, return 0; left = size; - data_start(i, &idx, &off); - *idxp = idx; + data_start(i, &iter_head, &off); + *iter_headp = iter_head; *offp = off; if (off) { left -= PAGE_SIZE - off; if (left <= 0) { - pipe->bufs[idx].len += size; + pipe->bufs[iter_head & p_mask].len += size; return size; } - pipe->bufs[idx].len = PAGE_SIZE; - idx = next_idx(idx, pipe); + pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; + iter_head++; } - while (idx != pipe->curbuf || !pipe->nrbufs) { + while (!pipe_full(iter_head, p_tail, pipe->ring_size)) { + struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; struct page *page = alloc_page(GFP_USER); if (!page) break; - pipe->nrbufs++; - pipe->bufs[idx].ops = &default_pipe_buf_ops; - pipe->bufs[idx].page = page; - pipe->bufs[idx].offset = 0; - if (left <= PAGE_SIZE) { - pipe->bufs[idx].len = left; + + buf->ops = &default_pipe_buf_ops; + buf->page = page; + buf->offset = 0; + buf->len = min_t(ssize_t, left, PAGE_SIZE); + left -= buf->len; + iter_head++; + pipe->head = iter_head; + + if (left == 0) return size; - } - pipe->bufs[idx].len = PAGE_SIZE; - left -= PAGE_SIZE; - idx = next_idx(idx, pipe); } return size - left; } @@ -541,23 +552,26 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head; size_t n, off; - int idx; if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &idx, &off); + bytes = n = push_pipe(i, bytes, &i_head, &off); if (unlikely(!n)) return 0; - for ( ; n; idx = next_idx(idx, pipe), off = 0) { + do { size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memcpy_to_page(pipe->bufs[idx].page, off, addr, chunk); - i->idx = idx; + memcpy_to_page(pipe->bufs[i_head & p_mask].page, off, addr, chunk); + i->head = i_head; i->iov_offset = off + chunk; n -= chunk; addr += chunk; - } + off = 0; + i_head++; + } while (n); i->count -= bytes; return bytes; } @@ -573,28 +587,31 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head; size_t n, r; size_t off = 0; __wsum sum = *csum; - int idx; if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &idx, &r); + bytes = n = push_pipe(i, bytes, &i_head, &r); if (unlikely(!n)) return 0; - for ( ; n; idx = next_idx(idx, pipe), r = 0) { + do { size_t chunk = min_t(size_t, n, PAGE_SIZE - r); - char *p = kmap_atomic(pipe->bufs[idx].page); + char *p = kmap_atomic(pipe->bufs[i_head & p_mask].page); sum = csum_and_memcpy(p + r, addr, chunk, sum, off); kunmap_atomic(p); - i->idx = idx; + i->head = i_head; i->iov_offset = r + chunk; n -= chunk; off += chunk; addr += chunk; - } + r = 0; + i_head++; + } while (n); i->count -= bytes; *csum = sum; return bytes; @@ -645,29 +662,32 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head; size_t n, off, xfer = 0; - int idx; if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &idx, &off); + bytes = n = push_pipe(i, bytes, &i_head, &off); if (unlikely(!n)) return 0; - for ( ; n; idx = next_idx(idx, pipe), off = 0) { + do { size_t chunk = min_t(size_t, n, PAGE_SIZE - off); unsigned long rem; - rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, - chunk); - i->idx = idx; + rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page, + off, addr, chunk); + i->head = i_head; i->iov_offset = off + chunk - rem; xfer += chunk - rem; if (rem) break; n -= chunk; addr += chunk; - } + off = 0; + i_head++; + } while (n); i->count -= xfer; return xfer; } @@ -925,23 +945,26 @@ EXPORT_SYMBOL(copy_page_from_iter); static size_t pipe_zero(size_t bytes, struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head; size_t n, off; - int idx; if (!sanity(i)) return 0; - bytes = n = push_pipe(i, bytes, &idx, &off); + bytes = n = push_pipe(i, bytes, &i_head, &off); if (unlikely(!n)) return 0; - for ( ; n; idx = next_idx(idx, pipe), off = 0) { + do { size_t chunk = min_t(size_t, n, PAGE_SIZE - off); - memzero_page(pipe->bufs[idx].page, off, chunk); - i->idx = idx; + memzero_page(pipe->bufs[i_head & p_mask].page, off, chunk); + i->head = i_head; i->iov_offset = off + chunk; n -= chunk; - } + off = 0; + i_head++; + } while (n); i->count -= bytes; return bytes; } @@ -987,20 +1010,26 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); static inline void pipe_truncate(struct iov_iter *i) { struct pipe_inode_info *pipe = i->pipe; - if (pipe->nrbufs) { + unsigned int p_tail = pipe->tail; + unsigned int p_head = pipe->head; + unsigned int p_mask = pipe->ring_size - 1; + + if (!pipe_empty(p_head, p_tail)) { + struct pipe_buffer *buf; + unsigned int i_head = i->head; size_t off = i->iov_offset; - int idx = i->idx; - int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); + if (off) { - pipe->bufs[idx].len = off - pipe->bufs[idx].offset; - idx = next_idx(idx, pipe); - nrbufs++; + buf = &pipe->bufs[i_head & p_mask]; + buf->len = off - buf->offset; + i_head++; } - while (pipe->nrbufs > nrbufs) { - pipe_buf_release(pipe, &pipe->bufs[idx]); - idx = next_idx(idx, pipe); - pipe->nrbufs--; + while (p_head != i_head) { + p_head--; + pipe_buf_release(pipe, &pipe->bufs[p_head & p_mask]); } + + pipe->head = p_head; } } @@ -1011,18 +1040,20 @@ static void pipe_advance(struct iov_iter *i, size_t size) size = i->count; if (size) { struct pipe_buffer *buf; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head = i->head; size_t off = i->iov_offset, left = size; - int idx = i->idx; + if (off) /* make it relative to the beginning of buffer */ - left += off - pipe->bufs[idx].offset; + left += off - pipe->bufs[i_head & p_mask].offset; while (1) { - buf = &pipe->bufs[idx]; + buf = &pipe->bufs[i_head & p_mask]; if (left <= buf->len) break; left -= buf->len; - idx = next_idx(idx, pipe); + i_head++; } - i->idx = idx; + i->head = i_head; i->iov_offset = buf->offset + left; } i->count -= size; @@ -1053,25 +1084,27 @@ void iov_iter_revert(struct iov_iter *i, size_t unroll) i->count += unroll; if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; - int idx = i->idx; + unsigned int p_mask = pipe->ring_size - 1; + unsigned int i_head = i->head; size_t off = i->iov_offset; while (1) { - size_t n = off - pipe->bufs[idx].offset; + struct pipe_buffer *b = &pipe->bufs[i_head & p_mask]; + size_t n = off - b->offset; if (unroll < n) { off -= unroll; break; } unroll -= n; - if (!unroll && idx == i->start_idx) { + if (!unroll && i_head == i->start_head) { off = 0; break; } - if (!idx--) - idx = pipe->buffers - 1; - off = pipe->bufs[idx].offset + pipe->bufs[idx].len; + i_head--; + b = &pipe->bufs[i_head & p_mask]; + off = b->offset + b->len; } i->iov_offset = off; - i->idx = idx; + i->head = i_head; pipe_truncate(i); return; } @@ -1159,13 +1192,13 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, size_t count) { BUG_ON(direction != READ); - WARN_ON(pipe->nrbufs == pipe->buffers); + WARN_ON(pipe_full(pipe->head, pipe->tail, pipe->ring_size)); i->type = ITER_PIPE | READ; i->pipe = pipe; - i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); + i->head = pipe->head; i->iov_offset = 0; i->count = count; - i->start_idx = i->idx; + i->start_head = i->head; } EXPORT_SYMBOL(iov_iter_pipe); @@ -1189,11 +1222,12 @@ EXPORT_SYMBOL(iov_iter_discard); unsigned long iov_iter_alignment(const struct iov_iter *i) { + unsigned int p_mask = i->pipe->ring_size - 1; unsigned long res = 0; size_t size = i->count; if (unlikely(iov_iter_is_pipe(i))) { - if (size && i->iov_offset && allocated(&i->pipe->bufs[i->idx])) + if (size && i->iov_offset && allocated(&i->pipe->bufs[i->head & p_mask])) return size | i->iov_offset; return size; } @@ -1231,19 +1265,20 @@ EXPORT_SYMBOL(iov_iter_gap_alignment); static inline ssize_t __pipe_get_pages(struct iov_iter *i, size_t maxsize, struct page **pages, - int idx, + int iter_head, size_t *start) { struct pipe_inode_info *pipe = i->pipe; - ssize_t n = push_pipe(i, maxsize, &idx, start); + unsigned int p_mask = pipe->ring_size - 1; + ssize_t n = push_pipe(i, maxsize, &iter_head, start); if (!n) return -EFAULT; maxsize = n; n += *start; while (n > 0) { - get_page(*pages++ = pipe->bufs[idx].page); - idx = next_idx(idx, pipe); + get_page(*pages++ = pipe->bufs[iter_head & p_mask].page); + iter_head++; n -= PAGE_SIZE; } @@ -1254,9 +1289,8 @@ static ssize_t pipe_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, size_t *start) { - unsigned npages; + unsigned int iter_head, npages; size_t capacity; - int idx; if (!maxsize) return 0; @@ -1264,12 +1298,12 @@ static ssize_t pipe_get_pages(struct iov_iter *i, if (!sanity(i)) return -EFAULT; - data_start(i, &idx, start); - /* some of this one + all after this one */ - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; - capacity = min(npages,maxpages) * PAGE_SIZE - *start; + data_start(i, &iter_head, start); + /* Amount of free space: some of this one + all after this one */ + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); + capacity = min(npages, maxpages) * PAGE_SIZE - *start; - return __pipe_get_pages(i, min(maxsize, capacity), pages, idx, start); + return __pipe_get_pages(i, min(maxsize, capacity), pages, iter_head, start); } ssize_t iov_iter_get_pages(struct iov_iter *i, @@ -1323,9 +1357,8 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, size_t *start) { struct page **p; + unsigned int iter_head, npages; ssize_t n; - int idx; - int npages; if (!maxsize) return 0; @@ -1333,9 +1366,9 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, if (!sanity(i)) return -EFAULT; - data_start(i, &idx, start); - /* some of this one + all after this one */ - npages = ((i->pipe->curbuf - idx - 1) & (i->pipe->buffers - 1)) + 1; + data_start(i, &iter_head, start); + /* Amount of free space: some of this one + all after this one */ + npages = pipe_space_for_user(iter_head, i->pipe->tail, i->pipe); n = npages * PAGE_SIZE - *start; if (maxsize > n) maxsize = n; @@ -1344,7 +1377,7 @@ static ssize_t pipe_get_pages_alloc(struct iov_iter *i, p = get_pages_array(npages); if (!p) return -ENOMEM; - n = __pipe_get_pages(i, maxsize, p, idx, start); + n = __pipe_get_pages(i, maxsize, p, iter_head, start); if (n > 0) *pages = p; else @@ -1560,15 +1593,15 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) if (unlikely(iov_iter_is_pipe(i))) { struct pipe_inode_info *pipe = i->pipe; + unsigned int iter_head; size_t off; - int idx; if (!sanity(i)) return 0; - data_start(i, &idx, &off); + data_start(i, &iter_head, &off); /* some of this one + all after this one */ - npages = ((pipe->curbuf - idx - 1) & (pipe->buffers - 1)) + 1; + npages = pipe_space_for_user(iter_head, pipe->tail, pipe); if (npages >= maxpages) return maxpages; } else iterate_all_kinds(i, size, v, ({ -- cgit v1.2.3 From 6718b6f855a0b4962d54bd625be2718cb820cec6 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 16 Oct 2019 16:47:32 +0100 Subject: pipe: Allow pipes to have kernel-reserved slots Split pipe->ring_size into two numbers: (1) pipe->ring_size - indicates the hard size of the pipe ring. (2) pipe->max_usage - indicates the maximum number of pipe ring slots that userspace orchestrated events can fill. This allows for a pipe that is both writable by the general kernel notification facility and by userspace, allowing plenty of ring space for notifications to be added whilst preventing userspace from being able to pin too much unswappable kernel space. Signed-off-by: David Howells --- fs/fuse/dev.c | 8 ++++---- fs/pipe.c | 10 ++++++---- fs/splice.c | 26 +++++++++++++------------- include/linux/pipe_fs_i.h | 6 +++++- lib/iov_iter.c | 4 ++-- 5 files changed, 30 insertions(+), 24 deletions(-) (limited to 'lib') diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index c56011f95a87..423b6c657bf0 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -703,7 +703,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs) cs->pipebufs++; cs->nr_segs--; } else { - if (cs->nr_segs >= cs->pipe->ring_size) + if (cs->nr_segs >= cs->pipe->max_usage) return -EIO; page = alloc_page(GFP_HIGHUSER); @@ -879,7 +879,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page, struct pipe_buffer *buf; int err; - if (cs->nr_segs >= cs->pipe->ring_size) + if (cs->nr_segs >= cs->pipe->max_usage) return -EIO; err = unlock_request(cs->req); @@ -1341,7 +1341,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (!fud) return -EPERM; - bufs = kvmalloc_array(pipe->ring_size, sizeof(struct pipe_buffer), + bufs = kvmalloc_array(pipe->max_usage, sizeof(struct pipe_buffer), GFP_KERNEL); if (!bufs) return -ENOMEM; @@ -1353,7 +1353,7 @@ static ssize_t fuse_dev_splice_read(struct file *in, loff_t *ppos, if (ret < 0) goto out; - if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->ring_size) { + if (pipe_occupancy(pipe->head, pipe->tail) + cs.nr_segs > pipe->max_usage) { ret = -EIO; goto out; } diff --git a/fs/pipe.c b/fs/pipe.c index e9b361cb093e..69afeab8a73a 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -404,7 +404,7 @@ pipe_write(struct kiocb *iocb, struct iov_iter *from) tail = pipe->tail; head = pipe->head; - max_usage = pipe->ring_size; + max_usage = pipe->max_usage; mask = pipe->ring_size - 1; /* We try to merge small writes */ @@ -571,7 +571,7 @@ pipe_poll(struct file *filp, poll_table *wait) } if (filp->f_mode & FMODE_WRITE) { - if (!pipe_full(head, tail, pipe->ring_size)) + if (!pipe_full(head, tail, pipe->max_usage)) mask |= EPOLLOUT | EPOLLWRNORM; /* * Most Unices do not set EPOLLERR for FIFOs but on Linux they @@ -696,6 +696,7 @@ struct pipe_inode_info *alloc_pipe_info(void) if (pipe->bufs) { init_waitqueue_head(&pipe->wait); pipe->r_counter = pipe->w_counter = 1; + pipe->max_usage = pipe_bufs; pipe->ring_size = pipe_bufs; pipe->user = user; mutex_init(&pipe->mutex); @@ -1150,9 +1151,10 @@ static long pipe_set_size(struct pipe_inode_info *pipe, unsigned long arg) kfree(pipe->bufs); pipe->bufs = bufs; pipe->ring_size = nr_slots; + pipe->max_usage = nr_slots; pipe->tail = tail; pipe->head = head; - return pipe->ring_size * PAGE_SIZE; + return pipe->max_usage * PAGE_SIZE; out_revert_acct: (void) account_pipe_buffers(pipe->user, nr_slots, pipe->ring_size); @@ -1185,7 +1187,7 @@ long pipe_fcntl(struct file *file, unsigned int cmd, unsigned long arg) ret = pipe_set_size(pipe, arg); break; case F_GETPIPE_SZ: - ret = pipe->ring_size * PAGE_SIZE; + ret = pipe->max_usage * PAGE_SIZE; break; default: ret = -EINVAL; diff --git a/fs/splice.c b/fs/splice.c index 22b0a47a35c0..c521090a0469 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -199,7 +199,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, goto out; } - while (!pipe_full(head, tail, pipe->ring_size)) { + while (!pipe_full(head, tail, pipe->max_usage)) { struct pipe_buffer *buf = &pipe->bufs[head & mask]; buf->page = spd->pages[page_nr]; @@ -239,7 +239,7 @@ ssize_t add_to_pipe(struct pipe_inode_info *pipe, struct pipe_buffer *buf) if (unlikely(!pipe->readers)) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; - } else if (pipe_full(head, tail, pipe->ring_size)) { + } else if (pipe_full(head, tail, pipe->max_usage)) { ret = -EAGAIN; } else { pipe->bufs[head & mask] = *buf; @@ -257,7 +257,7 @@ EXPORT_SYMBOL(add_to_pipe); */ int splice_grow_spd(const struct pipe_inode_info *pipe, struct splice_pipe_desc *spd) { - unsigned int max_usage = READ_ONCE(pipe->ring_size); + unsigned int max_usage = READ_ONCE(pipe->max_usage); spd->nr_pages_max = max_usage; if (max_usage <= PIPE_DEF_BUFFERS) @@ -381,7 +381,7 @@ static ssize_t default_file_splice_read(struct file *in, loff_t *ppos, ssize_t res; int i; - if (pipe_full(pipe->head, pipe->tail, pipe->ring_size)) + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return -EAGAIN; /* @@ -698,7 +698,7 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, .pos = *ppos, .u.file = out, }; - int nbufs = pipe->ring_size; + int nbufs = pipe->max_usage; struct bio_vec *array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); ssize_t ret; @@ -721,9 +721,9 @@ iter_file_splice_write(struct pipe_inode_info *pipe, struct file *out, if (ret <= 0) break; - if (unlikely(nbufs < pipe->ring_size)) { + if (unlikely(nbufs < pipe->max_usage)) { kfree(array); - nbufs = pipe->ring_size; + nbufs = pipe->max_usage; array = kcalloc(nbufs, sizeof(struct bio_vec), GFP_KERNEL); if (!array) { @@ -963,7 +963,7 @@ ssize_t splice_direct_to_actor(struct file *in, struct splice_desc *sd, loff_t pos = sd->pos, prev_pos = pos; /* Don't try to read more the pipe has space for. */ - p_space = pipe->ring_size - + p_space = pipe->max_usage - pipe_occupancy(pipe->head, pipe->tail); read_len = min_t(size_t, len, p_space << PAGE_SHIFT); ret = do_splice_to(in, &pos, pipe, read_len, flags); @@ -1090,7 +1090,7 @@ static int wait_for_space(struct pipe_inode_info *pipe, unsigned flags) send_sig(SIGPIPE, current, 0); return -EPIPE; } - if (!pipe_full(pipe->head, pipe->tail, pipe->ring_size)) + if (!pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; if (flags & SPLICE_F_NONBLOCK) return -EAGAIN; @@ -1498,13 +1498,13 @@ static int opipe_prep(struct pipe_inode_info *pipe, unsigned int flags) * Check pipe occupancy without the inode lock first. This function * is speculative anyways, so missing one is ok. */ - if (pipe_full(pipe->head, pipe->tail, pipe->ring_size)) + if (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) return 0; ret = 0; pipe_lock(pipe); - while (pipe_full(pipe->head, pipe->tail, pipe->ring_size)) { + while (pipe_full(pipe->head, pipe->tail, pipe->max_usage)) { if (!pipe->readers) { send_sig(SIGPIPE, current, 0); ret = -EPIPE; @@ -1584,7 +1584,7 @@ retry: * pipe is empty or the output pipe is full. */ if (pipe_empty(i_head, i_tail) || - pipe_full(o_head, o_tail, opipe->ring_size)) { + pipe_full(o_head, o_tail, opipe->max_usage)) { /* Already processed some buffers, break */ if (ret) break; @@ -1706,7 +1706,7 @@ static int link_pipe(struct pipe_inode_info *ipipe, * output room, break. */ if (pipe_empty(i_head, i_tail) || - pipe_full(o_head, o_tail, opipe->ring_size)) + pipe_full(o_head, o_tail, opipe->max_usage)) break; ibuf = &ipipe->bufs[i_tail & i_mask]; diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 96158ca80456..44f2245debda 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -32,6 +32,7 @@ struct pipe_buffer { * @wait: reader/writer wait point in case of empty/full pipe * @head: The point of buffer production * @tail: The point of buffer consumption + * @max_usage: The maximum number of slots that may be used in the ring * @ring_size: total number of buffers (should be a power of 2) * @tmp_page: cached released page * @readers: number of current readers of this pipe @@ -50,6 +51,7 @@ struct pipe_inode_info { wait_queue_head_t wait; unsigned int head; unsigned int tail; + unsigned int max_usage; unsigned int ring_size; unsigned int readers; unsigned int writers; @@ -150,9 +152,11 @@ static inline unsigned int pipe_space_for_user(unsigned int head, unsigned int t unsigned int p_occupancy, p_space; p_occupancy = pipe_occupancy(head, tail); - if (p_occupancy >= pipe->ring_size) + if (p_occupancy >= pipe->max_usage) return 0; p_space = pipe->ring_size - p_occupancy; + if (p_space > pipe->max_usage) + p_space = pipe->max_usage; return p_space; } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 957f8828378a..be2fc5793ddd 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -395,7 +395,7 @@ static size_t copy_page_to_iter_pipe(struct page *page, size_t offset, size_t by i_head++; buf = &pipe->bufs[i_head & p_mask]; } - if (pipe_full(i_head, p_tail, pipe->ring_size)) + if (pipe_full(i_head, p_tail, pipe->max_usage)) return 0; buf->ops = &page_cache_pipe_buf_ops; @@ -528,7 +528,7 @@ static size_t push_pipe(struct iov_iter *i, size_t size, pipe->bufs[iter_head & p_mask].len = PAGE_SIZE; iter_head++; } - while (!pipe_full(iter_head, p_tail, pipe->ring_size)) { + while (!pipe_full(iter_head, p_tail, pipe->max_usage)) { struct pipe_buffer *buf = &pipe->bufs[iter_head & p_mask]; struct page *page = alloc_page(GFP_USER); if (!page) -- cgit v1.2.3