diff options
author | Al Viro <viro@zeniv.linux.org.uk> | 2014-04-04 23:12:29 -0400 |
---|---|---|
committer | Al Viro <viro@zeniv.linux.org.uk> | 2014-05-06 17:39:45 -0400 |
commit | 62a8067a7f35dba2de501c9cb00e4cf36da90bc0 (patch) | |
tree | bb008456891c13b9d8a25825a10074efb861cd88 | |
parent | 81055e584f9d743cb13dc7944923d817c20f089d (diff) | |
download | linux-62a8067a7f35dba2de501c9cb00e4cf36da90bc0.tar.bz2 |
bio_vec-backed iov_iter
New variant of iov_iter - ITER_BVEC in iter->type, backed with
bio_vec array instead of iovec one. Primitives taught to deal
with such beasts, __swap_write() switched to using that kind
of iov_iter.
Note that bio_vec is just a <page, offset, length> triple - there's
nothing block-specific about it. I've left the definition where it
was, but took it from under ifdef CONFIG_BLOCK.
Next target: ->splice_write()...
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
-rw-r--r-- | fs/fuse/file.c | 2 | ||||
-rw-r--r-- | include/linux/blk_types.h | 4 | ||||
-rw-r--r-- | include/linux/uio.h | 14 | ||||
-rw-r--r-- | mm/iov_iter.c | 390 | ||||
-rw-r--r-- | mm/page_io.c | 19 |
5 files changed, 385 insertions, 44 deletions
diff --git a/fs/fuse/file.c b/fs/fuse/file.c index 7fbc803cf51d..b2dae9d1437c 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1288,7 +1288,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii, size_t nbytes = 0; /* # bytes already packed in req */ /* Special case for kernel I/O: can copy directly into the buffer */ - if (ii->type & REQ_KERNEL) { + if (ii->type & ITER_KVEC) { unsigned long user_addr = fuse_get_user_addr(ii); size_t frag_size = fuse_get_frag_size(ii, *nbytesp); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index aa0eaa2d0bd8..86df13b97160 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -5,8 +5,6 @@ #ifndef __LINUX_BLK_TYPES_H #define __LINUX_BLK_TYPES_H -#ifdef CONFIG_BLOCK - #include <linux/types.h> struct bio_set; @@ -28,6 +26,8 @@ struct bio_vec { unsigned int bv_offset; }; +#ifdef CONFIG_BLOCK + struct bvec_iter { sector_t bi_sector; /* device address in 512 byte sectors */ diff --git a/include/linux/uio.h b/include/linux/uio.h index e8a109a75de1..e2231e47cec1 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -19,12 +19,21 @@ struct kvec { size_t iov_len; }; +enum { + ITER_IOVEC = 0, + ITER_KVEC = 2, + ITER_BVEC = 4, +}; + struct iov_iter { int type; - const struct iovec *iov; - unsigned long nr_segs; size_t iov_offset; size_t count; + union { + const struct iovec *iov; + const struct bio_vec *bvec; + }; + unsigned long nr_segs; }; /* @@ -54,6 +63,7 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) } #define iov_for_each(iov, iter, start) \ + if (!((start).type & ITER_BVEC)) \ for (iter = (start); \ (iter).count && \ ((iov = iov_iter_iovec(&(iter))), 1); \ diff --git a/mm/iov_iter.c b/mm/iov_iter.c index fcdaaab438b6..7b5dbd1517b5 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -4,7 +4,7 @@ #include <linux/slab.h> #include <linux/vmalloc.h> -size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -84,9 +84,8 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_to_iter); -size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { size_t skip, copy, left, wanted; @@ -166,7 +165,6 @@ done: i->iov_offset = skip; return wanted - bytes; } -EXPORT_SYMBOL(copy_page_from_iter); static size_t __iovec_copy_from_user_inatomic(char *vaddr, const struct iovec *iov, size_t base, size_t bytes) @@ -195,7 +193,7 @@ static size_t __iovec_copy_from_user_inatomic(char *vaddr, * were successfully copied. If a fault is encountered then return the number of * bytes which were copied. */ -size_t iov_iter_copy_from_user_atomic(struct page *page, +static size_t copy_from_user_atomic_iovec(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { char *kaddr; @@ -215,9 +213,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, return copied; } -EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); -void iov_iter_advance(struct iov_iter *i, size_t bytes) +static void advance_iovec(struct iov_iter *i, size_t bytes) { BUG_ON(i->count < bytes); @@ -252,7 +249,6 @@ void iov_iter_advance(struct iov_iter *i, size_t bytes) i->nr_segs = nr_segs; } } -EXPORT_SYMBOL(iov_iter_advance); /* * Fault in the first iovec of the given iov_iter, to a maximum length @@ -265,26 +261,16 @@ EXPORT_SYMBOL(iov_iter_advance); */ int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) { - char __user *buf = i->iov->iov_base + i->iov_offset; - bytes = min(bytes, i->iov->iov_len - i->iov_offset); - return fault_in_pages_readable(buf, bytes); + if (!(i->type & ITER_BVEC)) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; } EXPORT_SYMBOL(iov_iter_fault_in_readable); -/* - * Return the count of just the current iov_iter segment. - */ -size_t iov_iter_single_seg_count(const struct iov_iter *i) -{ - const struct iovec *iov = i->iov; - if (i->nr_segs == 1) - return i->count; - else - return min(i->count, iov->iov_len - i->iov_offset); -} -EXPORT_SYMBOL(iov_iter_single_seg_count); - -unsigned long iov_iter_alignment(const struct iov_iter *i) +static unsigned long alignment_iovec(const struct iov_iter *i) { const struct iovec *iov = i->iov; unsigned long res; @@ -307,7 +293,6 @@ unsigned long iov_iter_alignment(const struct iov_iter *i) res |= (unsigned long)iov->iov_base | size; return res; } -EXPORT_SYMBOL(iov_iter_alignment); void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov, unsigned long nr_segs, @@ -315,7 +300,7 @@ void iov_iter_init(struct iov_iter *i, int direction, { /* It will get better. Eventually... */ if (segment_eq(get_fs(), KERNEL_DS)) - direction |= REQ_KERNEL; + direction |= ITER_KVEC; i->type = direction; i->iov = iov; i->nr_segs = nr_segs; @@ -324,7 +309,7 @@ void iov_iter_init(struct iov_iter *i, int direction, } EXPORT_SYMBOL(iov_iter_init); -ssize_t iov_iter_get_pages(struct iov_iter *i, +static ssize_t get_pages_iovec(struct iov_iter *i, struct page **pages, size_t maxsize, size_t *start) { @@ -349,9 +334,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i, return res; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages); -ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +static ssize_t get_pages_alloc_iovec(struct iov_iter *i, struct page ***pages, size_t maxsize, size_t *start) { @@ -387,9 +371,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, *pages = p; return (res == n ? len : res * PAGE_SIZE) - *start; } -EXPORT_SYMBOL(iov_iter_get_pages_alloc); -int iov_iter_npages(const struct iov_iter *i, int maxpages) +static int iov_iter_npages_iovec(const struct iov_iter *i, int maxpages) { size_t offset = i->iov_offset; size_t size = i->count; @@ -414,4 +397,347 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages) } return min(npages, maxpages); } + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + copy = min_t(size_t, bytes, bvec->bv_len - skip); + + kaddr = kmap_atomic(page); + from = kaddr + offset; + memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy); + skip += copy; + from += copy; + bytes -= copy; + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, copy); + skip = copy; + from += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, wanted; + const struct bio_vec *bvec; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + bvec = i->bvec; + skip = i->iov_offset; + + kaddr = kmap_atomic(page); + + to = kaddr + offset; + + copy = min(bytes, bvec->bv_len - skip); + + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy); + + to += copy; + skip += copy; + bytes -= copy; + + while (bytes) { + bvec++; + copy = min(bytes, (size_t)bvec->bv_len); + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, copy); + skip = copy; + to += copy; + bytes -= copy; + } + kunmap_atomic(kaddr); + if (skip == bvec->bv_len) { + bvec++; + skip = 0; + } + i->count -= wanted; + i->nr_segs -= bvec - i->bvec; + i->bvec = bvec; + i->iov_offset = skip; + return wanted; +} + +static size_t copy_from_user_bvec(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr; + size_t left; + const struct bio_vec *bvec; + size_t base = i->iov_offset; + + kaddr = kmap_atomic(page); + for (left = bytes, bvec = i->bvec; left; bvec++, base = 0) { + size_t copy = min(left, bvec->bv_len - base); + if (!bvec->bv_len) + continue; + memcpy_from_page(kaddr + offset, bvec->bv_page, + bvec->bv_offset + base, copy); + offset += copy; + left -= copy; + } + kunmap_atomic(kaddr); + return bytes; +} + +static void advance_bvec(struct iov_iter *i, size_t bytes) +{ + BUG_ON(i->count < bytes); + + if (likely(i->nr_segs == 1)) { + i->iov_offset += bytes; + i->count -= bytes; + } else { + const struct bio_vec *bvec = i->bvec; + size_t base = i->iov_offset; + unsigned long nr_segs = i->nr_segs; + + /* + * The !iov->iov_len check ensures we skip over unlikely + * zero-length segments (without overruning the iovec). + */ + while (bytes || unlikely(i->count && !bvec->bv_len)) { + int copy; + + copy = min(bytes, bvec->bv_len - base); + BUG_ON(!i->count || i->count < copy); + i->count -= copy; + bytes -= copy; + base += copy; + if (bvec->bv_len == base) { + bvec++; + nr_segs--; + base = 0; + } + } + i->bvec = bvec; + i->iov_offset = base; + i->nr_segs = nr_segs; + } +} + +static unsigned long alignment_bvec(const struct iov_iter *i) +{ + const struct bio_vec *bvec = i->bvec; + unsigned long res; + size_t size = i->count; + size_t n; + + if (!size) + return 0; + + res = bvec->bv_offset + i->iov_offset; + n = bvec->bv_len - i->iov_offset; + if (n >= size) + return res | size; + size -= n; + res |= n; + while (size > (++bvec)->bv_len) { + res |= bvec->bv_offset | bvec->bv_len; + size -= bvec->bv_len; + } + res |= bvec->bv_offset | size; + return res; +} + +static ssize_t get_pages_bvec(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + get_page(*pages = bvec->bv_page); + + return len; +} + +static ssize_t get_pages_alloc_bvec(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + const struct bio_vec *bvec = i->bvec; + size_t len = bvec->bv_len - i->iov_offset; + if (len > i->count) + len = i->count; + if (len > maxsize) + len = maxsize; + *start = bvec->bv_offset + i->iov_offset; + + *pages = kmalloc(sizeof(struct page *), GFP_KERNEL); + if (!*pages) + return -ENOMEM; + + get_page(**pages = bvec->bv_page); + + return len; +} + +static int iov_iter_npages_bvec(const struct iov_iter *i, int maxpages) +{ + size_t offset = i->iov_offset; + size_t size = i->count; + const struct bio_vec *bvec = i->bvec; + int npages = 0; + int n; + + for (n = 0; size && n < i->nr_segs; n++, bvec++) { + size_t len = bvec->bv_len - offset; + offset = 0; + if (unlikely(!len)) /* empty segment */ + continue; + if (len > size) + len = size; + npages++; + if (npages >= maxpages) /* don't bother going further */ + return maxpages; + size -= len; + offset = 0; + } + return min(npages, maxpages); +} + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_to_iter_bvec(page, offset, bytes, i); + else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return copy_page_from_iter_bvec(page, offset, bytes, i); + else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + if (i->type & ITER_BVEC) + return copy_from_user_bvec(page, i, offset, bytes); + else + return copy_from_user_atomic_iovec(page, i, offset, bytes); +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + if (i->type & ITER_BVEC) + advance_bvec(i, size); + else + advance_iovec(i, size); +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->iov->iov_len - i->iov_offset); + else + return min(i->count, i->bvec->bv_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + if (i->type & ITER_BVEC) + return alignment_bvec(i); + else + return alignment_iovec(i); +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_bvec(i, pages, maxsize, start); + else + return get_pages_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages); + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + if (i->type & ITER_BVEC) + return get_pages_alloc_bvec(i, pages, maxsize, start); + else + return get_pages_alloc_iovec(i, pages, maxsize, start); +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + if (i->type & ITER_BVEC) + return iov_iter_npages_bvec(i, maxpages); + else + return iov_iter_npages_iovec(i, maxpages); +} EXPORT_SYMBOL(iov_iter_npages); diff --git a/mm/page_io.c b/mm/page_io.c index 313bfedb75d1..33bb38c4aad7 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -259,23 +259,28 @@ int __swap_writepage(struct page *page, struct writeback_control *wbc, struct kiocb kiocb; struct file *swap_file = sis->swap_file; struct address_space *mapping = swap_file->f_mapping; - struct iovec iov = { - .iov_base = kmap(page), - .iov_len = PAGE_SIZE, + struct bio_vec bv = { + .bv_page = page, + .bv_len = PAGE_SIZE, + .bv_offset = 0 + }; + struct iov_iter from = { + .type = ITER_BVEC | WRITE, + .count = PAGE_SIZE, + .iov_offset = 0, + .nr_segs = 1, + .bvec = &bv }; - struct iov_iter from; init_sync_kiocb(&kiocb, swap_file); kiocb.ki_pos = page_file_offset(page); kiocb.ki_nbytes = PAGE_SIZE; - iov_iter_init(&from, KERNEL_WRITE, &iov, 1, PAGE_SIZE); set_page_writeback(page); unlock_page(page); - ret = mapping->a_ops->direct_IO(KERNEL_WRITE, + ret = mapping->a_ops->direct_IO(ITER_BVEC | WRITE, &kiocb, &from, kiocb.ki_pos); - kunmap(page); if (ret == PAGE_SIZE) { count_vm_event(PSWPOUT); ret = 0; |