diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 20:49:49 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2017-09-06 20:49:49 -0700 |
commit | d34fc1adf01ff87026da85fb972dc259dc347540 (patch) | |
tree | 27356073d423187157b7cdb69da32b53102fb9e7 /drivers | |
parent | 1c9fe4409ce3e9c78b1ed96ee8ed699d4f03bf33 (diff) | |
parent | d2cd9ede6e193dd7d88b6d27399e96229a551b19 (diff) | |
download | linux-d34fc1adf01ff87026da85fb972dc259dc347540.tar.bz2 |
Merge branch 'akpm' (patches from Andrew)
Merge updates from Andrew Morton:
- various misc bits
- DAX updates
- OCFS2
- most of MM
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (119 commits)
mm,fork: introduce MADV_WIPEONFORK
x86,mpx: make mpx depend on x86-64 to free up VMA flag
mm: add /proc/pid/smaps_rollup
mm: hugetlb: clear target sub-page last when clearing huge page
mm: oom: let oom_reap_task and exit_mmap run concurrently
swap: choose swap device according to numa node
mm: replace TIF_MEMDIE checks by tsk_is_oom_victim
mm, oom: do not rely on TIF_MEMDIE for memory reserves access
z3fold: use per-cpu unbuddied lists
mm, swap: don't use VMA based swap readahead if HDD is used as swap
mm, swap: add sysfs interface for VMA based swap readahead
mm, swap: VMA based swap readahead
mm, swap: fix swap readahead marking
mm, swap: add swap readahead hit statistics
mm/vmalloc.c: don't reinvent the wheel but use existing llist API
mm/vmstat.c: fix wrong comment
selftests/memfd: add memfd_create hugetlbfs selftest
mm/shmem: add hugetlbfs support to memfd_create()
mm, devm_memremap_pages: use multi-order radix for ZONE_DEVICE lookups
mm/vmalloc.c: halve the number of comparisons performed in pcpu_get_vm_areas()
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/base/memory.c | 30 | ||||
-rw-r--r-- | drivers/block/brd.c | 6 | ||||
-rw-r--r-- | drivers/block/zram/Kconfig | 12 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.c | 540 | ||||
-rw-r--r-- | drivers/block/zram/zram_drv.h | 11 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_debugfs.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_drv.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem.c | 4 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_gtt.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/i915/i915_gem_shrinker.c | 24 | ||||
-rw-r--r-- | drivers/nvdimm/btt.c | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 41 |
12 files changed, 563 insertions, 116 deletions
diff --git a/drivers/base/memory.c b/drivers/base/memory.c index c7c4e0325cdb..4e3b61cda520 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -388,6 +388,19 @@ static ssize_t show_phys_device(struct device *dev, } #ifdef CONFIG_MEMORY_HOTREMOVE +static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, + unsigned long nr_pages, int online_type, + struct zone *default_zone) +{ + struct zone *zone; + + zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); + if (zone != default_zone) { + strcat(buf, " "); + strcat(buf, zone->name); + } +} + static ssize_t show_valid_zones(struct device *dev, struct device_attribute *attr, char *buf) { @@ -395,7 +408,7 @@ static ssize_t show_valid_zones(struct device *dev, unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; unsigned long valid_start_pfn, valid_end_pfn; - bool append = false; + struct zone *default_zone; int nid; /* @@ -418,16 +431,13 @@ static ssize_t show_valid_zones(struct device *dev, } nid = pfn_to_nid(start_pfn); - if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL)) { - strcat(buf, default_zone_for_pfn(nid, start_pfn, nr_pages)->name); - append = true; - } + default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); + strcat(buf, default_zone->name); - if (allow_online_pfn_range(nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE)) { - if (append) - strcat(buf, " "); - strcat(buf, NODE_DATA(nid)->node_zones[ZONE_MOVABLE].name); - } + print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, + default_zone); + print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, + default_zone); out: strcat(buf, "\n"); diff --git a/drivers/block/brd.c b/drivers/block/brd.c index 104b71c0490d..5d9ed0616413 100644 --- a/drivers/block/brd.c +++ b/drivers/block/brd.c @@ -326,7 +326,11 @@ static int brd_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { struct brd_device *brd = bdev->bd_disk->private_data; - int err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); + int err; + + if (PageTransHuge(page)) + return -ENOTSUPP; + err = brd_do_bvec(brd, page, PAGE_SIZE, 0, is_write, sector); page_endio(page, is_write, err); return err; } diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig index b8ecba6dcd3b..7cd4a8ec3c8f 100644 --- a/drivers/block/zram/Kconfig +++ b/drivers/block/zram/Kconfig @@ -13,3 +13,15 @@ config ZRAM disks and maybe many more. See zram.txt for more information. + +config ZRAM_WRITEBACK + bool "Write back incompressible page to backing device" + depends on ZRAM + default n + help + With incompressible page, there is no memory saving to keep it + in memory. Instead, write it out to backing device. + For this feature, admin should set up backing device via + /sys/block/zramX/backing_dev. + + See zram.txt for more infomration. diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c index 3b1b6340ba13..4a0438c4ef2a 100644 --- a/drivers/block/zram/zram_drv.c +++ b/drivers/block/zram/zram_drv.c @@ -270,6 +270,349 @@ static ssize_t mem_used_max_store(struct device *dev, return len; } +#ifdef CONFIG_ZRAM_WRITEBACK +static bool zram_wb_enabled(struct zram *zram) +{ + return zram->backing_dev; +} + +static void reset_bdev(struct zram *zram) +{ + struct block_device *bdev; + + if (!zram_wb_enabled(zram)) + return; + + bdev = zram->bdev; + if (zram->old_block_size) + set_blocksize(bdev, zram->old_block_size); + blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL); + /* hope filp_close flush all of IO */ + filp_close(zram->backing_dev, NULL); + zram->backing_dev = NULL; + zram->old_block_size = 0; + zram->bdev = NULL; + + kvfree(zram->bitmap); + zram->bitmap = NULL; +} + +static ssize_t backing_dev_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct zram *zram = dev_to_zram(dev); + struct file *file = zram->backing_dev; + char *p; + ssize_t ret; + + down_read(&zram->init_lock); + if (!zram_wb_enabled(zram)) { + memcpy(buf, "none\n", 5); + up_read(&zram->init_lock); + return 5; + } + + p = file_path(file, buf, PAGE_SIZE - 1); + if (IS_ERR(p)) { + ret = PTR_ERR(p); + goto out; + } + + ret = strlen(p); + memmove(buf, p, ret); + buf[ret++] = '\n'; +out: + up_read(&zram->init_lock); + return ret; +} + +static ssize_t backing_dev_store(struct device *dev, + struct device_attribute *attr, const char *buf, size_t len) +{ + char *file_name; + struct file *backing_dev = NULL; + struct inode *inode; + struct address_space *mapping; + unsigned int bitmap_sz, old_block_size = 0; + unsigned long nr_pages, *bitmap = NULL; + struct block_device *bdev = NULL; + int err; + struct zram *zram = dev_to_zram(dev); + + file_name = kmalloc(PATH_MAX, GFP_KERNEL); + if (!file_name) + return -ENOMEM; + + down_write(&zram->init_lock); + if (init_done(zram)) { + pr_info("Can't setup backing device for initialized device\n"); + err = -EBUSY; + goto out; + } + + strlcpy(file_name, buf, len); + + backing_dev = filp_open(file_name, O_RDWR|O_LARGEFILE, 0); + if (IS_ERR(backing_dev)) { + err = PTR_ERR(backing_dev); + backing_dev = NULL; + goto out; + } + + mapping = backing_dev->f_mapping; + inode = mapping->host; + + /* Support only block device in this moment */ + if (!S_ISBLK(inode->i_mode)) { + err = -ENOTBLK; + goto out; + } + + bdev = bdgrab(I_BDEV(inode)); + err = blkdev_get(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL, zram); + if (err < 0) + goto out; + + nr_pages = i_size_read(inode) >> PAGE_SHIFT; + bitmap_sz = BITS_TO_LONGS(nr_pages) * sizeof(long); + bitmap = kvzalloc(bitmap_sz, GFP_KERNEL); + if (!bitmap) { + err = -ENOMEM; + goto out; + } + + old_block_size = block_size(bdev); + err = set_blocksize(bdev, PAGE_SIZE); + if (err) + goto out; + + reset_bdev(zram); + spin_lock_init(&zram->bitmap_lock); + + zram->old_block_size = old_block_size; + zram->bdev = bdev; + zram->backing_dev = backing_dev; + zram->bitmap = bitmap; + zram->nr_pages = nr_pages; + up_write(&zram->init_lock); + + pr_info("setup backing device %s\n", file_name); + kfree(file_name); + + return len; +out: + if (bitmap) + kvfree(bitmap); + + if (bdev) + blkdev_put(bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL); + + if (backing_dev) + filp_close(backing_dev, NULL); + + up_write(&zram->init_lock); + + kfree(file_name); + + return err; +} + +static unsigned long get_entry_bdev(struct zram *zram) +{ + unsigned long entry; + + spin_lock(&zram->bitmap_lock); + /* skip 0 bit to confuse zram.handle = 0 */ + entry = find_next_zero_bit(zram->bitmap, zram->nr_pages, 1); + if (entry == zram->nr_pages) { + spin_unlock(&zram->bitmap_lock); + return 0; + } + + set_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + + return entry; +} + +static void put_entry_bdev(struct zram *zram, unsigned long entry) +{ + int was_set; + + spin_lock(&zram->bitmap_lock); + was_set = test_and_clear_bit(entry, zram->bitmap); + spin_unlock(&zram->bitmap_lock); + WARN_ON_ONCE(!was_set); +} + +void zram_page_end_io(struct bio *bio) +{ + struct page *page = bio->bi_io_vec[0].bv_page; + + page_endio(page, op_is_write(bio_op(bio)), + blk_status_to_errno(bio->bi_status)); + bio_put(bio); +} + +/* + * Returns 1 if the submission is successful. + */ +static int read_from_bdev_async(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent) +{ + struct bio *bio; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + bio->bi_bdev = zram->bdev; + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, bvec->bv_offset)) { + bio_put(bio); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_READ; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + return 1; +} + +struct zram_work { + struct work_struct work; + struct zram *zram; + unsigned long entry; + struct bio *bio; +}; + +#if PAGE_SIZE != 4096 +static void zram_sync_read(struct work_struct *work) +{ + struct bio_vec bvec; + struct zram_work *zw = container_of(work, struct zram_work, work); + struct zram *zram = zw->zram; + unsigned long entry = zw->entry; + struct bio *bio = zw->bio; + + read_from_bdev_async(zram, &bvec, entry, bio); +} + +/* + * Block layer want one ->make_request_fn to be active at a time + * so if we use chained IO with parent IO in same context, + * it's a deadlock. To avoid, it, it uses worker thread context. + */ +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + struct zram_work work; + + work.zram = zram; + work.entry = entry; + work.bio = bio; + + INIT_WORK_ONSTACK(&work.work, zram_sync_read); + queue_work(system_unbound_wq, &work.work); + flush_work(&work.work); + destroy_work_on_stack(&work.work); + + return 1; +} +#else +static int read_from_bdev_sync(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *bio) +{ + WARN_ON(1); + return -EIO; +} +#endif + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + if (sync) + return read_from_bdev_sync(zram, bvec, entry, parent); + else + return read_from_bdev_async(zram, bvec, entry, parent); +} + +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) +{ + struct bio *bio; + unsigned long entry; + + bio = bio_alloc(GFP_ATOMIC, 1); + if (!bio) + return -ENOMEM; + + entry = get_entry_bdev(zram); + if (!entry) { + bio_put(bio); + return -ENOSPC; + } + + bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); + bio->bi_bdev = zram->bdev; + if (!bio_add_page(bio, bvec->bv_page, bvec->bv_len, + bvec->bv_offset)) { + bio_put(bio); + put_entry_bdev(zram, entry); + return -EIO; + } + + if (!parent) { + bio->bi_opf = REQ_OP_WRITE | REQ_SYNC; + bio->bi_end_io = zram_page_end_io; + } else { + bio->bi_opf = parent->bi_opf; + bio_chain(bio, parent); + } + + submit_bio(bio); + *pentry = entry; + + return 0; +} + +static void zram_wb_clear(struct zram *zram, u32 index) +{ + unsigned long entry; + + zram_clear_flag(zram, index, ZRAM_WB); + entry = zram_get_element(zram, index); + zram_set_element(zram, index, 0); + put_entry_bdev(zram, entry); +} + +#else +static bool zram_wb_enabled(struct zram *zram) { return false; } +static inline void reset_bdev(struct zram *zram) {}; +static int write_to_bdev(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *parent, + unsigned long *pentry) + +{ + return -EIO; +} + +static int read_from_bdev(struct zram *zram, struct bio_vec *bvec, + unsigned long entry, struct bio *parent, bool sync) +{ + return -EIO; +} +static void zram_wb_clear(struct zram *zram, u32 index) {} +#endif + + /* * We switched to per-cpu streams and this attr is not needed anymore. * However, we will keep it around for some time, because: @@ -453,30 +796,6 @@ static bool zram_same_page_read(struct zram *zram, u32 index, return false; } -static bool zram_same_page_write(struct zram *zram, u32 index, - struct page *page) -{ - unsigned long element; - void *mem = kmap_atomic(page); - - if (page_same_filled(mem, &element)) { - kunmap_atomic(mem); - /* Free memory associated with this sector now. */ - zram_slot_lock(zram, index); - zram_free_page(zram, index); - zram_set_flag(zram, index, ZRAM_SAME); - zram_set_element(zram, index, element); - zram_slot_unlock(zram, index); - - atomic64_inc(&zram->stats.same_pages); - atomic64_inc(&zram->stats.pages_stored); - return true; - } - kunmap_atomic(mem); - - return false; -} - static void zram_meta_free(struct zram *zram, u64 disksize) { size_t num_pages = disksize >> PAGE_SHIFT; @@ -515,7 +834,13 @@ static bool zram_meta_alloc(struct zram *zram, u64 disksize) */ static void zram_free_page(struct zram *zram, size_t index) { - unsigned long handle = zram_get_handle(zram, index); + unsigned long handle; + + if (zram_wb_enabled(zram) && zram_test_flag(zram, index, ZRAM_WB)) { + zram_wb_clear(zram, index); + atomic64_dec(&zram->stats.pages_stored); + return; + } /* * No memory is allocated for same element filled pages. @@ -529,6 +854,7 @@ static void zram_free_page(struct zram *zram, size_t index) return; } + handle = zram_get_handle(zram, index); if (!handle) return; @@ -542,13 +868,31 @@ static void zram_free_page(struct zram *zram, size_t index) zram_set_obj_size(zram, index, 0); } -static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) +static int __zram_bvec_read(struct zram *zram, struct page *page, u32 index, + struct bio *bio, bool partial_io) { int ret; unsigned long handle; unsigned int size; void *src, *dst; + if (zram_wb_enabled(zram)) { + zram_slot_lock(zram, index); + if (zram_test_flag(zram, index, ZRAM_WB)) { + struct bio_vec bvec; + + zram_slot_unlock(zram, index); + + bvec.bv_page = page; + bvec.bv_len = PAGE_SIZE; + bvec.bv_offset = 0; + return read_from_bdev(zram, &bvec, + zram_get_element(zram, index), + bio, partial_io); + } + zram_slot_unlock(zram, index); + } + if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE)) return 0; @@ -581,7 +925,7 @@ static int zram_decompress_page(struct zram *zram, struct page *page, u32 index) } static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) + u32 index, int offset, struct bio *bio) { int ret; struct page *page; @@ -594,7 +938,7 @@ static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec, return -ENOMEM; } - ret = zram_decompress_page(zram, page, index); + ret = __zram_bvec_read(zram, page, index, bio, is_partial_io(bvec)); if (unlikely(ret)) goto out; @@ -613,30 +957,57 @@ out: return ret; } -static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm, - struct page *page, - unsigned long *out_handle, unsigned int *out_comp_len) +static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, + u32 index, struct bio *bio) { - int ret; - unsigned int comp_len; - void *src; + int ret = 0; unsigned long alloced_pages; unsigned long handle = 0; + unsigned int comp_len = 0; + void *src, *dst, *mem; + struct zcomp_strm *zstrm; + struct page *page = bvec->bv_page; + unsigned long element = 0; + enum zram_pageflags flags = 0; + bool allow_wb = true; + + mem = kmap_atomic(page); + if (page_same_filled(mem, &element)) { + kunmap_atomic(mem); + /* Free memory associated with this sector now. */ + flags = ZRAM_SAME; + atomic64_inc(&zram->stats.same_pages); + goto out; + } + kunmap_atomic(mem); compress_again: + zstrm = zcomp_stream_get(zram->comp); src = kmap_atomic(page); - ret = zcomp_compress(*zstrm, src, &comp_len); + ret = zcomp_compress(zstrm, src, &comp_len); kunmap_atomic(src); if (unlikely(ret)) { + zcomp_stream_put(zram->comp); pr_err("Compression failed! err=%d\n", ret); - if (handle) - zs_free(zram->mem_pool, handle); + zs_free(zram->mem_pool, handle); return ret; } - if (unlikely(comp_len > max_zpage_size)) + if (unlikely(comp_len > max_zpage_size)) { + if (zram_wb_enabled(zram) && allow_wb) { + zcomp_stream_put(zram->comp); + ret = write_to_bdev(zram, bvec, index, bio, &element); + if (!ret) { + flags = ZRAM_WB; + ret = 1; + goto out; + } + allow_wb = false; + goto compress_again; + } comp_len = PAGE_SIZE; + } /* * handle allocation has 2 paths: @@ -663,7 +1034,6 @@ compress_again: handle = zs_malloc(zram->mem_pool, comp_len, GFP_NOIO | __GFP_HIGHMEM | __GFP_MOVABLE); - *zstrm = zcomp_stream_get(zram->comp); if (handle) goto compress_again; return -ENOMEM; @@ -673,34 +1043,11 @@ compress_again: update_used_max(zram, alloced_pages); if (zram->limit_pages && alloced_pages > zram->limit_pages) { + zcomp_stream_put(zram->comp); zs_free(zram->mem_pool, handle); return -ENOMEM; } - *out_handle = handle; - *out_comp_len = comp_len; - return 0; -} - -static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) -{ - int ret; - unsigned long handle; - unsigned int comp_len; - void *src, *dst; - struct zcomp_strm *zstrm; - struct page *page = bvec->bv_page; - - if (zram_same_page_write(zram, index, page)) - return 0; - - zstrm = zcomp_stream_get(zram->comp); - ret = zram_compress(zram, &zstrm, page, &handle, &comp_len); - if (ret) { - zcomp_stream_put(zram->comp); - return ret; - } - dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO); src = zstrm->buffer; @@ -712,25 +1059,31 @@ static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index) zcomp_stream_put(zram->comp); zs_unmap_object(zram->mem_pool, handle); - + atomic64_add(comp_len, &zram->stats.compr_data_size); +out: /* * Free memory associated with this sector * before overwriting unused sectors. */ zram_slot_lock(zram, index); zram_free_page(zram, index); - zram_set_handle(zram, index, handle); - zram_set_obj_size(zram, index, comp_len); + + if (flags) { + zram_set_flag(zram, index, flags); + zram_set_element(zram, index, element); + } else { + zram_set_handle(zram, index, handle); + zram_set_obj_size(zram, index, comp_len); + } zram_slot_unlock(zram, index); /* Update stats */ - atomic64_add(comp_len, &zram->stats.compr_data_size); atomic64_inc(&zram->stats.pages_stored); - return 0; + return ret; } static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, - u32 index, int offset) + u32 index, int offset, struct bio *bio) { int ret; struct page *page = NULL; @@ -748,7 +1101,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, if (!page) return -ENOMEM; - ret = zram_decompress_page(zram, page, index); + ret = __zram_bvec_read(zram, page, index, bio, true); if (ret) goto out; @@ -763,7 +1116,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, vec.bv_offset = 0; } - ret = __zram_bvec_write(zram, &vec, index); + ret = __zram_bvec_write(zram, &vec, index, bio); out: if (is_partial_io(bvec)) __free_page(page); @@ -808,8 +1161,13 @@ static void zram_bio_discard(struct zram *zram, u32 index, } } +/* + * Returns errno if it has some problem. Otherwise return 0 or 1. + * Returns 0 if IO request was done synchronously + * Returns 1 if IO request was successfully submitted. + */ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, - int offset, bool is_write) + int offset, bool is_write, struct bio *bio) { unsigned long start_time = jiffies; int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ; @@ -820,16 +1178,16 @@ static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index, if (!is_write) { atomic64_inc(&zram->stats.num_reads); - ret = zram_bvec_read(zram, bvec, index, offset); + ret = zram_bvec_read(zram, bvec, index, offset, bio); flush_dcache_page(bvec->bv_page); } else { atomic64_inc(&zram->stats.num_writes); - ret = zram_bvec_write(zram, bvec, index, offset); + ret = zram_bvec_write(zram, bvec, index, offset, bio); } generic_end_io_acct(rw_acct, &zram->disk->part0, start_time); - if (unlikely(ret)) { + if (unlikely(ret < 0)) { if (!is_write) atomic64_inc(&zram->stats.failed_reads); else @@ -868,7 +1226,7 @@ static void __zram_make_request(struct zram *zram, struct bio *bio) bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset, unwritten); if (zram_bvec_rw(zram, &bv, index, offset, - op_is_write(bio_op(bio))) < 0) + op_is_write(bio_op(bio)), bio) < 0) goto out; bv.bv_offset += bv.bv_len; @@ -922,16 +1280,18 @@ static void zram_slot_free_notify(struct block_device *bdev, static int zram_rw_page(struct block_device *bdev, sector_t sector, struct page *page, bool is_write) { - int offset, err = -EIO; + int offset, ret; u32 index; struct zram *zram; struct bio_vec bv; + if (PageTransHuge(page)) + return -ENOTSUPP; zram = bdev->bd_disk->private_data; if (!valid_io_request(zram, sector, PAGE_SIZE)) { atomic64_inc(&zram->stats.invalid_io); - err = -EINVAL; + ret = -EINVAL; goto out; } @@ -942,7 +1302,7 @@ static int zram_rw_page(struct block_device *bdev, sector_t sector, bv.bv_len = PAGE_SIZE; bv.bv_offset = 0; - err = zram_bvec_rw(zram, &bv, index, offset, is_write); + ret = zram_bvec_rw(zram, &bv, index, offset, is_write, NULL); out: /* * If I/O fails, just return error(ie, non-zero) without @@ -952,9 +1312,20 @@ out: * bio->bi_end_io does things to handle the error * (e.g., SetPageError, set_page_dirty and extra works). */ - if (err == 0) + if (unlikely(ret < 0)) + return ret; + + switch (ret) { + case 0: page_endio(page, is_write, 0); - return err; + break; + case 1: + ret = 0; + break; + default: + WARN_ON(1); + } + return ret; } static void zram_reset_device(struct zram *zram) @@ -983,6 +1354,7 @@ static void zram_reset_device(struct zram *zram) zram_meta_free(zram, disksize); memset(&zram->stats, 0, sizeof(zram->stats)); zcomp_destroy(comp); + reset_bdev(zram); } static ssize_t disksize_store(struct device *dev, @@ -1108,6 +1480,9 @@ static DEVICE_ATTR_WO(mem_limit); static DEVICE_ATTR_WO(mem_used_max); static DEVICE_ATTR_RW(max_comp_streams); static DEVICE_ATTR_RW(comp_algorithm); +#ifdef CONFIG_ZRAM_WRITEBACK +static DEVICE_ATTR_RW(backing_dev); +#endif static struct attribute *zram_disk_attrs[] = { &dev_attr_disksize.attr, @@ -1118,6 +1493,9 @@ static struct attribute *zram_disk_attrs[] = { &dev_attr_mem_used_max.attr, &dev_attr_max_comp_streams.attr, &dev_attr_comp_algorithm.attr, +#ifdef CONFIG_ZRAM_WRITEBACK + &dev_attr_backing_dev.attr, +#endif &dev_attr_io_stat.attr, &dev_attr_mm_stat.attr, &dev_attr_debug_stat.attr, diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h index e34e44d02e3e..31762db861e3 100644 --- a/drivers/block/zram/zram_drv.h +++ b/drivers/block/zram/zram_drv.h @@ -60,9 +60,10 @@ static const size_t max_zpage_size = PAGE_SIZE / 4 * 3; /* Flags for zram pages (table[page_no].value) */ enum zram_pageflags { - /* Page consists entirely of zeros */ + /* Page consists the same element */ ZRAM_SAME = ZRAM_FLAG_SHIFT, ZRAM_ACCESS, /* page is now accessed */ + ZRAM_WB, /* page is stored on backing_device */ __NR_ZRAM_PAGEFLAGS, }; @@ -115,5 +116,13 @@ struct zram { * zram is claimed so open request will be failed */ bool claim; /* Protected by bdev->bd_mutex */ +#ifdef CONFIG_ZRAM_WRITEBACK + struct file *backing_dev; + struct block_device *bdev; + unsigned int old_block_size; + unsigned long *bitmap; + unsigned long nr_pages; + spinlock_t bitmap_lock; +#endif }; #endif diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c index a36216bd2a84..e4d4b6b41e26 100644 --- a/drivers/gpu/drm/i915/i915_debugfs.c +++ b/drivers/gpu/drm/i915/i915_debugfs.c @@ -4308,10 +4308,10 @@ i915_drop_caches_set(void *data, u64 val) fs_reclaim_acquire(GFP_KERNEL); if (val & DROP_BOUND) - i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_BOUND); + i915_gem_shrink(dev_priv, LONG_MAX, NULL, I915_SHRINK_BOUND); if (val & DROP_UNBOUND) - i915_gem_shrink(dev_priv, LONG_MAX, I915_SHRINK_UNBOUND); + i915_gem_shrink(dev_priv, LONG_MAX, NULL, I915_SHRINK_UNBOUND); if (val & DROP_SHRINK_ALL) i915_gem_shrink_all(dev_priv); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index 60267e375e88..bd74641ab7f6 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -3742,6 +3742,7 @@ i915_gem_object_create_internal(struct drm_i915_private *dev_priv, /* i915_gem_shrinker.c */ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, unsigned long target, + unsigned long *nr_scanned, unsigned flags); #define I915_SHRINK_PURGEABLE 0x1 #define I915_SHRINK_UNBOUND 0x2 diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c index b9e8e0d6e97b..287c6ead95b3 100644 --- a/drivers/gpu/drm/i915/i915_gem.c +++ b/drivers/gpu/drm/i915/i915_gem.c @@ -2354,7 +2354,7 @@ rebuild_st: goto err_sg; } - i915_gem_shrink(dev_priv, 2 * page_count, *s++); + i915_gem_shrink(dev_priv, 2 * page_count, NULL, *s++); cond_resched(); /* We've tried hard to allocate the memory by reaping @@ -5015,7 +5015,7 @@ int i915_gem_freeze_late(struct drm_i915_private *dev_priv) * the objects as well, see i915_gem_freeze() */ - i915_gem_shrink(dev_priv, -1UL, I915_SHRINK_UNBOUND); + i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_UNBOUND); i915_gem_drain_freed_objects(dev_priv); mutex_lock(&dev_priv->drm.struct_mutex); diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c index d60f38adc4c4..6c6b8e8592aa 100644 --- a/drivers/gpu/drm/i915/i915_gem_gtt.c +++ b/drivers/gpu/drm/i915/i915_gem_gtt.c @@ -2062,7 +2062,7 @@ int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj, */ GEM_BUG_ON(obj->mm.pages == pages); } while (i915_gem_shrink(to_i915(obj->base.dev), - obj->base.size >> PAGE_SHIFT, + obj->base.size >> PAGE_SHIFT, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE)); diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c index 77fb39808131..74002b2d1b6f 100644 --- a/drivers/gpu/drm/i915/i915_gem_shrinker.c +++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c @@ -136,6 +136,7 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) * i915_gem_shrink - Shrink buffer object caches * @dev_priv: i915 device * @target: amount of memory to make available, in pages + * @nr_scanned: optional output for number of pages scanned (incremental) * @flags: control flags for selecting cache types * * This function is the main interface to the shrinker. It will try to release @@ -158,7 +159,9 @@ static bool unsafe_drop_pages(struct drm_i915_gem_object *obj) */ unsigned long i915_gem_shrink(struct drm_i915_private *dev_priv, - unsigned long target, unsigned flags) + unsigned long target, + unsigned long *nr_scanned, + unsigned flags) { const struct { struct list_head *list; @@ -169,6 +172,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, { NULL, 0 }, }, *phase; unsigned long count = 0; + unsigned long scanned = 0; bool unlock; if (!shrinker_lock(dev_priv, &unlock)) @@ -249,6 +253,7 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, count += obj->base.size >> PAGE_SHIFT; } mutex_unlock(&obj->mm.lock); + scanned += obj->base.size >> PAGE_SHIFT; } } list_splice_tail(&still_in_list, phase->list); @@ -261,6 +266,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv, shrinker_unlock(dev_priv, unlock); + if (nr_scanned) + *nr_scanned += scanned; return count; } @@ -283,7 +290,7 @@ unsigned long i915_gem_shrink_all(struct drm_i915_private *dev_priv) unsigned long freed; intel_runtime_pm_get(dev_priv); - freed = i915_gem_shrink(dev_priv, -1UL, + freed = i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE); @@ -329,23 +336,28 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) unsigned long freed; bool unlock; + sc->nr_scanned = 0; + if (!shrinker_lock(dev_priv, &unlock)) return SHRINK_STOP; freed = i915_gem_shrink(dev_priv, sc->nr_to_scan, + &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_PURGEABLE); if (freed < sc->nr_to_scan) freed += i915_gem_shrink(dev_priv, - sc->nr_to_scan - freed, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); if (freed < sc->nr_to_scan && current_is_kswapd()) { intel_runtime_pm_get(dev_priv); freed += i915_gem_shrink(dev_priv, - sc->nr_to_scan - freed, + sc->nr_to_scan - sc->nr_scanned, + &sc->nr_scanned, I915_SHRINK_ACTIVE | I915_SHRINK_BOUND | I915_SHRINK_UNBOUND); @@ -354,7 +366,7 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc) shrinker_unlock(dev_priv, unlock); - return freed; + return sc->nr_scanned ? freed : SHRINK_STOP; } static bool @@ -453,7 +465,7 @@ i915_gem_shrinker_vmap(struct notifier_block *nb, unsigned long event, void *ptr goto out; intel_runtime_pm_get(dev_priv); - freed_pages += i915_gem_shrink(dev_priv, -1UL, + freed_pages += i915_gem_shrink(dev_priv, -1UL, NULL, I915_SHRINK_BOUND | I915_SHRINK_UNBOUND | I915_SHRINK_ACTIVE | diff --git a/drivers/nvdimm/btt.c b/drivers/nvdimm/btt.c index 14323faf8bd9..60491641a8d6 100644 --- a/drivers/nvdimm/btt.c +++ b/drivers/nvdimm/btt.c @@ -1241,8 +1241,10 @@ static int btt_rw_page(struct block_device *bdev, sector_t sector, { struct btt *btt = bdev->bd_disk->private_data; int rc; + unsigned int len; - rc = btt_do_bvec(btt, NULL, page, PAGE_SIZE, 0, is_write, sector); + len = hpage_nr_pages(page) * PAGE_SIZE; + rc = btt_do_bvec(btt, NULL, page, len, 0, is_write, sector); if (rc == 0) page_endio(page, is_write, 0); diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index f7099adaabc0..e9aa453da50c 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -80,22 +80,40 @@ static blk_status_t pmem_clear_poison(struct pmem_device *pmem, static void write_pmem(void *pmem_addr, struct page *page, unsigned int off, unsigned int len) { - void *mem = kmap_atomic(page); - - memcpy_flushcache(pmem_addr, mem + off, len); - kunmap_atomic(mem); + unsigned int chunk; + void *mem; + + while (len) { + mem = kmap_atomic(page); + chunk = min_t(unsigned int, len, PAGE_SIZE); + memcpy_flushcache(pmem_addr, mem + off, chunk); + kunmap_atomic(mem); + len -= chunk; + off = 0; + page++; + pmem_addr += PAGE_SIZE; + } } static blk_status_t read_pmem(struct page *page, unsigned int off, void *pmem_addr, unsigned int len) { + unsigned int chunk; int rc; - void *mem = kmap_atomic(page); - - rc = memcpy_mcsafe(mem + off, pmem_addr, len); - kunmap_atomic(mem); - if (rc) - return BLK_STS_IOERR; + void *mem; + + while (len) { + mem = kmap_atomic(page); + chunk = min_t(unsigned int, len, PAGE_SIZE); + rc = memcpy_mcsafe(mem + off, pmem_addr, chunk); + kunmap_atomic(mem); + if (rc) + return BLK_STS_IOERR; + len -= chunk; + off = 0; + page++; + pmem_addr += PAGE_SIZE; + } return BLK_STS_OK; } @@ -188,7 +206,8 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_queue->queuedata; blk_status_t rc; - rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, is_write, sector); + rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, + 0, is_write, sector); /* * The ->rw_page interface is subtle and tricky. The core |