diff options
author | Dan Williams <dan.j.williams@intel.com> | 2020-04-02 19:55:17 -0700 |
---|---|---|
committer | Dan Williams <dan.j.williams@intel.com> | 2020-04-02 19:55:17 -0700 |
commit | f6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5 (patch) | |
tree | fe4dfcaafe847b71ebd040f348c0a2ee8206b38a /drivers | |
parent | d3b88655c0a157c11370b8faf50e82ecb1c17d54 (diff) | |
parent | 4e4ced93794acb42adb19484132966defba8f3a6 (diff) | |
download | linux-f6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5.tar.bz2 |
Merge branch 'for-5.7/libnvdimm' into libnvdimm-for-next
- Introduce 'zero_page_range' as a dax operation. This facilitates
filesystem-dax operation without a block-device.
- Advertise a persistence-domain for of_pmem and papr_scm. The
persistence domain indicates where cpu-store cycles need to reach in
the platform-memory subsystem before the platform will consider them
power-fail protected.
- Fixup some flexible-array declarations.
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/acpi/nfit/nfit.h | 12 | ||||
-rw-r--r-- | drivers/dax/bus.c | 4 | ||||
-rw-r--r-- | drivers/dax/super.c | 28 | ||||
-rw-r--r-- | drivers/md/dm-linear.c | 18 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 17 | ||||
-rw-r--r-- | drivers/md/dm-stripe.c | 23 | ||||
-rw-r--r-- | drivers/md/dm.c | 32 | ||||
-rw-r--r-- | drivers/nvdimm/label.h | 2 | ||||
-rw-r--r-- | drivers/nvdimm/nd.h | 4 | ||||
-rw-r--r-- | drivers/nvdimm/of_pmem.c | 4 | ||||
-rw-r--r-- | drivers/nvdimm/pmem.c | 101 | ||||
-rw-r--r-- | drivers/s390/block/dcssblk.c | 20 |
12 files changed, 211 insertions, 54 deletions
diff --git a/drivers/acpi/nfit/nfit.h b/drivers/acpi/nfit/nfit.h index b317f4043705..f5525f8bb770 100644 --- a/drivers/acpi/nfit/nfit.h +++ b/drivers/acpi/nfit/nfit.h @@ -145,32 +145,32 @@ struct nfit_spa { unsigned long ars_state; u32 clear_err_unit; u32 max_ars; - struct acpi_nfit_system_address spa[0]; + struct acpi_nfit_system_address spa[]; }; struct nfit_dcr { struct list_head list; - struct acpi_nfit_control_region dcr[0]; + struct acpi_nfit_control_region dcr[]; }; struct nfit_bdw { struct list_head list; - struct acpi_nfit_data_region bdw[0]; + struct acpi_nfit_data_region bdw[]; }; struct nfit_idt { struct list_head list; - struct acpi_nfit_interleave idt[0]; + struct acpi_nfit_interleave idt[]; }; struct nfit_flush { struct list_head list; - struct acpi_nfit_flush_address flush[0]; + struct acpi_nfit_flush_address flush[]; }; struct nfit_memdev { struct list_head list; - struct acpi_nfit_memory_map memdev[0]; + struct acpi_nfit_memory_map memdev[]; }; enum nfit_mem_flags { diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c index 46e46047a1f7..df238c8b6ef2 100644 --- a/drivers/dax/bus.c +++ b/drivers/dax/bus.c @@ -421,8 +421,10 @@ struct dev_dax *__devm_create_dev_dax(struct dax_region *dax_region, int id, * device outside of mmap of the resulting character device. */ dax_dev = alloc_dax(dev_dax, NULL, NULL, DAXDEV_F_SYNC); - if (!dax_dev) + if (IS_ERR(dax_dev)) { + rc = PTR_ERR(dax_dev); goto err; + } /* a device_dax instance is dead while the driver is not attached */ kill_dax(dax_dev); diff --git a/drivers/dax/super.c b/drivers/dax/super.c index 0aa4b6bc5101..8e32345be0f7 100644 --- a/drivers/dax/super.c +++ b/drivers/dax/super.c @@ -344,6 +344,23 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, void *addr, } EXPORT_SYMBOL_GPL(dax_copy_to_iter); +int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages) +{ + if (!dax_alive(dax_dev)) + return -ENXIO; + /* + * There are no callers that want to zero more than one page as of now. + * Once users are there, this check can be removed after the + * device mapper code has been updated to split ranges across targets. + */ + if (nr_pages != 1) + return -EIO; + + return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages); +} +EXPORT_SYMBOL_GPL(dax_zero_page_range); + #ifdef CONFIG_ARCH_HAS_PMEM_API void arch_wb_cache_pmem(void *addr, size_t size); void dax_flush(struct dax_device *dax_dev, void *addr, size_t size) @@ -551,9 +568,16 @@ struct dax_device *alloc_dax(void *private, const char *__host, dev_t devt; int minor; + if (ops && !ops->zero_page_range) { + pr_debug("%s: error: device does not provide dax" + " operation zero_page_range()\n", + __host ? __host : "Unknown"); + return ERR_PTR(-EINVAL); + } + host = kstrdup(__host, GFP_KERNEL); if (__host && !host) - return NULL; + return ERR_PTR(-ENOMEM); minor = ida_simple_get(&dax_minor_ida, 0, MINORMASK+1, GFP_KERNEL); if (minor < 0) @@ -576,7 +600,7 @@ struct dax_device *alloc_dax(void *private, const char *__host, ida_simple_remove(&dax_minor_ida, minor); err_minor: kfree(host); - return NULL; + return ERR_PTR(-ENOMEM); } EXPORT_SYMBOL_GPL(alloc_dax); diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 8d07fdf63a47..e1db43446327 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -201,10 +201,27 @@ static size_t linear_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } +static int linear_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, + size_t nr_pages) +{ + int ret; + struct linear_c *lc = ti->private; + struct block_device *bdev = lc->dev->bdev; + struct dax_device *dax_dev = lc->dev->dax_dev; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + + dev_sector = linear_map_sector(ti, sector); + ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff); + if (ret) + return ret; + return dax_zero_page_range(dax_dev, pgoff, nr_pages); +} + #else #define linear_dax_direct_access NULL #define linear_dax_copy_from_iter NULL #define linear_dax_copy_to_iter NULL +#define linear_dax_zero_page_range NULL #endif static struct target_type linear_target = { @@ -226,6 +243,7 @@ static struct target_type linear_target = { .direct_access = linear_dax_direct_access, .dax_copy_from_iter = linear_dax_copy_from_iter, .dax_copy_to_iter = linear_dax_copy_to_iter, + .dax_zero_page_range = linear_dax_zero_page_range, }; int __init dm_linear_init(void) diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 99721c76225d..8ea20b56b4d6 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -994,10 +994,26 @@ static size_t log_writes_dax_copy_to_iter(struct dm_target *ti, return dax_copy_to_iter(lc->dev->dax_dev, pgoff, addr, bytes, i); } +static int log_writes_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, + size_t nr_pages) +{ + int ret; + struct log_writes_c *lc = ti->private; + sector_t sector = pgoff * PAGE_SECTORS; + + ret = bdev_dax_pgoff(lc->dev->bdev, sector, nr_pages << PAGE_SHIFT, + &pgoff); + if (ret) + return ret; + return dax_zero_page_range(lc->dev->dax_dev, pgoff, + nr_pages << PAGE_SHIFT); +} + #else #define log_writes_dax_direct_access NULL #define log_writes_dax_copy_from_iter NULL #define log_writes_dax_copy_to_iter NULL +#define log_writes_dax_zero_page_range NULL #endif static struct target_type log_writes_target = { @@ -1016,6 +1032,7 @@ static struct target_type log_writes_target = { .direct_access = log_writes_dax_direct_access, .dax_copy_from_iter = log_writes_dax_copy_from_iter, .dax_copy_to_iter = log_writes_dax_copy_to_iter, + .dax_zero_page_range = log_writes_dax_zero_page_range, }; static int __init dm_log_writes_init(void) diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index 63bbcc20f49a..fa813c0f993d 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -360,10 +360,32 @@ static size_t stripe_dax_copy_to_iter(struct dm_target *ti, pgoff_t pgoff, return dax_copy_to_iter(dax_dev, pgoff, addr, bytes, i); } +static int stripe_dax_zero_page_range(struct dm_target *ti, pgoff_t pgoff, + size_t nr_pages) +{ + int ret; + sector_t dev_sector, sector = pgoff * PAGE_SECTORS; + struct stripe_c *sc = ti->private; + struct dax_device *dax_dev; + struct block_device *bdev; + uint32_t stripe; + + stripe_map_sector(sc, sector, &stripe, &dev_sector); + dev_sector += sc->stripe[stripe].physical_start; + dax_dev = sc->stripe[stripe].dev->dax_dev; + bdev = sc->stripe[stripe].dev->bdev; + + ret = bdev_dax_pgoff(bdev, dev_sector, nr_pages << PAGE_SHIFT, &pgoff); + if (ret) + return ret; + return dax_zero_page_range(dax_dev, pgoff, nr_pages); +} + #else #define stripe_dax_direct_access NULL #define stripe_dax_copy_from_iter NULL #define stripe_dax_copy_to_iter NULL +#define stripe_dax_zero_page_range NULL #endif /* @@ -486,6 +508,7 @@ static struct target_type stripe_target = { .direct_access = stripe_dax_direct_access, .dax_copy_from_iter = stripe_dax_copy_from_iter, .dax_copy_to_iter = stripe_dax_copy_to_iter, + .dax_zero_page_range = stripe_dax_zero_page_range, }; int __init dm_stripe_init(void) diff --git a/drivers/md/dm.c b/drivers/md/dm.c index b89f07ee2eff..6504f0a358e5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1198,6 +1198,35 @@ static size_t dm_dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff, return ret; } +static int dm_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages) +{ + struct mapped_device *md = dax_get_private(dax_dev); + sector_t sector = pgoff * PAGE_SECTORS; + struct dm_target *ti; + int ret = -EIO; + int srcu_idx; + + ti = dm_dax_get_live_target(md, sector, &srcu_idx); + + if (!ti) + goto out; + if (WARN_ON(!ti->type->dax_zero_page_range)) { + /* + * ->zero_page_range() is mandatory dax operation. If we are + * here, something is wrong. + */ + dm_put_live_table(md, srcu_idx); + goto out; + } + ret = ti->type->dax_zero_page_range(ti, pgoff, nr_pages); + + out: + dm_put_live_table(md, srcu_idx); + + return ret; +} + /* * A target may call dm_accept_partial_bio only from the map routine. It is * allowed for all bio types except REQ_PREFLUSH, REQ_OP_ZONE_RESET, @@ -1976,7 +2005,7 @@ static struct mapped_device *alloc_dev(int minor) if (IS_ENABLED(CONFIG_DAX_DRIVER)) { md->dax_dev = alloc_dax(md, md->disk->disk_name, &dm_dax_ops, 0); - if (!md->dax_dev) + if (IS_ERR(md->dax_dev)) goto bad; } @@ -3199,6 +3228,7 @@ static const struct dax_operations dm_dax_ops = { .dax_supported = dm_dax_supported, .copy_from_iter = dm_dax_copy_from_iter, .copy_to_iter = dm_dax_copy_to_iter, + .zero_page_range = dm_dax_zero_page_range, }; /* diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 4c7b775c2811..956b6d1bd8cc 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -62,7 +62,7 @@ struct nd_namespace_index { __le16 major; __le16 minor; __le64 checksum; - u8 free[0]; + u8 free[]; }; /** diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index c4d69c1cce55..85dbb2a322b9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -39,7 +39,7 @@ struct nd_region_data { int ns_count; int ns_active; unsigned int hints_shift; - void __iomem *flush_wpq[0]; + void __iomem *flush_wpq[]; }; static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, @@ -157,7 +157,7 @@ struct nd_region { struct nd_interleave_set *nd_set; struct nd_percpu_lane __percpu *lane; int (*flush)(struct nd_region *nd_region, struct bio *bio); - struct nd_mapping mapping[0]; + struct nd_mapping mapping[]; }; struct nd_blk_region { diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 8224d1431ea9..6826a274a1f1 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev) if (is_volatile) region = nvdimm_volatile_region_create(bus, &ndr_desc); - else + else { + set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); region = nvdimm_pmem_region_create(bus, &ndr_desc); + } if (!region) dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4eae441f86c9..715cb0696525 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off, return BLK_STS_OK; } -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, - unsigned int len, unsigned int off, unsigned int op, - sector_t sector) +static blk_status_t pmem_do_read(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) +{ + blk_status_t rc; + phys_addr_t pmem_off = sector * 512 + pmem->data_offset; + void *pmem_addr = pmem->virt_addr + pmem_off; + + if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) + return BLK_STS_IOERR; + + rc = read_pmem(page, page_off, pmem_addr, len); + flush_dcache_page(page); + return rc; +} + +static blk_status_t pmem_do_write(struct pmem_device *pmem, + struct page *page, unsigned int page_off, + sector_t sector, unsigned int len) { blk_status_t rc = BLK_STS_OK; bool bad_pmem = false; @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) bad_pmem = true; - if (!op_is_write(op)) { - if (unlikely(bad_pmem)) - rc = BLK_STS_IOERR; - else { - rc = read_pmem(page, off, pmem_addr, len); - flush_dcache_page(page); - } - } else { - /* - * Note that we write the data both before and after - * clearing poison. The write before clear poison - * handles situations where the latest written data is - * preserved and the clear poison operation simply marks - * the address range as valid without changing the data. - * In this case application software can assume that an - * interrupted write will either return the new good - * data or an error. - * - * However, if pmem_clear_poison() leaves the data in an - * indeterminate state we need to perform the write - * after clear poison. - */ - flush_dcache_page(page); - write_pmem(pmem_addr, page, off, len); - if (unlikely(bad_pmem)) { - rc = pmem_clear_poison(pmem, pmem_off, len); - write_pmem(pmem_addr, page, off, len); - } + /* + * Note that we write the data both before and after + * clearing poison. The write before clear poison + * handles situations where the latest written data is + * preserved and the clear poison operation simply marks + * the address range as valid without changing the data. + * In this case application software can assume that an + * interrupted write will either return the new good + * data or an error. + * + * However, if pmem_clear_poison() leaves the data in an + * indeterminate state we need to perform the write + * after clear poison. + */ + flush_dcache_page(page); + write_pmem(pmem_addr, page, page_off, len); + if (unlikely(bad_pmem)) { + rc = pmem_clear_poison(pmem, pmem_off, len); + write_pmem(pmem_addr, page, page_off, len); } return rc; @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio) do_acct = nd_iostat_start(bio, &start); bio_for_each_segment(bvec, bio, iter) { - rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, - bvec.bv_offset, bio_op(bio), iter.bi_sector); + if (op_is_write(bio_op(bio))) + rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); + else + rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, + iter.bi_sector, bvec.bv_len); if (rc) { bio->bi_status = rc; break; @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector, struct pmem_device *pmem = bdev->bd_queue->queuedata; blk_status_t rc; - rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, - 0, op, sector); - + if (op_is_write(op)) + rc = pmem_do_write(pmem, page, 0, sector, + hpage_nr_pages(page) * PAGE_SIZE); + else + rc = pmem_do_read(pmem, page, 0, sector, + hpage_nr_pages(page) * PAGE_SIZE); /* * The ->rw_page interface is subtle and tricky. The core * retries on any error, so we can only invoke page_endio() in @@ -268,6 +282,16 @@ static const struct block_device_operations pmem_fops = { .revalidate_disk = nvdimm_revalidate_disk, }; +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, + size_t nr_pages) +{ + struct pmem_device *pmem = dax_get_private(dax_dev); + + return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, + PFN_PHYS(pgoff) >> SECTOR_SHIFT, + PAGE_SIZE)); +} + static long pmem_dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn) { @@ -299,6 +323,7 @@ static const struct dax_operations pmem_dax_ops = { .dax_supported = generic_fsdax_supported, .copy_from_iter = pmem_copy_from_iter, .copy_to_iter = pmem_copy_to_iter, + .zero_page_range = pmem_dax_zero_page_range, }; static const struct attribute_group *pmem_attribute_groups[] = { @@ -462,9 +487,9 @@ static int pmem_attach_disk(struct device *dev, if (is_nvdimm_sync(nd_region)) flags = DAXDEV_F_SYNC; dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); - if (!dax_dev) { + if (IS_ERR(dax_dev)) { put_disk(disk); - return -ENOMEM; + return PTR_ERR(dax_dev); } dax_write_cache(dax_dev, nvdimm_has_cache(nd_region)); pmem->dax_dev = dax_dev; diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c index 63502ca537eb..9b05cf1a25c4 100644 --- a/drivers/s390/block/dcssblk.c +++ b/drivers/s390/block/dcssblk.c @@ -57,11 +57,26 @@ static size_t dcssblk_dax_copy_to_iter(struct dax_device *dax_dev, return copy_to_iter(addr, bytes, i); } +static int dcssblk_dax_zero_page_range(struct dax_device *dax_dev, + pgoff_t pgoff, size_t nr_pages) +{ + long rc; + void *kaddr; + + rc = dax_direct_access(dax_dev, pgoff, nr_pages, &kaddr, NULL); + if (rc < 0) + return rc; + memset(kaddr, 0, nr_pages << PAGE_SHIFT); + dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT); + return 0; +} + static const struct dax_operations dcssblk_dax_ops = { .direct_access = dcssblk_dax_direct_access, .dax_supported = generic_fsdax_supported, .copy_from_iter = dcssblk_dax_copy_from_iter, .copy_to_iter = dcssblk_dax_copy_to_iter, + .zero_page_range = dcssblk_dax_zero_page_range, }; struct dcssblk_dev_info { @@ -680,8 +695,9 @@ dcssblk_add_store(struct device *dev, struct device_attribute *attr, const char dev_info->dax_dev = alloc_dax(dev_info, dev_info->gd->disk_name, &dcssblk_dax_ops, DAXDEV_F_SYNC); - if (!dev_info->dax_dev) { - rc = -ENOMEM; + if (IS_ERR(dev_info->dax_dev)) { + rc = PTR_ERR(dev_info->dax_dev); + dev_info->dax_dev = NULL; goto put_dev; } |