diff options
| author | Dan Williams <dan.j.williams@intel.com> | 2020-04-02 19:55:17 -0700 | 
|---|---|---|
| committer | Dan Williams <dan.j.williams@intel.com> | 2020-04-02 19:55:17 -0700 | 
| commit | f6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5 (patch) | |
| tree | fe4dfcaafe847b71ebd040f348c0a2ee8206b38a /drivers/nvdimm | |
| parent | d3b88655c0a157c11370b8faf50e82ecb1c17d54 (diff) | |
| parent | 4e4ced93794acb42adb19484132966defba8f3a6 (diff) | |
| download | linux-f6d2b802f80d0ca89ee1f51c1781b3f79cdb25d5.tar.bz2 | |
Merge branch 'for-5.7/libnvdimm' into libnvdimm-for-next
- Introduce 'zero_page_range' as a dax operation. This facilitates
  filesystem-dax operation without a block-device.
- Advertise a persistence-domain for of_pmem and papr_scm. The
  persistence domain indicates where cpu-store cycles need to reach in
  the platform-memory subsystem before the platform will consider them
  power-fail protected.
- Fixup some flexible-array declarations.
Diffstat (limited to 'drivers/nvdimm')
| -rw-r--r-- | drivers/nvdimm/label.h | 2 | ||||
| -rw-r--r-- | drivers/nvdimm/nd.h | 4 | ||||
| -rw-r--r-- | drivers/nvdimm/of_pmem.c | 4 | ||||
| -rw-r--r-- | drivers/nvdimm/pmem.c | 101 | 
4 files changed, 69 insertions, 42 deletions
| diff --git a/drivers/nvdimm/label.h b/drivers/nvdimm/label.h index 4c7b775c2811..956b6d1bd8cc 100644 --- a/drivers/nvdimm/label.h +++ b/drivers/nvdimm/label.h @@ -62,7 +62,7 @@ struct nd_namespace_index {  	__le16 major;  	__le16 minor;  	__le64 checksum; -	u8 free[0]; +	u8 free[];  };  /** diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h index c4d69c1cce55..85dbb2a322b9 100644 --- a/drivers/nvdimm/nd.h +++ b/drivers/nvdimm/nd.h @@ -39,7 +39,7 @@ struct nd_region_data {  	int ns_count;  	int ns_active;  	unsigned int hints_shift; -	void __iomem *flush_wpq[0]; +	void __iomem *flush_wpq[];  };  static inline void __iomem *ndrd_get_flush_wpq(struct nd_region_data *ndrd, @@ -157,7 +157,7 @@ struct nd_region {  	struct nd_interleave_set *nd_set;  	struct nd_percpu_lane __percpu *lane;  	int (*flush)(struct nd_region *nd_region, struct bio *bio); -	struct nd_mapping mapping[0]; +	struct nd_mapping mapping[];  };  struct nd_blk_region { diff --git a/drivers/nvdimm/of_pmem.c b/drivers/nvdimm/of_pmem.c index 8224d1431ea9..6826a274a1f1 100644 --- a/drivers/nvdimm/of_pmem.c +++ b/drivers/nvdimm/of_pmem.c @@ -62,8 +62,10 @@ static int of_pmem_region_probe(struct platform_device *pdev)  		if (is_volatile)  			region = nvdimm_volatile_region_create(bus, &ndr_desc); -		else +		else { +			set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags);  			region = nvdimm_pmem_region_create(bus, &ndr_desc); +		}  		if (!region)  			dev_warn(&pdev->dev, "Unable to register region %pR from %pOF\n", diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c index 4eae441f86c9..715cb0696525 100644 --- a/drivers/nvdimm/pmem.c +++ b/drivers/nvdimm/pmem.c @@ -136,9 +136,25 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,  	return BLK_STS_OK;  } -static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page, -			unsigned int len, unsigned int off, unsigned int op, -			sector_t sector) +static blk_status_t pmem_do_read(struct pmem_device *pmem, +			struct page *page, unsigned int page_off, +			sector_t sector, unsigned int len) +{ +	blk_status_t rc; +	phys_addr_t pmem_off = sector * 512 + pmem->data_offset; +	void *pmem_addr = pmem->virt_addr + pmem_off; + +	if (unlikely(is_bad_pmem(&pmem->bb, sector, len))) +		return BLK_STS_IOERR; + +	rc = read_pmem(page, page_off, pmem_addr, len); +	flush_dcache_page(page); +	return rc; +} + +static blk_status_t pmem_do_write(struct pmem_device *pmem, +			struct page *page, unsigned int page_off, +			sector_t sector, unsigned int len)  {  	blk_status_t rc = BLK_STS_OK;  	bool bad_pmem = false; @@ -148,34 +164,25 @@ static blk_status_t pmem_do_bvec(struct pmem_device *pmem, struct page *page,  	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))  		bad_pmem = true; -	if (!op_is_write(op)) { -		if (unlikely(bad_pmem)) -			rc = BLK_STS_IOERR; -		else { -			rc = read_pmem(page, off, pmem_addr, len); -			flush_dcache_page(page); -		} -	} else { -		/* -		 * Note that we write the data both before and after -		 * clearing poison.  The write before clear poison -		 * handles situations where the latest written data is -		 * preserved and the clear poison operation simply marks -		 * the address range as valid without changing the data. -		 * In this case application software can assume that an -		 * interrupted write will either return the new good -		 * data or an error. -		 * -		 * However, if pmem_clear_poison() leaves the data in an -		 * indeterminate state we need to perform the write -		 * after clear poison. -		 */ -		flush_dcache_page(page); -		write_pmem(pmem_addr, page, off, len); -		if (unlikely(bad_pmem)) { -			rc = pmem_clear_poison(pmem, pmem_off, len); -			write_pmem(pmem_addr, page, off, len); -		} +	/* +	 * Note that we write the data both before and after +	 * clearing poison.  The write before clear poison +	 * handles situations where the latest written data is +	 * preserved and the clear poison operation simply marks +	 * the address range as valid without changing the data. +	 * In this case application software can assume that an +	 * interrupted write will either return the new good +	 * data or an error. +	 * +	 * However, if pmem_clear_poison() leaves the data in an +	 * indeterminate state we need to perform the write +	 * after clear poison. +	 */ +	flush_dcache_page(page); +	write_pmem(pmem_addr, page, page_off, len); +	if (unlikely(bad_pmem)) { +		rc = pmem_clear_poison(pmem, pmem_off, len); +		write_pmem(pmem_addr, page, page_off, len);  	}  	return rc; @@ -197,8 +204,12 @@ static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)  	do_acct = nd_iostat_start(bio, &start);  	bio_for_each_segment(bvec, bio, iter) { -		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, -				bvec.bv_offset, bio_op(bio), iter.bi_sector); +		if (op_is_write(bio_op(bio))) +			rc = pmem_do_write(pmem, bvec.bv_page, bvec.bv_offset, +				iter.bi_sector, bvec.bv_len); +		else +			rc = pmem_do_read(pmem, bvec.bv_page, bvec.bv_offset, +				iter.bi_sector, bvec.bv_len);  		if (rc) {  			bio->bi_status = rc;  			break; @@ -223,9 +234,12 @@ static int pmem_rw_page(struct block_device *bdev, sector_t sector,  	struct pmem_device *pmem = bdev->bd_queue->queuedata;  	blk_status_t rc; -	rc = pmem_do_bvec(pmem, page, hpage_nr_pages(page) * PAGE_SIZE, -			  0, op, sector); - +	if (op_is_write(op)) +		rc = pmem_do_write(pmem, page, 0, sector, +				   hpage_nr_pages(page) * PAGE_SIZE); +	else +		rc = pmem_do_read(pmem, page, 0, sector, +				   hpage_nr_pages(page) * PAGE_SIZE);  	/*  	 * The ->rw_page interface is subtle and tricky.  The core  	 * retries on any error, so we can only invoke page_endio() in @@ -268,6 +282,16 @@ static const struct block_device_operations pmem_fops = {  	.revalidate_disk =	nvdimm_revalidate_disk,  }; +static int pmem_dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff, +				    size_t nr_pages) +{ +	struct pmem_device *pmem = dax_get_private(dax_dev); + +	return blk_status_to_errno(pmem_do_write(pmem, ZERO_PAGE(0), 0, +				   PFN_PHYS(pgoff) >> SECTOR_SHIFT, +				   PAGE_SIZE)); +} +  static long pmem_dax_direct_access(struct dax_device *dax_dev,  		pgoff_t pgoff, long nr_pages, void **kaddr, pfn_t *pfn)  { @@ -299,6 +323,7 @@ static const struct dax_operations pmem_dax_ops = {  	.dax_supported = generic_fsdax_supported,  	.copy_from_iter = pmem_copy_from_iter,  	.copy_to_iter = pmem_copy_to_iter, +	.zero_page_range = pmem_dax_zero_page_range,  };  static const struct attribute_group *pmem_attribute_groups[] = { @@ -462,9 +487,9 @@ static int pmem_attach_disk(struct device *dev,  	if (is_nvdimm_sync(nd_region))  		flags = DAXDEV_F_SYNC;  	dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops, flags); -	if (!dax_dev) { +	if (IS_ERR(dax_dev)) {  		put_disk(disk); -		return -ENOMEM; +		return PTR_ERR(dax_dev);  	}  	dax_write_cache(dax_dev, nvdimm_has_cache(nd_region));  	pmem->dax_dev = dax_dev; |