summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/Kconfig1
-rw-r--r--arch/powerpc/mm/ioremap.c21
-rw-r--r--arch/powerpc/platforms/pseries/papr_scm.c2
-rw-r--r--drivers/acpi/nfit/core.c4
-rw-r--r--drivers/nvdimm/dimm.c2
-rw-r--r--drivers/nvdimm/dimm_devs.c95
-rw-r--r--drivers/nvdimm/namespace_devs.c28
-rw-r--r--drivers/nvdimm/nd.h3
-rw-r--r--drivers/nvdimm/pfn.h12
-rw-r--r--drivers/nvdimm/pfn_devs.c40
-rw-r--r--drivers/nvdimm/region_devs.c130
-rw-r--r--include/linux/io.h2
-rw-r--r--include/linux/libnvdimm.h2
-rw-r--r--include/linux/memremap.h8
-rw-r--r--include/linux/mmzone.h1
-rw-r--r--lib/Kconfig3
-rw-r--r--mm/memremap.c23
17 files changed, 328 insertions, 49 deletions
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 497b7d0b2d7e..e6ffe905e2b9 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -122,6 +122,7 @@ config PPC
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_KCOV
select ARCH_HAS_HUGEPD if HUGETLB_PAGE
+ select ARCH_HAS_MEMREMAP_COMPAT_ALIGN
select ARCH_HAS_MMIOWB if PPC64
select ARCH_HAS_PHYS_TO_DMA
select ARCH_HAS_PMEM_API
diff --git a/arch/powerpc/mm/ioremap.c b/arch/powerpc/mm/ioremap.c
index fc669643ce6a..b1a0aebe8c48 100644
--- a/arch/powerpc/mm/ioremap.c
+++ b/arch/powerpc/mm/ioremap.c
@@ -2,6 +2,7 @@
#include <linux/io.h>
#include <linux/slab.h>
+#include <linux/mmzone.h>
#include <linux/vmalloc.h>
#include <asm/io-workarounds.h>
@@ -97,3 +98,23 @@ void __iomem *do_ioremap(phys_addr_t pa, phys_addr_t offset, unsigned long size,
return NULL;
}
+
+#ifdef CONFIG_ZONE_DEVICE
+/*
+ * Override the generic version in mm/memremap.c.
+ *
+ * With hash translation, the direct-map range is mapped with just one
+ * page size selected by htab_init_page_sizes(). Consult
+ * mmu_psize_defs[] to determine the minimum page size alignment.
+*/
+unsigned long memremap_compat_align(void)
+{
+ unsigned int shift = mmu_psize_defs[mmu_linear_psize].shift;
+
+ if (radix_enabled())
+ return SUBSECTION_SIZE;
+ return max(SUBSECTION_SIZE, 1UL << shift);
+
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c
index 0b4467e378e5..589858cb3203 100644
--- a/arch/powerpc/platforms/pseries/papr_scm.c
+++ b/arch/powerpc/platforms/pseries/papr_scm.c
@@ -328,7 +328,7 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p)
}
dimm_flags = 0;
- set_bit(NDD_ALIASING, &dimm_flags);
+ set_bit(NDD_LABELING, &dimm_flags);
p->nvdimm = nvdimm_create(p->bus, p, NULL, dimm_flags,
PAPR_SCM_DIMM_CMD_MASK, 0, NULL);
diff --git a/drivers/acpi/nfit/core.c b/drivers/acpi/nfit/core.c
index d0090f71585c..fa4500f9cfd1 100644
--- a/drivers/acpi/nfit/core.c
+++ b/drivers/acpi/nfit/core.c
@@ -2027,8 +2027,10 @@ static int acpi_nfit_register_dimms(struct acpi_nfit_desc *acpi_desc)
continue;
}
- if (nfit_mem->bdw && nfit_mem->memdev_pmem)
+ if (nfit_mem->bdw && nfit_mem->memdev_pmem) {
set_bit(NDD_ALIASING, &flags);
+ set_bit(NDD_LABELING, &flags);
+ }
/* collate flags across all memdevs for this dimm */
list_for_each_entry(nfit_memdev, &acpi_desc->memdevs, list) {
diff --git a/drivers/nvdimm/dimm.c b/drivers/nvdimm/dimm.c
index 64776ed15bb3..7d4ddc4d9322 100644
--- a/drivers/nvdimm/dimm.c
+++ b/drivers/nvdimm/dimm.c
@@ -99,7 +99,7 @@ static int nvdimm_probe(struct device *dev)
if (ndd->ns_current >= 0) {
rc = nd_label_reserve_dpa(ndd);
if (rc == 0)
- nvdimm_set_aliasing(dev);
+ nvdimm_set_labeling(dev);
}
nvdimm_bus_unlock(dev);
diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
index 94ea6dba6b4f..b7b77e8d9027 100644
--- a/drivers/nvdimm/dimm_devs.c
+++ b/drivers/nvdimm/dimm_devs.c
@@ -32,7 +32,7 @@ int nvdimm_check_config_data(struct device *dev)
if (!nvdimm->cmd_mask ||
!test_bit(ND_CMD_GET_CONFIG_DATA, &nvdimm->cmd_mask)) {
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
return -ENXIO;
else
return -ENOTTY;
@@ -173,11 +173,11 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
return rc;
}
-void nvdimm_set_aliasing(struct device *dev)
+void nvdimm_set_labeling(struct device *dev)
{
struct nvdimm *nvdimm = to_nvdimm(dev);
- set_bit(NDD_ALIASING, &nvdimm->flags);
+ set_bit(NDD_LABELING, &nvdimm->flags);
}
void nvdimm_set_locked(struct device *dev)
@@ -312,8 +312,9 @@ static ssize_t flags_show(struct device *dev,
{
struct nvdimm *nvdimm = to_nvdimm(dev);
- return sprintf(buf, "%s%s\n",
+ return sprintf(buf, "%s%s%s\n",
test_bit(NDD_ALIASING, &nvdimm->flags) ? "alias " : "",
+ test_bit(NDD_LABELING, &nvdimm->flags) ? "label " : "",
test_bit(NDD_LOCKED, &nvdimm->flags) ? "lock " : "");
}
static DEVICE_ATTR_RO(flags);
@@ -562,6 +563,21 @@ int nvdimm_security_freeze(struct nvdimm *nvdimm)
return rc;
}
+static unsigned long dpa_align(struct nd_region *nd_region)
+{
+ struct device *dev = &nd_region->dev;
+
+ if (dev_WARN_ONCE(dev, !is_nvdimm_bus_locked(dev),
+ "bus lock required for capacity provision\n"))
+ return 0;
+ if (dev_WARN_ONCE(dev, !nd_region->ndr_mappings || nd_region->align
+ % nd_region->ndr_mappings,
+ "invalid region align %#lx mappings: %d\n",
+ nd_region->align, nd_region->ndr_mappings))
+ return 0;
+ return nd_region->align / nd_region->ndr_mappings;
+}
+
int alias_dpa_busy(struct device *dev, void *data)
{
resource_size_t map_end, blk_start, new;
@@ -570,6 +586,7 @@ int alias_dpa_busy(struct device *dev, void *data)
struct nd_region *nd_region;
struct nvdimm_drvdata *ndd;
struct resource *res;
+ unsigned long align;
int i;
if (!is_memory(dev))
@@ -607,13 +624,21 @@ int alias_dpa_busy(struct device *dev, void *data)
* Find the free dpa from the end of the last pmem allocation to
* the end of the interleave-set mapping.
*/
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end;
+
if (strncmp(res->name, "pmem", 4) != 0)
continue;
- if ((res->start >= blk_start && res->start < map_end)
- || (res->end >= blk_start
- && res->end <= map_end)) {
- new = max(blk_start, min(map_end + 1, res->end + 1));
+
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ if ((start >= blk_start && start < map_end)
+ || (end >= blk_start && end <= map_end)) {
+ new = max(blk_start, min(map_end, end) + 1);
if (new != blk_start) {
blk_start = new;
goto retry;
@@ -653,6 +678,7 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
.res = NULL,
};
struct resource *res;
+ unsigned long align;
if (!ndd)
return 0;
@@ -660,10 +686,20 @@ resource_size_t nd_blk_available_dpa(struct nd_region *nd_region)
device_for_each_child(&nvdimm_bus->dev, &info, alias_dpa_busy);
/* now account for busy blk allocations in unaliased dpa */
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end, size;
+
if (strncmp(res->name, "blk", 3) != 0)
continue;
- info.available -= resource_size(res);
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ size = end - start + 1;
+ if (size >= info.available)
+ return 0;
+ info.available -= size;
}
return info.available;
@@ -682,19 +718,31 @@ resource_size_t nd_pmem_max_contiguous_dpa(struct nd_region *nd_region,
struct nvdimm_bus *nvdimm_bus;
resource_size_t max = 0;
struct resource *res;
+ unsigned long align;
/* if a dimm is disabled the available capacity is zero */
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
nvdimm_bus = walk_to_nvdimm_bus(ndd->dev);
if (__reserve_free_pmem(&nd_region->dev, nd_mapping->nvdimm))
return 0;
for_each_dpa_resource(ndd, res) {
+ resource_size_t start, end;
+
if (strcmp(res->name, "pmem-reserve") != 0)
continue;
- if (resource_size(res) > max)
- max = resource_size(res);
+ /* trim free space relative to current alignment setting */
+ start = ALIGN(res->start, align);
+ end = ALIGN_DOWN(res->end + 1, align) - 1;
+ if (end < start)
+ continue;
+ if (end - start + 1 > max)
+ max = end - start + 1;
}
release_free_pmem(nvdimm_bus, nd_mapping);
return max;
@@ -722,24 +770,33 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
struct nvdimm_drvdata *ndd = to_ndd(nd_mapping);
struct resource *res;
const char *reason;
+ unsigned long align;
if (!ndd)
return 0;
+ align = dpa_align(nd_region);
+ if (!align)
+ return 0;
+
map_start = nd_mapping->start;
map_end = map_start + nd_mapping->size - 1;
blk_start = max(map_start, map_end + 1 - *overlap);
for_each_dpa_resource(ndd, res) {
- if (res->start >= map_start && res->start < map_end) {
+ resource_size_t start, end;
+
+ start = ALIGN_DOWN(res->start, align);
+ end = ALIGN(res->end + 1, align) - 1;
+ if (start >= map_start && start < map_end) {
if (strncmp(res->name, "blk", 3) == 0)
blk_start = min(blk_start,
- max(map_start, res->start));
- else if (res->end > map_end) {
+ max(map_start, start));
+ else if (end > map_end) {
reason = "misaligned to iset";
goto err;
} else
- busy += resource_size(res);
- } else if (res->end >= map_start && res->end <= map_end) {
+ busy += end - start + 1;
+ } else if (end >= map_start && end <= map_end) {
if (strncmp(res->name, "blk", 3) == 0) {
/*
* If a BLK allocation overlaps the start of
@@ -748,8 +805,8 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
*/
blk_start = map_start;
} else
- busy += resource_size(res);
- } else if (map_start > res->start && map_start < res->end) {
+ busy += end - start + 1;
+ } else if (map_start > start && map_start < end) {
/* total eclipse of the mapping */
busy += nd_mapping->size;
blk_start = map_start;
@@ -759,7 +816,7 @@ resource_size_t nd_pmem_available_dpa(struct nd_region *nd_region,
*overlap = map_end + 1 - blk_start;
available = blk_start - map_start;
if (busy < available)
- return available - busy;
+ return ALIGN_DOWN(available - busy, align);
return 0;
err:
diff --git a/drivers/nvdimm/namespace_devs.c b/drivers/nvdimm/namespace_devs.c
index 032dc61725ff..ae155e860fdc 100644
--- a/drivers/nvdimm/namespace_devs.c
+++ b/drivers/nvdimm/namespace_devs.c
@@ -10,6 +10,7 @@
#include <linux/nd.h>
#include "nd-core.h"
#include "pmem.h"
+#include "pfn.h"
#include "nd.h"
static void namespace_io_release(struct device *dev)
@@ -541,6 +542,11 @@ static void space_valid(struct nd_region *nd_region, struct nvdimm_drvdata *ndd,
{
bool is_reserve = strcmp(label_id->id, "pmem-reserve") == 0;
bool is_pmem = strncmp(label_id->id, "pmem", 4) == 0;
+ unsigned long align;
+
+ align = nd_region->align / nd_region->ndr_mappings;
+ valid->start = ALIGN(valid->start, align);
+ valid->end = ALIGN_DOWN(valid->end + 1, align) - 1;
if (valid->start >= valid->end)
goto invalid;
@@ -980,10 +986,10 @@ static ssize_t __size_store(struct device *dev, unsigned long long val)
return -ENXIO;
}
- div_u64_rem(val, PAGE_SIZE * nd_region->ndr_mappings, &remainder);
+ div_u64_rem(val, nd_region->align, &remainder);
if (remainder) {
dev_dbg(dev, "%llu is not %ldK aligned\n", val,
- (PAGE_SIZE * nd_region->ndr_mappings) / SZ_1K);
+ nd_region->align / SZ_1K);
return -EINVAL;
}
@@ -1739,6 +1745,22 @@ struct nd_namespace_common *nvdimm_namespace_common_probe(struct device *dev)
return ERR_PTR(-ENODEV);
}
+ /*
+ * Note, alignment validation for fsdax and devdax mode
+ * namespaces happens in nd_pfn_validate() where infoblock
+ * padding parameters can be applied.
+ */
+ if (pmem_should_map_pages(dev)) {
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ struct resource *res = &nsio->res;
+
+ if (!IS_ALIGNED(res->start | (res->end + 1),
+ memremap_compat_align())) {
+ dev_err(&ndns->dev, "%pr misaligned, unable to map\n", res);
+ return ERR_PTR(-EOPNOTSUPP);
+ }
+ }
+
if (is_namespace_pmem(&ndns->dev)) {
struct nd_namespace_pmem *nspm;
@@ -2521,7 +2543,7 @@ static int init_active_labels(struct nd_region *nd_region)
if (!ndd) {
if (test_bit(NDD_LOCKED, &nvdimm->flags))
/* fail, label data may be unreadable */;
- else if (test_bit(NDD_ALIASING, &nvdimm->flags))
+ else if (test_bit(NDD_LABELING, &nvdimm->flags))
/* fail, labels needed to disambiguate dpa */;
else
return 0;
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index c9f6a5b5253a..c4d69c1cce55 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -146,6 +146,7 @@ struct nd_region {
struct device *btt_seed;
struct device *pfn_seed;
struct device *dax_seed;
+ unsigned long align;
u16 ndr_mappings;
u64 ndr_size;
u64 ndr_start;
@@ -252,7 +253,7 @@ int nvdimm_set_config_data(struct nvdimm_drvdata *ndd, size_t offset,
void *buf, size_t len);
long nvdimm_clear_poison(struct device *dev, phys_addr_t phys,
unsigned int len);
-void nvdimm_set_aliasing(struct device *dev);
+void nvdimm_set_labeling(struct device *dev);
void nvdimm_set_locked(struct device *dev);
void nvdimm_clear_locked(struct device *dev);
int nvdimm_security_setup_events(struct device *dev);
diff --git a/drivers/nvdimm/pfn.h b/drivers/nvdimm/pfn.h
index acb19517f678..37cb1b8a2a39 100644
--- a/drivers/nvdimm/pfn.h
+++ b/drivers/nvdimm/pfn.h
@@ -24,6 +24,18 @@ struct nd_pfn_sb {
__le64 npfns;
__le32 mode;
/* minor-version-1 additions for section alignment */
+ /**
+ * @start_pad: Deprecated attribute to pad start-misaligned namespaces
+ *
+ * start_pad is deprecated because the original definition did
+ * not comprehend that dataoff is relative to the base address
+ * of the namespace not the start_pad adjusted base. The result
+ * is that the dax path is broken, but the block-I/O path is
+ * not. The kernel will no longer create namespaces using start
+ * padding, but it still supports block-I/O for legacy
+ * configurations mainly to allow a backup, reconfigure the
+ * namespace, and restore flow to repair dax operation.
+ */
__le32 start_pad;
__le32 end_trunc;
/* minor-version-2 record the base alignment of the mapping */
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index b94f7a7e94b8..34db557dbad1 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -446,6 +446,7 @@ static bool nd_supported_alignment(unsigned long align)
int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
{
u64 checksum, offset;
+ struct resource *res;
enum nd_pfn_mode mode;
struct nd_namespace_io *nsio;
unsigned long align, start_pad;
@@ -561,14 +562,14 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n",
nd_pfn->align, align, nd_pfn->mode,
mode);
- return -EINVAL;
+ return -EOPNOTSUPP;
}
}
if (align > nvdimm_namespace_capacity(ndns)) {
dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n",
align, nvdimm_namespace_capacity(ndns));
- return -EINVAL;
+ return -EOPNOTSUPP;
}
/*
@@ -578,18 +579,31 @@ int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig)
* established.
*/
nsio = to_nd_namespace_io(&ndns->dev);
- if (offset >= resource_size(&nsio->res)) {
+ res = &nsio->res;
+ if (offset >= resource_size(res)) {
dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n",
dev_name(&ndns->dev));
- return -EBUSY;
+ return -EOPNOTSUPP;
}
- if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align))
+ if ((align && !IS_ALIGNED(res->start + offset + start_pad, align))
|| !IS_ALIGNED(offset, PAGE_SIZE)) {
dev_err(&nd_pfn->dev,
"bad offset: %#llx dax disabled align: %#lx\n",
offset, align);
- return -ENXIO;
+ return -EOPNOTSUPP;
+ }
+
+ if (!IS_ALIGNED(res->start + le32_to_cpu(pfn_sb->start_pad),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource start misaligned\n");
+ return -EOPNOTSUPP;
+ }
+
+ if (!IS_ALIGNED(res->end + 1 - le32_to_cpu(pfn_sb->end_trunc),
+ memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "resource end misaligned\n");
+ return -EOPNOTSUPP;
}
return 0;
@@ -750,7 +764,19 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
start = nsio->res.start;
size = resource_size(&nsio->res);
npfns = PHYS_PFN(size - SZ_8K);
- align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT));
+ align = max(nd_pfn->align, memremap_compat_align());
+
+ /*
+ * When @start is misaligned fail namespace creation. See
+ * the 'struct nd_pfn_sb' commentary on why ->start_pad is not
+ * an option.
+ */
+ if (!IS_ALIGNED(start, memremap_compat_align())) {
+ dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n",
+ dev_name(&ndns->dev), &start,
+ memremap_compat_align());
+ return -EINVAL;
+ }
end_trunc = start + size - ALIGN_DOWN(start + size, align);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
diff --git a/drivers/nvdimm/region_devs.c b/drivers/nvdimm/region_devs.c
index a19e535830d9..ccbb5b43b8b2 100644
--- a/drivers/nvdimm/region_devs.c
+++ b/drivers/nvdimm/region_devs.c
@@ -195,16 +195,16 @@ EXPORT_SYMBOL_GPL(nd_blk_region_set_provider_data);
int nd_region_to_nstype(struct nd_region *nd_region)
{
if (is_memory(&nd_region->dev)) {
- u16 i, alias;
+ u16 i, label;
- for (i = 0, alias = 0; i < nd_region->ndr_mappings; i++) {
+ for (i = 0, label = 0; i < nd_region->ndr_mappings; i++) {
struct nd_mapping *nd_mapping = &nd_region->mapping[i];
struct nvdimm *nvdimm = nd_mapping->nvdimm;
- if (test_bit(NDD_ALIASING, &nvdimm->flags))
- alias++;
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
+ label++;
}
- if (alias)
+ if (label)
return ND_DEVICE_NAMESPACE_PMEM;
else
return ND_DEVICE_NAMESPACE_IO;
@@ -216,21 +216,25 @@ int nd_region_to_nstype(struct nd_region *nd_region)
}
EXPORT_SYMBOL(nd_region_to_nstype);
-static ssize_t size_show(struct device *dev,
- struct device_attribute *attr, char *buf)
+static unsigned long long region_size(struct nd_region *nd_region)
{
- struct nd_region *nd_region = to_nd_region(dev);
- unsigned long long size = 0;
-
- if (is_memory(dev)) {
- size = nd_region->ndr_size;
+ if (is_memory(&nd_region->dev)) {
+ return nd_region->ndr_size;
} else if (nd_region->ndr_mappings == 1) {
struct nd_mapping *nd_mapping = &nd_region->mapping[0];
- size = nd_mapping->size;
+ return nd_mapping->size;
}
- return sprintf(buf, "%llu\n", size);
+ return 0;
+}
+
+static ssize_t size_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ return sprintf(buf, "%llu\n", region_size(nd_region));
}
static DEVICE_ATTR_RO(size);
@@ -529,6 +533,54 @@ static ssize_t read_only_store(struct device *dev,
}
static DEVICE_ATTR_RW(read_only);
+static ssize_t align_show(struct device *dev,
+ struct device_attribute *attr, char *buf)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+
+ return sprintf(buf, "%#lx\n", nd_region->align);
+}
+
+static ssize_t align_store(struct device *dev,
+ struct device_attribute *attr, const char *buf, size_t len)
+{
+ struct nd_region *nd_region = to_nd_region(dev);
+ unsigned long val, dpa;
+ u32 remainder;
+ int rc;
+
+ rc = kstrtoul(buf, 0, &val);
+ if (rc)
+ return rc;
+
+ if (!nd_region->ndr_mappings)
+ return -ENXIO;
+
+ /*
+ * Ensure space-align is evenly divisible by the region
+ * interleave-width because the kernel typically has no facility
+ * to determine which DIMM(s), dimm-physical-addresses, would
+ * contribute to the tail capacity in system-physical-address
+ * space for the namespace.
+ */
+ dpa = div_u64_rem(val, nd_region->ndr_mappings, &remainder);
+ if (!is_power_of_2(dpa) || dpa < PAGE_SIZE
+ || val > region_size(nd_region) || remainder)
+ return -EINVAL;
+
+ /*
+ * Given that space allocation consults this value multiple
+ * times ensure it does not change for the duration of the
+ * allocation.
+ */
+ nvdimm_bus_lock(dev);
+ nd_region->align = val;
+ nvdimm_bus_unlock(dev);
+
+ return len;
+}
+static DEVICE_ATTR_RW(align);
+
static ssize_t region_badblocks_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@@ -571,6 +623,7 @@ static DEVICE_ATTR_RO(persistence_domain);
static struct attribute *nd_region_attributes[] = {
&dev_attr_size.attr,
+ &dev_attr_align.attr,
&dev_attr_nstype.attr,
&dev_attr_mappings.attr,
&dev_attr_btt_seed.attr,
@@ -626,6 +679,19 @@ static umode_t region_visible(struct kobject *kobj, struct attribute *a, int n)
return a->mode;
}
+ if (a == &dev_attr_align.attr) {
+ int i;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ if (test_bit(NDD_LABELING, &nvdimm->flags))
+ return a->mode;
+ }
+ return 0;
+ }
+
if (a != &dev_attr_set_cookie.attr
&& a != &dev_attr_available_size.attr)
return a->mode;
@@ -935,6 +1001,41 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane)
}
EXPORT_SYMBOL(nd_region_release_lane);
+/*
+ * PowerPC requires this alignment for memremap_pages(). All other archs
+ * should be ok with SUBSECTION_SIZE (see memremap_compat_align()).
+ */
+#define MEMREMAP_COMPAT_ALIGN_MAX SZ_16M
+
+static unsigned long default_align(struct nd_region *nd_region)
+{
+ unsigned long align;
+ int i, mappings;
+ u32 remainder;
+
+ if (is_nd_blk(&nd_region->dev))
+ align = PAGE_SIZE;
+ else
+ align = MEMREMAP_COMPAT_ALIGN_MAX;
+
+ for (i = 0; i < nd_region->ndr_mappings; i++) {
+ struct nd_mapping *nd_mapping = &nd_region->mapping[i];
+ struct nvdimm *nvdimm = nd_mapping->nvdimm;
+
+ if (test_bit(NDD_ALIASING, &nvdimm->flags)) {
+ align = MEMREMAP_COMPAT_ALIGN_MAX;
+ break;
+ }
+ }
+
+ mappings = max_t(u16, 1, nd_region->ndr_mappings);
+ div_u64_rem(align, mappings, &remainder);
+ if (remainder)
+ align *= mappings;
+
+ return align;
+}
+
static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
struct nd_region_desc *ndr_desc,
const struct device_type *dev_type, const char *caller)
@@ -1039,6 +1140,7 @@ static struct nd_region *nd_region_create(struct nvdimm_bus *nvdimm_bus,
dev->of_node = ndr_desc->of_node;
nd_region->ndr_size = resource_size(ndr_desc->res);
nd_region->ndr_start = ndr_desc->res->start;
+ nd_region->align = default_align(nd_region);
if (ndr_desc->flush)
nd_region->flush = ndr_desc->flush;
else
diff --git a/include/linux/io.h b/include/linux/io.h
index b1c44bb4b2d7..8394c56babc2 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -77,8 +77,6 @@ void *devm_memremap(struct device *dev, resource_size_t offset,
size_t size, unsigned long flags);
void devm_memunmap(struct device *dev, void *addr);
-void *__devm_memremap_pages(struct device *dev, struct resource *res);
-
#ifdef CONFIG_PCI
/*
* The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and
diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h
index 9df091bd30ba..18da4059be09 100644
--- a/include/linux/libnvdimm.h
+++ b/include/linux/libnvdimm.h
@@ -37,6 +37,8 @@ enum {
NDD_WORK_PENDING = 4,
/* ignore / filter NSLABEL_FLAG_LOCAL for this DIMM, i.e. no aliasing */
NDD_NOBLK = 5,
+ /* dimm supports namespace labels */
+ NDD_LABELING = 6,
/* need to set a limit somewhere, but yes, this is likely overkill */
ND_IOCTL_MAX_BUFLEN = SZ_4M,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 6fefb09af7c3..8af1cbd8f293 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -132,6 +132,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
+unsigned long memremap_compat_align(void);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct dev_pagemap *pgmap)
@@ -165,6 +166,12 @@ static inline void vmem_altmap_free(struct vmem_altmap *altmap,
unsigned long nr_pfns)
{
}
+
+/* when memremap_pages() is disabled all archs can remap a single page */
+static inline unsigned long memremap_compat_align(void)
+{
+ return PAGE_SIZE;
+}
#endif /* CONFIG_ZONE_DEVICE */
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
@@ -172,4 +179,5 @@ static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
if (pgmap)
percpu_ref_put(pgmap->ref);
}
+
#endif /* _LINUX_MEMREMAP_H_ */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 462f6873905a..6b77f7239af5 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -1170,6 +1170,7 @@ static inline unsigned long section_nr_to_pfn(unsigned long sec)
#define SECTION_ALIGN_DOWN(pfn) ((pfn) & PAGE_SECTION_MASK)
#define SUBSECTION_SHIFT 21
+#define SUBSECTION_SIZE (1UL << SUBSECTION_SHIFT)
#define PFN_SUBSECTION_SHIFT (SUBSECTION_SHIFT - PAGE_SHIFT)
#define PAGES_PER_SUBSECTION (1UL << PFN_SUBSECTION_SHIFT)
diff --git a/lib/Kconfig b/lib/Kconfig
index bc7e56370129..5d53f9609c25 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -615,6 +615,9 @@ config ARCH_HAS_PMEM_API
config MEMREGION
bool
+config ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+ bool
+
# use memcpy to implement user copies for nommu architectures
config UACCESS_MEMCPY
bool
diff --git a/mm/memremap.c b/mm/memremap.c
index 09b5b7adc773..3e7afaf05639 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -7,6 +7,7 @@
#include <linux/mm.h>
#include <linux/pfn_t.h>
#include <linux/swap.h>
+#include <linux/mmzone.h>
#include <linux/swapops.h>
#include <linux/types.h>
#include <linux/wait_bit.h>
@@ -14,6 +15,28 @@
static DEFINE_XARRAY(pgmap_array);
+/*
+ * The memremap() and memremap_pages() interfaces are alternately used
+ * to map persistent memory namespaces. These interfaces place different
+ * constraints on the alignment and size of the mapping (namespace).
+ * memremap() can map individual PAGE_SIZE pages. memremap_pages() can
+ * only map subsections (2MB), and at least one architecture (PowerPC)
+ * the minimum mapping granularity of memremap_pages() is 16MB.
+ *
+ * The role of memremap_compat_align() is to communicate the minimum
+ * arch supported alignment of a namespace such that it can freely
+ * switch modes without violating the arch constraint. Namely, do not
+ * allow a namespace to be PAGE_SIZE aligned since that namespace may be
+ * reconfigured into a mode that requires SUBSECTION_SIZE alignment.
+ */
+#ifndef CONFIG_ARCH_HAS_MEMREMAP_COMPAT_ALIGN
+unsigned long memremap_compat_align(void)
+{
+ return SUBSECTION_SIZE;
+}
+EXPORT_SYMBOL_GPL(memremap_compat_align);
+#endif
+
#ifdef CONFIG_DEV_PAGEMAP_OPS
DEFINE_STATIC_KEY_FALSE(devmap_managed_key);
EXPORT_SYMBOL(devmap_managed_key);