From 5cd401ace914dc68556c6d2fcae0c349444d5f86 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 25 Feb 2019 10:57:30 -0800 Subject: mm/resource: Return real error codes from walk failures walk_system_ram_range() can return an error code either becuase *it* failed, or because the 'func' that it calls returned an error. The memory hotplug does the following: ret = walk_system_ram_range(..., func); if (ret) return ret; and 'ret' makes it out to userspace, eventually. The problem s, walk_system_ram_range() failues that result from *it* failing (as opposed to 'func') return -1. That leads to a very odd -EPERM (-1) return code out to userspace. Make walk_system_ram_range() return -EINVAL for internal failures to keep userspace less confused. This return code is compatible with all the callers that I audited. Signed-off-by: Dave Hansen Reviewed-by: Bjorn Helgaas Acked-by: Michael Ellerman (powerpc) Cc: Dan Williams Cc: Dave Jiang Cc: Ross Zwisler Cc: Vishal Verma Cc: Tom Lendacky Cc: Andrew Morton Cc: Michal Hocko Cc: linux-nvdimm@lists.01.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: Huang Ying Cc: Fengguang Wu Cc: Borislav Petkov Cc: Yaowei Bai Cc: Takashi Iwai Cc: Jerome Glisse Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: linuxppc-dev@lists.ozlabs.org Cc: Keith Busch Signed-off-by: Dan Williams --- kernel/resource.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 915c02e8e5dd..ca7ed5158cff 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -382,7 +382,7 @@ static int __walk_iomem_res_desc(resource_size_t start, resource_size_t end, int (*func)(struct resource *, void *)) { struct resource res; - int ret = -1; + int ret = -EINVAL; while (start < end && !find_next_iomem_res(start, end, flags, desc, first_lvl, &res)) { @@ -462,7 +462,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, unsigned long flags; struct resource res; unsigned long pfn, end_pfn; - int ret = -1; + int ret = -EINVAL; start = (u64) start_pfn << PAGE_SHIFT; end = ((u64)(start_pfn + nr_pages) << PAGE_SHIFT) - 1; -- cgit v1.2.3 From b926b7f3baecb2a855db629e6822e1a85212e91c Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 25 Feb 2019 10:57:33 -0800 Subject: mm/resource: Move HMM pr_debug() deeper into resource code HMM consumes physical address space for its own use, even though nothing is mapped or accessible there. It uses a special resource description (IORES_DESC_DEVICE_PRIVATE_MEMORY) to uniquely identify these areas. When HMM consumes address space, it makes a best guess about what to consume. However, it is possible that a future memory or device hotplug can collide with the reserved area. In the case of these conflicts, there is an error message in register_memory_resource(). Later patches in this series move register_memory_resource() from using request_resource_conflict() to __request_region(). Unfortunately, __request_region() does not return the conflict like the previous function did, which makes it impossible to check for IORES_DESC_DEVICE_PRIVATE_MEMORY in a conflicting resource. Instead of warning in register_memory_resource(), move the check into the core resource code itself (__request_region()) where the conflicting resource _is_ available. This has the added bonus of producing a warning in case of HMM conflicts with devices *or* RAM address space, as opposed to the RAM- only warnings that were there previously. Signed-off-by: Dave Hansen Reviewed-by: Jerome Glisse Cc: Dan Williams Cc: Dave Jiang Cc: Ross Zwisler Cc: Vishal Verma Cc: Tom Lendacky Cc: Andrew Morton Cc: Michal Hocko Cc: linux-nvdimm@lists.01.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: Huang Ying Cc: Fengguang Wu Cc: Keith Busch Signed-off-by: Dan Williams --- kernel/resource.c | 9 +++++++++ mm/memory_hotplug.c | 5 ----- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index ca7ed5158cff..35fe105d581e 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1132,6 +1132,15 @@ struct resource * __request_region(struct resource *parent, conflict = __request_resource(parent, res); if (!conflict) break; + /* + * mm/hmm.c reserves physical addresses which then + * become unavailable to other users. Conflicts are + * not expected. Warn to aid debugging if encountered. + */ + if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { + pr_warn("Unaddressable device %s %pR conflicts with %pR", + conflict->name, conflict, res); + } if (conflict != parent) { if (!(conflict->flags & IORESOURCE_BUSY)) { parent = conflict; diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b9a667d36c55..e198974c968d 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -110,11 +110,6 @@ static struct resource *register_memory_resource(u64 start, u64 size) res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; conflict = request_resource_conflict(&iomem_resource, res); if (conflict) { - if (conflict->desc == IORES_DESC_DEVICE_PRIVATE_MEMORY) { - pr_debug("Device unaddressable memory block " - "memory hotplug at %#010llx !\n", - (unsigned long long)start); - } pr_debug("System RAM resource %pR cannot be added\n", res); kfree(res); return ERR_PTR(-EEXIST); -- cgit v1.2.3 From 2b539aefe9e48e3908cff02699aa63a8b9bd268e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 25 Feb 2019 10:57:38 -0800 Subject: mm/resource: Let walk_system_ram_range() search child resources In the process of onlining memory, we use walk_system_ram_range() to find the actual RAM areas inside of the area being onlined. However, it currently only finds memory resources which are "top-level" iomem_resources. Children are not currently searched which causes it to skip System RAM in areas like this (in the format of /proc/iomem): a0000000-bfffffff : Persistent Memory (legacy) a0000000-afffffff : System RAM Changing the true->false here allows children to be searched as well. We need this because we add a new "System RAM" resource underneath the "persistent memory" resource when we use persistent memory in a volatile mode. Signed-off-by: Dave Hansen Cc: Keith Busch Cc: Dan Williams Cc: Dave Jiang Cc: Ross Zwisler Cc: Vishal Verma Cc: Tom Lendacky Cc: Andrew Morton Cc: Michal Hocko Cc: linux-nvdimm@lists.01.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: Huang Ying Cc: Fengguang Wu Cc: Borislav Petkov Cc: Bjorn Helgaas Cc: Yaowei Bai Cc: Takashi Iwai Cc: Jerome Glisse Signed-off-by: Dan Williams --- kernel/resource.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 35fe105d581e..e7f9d2a5db25 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -454,6 +454,9 @@ int walk_mem_res(u64 start, u64 end, void *arg, * This function calls the @func callback against all memory ranges of type * System RAM which are marked as IORESOURCE_SYSTEM_RAM and IORESOUCE_BUSY. * It is to be used only for System RAM. + * + * This will find System RAM ranges that are children of top-level resources + * in addition to top-level System RAM resources. */ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)) @@ -469,7 +472,7 @@ int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; while (start < end && !find_next_iomem_res(start, end, flags, IORES_DESC_NONE, - true, &res)) { + false, &res)) { pfn = (res.start + PAGE_SIZE - 1) >> PAGE_SHIFT; end_pfn = (res.end + 1) >> PAGE_SHIFT; if (end_pfn > pfn) -- cgit v1.2.3