summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorMuchun Song <songmuchun@bytedance.com>2022-06-17 21:56:50 +0800
committerakpm <akpm@linux-foundation.org>2022-07-03 18:08:49 -0700
commit66361095129b3b5d065e6c09cf0c085ef4a8c40f (patch)
tree59f536104b945c2d0097f2411f84769472f013a4 /mm
parented7802dd48f7a507213cbb95bb4c6f1fe134eb5d (diff)
downloadlinux-66361095129b3b5d065e6c09cf0c085ef4a8c40f.tar.bz2
mm: memory_hotplug: make hugetlb_optimize_vmemmap compatible with memmap_on_memory
For now, the feature of hugetlb_free_vmemmap is not compatible with the feature of memory_hotplug.memmap_on_memory, and hugetlb_free_vmemmap takes precedence over memory_hotplug.memmap_on_memory. However, someone wants to make memory_hotplug.memmap_on_memory takes precedence over hugetlb_free_vmemmap since memmap_on_memory makes it more likely to succeed memory hotplug in close-to-OOM situations. So the decision of making hugetlb_free_vmemmap take precedence is not wise and elegant. The proper approach is to have hugetlb_vmemmap.c do the check whether the section which the HugeTLB pages belong to can be optimized. If the section's vmemmap pages are allocated from the added memory block itself, hugetlb_free_vmemmap should refuse to optimize the vmemmap, otherwise, do the optimization. Then both kernel parameters are compatible. So this patch introduces VmemmapSelfHosted to mask any non-optimizable vmemmap pages. The hugetlb_vmemmap can use this flag to detect if a vmemmap page can be optimized. [songmuchun@bytedance.com: walk vmemmap page tables to avoid false-positive] Link: https://lkml.kernel.org/r/20220620110616.12056-3-songmuchun@bytedance.com Link: https://lkml.kernel.org/r/20220617135650.74901-3-songmuchun@bytedance.com Signed-off-by: Muchun Song <songmuchun@bytedance.com> Co-developed-by: Oscar Salvador <osalvador@suse.de> Signed-off-by: Oscar Salvador <osalvador@suse.de> Acked-by: David Hildenbrand <david@redhat.com> Cc: Jonathan Corbet <corbet@lwn.net> Cc: Mike Kravetz <mike.kravetz@oracle.com> Cc: Paul E. McKenney <paulmck@kernel.org> Cc: Xiongchun Duan <duanxiongchun@bytedance.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/hugetlb_vmemmap.c66
-rw-r--r--mm/memory_hotplug.c27
2 files changed, 69 insertions, 24 deletions
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index ba29c15c53d6..1362feb3c6c9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -10,7 +10,7 @@
*/
#define pr_fmt(fmt) "HugeTLB: " fmt
-#include <linux/memory_hotplug.h>
+#include <linux/memory.h>
#include "hugetlb_vmemmap.h"
/*
@@ -97,18 +97,68 @@ int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
return ret;
}
+static unsigned int vmemmap_optimizable_pages(struct hstate *h,
+ struct page *head)
+{
+ if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) {
+ pmd_t *pmdp, pmd;
+ struct page *vmemmap_page;
+ unsigned long vaddr = (unsigned long)head;
+
+ /*
+ * Only the vmemmap page's vmemmap page can be self-hosted.
+ * Walking the page tables to find the backing page of the
+ * vmemmap page.
+ */
+ pmdp = pmd_off_k(vaddr);
+ /*
+ * The READ_ONCE() is used to stabilize *pmdp in a register or
+ * on the stack so that it will stop changing under the code.
+ * The only concurrent operation where it can be changed is
+ * split_vmemmap_huge_pmd() (*pmdp will be stable after this
+ * operation).
+ */
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_leaf(pmd))
+ vmemmap_page = pmd_page(pmd) + pte_index(vaddr);
+ else
+ vmemmap_page = pte_page(*pte_offset_kernel(pmdp, vaddr));
+ /*
+ * Due to HugeTLB alignment requirements and the vmemmap pages
+ * being at the start of the hotplugged memory region in
+ * memory_hotplug.memmap_on_memory case. Checking any vmemmap
+ * page's vmemmap page if it is marked as VmemmapSelfHosted is
+ * sufficient.
+ *
+ * [ hotplugged memory ]
+ * [ section ][...][ section ]
+ * [ vmemmap ][ usable memory ]
+ * ^ | | |
+ * +---+ | |
+ * ^ | |
+ * +-------+ |
+ * ^ |
+ * +-------------------------------------------+
+ */
+ if (PageVmemmapSelfHosted(vmemmap_page))
+ return 0;
+ }
+
+ return hugetlb_optimize_vmemmap_pages(h);
+}
+
void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
{
unsigned long vmemmap_addr = (unsigned long)head;
unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
- vmemmap_pages = hugetlb_optimize_vmemmap_pages(h);
+ vmemmap_pages = vmemmap_optimizable_pages(h, head);
if (!vmemmap_pages)
return;
- if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
- return;
-
static_branch_inc(&hugetlb_optimize_vmemmap_key);
vmemmap_addr += RESERVE_VMEMMAP_SIZE;
@@ -199,10 +249,10 @@ static struct ctl_table hugetlb_vmemmap_sysctls[] = {
static __init int hugetlb_vmemmap_sysctls_init(void)
{
/*
- * If "memory_hotplug.memmap_on_memory" is enabled or "struct page"
- * crosses page boundaries, the vmemmap pages cannot be optimized.
+ * If "struct page" crosses page boundaries, the vmemmap pages cannot
+ * be optimized.
*/
- if (!mhp_memmap_on_memory() && is_power_of_2(sizeof(struct page)))
+ if (is_power_of_2(sizeof(struct page)))
register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
return 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a2a6d280054f..99ecb2b3ff53 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -43,30 +43,22 @@
#include "shuffle.h"
#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
-static int memmap_on_memory_set(const char *val, const struct kernel_param *kp)
-{
- if (hugetlb_optimize_vmemmap_enabled())
- return 0;
- return param_set_bool(val, kp);
-}
-
-static const struct kernel_param_ops memmap_on_memory_ops = {
- .flags = KERNEL_PARAM_OPS_FL_NOARG,
- .set = memmap_on_memory_set,
- .get = param_get_bool,
-};
-
/*
* memory_hotplug.memmap_on_memory parameter
*/
static bool memmap_on_memory __ro_after_init;
-module_param_cb(memmap_on_memory, &memmap_on_memory_ops, &memmap_on_memory, 0444);
+module_param(memmap_on_memory, bool, 0444);
MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
-bool mhp_memmap_on_memory(void)
+static inline bool mhp_memmap_on_memory(void)
{
return memmap_on_memory;
}
+#else
+static inline bool mhp_memmap_on_memory(void)
+{
+ return false;
+}
#endif
enum {
@@ -1035,7 +1027,7 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
struct zone *zone)
{
unsigned long end_pfn = pfn + nr_pages;
- int ret;
+ int ret, i;
ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
if (ret)
@@ -1043,6 +1035,9 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
+ for (i = 0; i < nr_pages; i++)
+ SetPageVmemmapSelfHosted(pfn_to_page(pfn + i));
+
/*
* It might be that the vmemmap_pages fully span sections. If that is
* the case, mark those sections online here as otherwise they will be