summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt22
-rw-r--r--Documentation/admin-guide/sysctl/vm.rst5
-rw-r--r--include/linux/memory_hotplug.h9
-rw-r--r--include/linux/page-flags.h11
-rw-r--r--mm/hugetlb_vmemmap.c66
-rw-r--r--mm/memory_hotplug.c27
6 files changed, 93 insertions, 47 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 8c0ea6b6c6a9..2cacd4f8deb7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1722,9 +1722,11 @@
Built with CONFIG_HUGETLB_PAGE_OPTIMIZE_VMEMMAP_DEFAULT_ON=y,
the default is on.
- This is not compatible with memory_hotplug.memmap_on_memory.
- If both parameters are enabled, hugetlb_free_vmemmap takes
- precedence over memory_hotplug.memmap_on_memory.
+ Note that the vmemmap pages may be allocated from the added
+ memory block itself when memory_hotplug.memmap_on_memory is
+ enabled, those vmemmap pages cannot be optimized even if this
+ feature is enabled. Other vmemmap pages not allocated from
+ the added memory block itself do not be affected.
hung_task_panic=
[KNL] Should the hung task detector generate panics.
@@ -3068,10 +3070,12 @@
[KNL,X86,ARM] Boolean flag to enable this feature.
Format: {on | off (default)}
When enabled, runtime hotplugged memory will
- allocate its internal metadata (struct pages)
- from the hotadded memory which will allow to
- hotadd a lot of memory without requiring
- additional memory to do so.
+ allocate its internal metadata (struct pages,
+ those vmemmap pages cannot be optimized even
+ if hugetlb_free_vmemmap is enabled) from the
+ hotadded memory which will allow to hotadd a
+ lot of memory without requiring additional
+ memory to do so.
This feature is disabled by default because it
has some implication on large (e.g. GB)
allocations in some configurations (e.g. small
@@ -3081,10 +3085,6 @@
Note that even when enabled, there are a few cases where
the feature is not effective.
- This is not compatible with hugetlb_free_vmemmap. If
- both parameters are enabled, hugetlb_free_vmemmap takes
- precedence over memory_hotplug.memmap_on_memory.
-
memtest= [KNL,X86,ARM,M68K,PPC,RISCV] Enable memtest
Format: <integer>
default : 0 <disable>
diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst
index 4a440a7cfeb0..f74f722ad702 100644
--- a/Documentation/admin-guide/sysctl/vm.rst
+++ b/Documentation/admin-guide/sysctl/vm.rst
@@ -565,9 +565,8 @@ See Documentation/admin-guide/mm/hugetlbpage.rst
hugetlb_optimize_vmemmap
========================
-This knob is not available when memory_hotplug.memmap_on_memory (kernel parameter)
-is configured or the size of 'struct page' (a structure defined in
-include/linux/mm_types.h) is not power of two (an unusual system config could
+This knob is not available when the size of 'struct page' (a structure defined
+in include/linux/mm_types.h) is not power of two (an unusual system config could
result in this).
Enable (set to 1) or disable (set to 0) the feature of optimizing vmemmap pages
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index 20d7edf62a6a..e0b2209ab71c 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -351,13 +351,4 @@ void arch_remove_linear_mapping(u64 start, u64 size);
extern bool mhp_supports_memmap_on_memory(unsigned long size);
#endif /* CONFIG_MEMORY_HOTPLUG */
-#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
-bool mhp_memmap_on_memory(void);
-#else
-static inline bool mhp_memmap_on_memory(void)
-{
- return false;
-}
-#endif
-
#endif /* __LINUX_MEMORY_HOTPLUG_H */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index f32aade2a6e0..82719d33c0f1 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -193,6 +193,11 @@ enum pageflags {
/* Only valid for buddy pages. Used to track pages that are reported */
PG_reported = PG_uptodate,
+
+#ifdef CONFIG_MEMORY_HOTPLUG
+ /* For self-hosted memmap pages */
+ PG_vmemmap_self_hosted = PG_owner_priv_1,
+#endif
};
#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
@@ -628,6 +633,12 @@ PAGEFLAG_FALSE(SkipKASanPoison, skip_kasan_poison)
*/
__PAGEFLAG(Reported, reported, PF_NO_COMPOUND)
+#ifdef CONFIG_MEMORY_HOTPLUG
+PAGEFLAG(VmemmapSelfHosted, vmemmap_self_hosted, PF_ANY)
+#else
+PAGEFLAG_FALSE(VmemmapSelfHosted, vmemmap_self_hosted)
+#endif
+
/*
* On an anonymous page mapped into a user virtual memory area,
* page->mapping points to its anon_vma, not to a struct address_space;
diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c
index ba29c15c53d6..1362feb3c6c9 100644
--- a/mm/hugetlb_vmemmap.c
+++ b/mm/hugetlb_vmemmap.c
@@ -10,7 +10,7 @@
*/
#define pr_fmt(fmt) "HugeTLB: " fmt
-#include <linux/memory_hotplug.h>
+#include <linux/memory.h>
#include "hugetlb_vmemmap.h"
/*
@@ -97,18 +97,68 @@ int hugetlb_vmemmap_alloc(struct hstate *h, struct page *head)
return ret;
}
+static unsigned int vmemmap_optimizable_pages(struct hstate *h,
+ struct page *head)
+{
+ if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
+ return 0;
+
+ if (IS_ENABLED(CONFIG_MEMORY_HOTPLUG)) {
+ pmd_t *pmdp, pmd;
+ struct page *vmemmap_page;
+ unsigned long vaddr = (unsigned long)head;
+
+ /*
+ * Only the vmemmap page's vmemmap page can be self-hosted.
+ * Walking the page tables to find the backing page of the
+ * vmemmap page.
+ */
+ pmdp = pmd_off_k(vaddr);
+ /*
+ * The READ_ONCE() is used to stabilize *pmdp in a register or
+ * on the stack so that it will stop changing under the code.
+ * The only concurrent operation where it can be changed is
+ * split_vmemmap_huge_pmd() (*pmdp will be stable after this
+ * operation).
+ */
+ pmd = READ_ONCE(*pmdp);
+ if (pmd_leaf(pmd))
+ vmemmap_page = pmd_page(pmd) + pte_index(vaddr);
+ else
+ vmemmap_page = pte_page(*pte_offset_kernel(pmdp, vaddr));
+ /*
+ * Due to HugeTLB alignment requirements and the vmemmap pages
+ * being at the start of the hotplugged memory region in
+ * memory_hotplug.memmap_on_memory case. Checking any vmemmap
+ * page's vmemmap page if it is marked as VmemmapSelfHosted is
+ * sufficient.
+ *
+ * [ hotplugged memory ]
+ * [ section ][...][ section ]
+ * [ vmemmap ][ usable memory ]
+ * ^ | | |
+ * +---+ | |
+ * ^ | |
+ * +-------+ |
+ * ^ |
+ * +-------------------------------------------+
+ */
+ if (PageVmemmapSelfHosted(vmemmap_page))
+ return 0;
+ }
+
+ return hugetlb_optimize_vmemmap_pages(h);
+}
+
void hugetlb_vmemmap_free(struct hstate *h, struct page *head)
{
unsigned long vmemmap_addr = (unsigned long)head;
unsigned long vmemmap_end, vmemmap_reuse, vmemmap_pages;
- vmemmap_pages = hugetlb_optimize_vmemmap_pages(h);
+ vmemmap_pages = vmemmap_optimizable_pages(h, head);
if (!vmemmap_pages)
return;
- if (READ_ONCE(vmemmap_optimize_mode) == VMEMMAP_OPTIMIZE_OFF)
- return;
-
static_branch_inc(&hugetlb_optimize_vmemmap_key);
vmemmap_addr += RESERVE_VMEMMAP_SIZE;
@@ -199,10 +249,10 @@ static struct ctl_table hugetlb_vmemmap_sysctls[] = {
static __init int hugetlb_vmemmap_sysctls_init(void)
{
/*
- * If "memory_hotplug.memmap_on_memory" is enabled or "struct page"
- * crosses page boundaries, the vmemmap pages cannot be optimized.
+ * If "struct page" crosses page boundaries, the vmemmap pages cannot
+ * be optimized.
*/
- if (!mhp_memmap_on_memory() && is_power_of_2(sizeof(struct page)))
+ if (is_power_of_2(sizeof(struct page)))
register_sysctl_init("vm", hugetlb_vmemmap_sysctls);
return 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index a2a6d280054f..99ecb2b3ff53 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -43,30 +43,22 @@
#include "shuffle.h"
#ifdef CONFIG_MHP_MEMMAP_ON_MEMORY
-static int memmap_on_memory_set(const char *val, const struct kernel_param *kp)
-{
- if (hugetlb_optimize_vmemmap_enabled())
- return 0;
- return param_set_bool(val, kp);
-}
-
-static const struct kernel_param_ops memmap_on_memory_ops = {
- .flags = KERNEL_PARAM_OPS_FL_NOARG,
- .set = memmap_on_memory_set,
- .get = param_get_bool,
-};
-
/*
* memory_hotplug.memmap_on_memory parameter
*/
static bool memmap_on_memory __ro_after_init;
-module_param_cb(memmap_on_memory, &memmap_on_memory_ops, &memmap_on_memory, 0444);
+module_param(memmap_on_memory, bool, 0444);
MODULE_PARM_DESC(memmap_on_memory, "Enable memmap on memory for memory hotplug");
-bool mhp_memmap_on_memory(void)
+static inline bool mhp_memmap_on_memory(void)
{
return memmap_on_memory;
}
+#else
+static inline bool mhp_memmap_on_memory(void)
+{
+ return false;
+}
#endif
enum {
@@ -1035,7 +1027,7 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
struct zone *zone)
{
unsigned long end_pfn = pfn + nr_pages;
- int ret;
+ int ret, i;
ret = kasan_add_zero_shadow(__va(PFN_PHYS(pfn)), PFN_PHYS(nr_pages));
if (ret)
@@ -1043,6 +1035,9 @@ int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
move_pfn_range_to_zone(zone, pfn, nr_pages, NULL, MIGRATE_UNMOVABLE);
+ for (i = 0; i < nr_pages; i++)
+ SetPageVmemmapSelfHosted(pfn_to_page(pfn + i));
+
/*
* It might be that the vmemmap_pages fully span sections. If that is
* the case, mark those sections online here as otherwise they will be