summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/gfp.h8
-rw-r--r--init/main.c2
-rw-r--r--mm/internal.h24
-rw-r--r--mm/page_alloc.c46
-rw-r--r--mm/vmscan.c6
5 files changed, 49 insertions, 37 deletions
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 6ba7cf23748f..ad35f300b9a4 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -384,6 +384,14 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
void drain_all_pages(struct zone *zone);
void drain_local_pages(struct zone *zone);
+#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
+void page_alloc_init_late(void);
+#else
+static inline void page_alloc_init_late(void)
+{
+}
+#endif
+
/*
* gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
* GFP flags are used before interrupts are enabled. Once interrupts are
diff --git a/init/main.c b/init/main.c
index c599aea23bb1..c5d5626289ce 100644
--- a/init/main.c
+++ b/init/main.c
@@ -1004,6 +1004,8 @@ static noinline void __init kernel_init_freeable(void)
smp_init();
sched_init_smp();
+ page_alloc_init_late();
+
do_basic_setup();
/* Open the /dev/console on the rootfs, this should never fail */
diff --git a/mm/internal.h b/mm/internal.h
index a48cbefde8ca..36b23f1e2ca6 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -379,30 +379,6 @@ static inline void mminit_verify_zonelist(void)
}
#endif /* CONFIG_DEBUG_MEMORY_INIT */
-/*
- * Deferred struct page initialisation requires init functions that are freed
- * before kswapd is available. Reuse the memory hotplug section annotation
- * to mark the required code.
- *
- * __defermem_init is code that always exists but is annotated __meminit to
- * avoid section warnings.
- * __defer_init code gets marked __meminit when deferring struct page
- * initialistion but is otherwise in the init section.
- */
-#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-#define __defermem_init __meminit
-#define __defer_init __meminit
-
-void deferred_init_memmap(int nid);
-#else
-#define __defermem_init
-#define __defer_init __init
-
-static inline void deferred_init_memmap(int nid)
-{
-}
-#endif
-
/* mminit_validate_memmodel_limits is independent of CONFIG_DEBUG_MEMORY_INIT */
#if defined(CONFIG_SPARSEMEM)
extern void mminit_validate_memmodel_limits(unsigned long *start_pfn,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 5a38e39b30d1..506eac8b38af 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -18,6 +18,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/interrupt.h>
+#include <linux/rwsem.h>
#include <linux/pagemap.h>
#include <linux/jiffies.h>
#include <linux/bootmem.h>
@@ -61,6 +62,7 @@
#include <linux/hugetlb.h>
#include <linux/sched/rt.h>
#include <linux/page_owner.h>
+#include <linux/kthread.h>
#include <asm/sections.h>
#include <asm/tlbflush.h>
@@ -242,7 +244,7 @@ static inline void reset_deferred_meminit(pg_data_t *pgdat)
}
/* Returns true if the struct page for the pfn is uninitialised */
-static inline bool __defermem_init early_page_uninitialised(unsigned long pfn)
+static inline bool __meminit early_page_uninitialised(unsigned long pfn)
{
int nid = early_pfn_to_nid(pfn);
@@ -958,7 +960,7 @@ static void __free_pages_ok(struct page *page, unsigned int order)
local_irq_restore(flags);
}
-static void __defer_init __free_pages_boot_core(struct page *page,
+static void __init __free_pages_boot_core(struct page *page,
unsigned long pfn, unsigned int order)
{
unsigned int nr_pages = 1 << order;
@@ -1031,7 +1033,7 @@ static inline bool __meminit meminit_pfn_in_nid(unsigned long pfn, int node,
#endif
-void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
+void __init __free_pages_bootmem(struct page *page, unsigned long pfn,
unsigned int order)
{
if (early_page_uninitialised(pfn))
@@ -1040,7 +1042,7 @@ void __defer_init __free_pages_bootmem(struct page *page, unsigned long pfn,
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-static void __defermem_init deferred_free_range(struct page *page,
+static void __init deferred_free_range(struct page *page,
unsigned long pfn, int nr_pages)
{
int i;
@@ -1060,20 +1062,30 @@ static void __defermem_init deferred_free_range(struct page *page,
__free_pages_boot_core(page, pfn, 0);
}
+static __initdata DECLARE_RWSEM(pgdat_init_rwsem);
+
/* Initialise remaining memory on a node */
-void __defermem_init deferred_init_memmap(int nid)
+static int __init deferred_init_memmap(void *data)
{
+ pg_data_t *pgdat = data;
+ int nid = pgdat->node_id;
struct mminit_pfnnid_cache nid_init_state = { };
unsigned long start = jiffies;
unsigned long nr_pages = 0;
unsigned long walk_start, walk_end;
int i, zid;
struct zone *zone;
- pg_data_t *pgdat = NODE_DATA(nid);
unsigned long first_init_pfn = pgdat->first_deferred_pfn;
+ const struct cpumask *cpumask = cpumask_of_node(pgdat->node_id);
- if (first_init_pfn == ULONG_MAX)
- return;
+ if (first_init_pfn == ULONG_MAX) {
+ up_read(&pgdat_init_rwsem);
+ return 0;
+ }
+
+ /* Bind memory initialisation thread to a local node if possible */
+ if (!cpumask_empty(cpumask))
+ set_cpus_allowed_ptr(current, cpumask);
/* Sanity check boundaries */
BUG_ON(pgdat->first_deferred_pfn < pgdat->node_start_pfn);
@@ -1165,8 +1177,24 @@ free_range:
/* Sanity check that the next zone really is unpopulated */
WARN_ON(++zid < MAX_NR_ZONES && populated_zone(++zone));
- pr_info("kswapd %d initialised %lu pages in %ums\n", nid, nr_pages,
+ pr_info("node %d initialised, %lu pages in %ums\n", nid, nr_pages,
jiffies_to_msecs(jiffies - start));
+ up_read(&pgdat_init_rwsem);
+ return 0;
+}
+
+void __init page_alloc_init_late(void)
+{
+ int nid;
+
+ for_each_node_state(nid, N_MEMORY) {
+ down_read(&pgdat_init_rwsem);
+ kthread_run(deferred_init_memmap, NODE_DATA(nid), "pgdatinit%d", nid);
+ }
+
+ /* Block until all are initialised */
+ down_write(&pgdat_init_rwsem);
+ up_write(&pgdat_init_rwsem);
}
#endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index f4a487110764..e61445dce04e 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3386,7 +3386,7 @@ static void kswapd_try_to_sleep(pg_data_t *pgdat, int order, int classzone_idx)
* If there are applications that are active memory-allocators
* (most normal use), this basically shouldn't matter.
*/
-static int __defermem_init kswapd(void *p)
+static int kswapd(void *p)
{
unsigned long order, new_order;
unsigned balanced_order;
@@ -3421,8 +3421,6 @@ static int __defermem_init kswapd(void *p)
tsk->flags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
set_freezable();
- deferred_init_memmap(pgdat->node_id);
-
order = new_order = 0;
balanced_order = 0;
classzone_idx = new_classzone_idx = pgdat->nr_zones - 1;
@@ -3578,7 +3576,7 @@ static int cpu_callback(struct notifier_block *nfb, unsigned long action,
* This kswapd start function will be called by init and node-hot-add.
* On node-hot-add, kswapd will moved to proper cpus if cpus are hot-added.
*/
-int __defermem_init kswapd_run(int nid)
+int kswapd_run(int nid)
{
pg_data_t *pgdat = NODE_DATA(nid);
int ret = 0;