summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 17:29:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-06-29 17:29:11 -0700
commit65090f30ab791810a3dc840317e57df05018559c (patch)
treef417526656da37109777e89613e140ffc59228bc /include
parent349a2d52ffe59b7a0c5876fa7ee9f3eaf188b830 (diff)
parent0ed950d1f28142ccd9a9453c60df87853530d778 (diff)
downloadlinux-65090f30ab791810a3dc840317e57df05018559c.tar.bz2
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton: "191 patches. Subsystems affected by this patch series: kthread, ia64, scripts, ntfs, squashfs, ocfs2, kernel/watchdog, and mm (gup, pagealloc, slab, slub, kmemleak, dax, debug, pagecache, gup, swap, memcg, pagemap, mprotect, bootmem, dma, tracing, vmalloc, kasan, initialization, pagealloc, and memory-failure)" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (191 commits) mm,hwpoison: make get_hwpoison_page() call get_any_page() mm,hwpoison: send SIGBUS with error virutal address mm/page_alloc: split pcp->high across all online CPUs for cpuless nodes mm/page_alloc: allow high-order pages to be stored on the per-cpu lists mm: replace CONFIG_FLAT_NODE_MEM_MAP with CONFIG_FLATMEM mm: replace CONFIG_NEED_MULTIPLE_NODES with CONFIG_NUMA docs: remove description of DISCONTIGMEM arch, mm: remove stale mentions of DISCONIGMEM mm: remove CONFIG_DISCONTIGMEM m68k: remove support for DISCONTIGMEM arc: remove support for DISCONTIGMEM arc: update comment about HIGHMEM implementation alpha: remove DISCONTIGMEM and NUMA mm/page_alloc: move free_the_page mm/page_alloc: fix counting of managed_pages mm/page_alloc: improve memmap_pages dbg msg mm: drop SECTION_SHIFT in code comments mm/page_alloc: introduce vm.percpu_pagelist_high_fraction mm/page_alloc: limit the number of pages on PCP lists when reclaim is active mm/page_alloc: scale the number of pages that are batch freed ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/memory_model.h37
-rw-r--r--include/asm-generic/pgtable-nop4d.h1
-rw-r--r--include/asm-generic/topology.h2
-rw-r--r--include/kunit/test.h5
-rw-r--r--include/linux/backing-dev-defs.h20
-rw-r--r--include/linux/cpuhotplug.h2
-rw-r--r--include/linux/fs.h6
-rw-r--r--include/linux/gfp.h13
-rw-r--r--include/linux/iomap.h1
-rw-r--r--include/linux/kasan.h7
-rw-r--r--include/linux/kernel.h2
-rw-r--r--include/linux/kthread.h2
-rw-r--r--include/linux/memblock.h6
-rw-r--r--include/linux/memcontrol.h54
-rw-r--r--include/linux/mm.h53
-rw-r--r--include/linux/mm_types.h10
-rw-r--r--include/linux/mman.h2
-rw-r--r--include/linux/mmdebug.h3
-rw-r--r--include/linux/mmzone.h90
-rw-r--r--include/linux/page-flags.h10
-rw-r--r--include/linux/page_owner.h6
-rw-r--r--include/linux/page_ref.h4
-rw-r--r--include/linux/page_reporting.h3
-rw-r--r--include/linux/pageblock-flags.h2
-rw-r--r--include/linux/pagemap.h4
-rw-r--r--include/linux/pgtable.h22
-rw-r--r--include/linux/printk.h5
-rw-r--r--include/linux/sched/coredump.h8
-rw-r--r--include/linux/slab.h59
-rw-r--r--include/linux/swap.h19
-rw-r--r--include/linux/swapops.h5
-rw-r--r--include/linux/vmstat.h65
-rw-r--r--include/linux/writeback.h1
-rw-r--r--include/trace/events/cma.h4
-rw-r--r--include/trace/events/filemap.h2
-rw-r--r--include/trace/events/kmem.h12
-rw-r--r--include/trace/events/page_pool.h4
-rw-r--r--include/trace/events/pagemap.h4
-rw-r--r--include/trace/events/vmscan.h2
39 files changed, 342 insertions, 215 deletions
diff --git a/include/asm-generic/memory_model.h b/include/asm-generic/memory_model.h
index 7637fb46ba4f..a2c8ed60233a 100644
--- a/include/asm-generic/memory_model.h
+++ b/include/asm-generic/memory_model.h
@@ -6,47 +6,18 @@
#ifndef __ASSEMBLY__
+/*
+ * supports 3 memory models.
+ */
#if defined(CONFIG_FLATMEM)
#ifndef ARCH_PFN_OFFSET
#define ARCH_PFN_OFFSET (0UL)
#endif
-#elif defined(CONFIG_DISCONTIGMEM)
-
-#ifndef arch_pfn_to_nid
-#define arch_pfn_to_nid(pfn) pfn_to_nid(pfn)
-#endif
-
-#ifndef arch_local_page_offset
-#define arch_local_page_offset(pfn, nid) \
- ((pfn) - NODE_DATA(nid)->node_start_pfn)
-#endif
-
-#endif /* CONFIG_DISCONTIGMEM */
-
-/*
- * supports 3 memory models.
- */
-#if defined(CONFIG_FLATMEM)
-
#define __pfn_to_page(pfn) (mem_map + ((pfn) - ARCH_PFN_OFFSET))
#define __page_to_pfn(page) ((unsigned long)((page) - mem_map) + \
ARCH_PFN_OFFSET)
-#elif defined(CONFIG_DISCONTIGMEM)
-
-#define __pfn_to_page(pfn) \
-({ unsigned long __pfn = (pfn); \
- unsigned long __nid = arch_pfn_to_nid(__pfn); \
- NODE_DATA(__nid)->node_mem_map + arch_local_page_offset(__pfn, __nid);\
-})
-
-#define __page_to_pfn(pg) \
-({ const struct page *__pg = (pg); \
- struct pglist_data *__pgdat = NODE_DATA(page_to_nid(__pg)); \
- (unsigned long)(__pg - __pgdat->node_mem_map) + \
- __pgdat->node_start_pfn; \
-})
#elif defined(CONFIG_SPARSEMEM_VMEMMAP)
@@ -70,7 +41,7 @@
struct mem_section *__sec = __pfn_to_section(__pfn); \
__section_mem_map_addr(__sec) + __pfn; \
})
-#endif /* CONFIG_FLATMEM/DISCONTIGMEM/SPARSEMEM */
+#endif /* CONFIG_FLATMEM/SPARSEMEM */
/*
* Convert a physical address to a Page Frame Number and back
diff --git a/include/asm-generic/pgtable-nop4d.h b/include/asm-generic/pgtable-nop4d.h
index ce2cbb3c380f..2f6b1befb129 100644
--- a/include/asm-generic/pgtable-nop4d.h
+++ b/include/asm-generic/pgtable-nop4d.h
@@ -9,7 +9,6 @@
typedef struct { pgd_t pgd; } p4d_t;
#define P4D_SHIFT PGDIR_SHIFT
-#define MAX_PTRS_PER_P4D 1
#define PTRS_PER_P4D 1
#define P4D_SIZE (1UL << P4D_SHIFT)
#define P4D_MASK (~(P4D_SIZE-1))
diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h
index 5aa8705df87e..4dbe715be65b 100644
--- a/include/asm-generic/topology.h
+++ b/include/asm-generic/topology.h
@@ -45,7 +45,7 @@
#endif
#ifndef cpumask_of_node
- #ifdef CONFIG_NEED_MULTIPLE_NODES
+ #ifdef CONFIG_NUMA
#define cpumask_of_node(node) ((node) == 0 ? cpu_online_mask : cpu_none_mask)
#else
#define cpumask_of_node(node) ((void)(node), cpu_online_mask)
diff --git a/include/kunit/test.h b/include/kunit/test.h
index 49601c4b98b8..524d4789af22 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -515,8 +515,9 @@ kunit_find_resource(struct kunit *test,
void *match_data)
{
struct kunit_resource *res, *found = NULL;
+ unsigned long flags;
- spin_lock(&test->lock);
+ spin_lock_irqsave(&test->lock, flags);
list_for_each_entry_reverse(res, &test->resources, node) {
if (match(test, res, (void *)match_data)) {
@@ -526,7 +527,7 @@ kunit_find_resource(struct kunit *test,
}
}
- spin_unlock(&test->lock);
+ spin_unlock_irqrestore(&test->lock, flags);
return found;
}
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index fff9367a6348..1d7edad9914f 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -154,6 +154,8 @@ struct bdi_writeback {
struct cgroup_subsys_state *blkcg_css; /* and blkcg */
struct list_head memcg_node; /* anchored at memcg->cgwb_list */
struct list_head blkcg_node; /* anchored at blkcg->cgwb_list */
+ struct list_head b_attached; /* attached inodes, protected by list_lock */
+ struct list_head offline_node; /* anchored at offline_cgwbs */
union {
struct work_struct release_work;
@@ -239,8 +241,9 @@ static inline void wb_get(struct bdi_writeback *wb)
/**
* wb_put - decrement a wb's refcount
* @wb: bdi_writeback to put
+ * @nr: number of references to put
*/
-static inline void wb_put(struct bdi_writeback *wb)
+static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
{
if (WARN_ON_ONCE(!wb->bdi)) {
/*
@@ -251,7 +254,16 @@ static inline void wb_put(struct bdi_writeback *wb)
}
if (wb != &wb->bdi->wb)
- percpu_ref_put(&wb->refcnt);
+ percpu_ref_put_many(&wb->refcnt, nr);
+}
+
+/**
+ * wb_put - decrement a wb's refcount
+ * @wb: bdi_writeback to put
+ */
+static inline void wb_put(struct bdi_writeback *wb)
+{
+ wb_put_many(wb, 1);
}
/**
@@ -280,6 +292,10 @@ static inline void wb_put(struct bdi_writeback *wb)
{
}
+static inline void wb_put_many(struct bdi_writeback *wb, unsigned long nr)
+{
+}
+
static inline bool wb_dying(struct bdi_writeback *wb)
{
return false;
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index 4a62b3980642..47e13582d9fc 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -54,7 +54,7 @@ enum cpuhp_state {
CPUHP_MM_MEMCQ_DEAD,
CPUHP_PERCPU_CNT_DEAD,
CPUHP_RADIX_DEAD,
- CPUHP_PAGE_ALLOC_DEAD,
+ CPUHP_PAGE_ALLOC,
CPUHP_NET_DEV_DEAD,
CPUHP_PCI_XGENE_DEAD,
CPUHP_IOMMU_IOVA_DEAD,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index c3c88fdb9b2a..fad6663cd1b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3417,18 +3417,14 @@ extern int simple_rename(struct user_namespace *, struct inode *,
extern void simple_recursive_removal(struct dentry *,
void (*callback)(struct dentry *));
extern int noop_fsync(struct file *, loff_t, loff_t, int);
-extern int noop_set_page_dirty(struct page *page);
extern void noop_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
extern ssize_t noop_direct_IO(struct kiocb *iocb, struct iov_iter *iter);
extern int simple_empty(struct dentry *);
-extern int simple_readpage(struct file *file, struct page *page);
extern int simple_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata);
-extern int simple_write_end(struct file *file, struct address_space *mapping,
- loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata);
+extern const struct address_space_operations ram_aops;
extern int always_delete_dentry(const struct dentry *);
extern struct inode *alloc_anon_inode(struct super_block *);
extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index e6102dfa4faa..55b2ec1f965a 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -506,8 +506,8 @@ static inline int gfp_zonelist(gfp_t flags)
* There are two zonelists per node, one for all zones with memory and
* one containing just zones from the node the zonelist belongs to.
*
- * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
- * optimized to &contig_page_data at compile-time.
+ * For the case of non-NUMA systems the NODE_DATA() gets optimized to
+ * &contig_page_data at compile-time.
*/
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
@@ -548,6 +548,15 @@ alloc_pages_bulk_array(gfp_t gfp, unsigned long nr_pages, struct page **page_arr
return __alloc_pages_bulk(gfp, numa_mem_id(), NULL, nr_pages, NULL, page_array);
}
+static inline unsigned long
+alloc_pages_bulk_array_node(gfp_t gfp, int nid, unsigned long nr_pages, struct page **page_array)
+{
+ if (nid == NUMA_NO_NODE)
+ nid = numa_mem_id();
+
+ return __alloc_pages_bulk(gfp, nid, NULL, nr_pages, NULL, page_array);
+}
+
/*
* Allocate pages, preferring the node given as nid. The node must be valid and
* online. For more general interface, see alloc_pages_node().
diff --git a/include/linux/iomap.h b/include/linux/iomap.h
index c87d0cb0de6d..479c1da3e221 100644
--- a/include/linux/iomap.h
+++ b/include/linux/iomap.h
@@ -159,7 +159,6 @@ ssize_t iomap_file_buffered_write(struct kiocb *iocb, struct iov_iter *from,
const struct iomap_ops *ops);
int iomap_readpage(struct page *page, const struct iomap_ops *ops);
void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops);
-int iomap_set_page_dirty(struct page *page);
int iomap_is_partially_uptodate(struct page *page, unsigned long from,
unsigned long count);
int iomap_releasepage(struct page *page, gfp_t gfp_mask);
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index a1c7ce5f3e4f..5310e217bd74 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -18,7 +18,6 @@ struct task_struct;
/* kasan_data struct is used in KUnit tests for KASAN expected failures */
struct kunit_kasan_expectation {
- bool report_expected;
bool report_found;
};
@@ -42,9 +41,9 @@ struct kunit_kasan_expectation {
#endif
extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
-extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE + PTE_HWTABLE_PTRS];
-extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD];
-extern pud_t kasan_early_shadow_pud[PTRS_PER_PUD];
+extern pte_t kasan_early_shadow_pte[MAX_PTRS_PER_PTE + PTE_HWTABLE_PTRS];
+extern pmd_t kasan_early_shadow_pmd[MAX_PTRS_PER_PMD];
+extern pud_t kasan_early_shadow_pud[MAX_PTRS_PER_PUD];
extern p4d_t kasan_early_shadow_p4d[MAX_PTRS_PER_P4D];
int kasan_populate_early_shadow(const void *shadow_start,
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 15d8bad3d2f2..bf950621febf 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -357,6 +357,8 @@ int sscanf(const char *, const char *, ...);
extern __scanf(2, 0)
int vsscanf(const char *, const char *, va_list);
+extern int no_hash_pointers_enable(char *str);
+
extern int get_option(char **str, int *pint);
extern char *get_options(const char *str, int nints, int *ints);
extern unsigned long long memparse(const char *ptr, char **retptr);
diff --git a/include/linux/kthread.h b/include/linux/kthread.h
index d9133d6db308..346b0f269161 100644
--- a/include/linux/kthread.h
+++ b/include/linux/kthread.h
@@ -18,7 +18,7 @@ struct task_struct *kthread_create_on_node(int (*threadfn)(void *data),
* @threadfn: the function to run in the thread
* @data: data pointer for @threadfn()
* @namefmt: printf-style format string for the thread name
- * @arg...: arguments for @namefmt.
+ * @arg: arguments for @namefmt.
*
* This macro will create a kthread on the current node, leaving it in
* the stopped state. This is just a helper for kthread_create_on_node();
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 5984fff3f175..552309342c38 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -50,7 +50,7 @@ struct memblock_region {
phys_addr_t base;
phys_addr_t size;
enum memblock_flags flags;
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
int nid;
#endif
};
@@ -347,7 +347,7 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask);
int memblock_set_node(phys_addr_t base, phys_addr_t size,
struct memblock_type *type, int nid);
-#ifdef CONFIG_NEED_MULTIPLE_NODES
+#ifdef CONFIG_NUMA
static inline void memblock_set_region_node(struct memblock_region *r, int nid)
{
r->nid = nid;
@@ -366,7 +366,7 @@ static inline int memblock_get_region_node(const struct memblock_region *r)
{
return 0;
}
-#endif /* CONFIG_NEED_MULTIPLE_NODES */
+#endif /* CONFIG_NUMA */
/* Flags for memblock allocation APIs */
#define MEMBLOCK_ALLOC_ANYWHERE (~(phys_addr_t)0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c193be760709..6d66037be646 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -192,7 +192,7 @@ enum memcg_kmem_state {
struct memcg_padding {
char x[0];
} ____cacheline_internodealigned_in_smp;
-#define MEMCG_PADDING(name) struct memcg_padding name;
+#define MEMCG_PADDING(name) struct memcg_padding name
#else
#define MEMCG_PADDING(name)
#endif
@@ -349,8 +349,7 @@ struct mem_cgroup {
struct deferred_split deferred_split_queue;
#endif
- struct mem_cgroup_per_node *nodeinfo[0];
- /* WARNING: nodeinfo must be the last member here */
+ struct mem_cgroup_per_node *nodeinfo[];
};
/*
@@ -743,35 +742,18 @@ out:
/**
* mem_cgroup_page_lruvec - return lruvec for isolating/putting an LRU page
* @page: the page
- * @pgdat: pgdat of the page
*
* This function relies on page->mem_cgroup being stable.
*/
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
- struct pglist_data *pgdat)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
{
+ pg_data_t *pgdat = page_pgdat(page);
struct mem_cgroup *memcg = page_memcg(page);
VM_WARN_ON_ONCE_PAGE(!memcg && !mem_cgroup_disabled(), page);
return mem_cgroup_lruvec(memcg, pgdat);
}
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
- struct lruvec *lruvec)
-{
- pg_data_t *pgdat = page_pgdat(page);
- const struct mem_cgroup *memcg;
- struct mem_cgroup_per_node *mz;
-
- if (mem_cgroup_disabled())
- return lruvec == &pgdat->__lruvec;
-
- mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
- memcg = page_memcg(page) ? : root_mem_cgroup;
-
- return lruvec->pgdat == pgdat && mz->memcg == memcg;
-}
-
struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
@@ -1221,18 +1203,11 @@ static inline struct lruvec *mem_cgroup_lruvec(struct mem_cgroup *memcg,
return &pgdat->__lruvec;
}
-static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page,
- struct pglist_data *pgdat)
-{
- return &pgdat->__lruvec;
-}
-
-static inline bool lruvec_holds_page_lru_lock(struct page *page,
- struct lruvec *lruvec)
+static inline struct lruvec *mem_cgroup_page_lruvec(struct page *page)
{
pg_data_t *pgdat = page_pgdat(page);
- return lruvec == &pgdat->__lruvec;
+ return &pgdat->__lruvec;
}
static inline void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
@@ -1255,6 +1230,12 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
return NULL;
}
+static inline
+struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css)
+{
+ return NULL;
+}
+
static inline void mem_cgroup_put(struct mem_cgroup *memcg)
{
}
@@ -1516,12 +1497,19 @@ static inline void unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
spin_unlock_irqrestore(&lruvec->lru_lock, flags);
}
+/* Test requires a stable page->memcg binding, see page_memcg() */
+static inline bool page_matches_lruvec(struct page *page, struct lruvec *lruvec)
+{
+ return lruvec_pgdat(lruvec) == page_pgdat(page) &&
+ lruvec_memcg(lruvec) == page_memcg(page);
+}
+
/* Don't lock again iff page's lruvec locked */
static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
struct lruvec *locked_lruvec)
{
if (locked_lruvec) {
- if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ if (page_matches_lruvec(page, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irq(locked_lruvec);
@@ -1535,7 +1523,7 @@ static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
struct lruvec *locked_lruvec, unsigned long *flags)
{
if (locked_lruvec) {
- if (lruvec_holds_page_lru_lock(page, locked_lruvec))
+ if (page_matches_lruvec(page, locked_lruvec))
return locked_lruvec;
unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 01ecf9e3603c..6d0f827ca4eb 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -46,7 +46,7 @@ extern int sysctl_page_lock_unfairness;
void init_mm_internals(void);
-#ifndef CONFIG_NEED_MULTIPLE_NODES /* Don't use mapnrs, do it properly */
+#ifndef CONFIG_NUMA /* Don't use mapnrs, do it properly */
extern unsigned long max_mapnr;
static inline void set_max_mapnr(unsigned long limit)
@@ -234,7 +234,11 @@ int overcommit_policy_handler(struct ctl_table *, int, void *, size_t *,
int __add_to_page_cache_locked(struct page *page, struct address_space *mapping,
pgoff_t index, gfp_t gfp, void **shadowp);
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
#define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
+#else
+#define nth_page(page,n) ((page) + (n))
+#endif
/* to align the pointer to the (next) page boundary */
#define PAGE_ALIGN(addr) ALIGN(addr, PAGE_SIZE)
@@ -1341,7 +1345,7 @@ static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
if (!is_cow_mapping(vma->vm_flags))
return false;
- if (!atomic_read(&vma->vm_mm->has_pinned))
+ if (!test_bit(MMF_HAS_PINNED, &vma->vm_mm->flags))
return false;
return page_maybe_dma_pinned(page);
@@ -1850,12 +1854,8 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask);
extern void do_invalidatepage(struct page *page, unsigned int offset,
unsigned int length);
-void __set_page_dirty(struct page *, struct address_space *, int warn);
-int __set_page_dirty_nobuffers(struct page *page);
-int __set_page_dirty_no_writeback(struct page *page);
int redirty_page_for_writepage(struct writeback_control *wbc,
struct page *page);
-void account_page_dirtied(struct page *page, struct address_space *mapping);
void account_page_cleaned(struct page *page, struct address_space *mapping,
struct bdi_writeback *wb);
int set_page_dirty(struct page *page);
@@ -2420,7 +2420,7 @@ static inline unsigned long free_initmem_default(int poison)
extern char __init_begin[], __init_end[];
return free_reserved_area(&__init_begin, &__init_end,
- poison, "unused kernel");
+ poison, "unused kernel image (initmem)");
}
static inline unsigned long get_num_physpages(void)
@@ -2460,7 +2460,7 @@ extern void get_pfn_range_for_nid(unsigned int nid,
unsigned long *start_pfn, unsigned long *end_pfn);
extern unsigned long find_min_pfn_with_active_regions(void);
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
static inline int early_pfn_to_nid(unsigned long pfn)
{
return 0;
@@ -2474,7 +2474,6 @@ extern void set_dma_reserve(unsigned long new_dma_reserve);
extern void memmap_init_range(unsigned long, int, unsigned long,
unsigned long, unsigned long, enum meminit_context,
struct vmem_altmap *, int migratetype);
-extern void memmap_init_zone(struct zone *zone);
extern void setup_per_zone_wmarks(void);
extern int __meminit init_per_zone_wmark_min(void);
extern void mem_init(void);
@@ -2681,17 +2680,45 @@ extern struct vm_area_struct * find_vma(struct mm_struct * mm, unsigned long add
extern struct vm_area_struct * find_vma_prev(struct mm_struct * mm, unsigned long addr,
struct vm_area_struct **pprev);
-/* Look up the first VMA which intersects the interval start_addr..end_addr-1,
- NULL if none. Assume start_addr < end_addr. */
-static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr)
+/**
+ * find_vma_intersection() - Look up the first VMA which intersects the interval
+ * @mm: The process address space.
+ * @start_addr: The inclusive start user address.
+ * @end_addr: The exclusive end user address.
+ *
+ * Returns: The first VMA within the provided range, %NULL otherwise. Assumes
+ * start_addr < end_addr.
+ */
+static inline
+struct vm_area_struct *find_vma_intersection(struct mm_struct *mm,
+ unsigned long start_addr,
+ unsigned long end_addr)
{
- struct vm_area_struct * vma = find_vma(mm,start_addr);
+ struct vm_area_struct *vma = find_vma(mm, start_addr);
if (vma && end_addr <= vma->vm_start)
vma = NULL;
return vma;
}
+/**
+ * vma_lookup() - Find a VMA at a specific address
+ * @mm: The process address space.
+ * @addr: The user address.
+ *
+ * Return: The vm_area_struct at the given address, %NULL otherwise.
+ */
+static inline
+struct vm_area_struct *vma_lookup(struct mm_struct *mm, unsigned long addr)
+{
+ struct vm_area_struct *vma = find_vma(mm, addr);
+
+ if (vma && addr < vma->vm_start)
+ vma = NULL;
+
+ return vma;
+}
+
static inline unsigned long vm_start_gap(struct vm_area_struct *vma)
{
unsigned long vm_start = vma->vm_start;
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 8f0fb62e8975..b66d0225414e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -435,16 +435,6 @@ struct mm_struct {
*/
atomic_t mm_count;
- /**
- * @has_pinned: Whether this mm has pinned any pages. This can
- * be either replaced in the future by @pinned_vm when it
- * becomes stable, or grow into a counter on its own. We're
- * aggresive on this bit now - even if the pinned pages were
- * unpinned later on, we'll still keep this bit set for the
- * lifecycle of this mm just for simplicity.
- */
- atomic_t has_pinned;
-
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 629cefc4ecba..ebb09a964272 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -31,6 +31,8 @@
/*
* The historical set of flags that all mmap implementations implicitly
* support when a ->mmap_validate() op is not provided in file_operations.
+ *
+ * MAP_EXECUTABLE is completely ignored throughout the kernel.
*/
#define LEGACY_MAP_MASK (MAP_SHARED \
| MAP_PRIVATE \
diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 5d0767cb424a..1935d4c72d10 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -9,8 +9,7 @@ struct page;
struct vm_area_struct;
struct mm_struct;
-extern void dump_page(struct page *page, const char *reason);
-extern void __dump_page(struct page *page, const char *reason);
+void dump_page(struct page *page, const char *reason);
void dump_vma(const struct vm_area_struct *vma);
void dump_mm(const struct mm_struct *mm);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 0d53eba1c383..265a32e1ff74 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -20,6 +20,7 @@
#include <linux/atomic.h>
#include <linux/mm_types.h>
#include <linux/page-flags.h>
+#include <linux/local_lock.h>
#include <asm/page.h>
/* Free memory management - zoned buddy allocator. */
@@ -134,10 +135,10 @@ enum numa_stat_item {
NUMA_INTERLEAVE_HIT, /* interleaver preferred this zone */
NUMA_LOCAL, /* allocation from local node */
NUMA_OTHER, /* allocation from other node */
- NR_VM_NUMA_STAT_ITEMS
+ NR_VM_NUMA_EVENT_ITEMS
};
#else
-#define NR_VM_NUMA_STAT_ITEMS 0
+#define NR_VM_NUMA_EVENT_ITEMS 0
#endif
enum zone_stat_item {
@@ -332,29 +333,55 @@ enum zone_watermarks {
NR_WMARK
};
+/*
+ * One per migratetype for each PAGE_ALLOC_COSTLY_ORDER plus one additional
+ * for pageblock size for THP if configured.
+ */
+#ifdef CONFIG_TRANSPARENT_HUGEPAGE
+#define NR_PCP_THP 1
+#else
+#define NR_PCP_THP 0
+#endif
+#define NR_PCP_LISTS (MIGRATE_PCPTYPES * (PAGE_ALLOC_COSTLY_ORDER + 1 + NR_PCP_THP))
+
+/*
+ * Shift to encode migratetype and order in the same integer, with order
+ * in the least significant bits.
+ */
+#define NR_PCP_ORDER_WIDTH 8
+#define NR_PCP_ORDER_MASK ((1<<NR_PCP_ORDER_WIDTH) - 1)
+
#define min_wmark_pages(z) (z->_watermark[WMARK_MIN] + z->watermark_boost)
#define low_wmark_pages(z) (z->_watermark[WMARK_LOW] + z->watermark_boost)
#define high_wmark_pages(z) (z->_watermark[WMARK_HIGH] + z->watermark_boost)
#define wmark_pages(z, i) (z->_watermark[i] + z->watermark_boost)
+/* Fields and list protected by pagesets local_lock in page_alloc.c */
struct per_cpu_pages {
int count; /* number of pages in the list */
int high; /* high watermark, emptying needed */
int batch; /* chunk size for buddy add/remove */
+ short free_factor; /* batch scaling factor during free */
+#ifdef CONFIG_NUMA
+ short expire; /* When 0, remote pagesets are drained */
+#endif
/* Lists of pages, one per migrate type stored on the pcp-lists */
- struct list_head lists[MIGRATE_PCPTYPES];
+ struct list_head lists[NR_PCP_LISTS];
};
-struct per_cpu_pageset {
- struct per_cpu_pages pcp;
-#ifdef CONFIG_NUMA
- s8 expire;
- u16 vm_numa_stat_diff[NR_VM_NUMA_STAT_ITEMS];
-#endif
+struct per_cpu_zonestat {
#ifdef CONFIG_SMP
- s8 stat_threshold;
s8 vm_stat_diff[NR_VM_ZONE_STAT_ITEMS];
+ s8 stat_threshold;
+#endif
+#ifdef CONFIG_NUMA
+ /*
+ * Low priority inaccurate counters that are only folded
+ * on demand. Use a large type to avoid the overhead of
+ * folding during refresh_cpu_vm_stats.
+ */
+ unsigned long vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#endif
};
@@ -484,7 +511,8 @@ struct zone {
int node;
#endif
struct pglist_data *zone_pgdat;
- struct per_cpu_pageset __percpu *pageset;
+ struct per_cpu_pages __percpu *per_cpu_pageset;
+ struct per_cpu_zonestat __percpu *per_cpu_zonestats;
/*
* the high and batch values are copied to individual pagesets for
* faster access
@@ -619,7 +647,7 @@ struct zone {
ZONE_PADDING(_pad3_)
/* Zone statistics */
atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS];
- atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
+ atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
} ____cacheline_internodealigned_in_smp;
enum pgdat_flags {
@@ -637,6 +665,7 @@ enum zone_flags {
ZONE_BOOSTED_WATERMARK, /* zone recently boosted watermarks.
* Cleared when kswapd is woken.
*/
+ ZONE_RECLAIM_ACTIVE, /* kswapd may be scanning the zone. */
};
static inline unsigned long zone_managed_pages(struct zone *zone)
@@ -738,10 +767,12 @@ struct zonelist {
struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1];
};
-#ifndef CONFIG_DISCONTIGMEM
-/* The array of struct pages - for discontigmem use pgdat->lmem_map */
+/*
+ * The array of struct pages for flatmem.
+ * It must be declared for SPARSEMEM as well because there are configurations
+ * that rely on that.
+ */
extern struct page *mem_map;
-#endif
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
struct deferred_split {
@@ -775,7 +806,7 @@ typedef struct pglist_data {
struct zonelist node_zonelists[MAX_ZONELISTS];
int nr_zones; /* number of populated zones in this node */
-#ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */
+#ifdef CONFIG_FLATMEM /* means !SPARSEMEM */
struct page *node_mem_map;
#ifdef CONFIG_PAGE_EXTENSION
struct page_ext *node_page_ext;
@@ -865,7 +896,7 @@ typedef struct pglist_data {
#define node_present_pages(nid) (NODE_DATA(nid)->node_present_pages)
#define node_spanned_pages(nid) (NODE_DATA(nid)->node_spanned_pages)
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
+#ifdef CONFIG_FLATMEM
#define pgdat_page_nr(pgdat, pagenr) ((pgdat)->node_mem_map + (pagenr))
#else
#define pgdat_page_nr(pgdat, pagenr) pfn_to_page((pgdat)->node_start_pfn + (pagenr))
@@ -982,22 +1013,11 @@ static inline void zone_set_nid(struct zone *zone, int nid) {}
extern int movable_zone;
-#ifdef CONFIG_HIGHMEM
-static inline int zone_movable_is_highmem(void)
-{
-#ifdef CONFIG_NEED_MULTIPLE_NODES
- return movable_zone == ZONE_HIGHMEM;
-#else
- return (ZONE_MOVABLE - 1) == ZONE_HIGHMEM;
-#endif
-}
-#endif
-
static inline int is_highmem_idx(enum zone_type idx)
{
#ifdef CONFIG_HIGHMEM
return (idx == ZONE_HIGHMEM ||
- (idx == ZONE_MOVABLE && zone_movable_is_highmem()));
+ (idx == ZONE_MOVABLE && movable_zone == ZONE_HIGHMEM));
#else
return 0;
#endif
@@ -1029,7 +1049,7 @@ int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
size_t *, loff_t *);
-int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
+int percpu_pagelist_high_fraction_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
@@ -1037,21 +1057,21 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
int numa_zonelist_order_handler(struct ctl_table *, int,
void *, size_t *, loff_t *);
-extern int percpu_pagelist_fraction;
+extern int percpu_pagelist_high_fraction;
extern char numa_zonelist_order[];
#define NUMA_ZONELIST_ORDER_LEN 16
-#ifndef CONFIG_NEED_MULTIPLE_NODES
+#ifndef CONFIG_NUMA
extern struct pglist_data contig_page_data;
#define NODE_DATA(nid) (&contig_page_data)
#define NODE_MEM_MAP(nid) mem_map
-#else /* CONFIG_NEED_MULTIPLE_NODES */
+#else /* CONFIG_NUMA */
#include <asm/mmzone.h>
-#endif /* !CONFIG_NEED_MULTIPLE_NODES */
+#endif /* !CONFIG_NUMA */
extern struct pglist_data *first_online_pgdat(void);
extern struct pglist_data *next_online_pgdat(struct pglist_data *pgdat);
@@ -1200,8 +1220,6 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist,
#ifdef CONFIG_SPARSEMEM
/*
- * SECTION_SHIFT #bits space required to store a section #
- *
* PA_SECTION_SHIFT physical address to/from section number
* PFN_SECTION_SHIFT pfn to/from section number
*/
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 40e2c5000585..458696550028 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -180,17 +180,17 @@ enum pageflags {
#ifndef __GENERATING_BOUNDS_H
-struct page; /* forward declaration */
-
-static inline struct page *compound_head(struct page *page)
+static inline unsigned long _compound_head(const struct page *page)
{
unsigned long head = READ_ONCE(page->compound_head);
if (unlikely(head & 1))
- return (struct page *) (head - 1);
- return page;
+ return head - 1;
+ return (unsigned long)page;
}
+#define compound_head(page) ((typeof(page))_compound_head(page))
+
static __always_inline int PageTail(struct page *page)
{
return READ_ONCE(page->compound_head) & 1;
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
index 3468794f83d2..719bfe5108c5 100644
--- a/include/linux/page_owner.h
+++ b/include/linux/page_owner.h
@@ -14,7 +14,7 @@ extern void __set_page_owner(struct page *page,
extern void __split_page_owner(struct page *page, unsigned int nr);
extern void __copy_page_owner(struct page *oldpage, struct page *newpage);
extern void __set_page_owner_migrate_reason(struct page *page, int reason);
-extern void __dump_page_owner(struct page *page);
+extern void __dump_page_owner(const struct page *page);
extern void pagetypeinfo_showmixedcount_print(struct seq_file *m,
pg_data_t *pgdat, struct zone *zone);
@@ -46,7 +46,7 @@ static inline void set_page_owner_migrate_reason(struct page *page, int reason)
if (static_branch_unlikely(&page_owner_inited))
__set_page_owner_migrate_reason(page, reason);
}
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(const struct page *page)
{
if (static_branch_unlikely(&page_owner_inited))
__dump_page_owner(page);
@@ -69,7 +69,7 @@ static inline void copy_page_owner(struct page *oldpage, struct page *newpage)
static inline void set_page_owner_migrate_reason(struct page *page, int reason)
{
}
-static inline void dump_page_owner(struct page *page)
+static inline void dump_page_owner(const struct page *page)
{
}
#endif /* CONFIG_PAGE_OWNER */
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index f3318f34fc54..7ad46f45df39 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -62,12 +62,12 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
#endif
-static inline int page_ref_count(struct page *page)
+static inline int page_ref_count(const struct page *page)
{
return atomic_read(&page->_refcount);
}
-static inline int page_count(struct page *page)
+static inline int page_count(const struct page *page)
{
return atomic_read(&compound_head(page)->_refcount);
}
diff --git a/include/linux/page_reporting.h b/include/linux/page_reporting.h
index 3b99e0ec24f2..fe648dfa3a7c 100644
--- a/include/linux/page_reporting.h
+++ b/include/linux/page_reporting.h
@@ -18,6 +18,9 @@ struct page_reporting_dev_info {
/* Current state of page reporting */
atomic_t state;
+
+ /* Minimal order of page reporting */
+ unsigned int order;
};
/* Tear-down and bring-up for page reporting devices */
diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h
index fff52ad370c1..973fd731a520 100644
--- a/include/linux/pageblock-flags.h
+++ b/include/linux/pageblock-flags.h
@@ -54,7 +54,7 @@ extern unsigned int pageblock_order;
/* Forward declaration */
struct page;
-unsigned long get_pfnblock_flags_mask(struct page *page,
+unsigned long get_pfnblock_flags_mask(const struct page *page,
unsigned long pfn,
unsigned long mask);
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 0f1b34dbf3a2..ed02aa522263 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -702,6 +702,10 @@ int wait_on_page_writeback_killable(struct page *page);
extern void end_page_writeback(struct page *page);
void wait_for_stable_page(struct page *page);
+void __set_page_dirty(struct page *, struct address_space *, int warn);
+int __set_page_dirty_nobuffers(struct page *page);
+int __set_page_dirty_no_writeback(struct page *page);
+
void page_endio(struct page *page, bool is_write, int err);
/**
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index a43047b1030d..c32600c9e1ad 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -1592,4 +1592,26 @@ typedef unsigned int pgtbl_mod_mask;
#define pte_leaf_size(x) PAGE_SIZE
#endif
+/*
+ * Some architectures have MMUs that are configurable or selectable at boot
+ * time. These lead to variable PTRS_PER_x. For statically allocated arrays it
+ * helps to have a static maximum value.
+ */
+
+#ifndef MAX_PTRS_PER_PTE
+#define MAX_PTRS_PER_PTE PTRS_PER_PTE
+#endif
+
+#ifndef MAX_PTRS_PER_PMD
+#define MAX_PTRS_PER_PMD PTRS_PER_PMD
+#endif
+
+#ifndef MAX_PTRS_PER_PUD
+#define MAX_PTRS_PER_PUD PTRS_PER_PUD
+#endif
+
+#ifndef MAX_PTRS_PER_P4D
+#define MAX_PTRS_PER_P4D PTRS_PER_P4D
+#endif
+
#endif /* _LINUX_PGTABLE_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 1790a5521fd9..d796183f26c9 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -206,6 +206,7 @@ void __init setup_log_buf(int early);
__printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...);
void dump_stack_print_info(const char *log_lvl);
void show_regs_print_info(const char *log_lvl);
+extern asmlinkage void dump_stack_lvl(const char *log_lvl) __cold;
extern asmlinkage void dump_stack(void) __cold;
extern void printk_safe_flush(void);
extern void printk_safe_flush_on_panic(void);
@@ -269,6 +270,10 @@ static inline void show_regs_print_info(const char *log_lvl)
{
}
+static inline void dump_stack_lvl(const char *log_lvl)
+{
+}
+
static inline void dump_stack(void)
{
}
diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h
index dfd82eab2902..4d9e3a656875 100644
--- a/include/linux/sched/coredump.h
+++ b/include/linux/sched/coredump.h
@@ -73,6 +73,14 @@ static inline int get_dumpable(struct mm_struct *mm)
#define MMF_OOM_VICTIM 25 /* mm is the oom victim */
#define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */
#define MMF_MULTIPROCESS 27 /* mm is shared between processes */
+/*
+ * MMF_HAS_PINNED: Whether this mm has pinned any pages. This can be either
+ * replaced in the future by mm.pinned_vm when it becomes stable, or grow into
+ * a counter on its own. We're aggresive on this bit for now: even if the
+ * pinned pages were unpinned later on, we'll still keep this bit set for the
+ * lifecycle of this mm, just for simplicity.
+ */
+#define MMF_HAS_PINNED 28 /* FOLL_PIN has run, never cleared */
#define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP)
#define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 0c97d788762c..083f3ce550bc 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -305,9 +305,21 @@ static inline void __check_heap_object(const void *ptr, unsigned long n,
/*
* Whenever changing this, take care of that kmalloc_type() and
* create_kmalloc_caches() still work as intended.
+ *
+ * KMALLOC_NORMAL can contain only unaccounted objects whereas KMALLOC_CGROUP
+ * is for accounted but unreclaimable and non-dma objects. All the other
+ * kmem caches can have both accounted and unaccounted objects.
*/
enum kmalloc_cache_type {
KMALLOC_NORMAL = 0,
+#ifndef CONFIG_ZONE_DMA
+ KMALLOC_DMA = KMALLOC_NORMAL,
+#endif
+#ifndef CONFIG_MEMCG_KMEM
+ KMALLOC_CGROUP = KMALLOC_NORMAL,
+#else
+ KMALLOC_CGROUP,
+#endif
KMALLOC_RECLAIM,
#ifdef CONFIG_ZONE_DMA
KMALLOC_DMA,
@@ -319,24 +331,36 @@ enum kmalloc_cache_type {
extern struct kmem_cache *
kmalloc_caches[NR_KMALLOC_TYPES][KMALLOC_SHIFT_HIGH + 1];
+/*
+ * Define gfp bits that should not be set for KMALLOC_NORMAL.
+ */
+#define KMALLOC_NOT_NORMAL_BITS \
+ (__GFP_RECLAIMABLE | \
+ (IS_ENABLED(CONFIG_ZONE_DMA) ? __GFP_DMA : 0) | \
+ (IS_ENABLED(CONFIG_MEMCG_KMEM) ? __GFP_ACCOUNT : 0))
+
static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
{
-#ifdef CONFIG_ZONE_DMA
/*
* The most common case is KMALLOC_NORMAL, so test for it
- * with a single branch for both flags.
+ * with a single branch for all the relevant flags.
*/
- if (likely((flags & (__GFP_DMA | __GFP_RECLAIMABLE)) == 0))
+ if (likely((flags & KMALLOC_NOT_NORMAL_BITS) == 0))
return KMALLOC_NORMAL;
/*
- * At least one of the flags has to be set. If both are, __GFP_DMA
- * is more important.
+ * At least one of the flags has to be set. Their priorities in
+ * decreasing order are:
+ * 1) __GFP_DMA
+ * 2) __GFP_RECLAIMABLE
+ * 3) __GFP_ACCOUNT
*/
- return flags & __GFP_DMA ? KMALLOC_DMA : KMALLOC_RECLAIM;
-#else
- return flags & __GFP_RECLAIMABLE ? KMALLOC_RECLAIM : KMALLOC_NORMAL;
-#endif
+ if (IS_ENABLED(CONFIG_ZONE_DMA) && (flags & __GFP_DMA))
+ return KMALLOC_DMA;
+ if (!IS_ENABLED(CONFIG_MEMCG_KMEM) || (flags & __GFP_RECLAIMABLE))
+ return KMALLOC_RECLAIM;
+ else
+ return KMALLOC_CGROUP;
}
/*
@@ -346,8 +370,14 @@ static __always_inline enum kmalloc_cache_type kmalloc_type(gfp_t flags)
* 1 = 65 .. 96 bytes
* 2 = 129 .. 192 bytes
* n = 2^(n-1)+1 .. 2^n
+ *
+ * Note: __kmalloc_index() is compile-time optimized, and not runtime optimized;
+ * typical usage is via kmalloc_index() and therefore evaluated at compile-time.
+ * Callers where !size_is_constant should only be test modules, where runtime
+ * overheads of __kmalloc_index() can be tolerated. Also see kmalloc_slab().
*/
-static __always_inline unsigned int kmalloc_index(size_t size)
+static __always_inline unsigned int __kmalloc_index(size_t size,
+ bool size_is_constant)
{
if (!size)
return 0;
@@ -382,12 +412,17 @@ static __always_inline unsigned int kmalloc_index(size_t size)
if (size <= 8 * 1024 * 1024) return 23;
if (size <= 16 * 1024 * 1024) return 24;
if (size <= 32 * 1024 * 1024) return 25;
- if (size <= 64 * 1024 * 1024) return 26;
- BUG();
+
+ if ((IS_ENABLED(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 110000)
+ && !IS_ENABLED(CONFIG_PROFILE_ALL_BRANCHES) && size_is_constant)
+ BUILD_BUG_ON_MSG(1, "unexpected size in kmalloc_index()");
+ else
+ BUG();
/* Will never be reached. Needed because the compiler may complain */
return -1;
}
+#define kmalloc_index(s) __kmalloc_index(s, true)
#endif /* !CONFIG_SLOB */
void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment __malloc;
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 144727041e78..49b1dd2c100b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -177,7 +177,6 @@ enum {
SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */
SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */
SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */
- SWP_VALID = (1 << 13), /* swap is valid to be operated on? */
/* add others here before... */
SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */
};
@@ -240,6 +239,7 @@ struct swap_cluster_list {
* The in-memory structure used to track swap areas.
*/
struct swap_info_struct {
+ struct percpu_ref users; /* indicate and keep swap device valid. */
unsigned long flags; /* SWP_USED etc: see above */
signed short prio; /* swap priority of this type */
struct plist_node list; /* entry in swap_active_head */
@@ -260,6 +260,7 @@ struct swap_info_struct {
struct block_device *bdev; /* swap device or bdev of swap file */
struct file *swap_file; /* seldom referenced */
unsigned int old_block_size; /* seldom referenced */
+ struct completion comp; /* seldom referenced */
#ifdef CONFIG_FRONTSWAP
unsigned long *frontswap_map; /* frontswap in-use, one bit per page */
atomic_t frontswap_pages; /* frontswap pages in-use counter */
@@ -445,6 +446,7 @@ extern void __delete_from_swap_cache(struct page *page,
extern void delete_from_swap_cache(struct page *);
extern void clear_shadow_from_swap_cache(int type, unsigned long begin,
unsigned long end);
+extern void free_swap_cache(struct page *);
extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
extern struct page *lookup_swap_cache(swp_entry_t entry,
@@ -511,7 +513,7 @@ sector_t swap_page_sector(struct page *page);
static inline void put_swap_device(struct swap_info_struct *si)
{
- rcu_read_unlock();
+ percpu_ref_put(&si->users);
}
#else /* CONFIG_SWAP */
@@ -526,6 +528,15 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
return NULL;
}
+static inline struct swap_info_struct *get_swap_device(swp_entry_t entry)
+{
+ return NULL;
+}
+
+static inline void put_swap_device(struct swap_info_struct *si)
+{
+}
+
#define swap_address_space(entry) (NULL)
#define get_nr_swap_pages() 0L
#define total_swap_pages 0L
@@ -541,6 +552,10 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry)
#define free_pages_and_swap_cache(pages, nr) \
release_pages((pages), (nr));
+static inline void free_swap_cache(struct page *page)
+{
+}
+
static inline void show_swap_cache_info(void)
{
}
diff --git a/include/linux/swapops.h b/include/linux/swapops.h
index 6430a94c6981..5907205c712c 100644
--- a/include/linux/swapops.h
+++ b/include/linux/swapops.h
@@ -330,6 +330,11 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
}
+static inline unsigned long hwpoison_entry_to_pfn(swp_entry_t entry)
+{
+ return swp_offset(entry);
+}
+
static inline void num_poisoned_pages_inc(void)
{
atomic_long_inc(&num_poisoned_pages);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 3299cd69e4ca..d6a6cf53b127 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -138,34 +138,27 @@ static inline void vm_events_fold_cpu(int cpu)
* Zone and node-based page accounting with per cpu differentials.
*/
extern atomic_long_t vm_zone_stat[NR_VM_ZONE_STAT_ITEMS];
-extern atomic_long_t vm_numa_stat[NR_VM_NUMA_STAT_ITEMS];
extern atomic_long_t vm_node_stat[NR_VM_NODE_STAT_ITEMS];
+extern atomic_long_t vm_numa_event[NR_VM_NUMA_EVENT_ITEMS];
#ifdef CONFIG_NUMA
-static inline void zone_numa_state_add(long x, struct zone *zone,
- enum numa_stat_item item)
+static inline void zone_numa_event_add(long x, struct zone *zone,
+ enum numa_stat_item item)
{
- atomic_long_add(x, &zone->vm_numa_stat[item]);
- atomic_long_add(x, &vm_numa_stat[item]);
+ atomic_long_add(x, &zone->vm_numa_event[item]);
+ atomic_long_add(x, &vm_numa_event[item]);
}
-static inline unsigned long global_numa_state(enum numa_stat_item item)
+static inline unsigned long zone_numa_event_state(struct zone *zone,
+ enum numa_stat_item item)
{
- long x = atomic_long_read(&vm_numa_stat[item]);
-
- return x;
+ return atomic_long_read(&zone->vm_numa_event[item]);
}
-static inline unsigned long zone_numa_state_snapshot(struct zone *zone,
- enum numa_stat_item item)
+static inline unsigned long
+global_numa_event_state(enum numa_stat_item item)
{
- long x = atomic_long_read(&zone->vm_numa_stat[item]);
- int cpu;
-
- for_each_online_cpu(cpu)
- x += per_cpu_ptr(zone->pageset, cpu)->vm_numa_stat_diff[item];
-
- return x;
+ return atomic_long_read(&vm_numa_event[item]);
}
#endif /* CONFIG_NUMA */
@@ -236,7 +229,7 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
#ifdef CONFIG_SMP
int cpu;
for_each_online_cpu(cpu)
- x += per_cpu_ptr(zone->pageset, cpu)->vm_stat_diff[item];
+ x += per_cpu_ptr(zone->per_cpu_zonestats, cpu)->vm_stat_diff[item];
if (x < 0)
x = 0;
@@ -245,18 +238,38 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone,
}
#ifdef CONFIG_NUMA
-extern void __inc_numa_state(struct zone *zone, enum numa_stat_item item);
+/* See __count_vm_event comment on why raw_cpu_inc is used. */
+static inline void
+__count_numa_event(struct zone *zone, enum numa_stat_item item)
+{
+ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+
+ raw_cpu_inc(pzstats->vm_numa_event[item]);
+}
+
+static inline void
+__count_numa_events(struct zone *zone, enum numa_stat_item item, long delta)
+{
+ struct per_cpu_zonestat __percpu *pzstats = zone->per_cpu_zonestats;
+
+ raw_cpu_add(pzstats->vm_numa_event[item], delta);
+}
+
extern unsigned long sum_zone_node_page_state(int node,
enum zone_stat_item item);
-extern unsigned long sum_zone_numa_state(int node, enum numa_stat_item item);
+extern unsigned long sum_zone_numa_event_state(int node, enum numa_stat_item item);
extern unsigned long node_page_state(struct pglist_data *pgdat,
enum node_stat_item item);
extern unsigned long node_page_state_pages(struct pglist_data *pgdat,
enum node_stat_item item);
+extern void fold_vm_numa_events(void);
#else
#define sum_zone_node_page_state(node, item) global_zone_page_state(item)
#define node_page_state(node, item) global_node_page_state(item)
#define node_page_state_pages(node, item) global_node_page_state_pages(item)
+static inline void fold_vm_numa_events(void)
+{
+}
#endif /* CONFIG_NUMA */
#ifdef CONFIG_SMP
@@ -291,7 +304,7 @@ struct ctl_table;
int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp,
loff_t *ppos);
-void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
+void drain_zonestat(struct zone *zone, struct per_cpu_zonestat *);
int calculate_pressure_threshold(struct zone *zone);
int calculate_normal_threshold(struct zone *zone);
@@ -399,7 +412,7 @@ static inline void cpu_vm_stats_fold(int cpu) { }
static inline void quiet_vmstat(void) { }
static inline void drain_zonestat(struct zone *zone,
- struct per_cpu_pageset *pset) { }
+ struct per_cpu_zonestat *pzstats) { }
#endif /* CONFIG_SMP */
static inline void __mod_zone_freepage_state(struct zone *zone, int nr_pages,
@@ -428,7 +441,7 @@ static inline const char *numa_stat_name(enum numa_stat_item item)
static inline const char *node_stat_name(enum node_stat_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS +
+ NR_VM_NUMA_EVENT_ITEMS +
item];
}
@@ -440,7 +453,7 @@ static inline const char *lru_list_name(enum lru_list lru)
static inline const char *writeback_stat_name(enum writeback_stat_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS +
+ NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS +
item];
}
@@ -449,7 +462,7 @@ static inline const char *writeback_stat_name(enum writeback_stat_item item)
static inline const char *vm_event_name(enum vm_event_item item)
{
return vmstat_text[NR_VM_ZONE_STAT_ITEMS +
- NR_VM_NUMA_STAT_ITEMS +
+ NR_VM_NUMA_EVENT_ITEMS +
NR_VM_NODE_STAT_ITEMS +
NR_VM_WRITEBACK_STAT_ITEMS +
item];
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index 8e5c5bb16e2d..95de51c10248 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -221,6 +221,7 @@ void wbc_account_cgroup_owner(struct writeback_control *wbc, struct page *page,
int cgroup_writeback_by_id(u64 bdi_id, int memcg_id, unsigned long nr_pages,
enum wb_reason reason, struct wb_completion *done);
void cgroup_writeback_umount(void);
+bool cleanup_offline_cgwb(struct bdi_writeback *wb);
/**
* inode_attach_wb - associate an inode with its wb
diff --git a/include/trace/events/cma.h b/include/trace/events/cma.h
index c3d354702cb0..3d708dae1542 100644
--- a/include/trace/events/cma.h
+++ b/include/trace/events/cma.h
@@ -31,7 +31,7 @@ DECLARE_EVENT_CLASS(cma_alloc_class,
__entry->align = align;
),
- TP_printk("name=%s pfn=%lx page=%p count=%lu align=%u",
+ TP_printk("name=%s pfn=0x%lx page=%p count=%lu align=%u",
__get_str(name),
__entry->pfn,
__entry->page,
@@ -60,7 +60,7 @@ TRACE_EVENT(cma_release,
__entry->count = count;
),
- TP_printk("name=%s pfn=%lx page=%p count=%lu",
+ TP_printk("name=%s pfn=0x%lx page=%p count=%lu",
__get_str(name),
__entry->pfn,
__entry->page,
diff --git a/include/trace/events/filemap.h b/include/trace/events/filemap.h
index 796053e162d2..c47b63db124e 100644
--- a/include/trace/events/filemap.h
+++ b/include/trace/events/filemap.h
@@ -36,7 +36,7 @@ DECLARE_EVENT_CLASS(mm_filemap_op_page_cache,
__entry->s_dev = page->mapping->host->i_rdev;
),
- TP_printk("dev %d:%d ino %lx page=%p pfn=%lu ofs=%lu",
+ TP_printk("dev %d:%d ino %lx page=%p pfn=0x%lx ofs=%lu",
MAJOR(__entry->s_dev), MINOR(__entry->s_dev),
__entry->i_ino,
pfn_to_page(__entry->pfn),
diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h
index 829a75692cc0..ddc8c944f417 100644
--- a/include/trace/events/kmem.h
+++ b/include/trace/events/kmem.h
@@ -173,7 +173,7 @@ TRACE_EVENT(mm_page_free,
__entry->order = order;
),
- TP_printk("page=%p pfn=%lu order=%d",
+ TP_printk("page=%p pfn=0x%lx order=%d",
pfn_to_page(__entry->pfn),
__entry->pfn,
__entry->order)
@@ -193,7 +193,7 @@ TRACE_EVENT(mm_page_free_batched,
__entry->pfn = page_to_pfn(page);
),
- TP_printk("page=%p pfn=%lu order=0",
+ TP_printk("page=%p pfn=0x%lx order=0",
pfn_to_page(__entry->pfn),
__entry->pfn)
);
@@ -219,7 +219,7 @@ TRACE_EVENT(mm_page_alloc,
__entry->migratetype = migratetype;
),
- TP_printk("page=%p pfn=%lu order=%d migratetype=%d gfp_flags=%s",
+ TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d gfp_flags=%s",
__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
__entry->pfn != -1UL ? __entry->pfn : 0,
__entry->order,
@@ -245,7 +245,7 @@ DECLARE_EVENT_CLASS(mm_page,
__entry->migratetype = migratetype;
),
- TP_printk("page=%p pfn=%lu order=%u migratetype=%d percpu_refill=%d",
+ TP_printk("page=%p pfn=0x%lx order=%u migratetype=%d percpu_refill=%d",
__entry->pfn != -1UL ? pfn_to_page(__entry->pfn) : NULL,
__entry->pfn != -1UL ? __entry->pfn : 0,
__entry->order,
@@ -278,7 +278,7 @@ TRACE_EVENT(mm_page_pcpu_drain,
__entry->migratetype = migratetype;
),
- TP_printk("page=%p pfn=%lu order=%d migratetype=%d",
+ TP_printk("page=%p pfn=0x%lx order=%d migratetype=%d",
pfn_to_page(__entry->pfn), __entry->pfn,
__entry->order, __entry->migratetype)
);
@@ -312,7 +312,7 @@ TRACE_EVENT(mm_page_alloc_extfrag,
get_pageblock_migratetype(page));
),
- TP_printk("page=%p pfn=%lu alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
+ TP_printk("page=%p pfn=0x%lx alloc_order=%d fallback_order=%d pageblock_order=%d alloc_migratetype=%d fallback_migratetype=%d fragmenting=%d change_ownership=%d",
pfn_to_page(__entry->pfn),
__entry->pfn,
__entry->alloc_order,
diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h
index ad0aa7f31675..ca534501158b 100644
--- a/include/trace/events/page_pool.h
+++ b/include/trace/events/page_pool.h
@@ -60,7 +60,7 @@ TRACE_EVENT(page_pool_state_release,
__entry->pfn = page_to_pfn(page);
),
- TP_printk("page_pool=%p page=%p pfn=%lu release=%u",
+ TP_printk("page_pool=%p page=%p pfn=0x%lx release=%u",
__entry->pool, __entry->page, __entry->pfn, __entry->release)
);
@@ -85,7 +85,7 @@ TRACE_EVENT(page_pool_state_hold,
__entry->pfn = page_to_pfn(page);
),
- TP_printk("page_pool=%p page=%p pfn=%lu hold=%u",
+ TP_printk("page_pool=%p page=%p pfn=0x%lx hold=%u",
__entry->pool, __entry->page, __entry->pfn, __entry->hold)
);
diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h
index e1735fe7c76a..1d28431e85bd 100644
--- a/include/trace/events/pagemap.h
+++ b/include/trace/events/pagemap.h
@@ -46,7 +46,7 @@ TRACE_EVENT(mm_lru_insertion,
),
/* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=%lu lru=%d flags=%s%s%s%s%s%s",
+ TP_printk("page=%p pfn=0x%lx lru=%d flags=%s%s%s%s%s%s",
__entry->page,
__entry->pfn,
__entry->lru,
@@ -75,7 +75,7 @@ TRACE_EVENT(mm_lru_activate,
),
/* Flag format is based on page-types.c formatting for pagemap */
- TP_printk("page=%p pfn=%lu", __entry->page, __entry->pfn)
+ TP_printk("page=%p pfn=0x%lx", __entry->page, __entry->pfn)
);
diff --git a/include/trace/events/vmscan.h b/include/trace/events/vmscan.h
index 2070df64958e..00d1180527d8 100644
--- a/include/trace/events/vmscan.h
+++ b/include/trace/events/vmscan.h
@@ -330,7 +330,7 @@ TRACE_EVENT(mm_vmscan_writepage,
page_is_file_lru(page));
),
- TP_printk("page=%p pfn=%lu flags=%s",
+ TP_printk("page=%p pfn=0x%lx flags=%s",
pfn_to_page(__entry->pfn),
__entry->pfn,
show_reclaim_flags(__entry->reclaim_flags))