From cea86fe246b694a191804b47378eb9d77aefabec Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Mon, 14 Feb 2022 18:26:39 -0800 Subject: mm/munlock: rmap call mlock_vma_page() munlock_vma_page() Add vma argument to mlock_vma_page() and munlock_vma_page(), make them inline functions which check (vma->vm_flags & VM_LOCKED) before calling mlock_page() and munlock_page() in mm/mlock.c. Add bool compound to mlock_vma_page() and munlock_vma_page(): this is because we have understandable difficulty in accounting pte maps of THPs, and if passed a PageHead page, mlock_page() and munlock_page() cannot tell whether it's a pmd map to be counted or a pte map to be ignored. Add vma arg to page_add_file_rmap() and page_remove_rmap(), like the others, and use that to call mlock_vma_page() at the end of the page adds, and munlock_vma_page() at the end of page_remove_rmap() (end or beginning? unimportant, but end was easier for assertions in testing). No page lock is required (although almost all adds happen to hold it): delete the "Serialize with page migration" BUG_ON(!PageLocked(page))s. Certainly page lock did serialize with page migration, but I'm having difficulty explaining why that was ever important. Mlock accounting on THPs has been hard to define, differed between anon and file, involved PageDoubleMap in some places and not others, required clear_page_mlock() at some points. Keep it simple now: just count the pmds and ignore the ptes, there is no reason for ptes to undo pmd mlocks. page_add_new_anon_rmap() callers unchanged: they have long been calling lru_cache_add_inactive_or_unevictable(), which does its own VM_LOCKED handling (it also checks for not VM_SPECIAL: I think that's overcautious, and inconsistent with other checks, that mmap_region() already prevents VM_LOCKED on VM_SPECIAL; but haven't quite convinced myself to change it). Signed-off-by: Hugh Dickins Acked-by: Vlastimil Babka Signed-off-by: Matthew Wilcox (Oracle) --- kernel/events/uprobes.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'kernel') diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 6357c3580d07..eed2f7437d96 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -173,7 +173,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, return err; } - /* For try_to_free_swap() and munlock_vma_page() below */ + /* For try_to_free_swap() below */ lock_page(old_page); mmu_notifier_invalidate_range_start(&range); @@ -201,13 +201,10 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, set_pte_at_notify(mm, addr, pvmw.pte, mk_pte(new_page, vma->vm_page_prot)); - page_remove_rmap(old_page, false); + page_remove_rmap(old_page, vma, false); if (!page_mapped(old_page)) try_to_free_swap(old_page); page_vma_mapped_walk_done(&pvmw); - - if ((vma->vm_flags & VM_LOCKED) && !PageCompound(old_page)) - munlock_vma_page(old_page); put_page(old_page); err = 0; -- cgit v1.2.3 From 1b8ddbeeb9b819e62b7190115023ce858a159f5c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sat, 12 Feb 2022 15:27:42 -0500 Subject: mm/truncate: Inline invalidate_complete_page() into its one caller invalidate_inode_page() is the only caller of invalidate_complete_page() and inlining it reveals that the first check is unnecessary (because we hold the page locked, and we just retrieved the mapping from the page). Actually, it does make a difference, in that tail pages no longer fail at this check, so it's now possible to remove a tail page from a mapping. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: John Hubbard Reviewed-by: Christoph Hellwig --- kernel/futex/core.c | 2 +- mm/truncate.c | 28 +++++----------------------- 2 files changed, 6 insertions(+), 24 deletions(-) (limited to 'kernel') diff --git a/kernel/futex/core.c b/kernel/futex/core.c index 51dd822a8060..b22ef1efe751 100644 --- a/kernel/futex/core.c +++ b/kernel/futex/core.c @@ -302,7 +302,7 @@ again: * found it, but truncated or holepunched or subjected to * invalidate_complete_page2 before we got the page lock (also * cases which we are happy to fail). And we hold a reference, - * so refcount care in invalidate_complete_page's remove_mapping + * so refcount care in invalidate_inode_page's remove_mapping * prevents drop_caches from setting mapping to NULL beneath us. * * The case we do have to guard against is when memory pressure made diff --git a/mm/truncate.c b/mm/truncate.c index 9dbf0b75da5d..e5e2edaa0b76 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -193,27 +193,6 @@ static void truncate_cleanup_folio(struct folio *folio) folio_clear_mappedtodisk(folio); } -/* - * This is for invalidate_mapping_pages(). That function can be called at - * any time, and is not supposed to throw away dirty pages. But pages can - * be marked dirty at any time too, so use remove_mapping which safely - * discards clean, unused pages. - * - * Returns non-zero if the page was successfully invalidated. - */ -static int -invalidate_complete_page(struct address_space *mapping, struct page *page) -{ - - if (page->mapping != mapping) - return 0; - - if (page_has_private(page) && !try_to_release_page(page, 0)) - return 0; - - return remove_mapping(mapping, page); -} - int truncate_inode_folio(struct address_space *mapping, struct folio *folio) { if (folio->mapping != mapping) @@ -309,7 +288,10 @@ int invalidate_inode_page(struct page *page) return 0; if (page_mapped(page)) return 0; - return invalidate_complete_page(mapping, page); + if (page_has_private(page) && !try_to_release_page(page, 0)) + return 0; + + return remove_mapping(mapping, page); } /** @@ -584,7 +566,7 @@ void invalidate_mapping_pagevec(struct address_space *mapping, } /* - * This is like invalidate_complete_page(), except it ignores the page's + * This is like invalidate_inode_page(), except it ignores the page's * refcount. We do this because invalidate_inode_pages2() needs stronger * invalidation guarantees, and cannot afford to leave pages behind because * shrink_page_list() has a temp ref on them, or because they're transiently -- cgit v1.2.3 From eed05e54d275b3cfc5d8c79843c5276a5878e94a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 3 Feb 2022 09:06:08 -0500 Subject: mm: Add DEFINE_PAGE_VMA_WALK and DEFINE_FOLIO_VMA_WALK Instead of declaring a struct page_vma_mapped_walk directly, use these helpers to allow us to transition to a PFN approach in the following patches. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/rmap.h | 16 ++++++++++++++++ kernel/events/uprobes.c | 6 +----- mm/damon/paddr.c | 12 ++---------- mm/ksm.c | 5 +---- mm/migrate.c | 7 +------ mm/page_idle.c | 6 +----- mm/rmap.c | 31 +++++-------------------------- 7 files changed, 27 insertions(+), 56 deletions(-) (limited to 'kernel') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index ac29b076082b..0d894a2bfaa1 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -214,6 +214,22 @@ struct page_vma_mapped_walk { unsigned int flags; }; +#define DEFINE_PAGE_VMA_WALK(name, _page, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .page = _page, \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + +#define DEFINE_FOLIO_VMA_WALK(name, _folio, _vma, _address, _flags) \ + struct page_vma_mapped_walk name = { \ + .page = &_folio->page, \ + .vma = _vma, \ + .address = _address, \ + .flags = _flags, \ + } + static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw) { /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */ diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index eed2f7437d96..6418083901d4 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -155,11 +155,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr, struct page *old_page, struct page *new_page) { struct mm_struct *mm = vma->vm_mm; - struct page_vma_mapped_walk pvmw = { - .page = compound_head(old_page), - .vma = vma, - .address = addr, - }; + DEFINE_FOLIO_VMA_WALK(pvmw, page_folio(old_page), vma, addr, 0); int err; struct mmu_notifier_range range; diff --git a/mm/damon/paddr.c b/mm/damon/paddr.c index 5e8244f65a1a..cb45d49c731d 100644 --- a/mm/damon/paddr.c +++ b/mm/damon/paddr.c @@ -19,11 +19,7 @@ static bool __damon_pa_mkold(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg) { - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = addr, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, addr, 0); while (page_vma_mapped_walk(&pvmw)) { addr = pvmw.address; @@ -93,11 +89,7 @@ static bool __damon_pa_young(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg) { struct damon_pa_access_chk_result *result = arg; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = addr, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, addr, 0); result->accessed = false; result->page_sz = PAGE_SIZE; diff --git a/mm/ksm.c b/mm/ksm.c index c5a4403b5dc9..ea82fef93a31 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1034,10 +1034,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page, pte_t *orig_pte) { struct mm_struct *mm = vma->vm_mm; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, 0, 0); int swapped; int err = -EFAULT; struct mmu_notifier_range range; diff --git a/mm/migrate.c b/mm/migrate.c index f4076093c855..71f92e8ed934 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -174,12 +174,7 @@ void putback_movable_pages(struct list_head *l) static bool remove_migration_pte(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *old) { - struct page_vma_mapped_walk pvmw = { - .page = old, - .vma = vma, - .address = addr, - .flags = PVMW_SYNC | PVMW_MIGRATION, - }; + DEFINE_PAGE_VMA_WALK(pvmw, old, vma, addr, PVMW_SYNC | PVMW_MIGRATION); struct page *new; pte_t pte; swp_entry_t entry; diff --git a/mm/page_idle.c b/mm/page_idle.c index edead6a8a5f9..3e05bf1ce825 100644 --- a/mm/page_idle.c +++ b/mm/page_idle.c @@ -48,11 +48,7 @@ static bool page_idle_clear_pte_refs_one(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg) { - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = addr, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, addr, 0); bool referenced = false; while (page_vma_mapped_walk(&pvmw)) { diff --git a/mm/rmap.c b/mm/rmap.c index 1a13d5d6cfc7..a7f06b76b503 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -802,11 +802,7 @@ static bool page_referenced_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct page_referenced_arg *pra = arg; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = address, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, address, 0); int referenced = 0; while (page_vma_mapped_walk(&pvmw)) { @@ -934,12 +930,7 @@ int page_referenced(struct page *page, static bool page_mkclean_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = address, - .flags = PVMW_SYNC, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, address, PVMW_SYNC); struct mmu_notifier_range range; int *cleaned = arg; @@ -1419,11 +1410,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = address, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, address, 0); pte_t pteval; struct page *subpage; bool ret = true; @@ -1714,11 +1701,7 @@ static bool try_to_migrate_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *arg) { struct mm_struct *mm = vma->vm_mm; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = address, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, address, 0); pte_t pteval; struct page *subpage; bool ret = true; @@ -2001,11 +1984,7 @@ static bool page_make_device_exclusive_one(struct page *page, struct vm_area_struct *vma, unsigned long address, void *priv) { struct mm_struct *mm = vma->vm_mm; - struct page_vma_mapped_walk pvmw = { - .page = page, - .vma = vma, - .address = address, - }; + DEFINE_PAGE_VMA_WALK(pvmw, page, vma, address, 0); struct make_exclusive_args *args = priv; pte_t pteval; struct page *subpage; -- cgit v1.2.3