From ddffe98d166f4a93d996d5aa628fd745311fc1e7 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 22 Feb 2017 15:45:13 -0800 Subject: mm/memory_hotplug: set magic number to page->freelist instead of page->lru.next To identify that pages of page table are allocated from bootmem allocator, magic number sets to page->lru.next. But page->lru list is initialized in reserve_bootmem_region(). So when calling free_pagetable(), the function cannot find the magic number of pages. And free_pagetable() frees the pages by free_reserved_page() not put_page_bootmem(). But if the pages are allocated from bootmem allocator and used as page table, the pages have private flag. So before freeing the pages, we should clear the private flag by put_page_bootmem(). Before applying the commit 7bfec6f47bb0 ("mm, page_alloc: check multiple page fields with a single branch"), we could find the following visible issue: BUG: Bad page state in process kworker/u1024:1 page:ffffea103cfd8040 count:0 mapcount:0 mappi flags: 0x6fffff80000800(private) page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x800(private) Call Trace: [...] dump_stack+0x63/0x87 [...] bad_page+0x114/0x130 [...] free_pages_prepare+0x299/0x2d0 [...] free_hot_cold_page+0x31/0x150 [...] __free_pages+0x25/0x30 [...] free_pagetable+0x6f/0xb4 [...] remove_pagetable+0x379/0x7ff [...] vmemmap_free+0x10/0x20 [...] sparse_remove_one_section+0x149/0x180 [...] __remove_pages+0x2e9/0x4f0 [...] arch_remove_memory+0x63/0xc0 [...] remove_memory+0x8c/0xc0 [...] acpi_memory_device_remove+0x79/0xa5 [...] acpi_bus_trim+0x5a/0x8d [...] acpi_bus_trim+0x38/0x8d [...] acpi_device_hotplug+0x1b7/0x418 [...] acpi_hotplug_work_fn+0x1e/0x29 [...] process_one_work+0x152/0x400 [...] worker_thread+0x125/0x4b0 [...] kthread+0xd8/0xf0 [...] ret_from_fork+0x22/0x40 And the issue still silently occurs. Until freeing the pages of page table allocated from bootmem allocator, the page->freelist is never used. So the patch sets magic number to page->freelist instead of page->lru.next. [isimatu.yasuaki@jp.fujitsu.com: fix merge issue] Link: http://lkml.kernel.org/r/722b1cc4-93ac-dd8b-2be2-7a7e313b3b0b@gmail.com Link: http://lkml.kernel.org/r/2c29bd9f-5b67-02d0-18a3-8828e78bbb6f@gmail.com Signed-off-by: Yasuaki Ishimatsu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Dave Hansen Cc: Vlastimil Babka Cc: Mel Gorman Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86/mm') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af85b686a7b0..97346f987ef2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -679,7 +679,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); -- cgit v1.2.3 From ecf1385d72f0491400a8ceca7001196ca369aa8c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 22 Feb 2017 15:46:37 -0800 Subject: mm: drop unused argument of zap_page_range() There's no users of zap_page_range() who wants non-NULL 'details'. Let's drop it. Link: http://lkml.kernel.org/r/20170118122429.43661-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Peter Zijlstra Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/gmap.c | 2 +- arch/x86/mm/mpx.c | 2 +- drivers/android/binder.c | 2 +- drivers/staging/android/ion/ion.c | 3 +-- include/linux/mm.h | 2 +- mm/madvise.c | 2 +- mm/memory.c | 5 ++--- 7 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86/mm') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ec1f0dedb948..59ac93714fa4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -687,7 +687,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); + zap_page_range(vma, vmaddr, size); } up_read(&gmap->mm->mmap_sem); } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index af59f808742f..aad4ac386f98 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm, return -EINVAL; len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); + zap_page_range(vma, addr, len); trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9451b762fa1c..15b263a420e8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -657,7 +657,7 @@ free_range: page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; if (vma) zap_page_range(vma, (uintptr_t)page_addr + - proc->user_buffer_offset, PAGE_SIZE, NULL); + proc->user_buffer_offset, PAGE_SIZE); err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 937c2d5d7ec3..969600779e44 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -865,8 +865,7 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer, list_for_each_entry(vma_list, &buffer->vmas, list) { struct vm_area_struct *vma = vma_list->vma; - zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, - NULL); + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); } mutex_unlock(&buffer->lock); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 062936e8b832..574bc157a27c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1185,7 +1185,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *); + unsigned long size); void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); diff --git a/mm/madvise.c b/mm/madvise.c index ca75b8a01ba0..7f1490f0d3a6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -478,7 +478,7 @@ static long madvise_dontneed(struct vm_area_struct *vma, return -EINVAL; madvise_userfault_dontneed(vma, prev, start, end); - zap_page_range(vma, start, end - start, NULL); + zap_page_range(vma, start, end - start); return 0; } diff --git a/mm/memory.c b/mm/memory.c index e9035a0afee2..7663068a33c6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1370,12 +1370,11 @@ void unmap_vmas(struct mmu_gather *tlb, * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap - * @details: details of shared cache invalidation * * Caller must protect the VMA list */ void zap_page_range(struct vm_area_struct *vma, unsigned long start, - unsigned long size, struct zap_details *details) + unsigned long size) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; @@ -1386,7 +1385,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) - unmap_single_vma(&tlb, vma, start, end, details); + unmap_single_vma(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } -- cgit v1.2.3