summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorHugh Dickins <hugh@veritas.com>2009-01-06 14:40:09 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2009-01-06 15:59:07 -0800
commit22b31eec63e5f2e219a3ee15f456897272bc73e8 (patch)
tree906e4975a0e2cdef15ef071b4890e3b28e36cf39
parent3dc147414ccad81dc33edb80774b1fed12a38c08 (diff)
downloadlinux-22b31eec63e5f2e219a3ee15f456897272bc73e8.tar.bz2
badpage: vm_normal_page use print_bad_pte
print_bad_pte() is so far being called only when zap_pte_range() finds negative page_mapcount, or there's a fault on a pte_file where it does not belong. That's weak coverage when we suspect pagetable corruption. Originally, it was called when vm_normal_page() found an invalid pfn: but pfn_valid is expensive on some architectures and configurations, so 2.6.24 put that under CONFIG_DEBUG_VM (which doesn't help in the field), then 2.6.26 replaced it by a VM_BUG_ON (likewise). Reinstate the print_bad_pte() in vm_normal_page(), but use a cheaper test than pfn_valid(): memmap_init_zone() (used in bootup and hotplug) keep a __read_mostly note of the highest_memmap_pfn, vm_normal_page() then check pfn against that. We could call this pfn_plausible() or pfn_sane(), but I doubt we'll need it elsewhere: of course it's not reliable, but gives much stronger pagetable validation on many boxes. Also use print_bad_pte() when the pte_special bit is found outside a VM_PFNMAP or VM_MIXEDMAP area, instead of VM_BUG_ON. Signed-off-by: Hugh Dickins <hugh@veritas.com> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Cc: Christoph Lameter <cl@linux-foundation.org> Cc: Mel Gorman <mel@csn.ul.ie> Cc: Rik van Riel <riel@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/internal.h1
-rw-r--r--mm/memory.c20
-rw-r--r--mm/page_alloc.c4
3 files changed, 15 insertions, 10 deletions
diff --git a/mm/internal.h b/mm/internal.h
index 13333bc2eb68..1981bc9454f3 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -49,6 +49,7 @@ extern void putback_lru_page(struct page *page);
/*
* in mm/page_alloc.c
*/
+extern unsigned long highest_memmap_pfn;
extern void __free_pages_bootmem(struct page *page, unsigned int order);
/*
diff --git a/mm/memory.c b/mm/memory.c
index cda04b19f733..890095f5f36d 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -467,21 +467,18 @@ static inline int is_cow_mapping(unsigned int flags)
struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
pte_t pte)
{
- unsigned long pfn;
+ unsigned long pfn = pte_pfn(pte);
if (HAVE_PTE_SPECIAL) {
- if (likely(!pte_special(pte))) {
- VM_BUG_ON(!pfn_valid(pte_pfn(pte)));
- return pte_page(pte);
- }
- VM_BUG_ON(!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)));
+ if (likely(!pte_special(pte)))
+ goto check_pfn;
+ if (!(vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP)))
+ print_bad_pte(vma, addr, pte, NULL);
return NULL;
}
/* !HAVE_PTE_SPECIAL case follows: */
- pfn = pte_pfn(pte);
-
if (unlikely(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))) {
if (vma->vm_flags & VM_MIXEDMAP) {
if (!pfn_valid(pfn))
@@ -497,11 +494,14 @@ struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
}
}
- VM_BUG_ON(!pfn_valid(pfn));
+check_pfn:
+ if (unlikely(pfn > highest_memmap_pfn)) {
+ print_bad_pte(vma, addr, pte, NULL);
+ return NULL;
+ }
/*
* NOTE! We still have PageReserved() pages in the page tables.
- *
* eg. VDSO mappings can cause them to exist.
*/
out:
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 3acb216e9a78..755c99a0ac71 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -69,6 +69,7 @@ EXPORT_SYMBOL(node_states);
unsigned long totalram_pages __read_mostly;
unsigned long totalreserve_pages __read_mostly;
+unsigned long highest_memmap_pfn __read_mostly;
int percpu_pagelist_fraction;
#ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -2597,6 +2598,9 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long pfn;
struct zone *z;
+ if (highest_memmap_pfn < end_pfn - 1)
+ highest_memmap_pfn = end_pfn - 1;
+
z = &NODE_DATA(nid)->node_zones[zone];
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*