summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-01-05 17:50:59 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-01-06 13:43:02 -0800
commit574823bfab82d9d8fa47f422778043fbb4b4f50e (patch)
tree62ee7a3d62dc1be896c689480a825a684bb1d71e
parent94bd8a05cd4de344a9a57e52ef7d99550251984f (diff)
downloadlinux-574823bfab82d9d8fa47f422778043fbb4b4f50e.tar.bz2
Change mincore() to count "mapped" pages rather than "cached" pages
The semantics of what "in core" means for the mincore() system call are somewhat unclear, but Linux has always (since 2.3.52, which is when mincore() was initially done) treated it as "page is available in page cache" rather than "page is mapped in the mapping". The problem with that traditional semantic is that it exposes a lot of system cache state that it really probably shouldn't, and that users shouldn't really even care about. So let's try to avoid that information leak by simply changing the semantics to be that mincore() counts actual mapped pages, not pages that might be cheaply mapped if they were faulted (note the "might be" part of the old semantics: being in the cache doesn't actually guarantee that you can access them without IO anyway, since things like network filesystems may have to revalidate the cache before use). In many ways the old semantics were somewhat insane even aside from the information leak issue. From the very beginning (and that beginning is a long time ago: 2.3.52 was released in March 2000, I think), the code had a comment saying Later we can get more picky about what "in core" means precisely. and this is that "later". Admittedly it is much later than is really comfortable. NOTE! This is a real semantic change, and it is for example known to change the output of "fincore", since that program literally does a mmmap without populating it, and then doing "mincore()" on that mapping that doesn't actually have any pages in it. I'm hoping that nobody actually has any workflow that cares, and the info leak is real. We may have to do something different if it turns out that people have valid reasons to want the old semantics, and if we can limit the information leak sanely. Cc: Kevin Easton <kevin@guarana.org> Cc: Jiri Kosina <jikos@kernel.org> Cc: Masatake YAMATO <yamato@redhat.com> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Greg KH <gregkh@linuxfoundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Michal Hocko <mhocko@suse.com> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--mm/mincore.c94
1 files changed, 13 insertions, 81 deletions
diff --git a/mm/mincore.c b/mm/mincore.c
index 218099b5ed31..f0f91461a9f4 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -42,72 +42,14 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr,
return 0;
}
-/*
- * Later we can get more picky about what "in core" means precisely.
- * For now, simply check to see if the page is in the page cache,
- * and is up to date; i.e. that no page-in operation would be required
- * at this time if an application were to map and access this page.
- */
-static unsigned char mincore_page(struct address_space *mapping, pgoff_t pgoff)
-{
- unsigned char present = 0;
- struct page *page;
-
- /*
- * When tmpfs swaps out a page from a file, any process mapping that
- * file will not get a swp_entry_t in its pte, but rather it is like
- * any other file mapping (ie. marked !present and faulted in with
- * tmpfs's .fault). So swapped out tmpfs mappings are tested here.
- */
-#ifdef CONFIG_SWAP
- if (shmem_mapping(mapping)) {
- page = find_get_entry(mapping, pgoff);
- /*
- * shmem/tmpfs may return swap: account for swapcache
- * page too.
- */
- if (xa_is_value(page)) {
- swp_entry_t swp = radix_to_swp_entry(page);
- page = find_get_page(swap_address_space(swp),
- swp_offset(swp));
- }
- } else
- page = find_get_page(mapping, pgoff);
-#else
- page = find_get_page(mapping, pgoff);
-#endif
- if (page) {
- present = PageUptodate(page);
- put_page(page);
- }
-
- return present;
-}
-
-static int __mincore_unmapped_range(unsigned long addr, unsigned long end,
- struct vm_area_struct *vma, unsigned char *vec)
-{
- unsigned long nr = (end - addr) >> PAGE_SHIFT;
- int i;
-
- if (vma->vm_file) {
- pgoff_t pgoff;
-
- pgoff = linear_page_index(vma, addr);
- for (i = 0; i < nr; i++, pgoff++)
- vec[i] = mincore_page(vma->vm_file->f_mapping, pgoff);
- } else {
- for (i = 0; i < nr; i++)
- vec[i] = 0;
- }
- return nr;
-}
-
static int mincore_unmapped_range(unsigned long addr, unsigned long end,
struct mm_walk *walk)
{
- walk->private += __mincore_unmapped_range(addr, end,
- walk->vma, walk->private);
+ unsigned char *vec = walk->private;
+ unsigned long nr = (end - addr) >> PAGE_SHIFT;
+
+ memset(vec, 0, nr);
+ walk->private += nr;
return 0;
}
@@ -127,8 +69,9 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
goto out;
}
+ /* We'll consider a THP page under construction to be there */
if (pmd_trans_unstable(pmd)) {
- __mincore_unmapped_range(addr, end, vma, vec);
+ memset(vec, 1, nr);
goto out;
}
@@ -137,28 +80,17 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t pte = *ptep;
if (pte_none(pte))
- __mincore_unmapped_range(addr, addr + PAGE_SIZE,
- vma, vec);
+ *vec = 0;
else if (pte_present(pte))
*vec = 1;
else { /* pte is a swap entry */
swp_entry_t entry = pte_to_swp_entry(pte);
- if (non_swap_entry(entry)) {
- /*
- * migration or hwpoison entries are always
- * uptodate
- */
- *vec = 1;
- } else {
-#ifdef CONFIG_SWAP
- *vec = mincore_page(swap_address_space(entry),
- swp_offset(entry));
-#else
- WARN_ON(1);
- *vec = 1;
-#endif
- }
+ /*
+ * migration or hwpoison entries are always
+ * uptodate
+ */
+ *vec = !!non_swap_entry(entry);
}
vec++;
}