summaryrefslogtreecommitdiffstats
path: root/mm/rmap.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 14:27:38 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2015-09-05 14:27:38 -0700
commit6c0f568e84a3cfc775682311d65205462c3f3bc1 (patch)
tree5105a137a9ea2459d55e895d3c096bbd31274724 /mm/rmap.c
parentc82199061009d1561e31e17fca5e47a87cb7ff4c (diff)
parent559ec2f8fd50981821621f52db5e1a8ffcf8d792 (diff)
downloadlinux-6c0f568e84a3cfc775682311d65205462c3f3bc1.tar.bz2
Merge branch 'akpm' (patches from Andrew)
Merge patch-bomb from Andrew Morton: - a few misc things - Andy's "ambient capabilities" - fs/nofity updates - the ocfs2 queue - kernel/watchdog.c updates and feature work. - some of MM. Includes Andrea's userfaultfd feature. [ Hadn't noticed that userfaultfd was 'default y' when applying the patches, so that got fixed in this merge instead. We do _not_ mark new features that nobody uses yet 'default y' - Linus ] * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (118 commits) mm/hugetlb.c: make vma_has_reserves() return bool mm/madvise.c: make madvise_behaviour_valid() return bool mm/memory.c: make tlb_next_batch() return bool mm/dmapool.c: change is_page_busy() return from int to bool mm: remove struct node_active_region mremap: simplify the "overlap" check in mremap_to() mremap: don't do uneccesary checks if new_len == old_len mremap: don't do mm_populate(new_addr) on failure mm: move ->mremap() from file_operations to vm_operations_struct mremap: don't leak new_vma if f_op->mremap() fails mm/hugetlb.c: make vma_shareable() return bool mm: make GUP handle pfn mapping unless FOLL_GET is requested mm: fix status code which move_pages() returns for zero page mm: memcontrol: bring back the VM_BUG_ON() in mem_cgroup_swapout() genalloc: add support of multiple gen_pools per device genalloc: add name arg to gen_pool_get() and devm_gen_pool_create() mm/memblock: WARN_ON when nid differs from overlap region Documentation/features/vm: add feature description and arch support status for batched TLB flush after unmap mm: defer flush of writable TLB entries mm: send one IPI per CPU to TLB flush all entries after unmapping pages ...
Diffstat (limited to 'mm/rmap.c')
-rw-r--r--mm/rmap.c118
1 files changed, 117 insertions, 1 deletions
diff --git a/mm/rmap.c b/mm/rmap.c
index 171b68768df1..0db38e7d0a72 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -62,6 +62,8 @@
#include <asm/tlbflush.h>
+#include <trace/events/tlb.h>
+
#include "internal.h"
static struct kmem_cache *anon_vma_cachep;
@@ -583,6 +585,107 @@ vma_address(struct page *page, struct vm_area_struct *vma)
return address;
}
+#ifdef CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH
+static void percpu_flush_tlb_batch_pages(void *data)
+{
+ /*
+ * All TLB entries are flushed on the assumption that it is
+ * cheaper to flush all TLBs and let them be refilled than
+ * flushing individual PFNs. Note that we do not track mm's
+ * to flush as that might simply be multiple full TLB flushes
+ * for no gain.
+ */
+ count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
+ flush_tlb_local();
+}
+
+/*
+ * Flush TLB entries for recently unmapped pages from remote CPUs. It is
+ * important if a PTE was dirty when it was unmapped that it's flushed
+ * before any IO is initiated on the page to prevent lost writes. Similarly,
+ * it must be flushed before freeing to prevent data leakage.
+ */
+void try_to_unmap_flush(void)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+ int cpu;
+
+ if (!tlb_ubc->flush_required)
+ return;
+
+ cpu = get_cpu();
+
+ trace_tlb_flush(TLB_REMOTE_SHOOTDOWN, -1UL);
+
+ if (cpumask_test_cpu(cpu, &tlb_ubc->cpumask))
+ percpu_flush_tlb_batch_pages(&tlb_ubc->cpumask);
+
+ if (cpumask_any_but(&tlb_ubc->cpumask, cpu) < nr_cpu_ids) {
+ smp_call_function_many(&tlb_ubc->cpumask,
+ percpu_flush_tlb_batch_pages, (void *)tlb_ubc, true);
+ }
+ cpumask_clear(&tlb_ubc->cpumask);
+ tlb_ubc->flush_required = false;
+ tlb_ubc->writable = false;
+ put_cpu();
+}
+
+/* Flush iff there are potentially writable TLB entries that can race with IO */
+void try_to_unmap_flush_dirty(void)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+ if (tlb_ubc->writable)
+ try_to_unmap_flush();
+}
+
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
+ struct page *page, bool writable)
+{
+ struct tlbflush_unmap_batch *tlb_ubc = &current->tlb_ubc;
+
+ cpumask_or(&tlb_ubc->cpumask, &tlb_ubc->cpumask, mm_cpumask(mm));
+ tlb_ubc->flush_required = true;
+
+ /*
+ * If the PTE was dirty then it's best to assume it's writable. The
+ * caller must use try_to_unmap_flush_dirty() or try_to_unmap_flush()
+ * before the page is queued for IO.
+ */
+ if (writable)
+ tlb_ubc->writable = true;
+}
+
+/*
+ * Returns true if the TLB flush should be deferred to the end of a batch of
+ * unmap operations to reduce IPIs.
+ */
+static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
+{
+ bool should_defer = false;
+
+ if (!(flags & TTU_BATCH_FLUSH))
+ return false;
+
+ /* If remote CPUs need to be flushed then defer batch the flush */
+ if (cpumask_any_but(mm_cpumask(mm), get_cpu()) < nr_cpu_ids)
+ should_defer = true;
+ put_cpu();
+
+ return should_defer;
+}
+#else
+static void set_tlb_ubc_flush_pending(struct mm_struct *mm,
+ struct page *page, bool writable)
+{
+}
+
+static bool should_defer_flush(struct mm_struct *mm, enum ttu_flags flags)
+{
+ return false;
+}
+#endif /* CONFIG_ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH */
+
/*
* At what user virtual address is page expected in vma?
* Caller should check the page is actually part of the vma.
@@ -1220,7 +1323,20 @@ static int try_to_unmap_one(struct page *page, struct vm_area_struct *vma,
/* Nuke the page table entry. */
flush_cache_page(vma, address, page_to_pfn(page));
- pteval = ptep_clear_flush(vma, address, pte);
+ if (should_defer_flush(mm, flags)) {
+ /*
+ * We clear the PTE but do not flush so potentially a remote
+ * CPU could still be writing to the page. If the entry was
+ * previously clean then the architecture must guarantee that
+ * a clear->dirty transition on a cached TLB entry is written
+ * through and traps if the PTE is unmapped.
+ */
+ pteval = ptep_get_and_clear(mm, address, pte);
+
+ set_tlb_ubc_flush_pending(mm, page, pte_dirty(pteval));
+ } else {
+ pteval = ptep_clear_flush(vma, address, pte);
+ }
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))