summaryrefslogtreecommitdiffstats
path: root/mm
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-08-13 16:29:35 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-08-13 16:29:35 -0700
commit203b4fc903b644223a27ad3f25f3a0f3a3911d1d (patch)
tree8c210b67a17b74b2a39c500891a20e23c2390cf6 /mm
parent7edcf0d314f69e506ddd9562062b2a79fa965bb9 (diff)
parent765d28f136291f9639e3c031a1070fb76d6625c7 (diff)
downloadlinux-203b4fc903b644223a27ad3f25f3a0f3a3911d1d.tar.bz2
Merge branch 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 mm updates from Thomas Gleixner: - Make lazy TLB mode even lazier to avoid pointless switch_mm() operations, which reduces CPU load by 1-2% for memcache workloads - Small cleanups and improvements all over the place * 'x86-mm-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: x86/mm: Remove redundant check for kmem_cache_create() arm/asm/tlb.h: Fix build error implicit func declaration x86/mm/tlb: Make clear_asid_other() static x86/mm/tlb: Skip atomic operations for 'init_mm' in switch_mm_irqs_off() x86/mm/tlb: Always use lazy TLB mode x86/mm/tlb: Only send page table free TLB flush to lazy TLB CPUs x86/mm/tlb: Make lazy TLB mode lazier x86/mm/tlb: Restructure switch_mm_irqs_off() x86/mm/tlb: Leave lazy TLB mode at page table free time mm: Allocate the mm_cpumask (mm->cpu_bitmap[]) dynamically based on nr_cpu_ids x86/mm: Add TLB purge to free pmd/pte page interfaces ioremap: Update pgtable free interfaces with addr x86/mm: Disable ioremap free page handling on x86-PAE
Diffstat (limited to 'mm')
-rw-r--r--mm/init-mm.c11
-rw-r--r--mm/memory.c22
2 files changed, 25 insertions, 8 deletions
diff --git a/mm/init-mm.c b/mm/init-mm.c
index f0179c9c04c2..a787a319211e 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -15,6 +15,16 @@
#define INIT_MM_CONTEXT(name)
#endif
+/*
+ * For dynamically allocated mm_structs, there is a dynamically sized cpumask
+ * at the end of the structure, the size of which depends on the maximum CPU
+ * number the system can see. That way we allocate only as much memory for
+ * mm_cpumask() as needed for the hundreds, or thousands of processes that
+ * a system typically runs.
+ *
+ * Since there is only one init_mm in the entire system, keep it simple
+ * and size this cpu_bitmask to NR_CPUS.
+ */
struct mm_struct init_mm = {
.mm_rb = RB_ROOT,
.pgd = swapper_pg_dir,
@@ -25,5 +35,6 @@ struct mm_struct init_mm = {
.arg_lock = __SPIN_LOCK_UNLOCKED(init_mm.arg_lock),
.mmlist = LIST_HEAD_INIT(init_mm.mmlist),
.user_ns = &init_user_ns,
+ .cpu_bitmap = { [BITS_TO_LONGS(NR_CPUS)] = 0},
INIT_MM_CONTEXT(init_mm)
};
diff --git a/mm/memory.c b/mm/memory.c
index c5e87a3a82ba..3d0a74ab70f2 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -326,16 +326,20 @@ bool __tlb_remove_page_size(struct mmu_gather *tlb, struct page *page, int page_
#ifdef CONFIG_HAVE_RCU_TABLE_FREE
-/*
- * See the comment near struct mmu_table_batch.
- */
-
static void tlb_remove_table_smp_sync(void *arg)
{
- /* Simply deliver the interrupt */
+ struct mm_struct __maybe_unused *mm = arg;
+ /*
+ * On most architectures this does nothing. Simply delivering the
+ * interrupt is enough to prevent races with software page table
+ * walking like that done in get_user_pages_fast.
+ *
+ * See the comment near struct mmu_table_batch.
+ */
+ tlb_flush_remove_tables_local(mm);
}
-static void tlb_remove_table_one(void *table)
+static void tlb_remove_table_one(void *table, struct mmu_gather *tlb)
{
/*
* This isn't an RCU grace period and hence the page-tables cannot be
@@ -344,7 +348,7 @@ static void tlb_remove_table_one(void *table)
* It is however sufficient for software page-table walkers that rely on
* IRQ disabling. See the comment near struct mmu_table_batch.
*/
- smp_call_function(tlb_remove_table_smp_sync, NULL, 1);
+ smp_call_function(tlb_remove_table_smp_sync, tlb->mm, 1);
__tlb_remove_table(table);
}
@@ -365,6 +369,8 @@ void tlb_table_flush(struct mmu_gather *tlb)
{
struct mmu_table_batch **batch = &tlb->batch;
+ tlb_flush_remove_tables(tlb->mm);
+
if (*batch) {
call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu);
*batch = NULL;
@@ -387,7 +393,7 @@ void tlb_remove_table(struct mmu_gather *tlb, void *table)
if (*batch == NULL) {
*batch = (struct mmu_table_batch *)__get_free_page(GFP_NOWAIT | __GFP_NOWARN);
if (*batch == NULL) {
- tlb_remove_table_one(table);
+ tlb_remove_table_one(table, tlb);
return;
}
(*batch)->nr = 0;