From 4c21e2f2441dc5fbb957b030333f5a3f2d02dea7 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:40 -0700 Subject: [PATCH] mm: split page table lock Christoph Lameter demonstrated very poor scalability on the SGI 512-way, with a many-threaded application which concurrently initializes different parts of a large anonymous area. This patch corrects that, by using a separate spinlock per page table page, to guard the page table entries in that page, instead of using the mm's single page_table_lock. (But even then, page_table_lock is still used to guard page table allocation, and anon_vma allocation.) In this implementation, the spinlock is tucked inside the struct page of the page table page: with a BUILD_BUG_ON in case it overflows - which it would in the case of 32-bit PA-RISC with spinlock debugging enabled. Splitting the lock is not quite for free: another cacheline access. Ideally, I suppose we would use split ptlock only for multi-threaded processes on multi-cpu machines; but deciding that dynamically would have its own costs. So for now enable it by config, at some number of cpus - since the Kconfig language doesn't support inequalities, let preprocessor compare that with NR_CPUS. But I don't think it's worth being user-configurable: for good testing of both split and unsplit configs, split now at 4 cpus, and perhaps change that to 8 later. There is a benefit even for singly threaded processes: kswapd can be attacking one part of the mm while another part is busy faulting. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/swap_state.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'mm/swap_state.c') diff --git a/mm/swap_state.c b/mm/swap_state.c index 132164f7d0a7..cafc1edcbeba 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -83,7 +83,7 @@ static int __add_to_swap_cache(struct page *page, swp_entry_t entry, page_cache_get(page); SetPageLocked(page); SetPageSwapCache(page); - page->private = entry.val; + set_page_private(page, entry.val); total_swapcache_pages++; pagecache_acct(1); } @@ -126,8 +126,8 @@ void __delete_from_swap_cache(struct page *page) BUG_ON(PageWriteback(page)); BUG_ON(PagePrivate(page)); - radix_tree_delete(&swapper_space.page_tree, page->private); - page->private = 0; + radix_tree_delete(&swapper_space.page_tree, page_private(page)); + set_page_private(page, 0); ClearPageSwapCache(page); total_swapcache_pages--; pagecache_acct(-1); @@ -197,7 +197,7 @@ void delete_from_swap_cache(struct page *page) { swp_entry_t entry; - entry.val = page->private; + entry.val = page_private(page); write_lock_irq(&swapper_space.tree_lock); __delete_from_swap_cache(page); -- cgit v1.2.3 From b8072f099b7829a6ff3eba618e1d079a81f753f8 Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Sat, 29 Oct 2005 18:16:41 -0700 Subject: [PATCH] mm: update comments to pte lock Updated several references to page_table_lock in common code comments. Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/asm-generic/pgtable.h | 2 +- include/linux/mempolicy.h | 3 +-- mm/filemap.c | 6 +++--- mm/rmap.c | 10 +++++----- mm/swap_state.c | 3 +-- 5 files changed, 11 insertions(+), 13 deletions(-) (limited to 'mm/swap_state.c') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index ff28c8b31f58..7dca30a26c53 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -8,7 +8,7 @@ * - update the page tables * - inform the TLB about the new one * - * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock. + * We hold the mm semaphore for reading, and the pte lock. * * Note: the old pte is known to not be writable, so we don't need to * worry about dirty bits etc getting lost. diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 38e60a099399..7af8cb836e78 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -47,8 +47,7 @@ struct vm_area_struct; * Locking policy for interlave: * In process context there is no locking because only the process accesses * its own state. All vma manipulation is somewhat protected by a down_read on - * mmap_sem. For allocating in the interleave policy the page_table_lock - * must be also aquired to protect il_next. + * mmap_sem. * * Freeing policy: * When policy is MPOL_BIND v.zonelist is kmalloc'ed and must be kfree'd. diff --git a/mm/filemap.c b/mm/filemap.c index f560b41c8f61..036599d1177e 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -66,7 +66,7 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * * ->mmap_sem * ->i_mmap_lock - * ->page_table_lock (various places, mainly in mmap.c) + * ->page_table_lock or pte_lock (various, mainly in memory.c) * ->mapping->tree_lock (arch-dependent flush_dcache_mmap_lock) * * ->mmap_sem @@ -86,9 +86,9 @@ generic_file_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, * ->anon_vma.lock (vma_adjust) * * ->anon_vma.lock - * ->page_table_lock (anon_vma_prepare and various) + * ->page_table_lock or pte_lock (anon_vma_prepare and various) * - * ->page_table_lock + * ->page_table_lock or pte_lock * ->swap_lock (try_to_unmap_one) * ->private_lock (try_to_unmap_one) * ->tree_lock (try_to_unmap_one) diff --git a/mm/rmap.c b/mm/rmap.c index a7427bbf57e4..914d04b98bee 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -32,7 +32,7 @@ * page->flags PG_locked (lock_page) * mapping->i_mmap_lock * anon_vma->lock - * mm->page_table_lock + * mm->page_table_lock or pte_lock * zone->lru_lock (in mark_page_accessed) * swap_lock (in swap_duplicate, swap_info_get) * mmlist_lock (in mmput, drain_mmlist and others) @@ -244,7 +244,7 @@ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) /* * Check that @page is mapped at @address into @mm. * - * On success returns with mapped pte and locked mm->page_table_lock. + * On success returns with pte mapped and locked. */ pte_t *page_check_address(struct page *page, struct mm_struct *mm, unsigned long address, spinlock_t **ptlp) @@ -445,7 +445,7 @@ int page_referenced(struct page *page, int is_locked, int ignore_token) * @vma: the vm area in which the mapping is added * @address: the user virtual address mapped * - * The caller needs to hold the mm->page_table_lock. + * The caller needs to hold the pte lock. */ void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) @@ -468,7 +468,7 @@ void page_add_anon_rmap(struct page *page, * page_add_file_rmap - add pte mapping to a file page * @page: the page to add the mapping to * - * The caller needs to hold the mm->page_table_lock. + * The caller needs to hold the pte lock. */ void page_add_file_rmap(struct page *page) { @@ -483,7 +483,7 @@ void page_add_file_rmap(struct page *page) * page_remove_rmap - take down pte mapping from a page * @page: page to remove mapping from * - * Caller needs to hold the mm->page_table_lock. + * The caller needs to hold the pte lock. */ void page_remove_rmap(struct page *page) { diff --git a/mm/swap_state.c b/mm/swap_state.c index cafc1edcbeba..dfd9a46755b8 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -259,8 +259,7 @@ static inline void free_swap_cache(struct page *page) /* * Perform a free_page(), also freeing any swap cache associated with - * this page if it is the last user of the page. Can not do a lock_page, - * as we are holding the page_table_lock spinlock. + * this page if it is the last user of the page. */ void free_page_and_swap_cache(struct page *page) { -- cgit v1.2.3