summaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 18:24:37 +0100
committerMartin Schwidefsky <schwidefsky@de.ibm.com>2008-02-09 18:24:41 +0100
commit6252d702c5311ce916caf75ed82e5c8245171c92 (patch)
tree3490f27b5f888ff2c1ec915d4e7201000f37a771 /arch
parent5a216a20837c5f5fa1ca4b8ae8991ffd96b08e6f (diff)
downloadlinux-6252d702c5311ce916caf75ed82e5c8245171c92.tar.bz2
[S390] dynamic page tables.
Add support for different number of page table levels dependent on the highest address used for a process. This will cause a 31 bit process to use a two level page table instead of the four level page table that is the default after the pud has been introduced. Likewise a normal 64 bit process will use three levels instead of four. Only if a process runs out of the 4 tera bytes which can be addressed with a three level page table the fourth level is dynamically added. Then the process can use up to 8 peta byte. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/s390/kernel/binfmt_elf32.c11
-rw-r--r--arch/s390/kernel/traps.c3
-rw-r--r--arch/s390/mm/fault.c40
-rw-r--r--arch/s390/mm/init.c5
-rw-r--r--arch/s390/mm/mmap.c65
-rw-r--r--arch/s390/mm/pgtable.c74
6 files changed, 195 insertions, 3 deletions
diff --git a/arch/s390/kernel/binfmt_elf32.c b/arch/s390/kernel/binfmt_elf32.c
index f1e40ca00d8d..3e1c315b736d 100644
--- a/arch/s390/kernel/binfmt_elf32.c
+++ b/arch/s390/kernel/binfmt_elf32.c
@@ -134,6 +134,7 @@ static inline int dump_task_fpu(struct task_struct *tsk, elf_fpregset_t *fpregs)
}
#include <asm/processor.h>
+#include <asm/pgalloc.h>
#include <linux/module.h>
#include <linux/elfcore.h>
#include <linux/binfmts.h>
@@ -183,6 +184,16 @@ struct elf_prpsinfo32
#undef start_thread
#define start_thread start_thread31
+static inline void start_thread31(struct pt_regs *regs, unsigned long new_psw,
+ unsigned long new_stackp)
+{
+ set_fs(USER_DS);
+ regs->psw.mask = psw_user32_bits;
+ regs->psw.addr = new_psw;
+ regs->gprs[15] = new_stackp;
+ crst_table_downgrade(current->mm, 1UL << 31);
+}
+
MODULE_DESCRIPTION("Binary format loader for compatibility with 32bit Linux for S390 binaries,"
" Copyright 2000 IBM Corporation");
MODULE_AUTHOR("Gerhard Tonn <ton@de.ibm.com>");
diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c
index a4d29025ddbd..60f728aeaf12 100644
--- a/arch/s390/kernel/traps.c
+++ b/arch/s390/kernel/traps.c
@@ -60,6 +60,7 @@ int sysctl_userprocess_debug = 0;
extern pgm_check_handler_t do_protection_exception;
extern pgm_check_handler_t do_dat_exception;
extern pgm_check_handler_t do_monitor_call;
+extern pgm_check_handler_t do_asce_exception;
#define stack_pointer ({ void **sp; asm("la %0,0(15)" : "=&d" (sp)); sp; })
@@ -730,7 +731,7 @@ void __init trap_init(void)
pgm_check_table[0x12] = &translation_exception;
pgm_check_table[0x13] = &special_op_exception;
#ifdef CONFIG_64BIT
- pgm_check_table[0x38] = &do_dat_exception;
+ pgm_check_table[0x38] = &do_asce_exception;
pgm_check_table[0x39] = &do_dat_exception;
pgm_check_table[0x3A] = &do_dat_exception;
pgm_check_table[0x3B] = &do_dat_exception;
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index 2456b52ed068..ed13d429a487 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -32,6 +32,7 @@
#include <asm/system.h>
#include <asm/pgtable.h>
#include <asm/s390_ext.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define __FAIL_ADDR_MASK 0x7ffff000
@@ -444,6 +445,45 @@ void __kprobes do_dat_exception(struct pt_regs *regs, unsigned long error_code)
do_exception(regs, error_code & 0xff, 0);
}
+#ifdef CONFIG_64BIT
+void __kprobes do_asce_exception(struct pt_regs *regs, unsigned long error_code)
+{
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+ unsigned long address;
+ int space;
+
+ mm = current->mm;
+ address = S390_lowcore.trans_exc_code & __FAIL_ADDR_MASK;
+ space = check_space(current);
+
+ if (unlikely(space == 0 || in_atomic() || !mm))
+ goto no_context;
+
+ local_irq_enable();
+
+ down_read(&mm->mmap_sem);
+ vma = find_vma(mm, address);
+ up_read(&mm->mmap_sem);
+
+ if (vma) {
+ update_mm(mm, current);
+ return;
+ }
+
+ /* User mode accesses just cause a SIGSEGV */
+ if (regs->psw.mask & PSW_MASK_PSTATE) {
+ current->thread.prot_addr = address;
+ current->thread.trap_no = error_code;
+ do_sigsegv(regs, error_code, SEGV_MAPERR, address);
+ return;
+ }
+
+no_context:
+ do_no_context(regs, error_code, address);
+}
+#endif
+
#ifdef CONFIG_PFAULT
/*
* 'pfault' pseudo page faults routines.
diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
index 248a71010700..8053245fe259 100644
--- a/arch/s390/mm/init.c
+++ b/arch/s390/mm/init.c
@@ -112,8 +112,9 @@ void __init paging_init(void)
init_mm.pgd = swapper_pg_dir;
S390_lowcore.kernel_asce = __pa(init_mm.pgd) & PAGE_MASK;
#ifdef CONFIG_64BIT
- S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION2 | _ASCE_TABLE_LENGTH;
- pgd_type = _REGION2_ENTRY_EMPTY;
+ /* A three level page table (4TB) is enough for the kernel space. */
+ S390_lowcore.kernel_asce |= _ASCE_TYPE_REGION3 | _ASCE_TABLE_LENGTH;
+ pgd_type = _REGION3_ENTRY_EMPTY;
#else
S390_lowcore.kernel_asce |= _ASCE_TABLE_LENGTH;
pgd_type = _SEGMENT_ENTRY_EMPTY;
diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c
index 356257c171de..5932a824547a 100644
--- a/arch/s390/mm/mmap.c
+++ b/arch/s390/mm/mmap.c
@@ -27,6 +27,7 @@
#include <linux/personality.h>
#include <linux/mm.h>
#include <linux/module.h>
+#include <asm/pgalloc.h>
/*
* Top of mmap area (just below the process stack).
@@ -62,6 +63,8 @@ static inline int mmap_is_legacy(void)
current->signal->rlim[RLIMIT_STACK].rlim_cur == RLIM_INFINITY;
}
+#ifndef CONFIG_64BIT
+
/*
* This function, called very early during the creation of a new
* process VM image, sets up which VM layout function to use:
@@ -84,3 +87,65 @@ void arch_pick_mmap_layout(struct mm_struct *mm)
}
EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+#else
+
+static unsigned long
+s390_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ int rc;
+
+ addr = arch_get_unmapped_area(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+
+static unsigned long
+s390_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
+ const unsigned long len, const unsigned long pgoff,
+ const unsigned long flags)
+{
+ struct mm_struct *mm = current->mm;
+ unsigned long addr = addr0;
+ int rc;
+
+ addr = arch_get_unmapped_area_topdown(filp, addr, len, pgoff, flags);
+ if (addr & ~PAGE_MASK)
+ return addr;
+ if (unlikely(mm->context.asce_limit < addr + len)) {
+ rc = crst_table_upgrade(mm, addr + len);
+ if (rc)
+ return (unsigned long) rc;
+ }
+ return addr;
+}
+/*
+ * This function, called very early during the creation of a new
+ * process VM image, sets up which VM layout function to use:
+ */
+void arch_pick_mmap_layout(struct mm_struct *mm)
+{
+ /*
+ * Fall back to the standard layout if the personality
+ * bit is set, or if the expected stack growth is unlimited:
+ */
+ if (mmap_is_legacy()) {
+ mm->mmap_base = TASK_UNMAPPED_BASE;
+ mm->get_unmapped_area = s390_get_unmapped_area;
+ mm->unmap_area = arch_unmap_area;
+ } else {
+ mm->mmap_base = mmap_base();
+ mm->get_unmapped_area = s390_get_unmapped_area_topdown;
+ mm->unmap_area = arch_unmap_area_topdown;
+ }
+}
+EXPORT_SYMBOL_GPL(arch_pick_mmap_layout);
+
+#endif
diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c
index 809e77893039..fd072013f88c 100644
--- a/arch/s390/mm/pgtable.c
+++ b/arch/s390/mm/pgtable.c
@@ -23,6 +23,7 @@
#include <asm/pgalloc.h>
#include <asm/tlb.h>
#include <asm/tlbflush.h>
+#include <asm/mmu_context.h>
#ifndef CONFIG_64BIT
#define ALLOC_ORDER 1
@@ -70,6 +71,79 @@ void crst_table_free(struct mm_struct *mm, unsigned long *table)
free_pages((unsigned long) table, ALLOC_ORDER);
}
+#ifdef CONFIG_64BIT
+int crst_table_upgrade(struct mm_struct *mm, unsigned long limit)
+{
+ unsigned long *table, *pgd;
+ unsigned long entry;
+
+ BUG_ON(limit > (1UL << 53));
+repeat:
+ table = crst_table_alloc(mm, mm->context.noexec);
+ if (!table)
+ return -ENOMEM;
+ spin_lock(&mm->page_table_lock);
+ if (mm->context.asce_limit < limit) {
+ pgd = (unsigned long *) mm->pgd;
+ if (mm->context.asce_limit <= (1UL << 31)) {
+ entry = _REGION3_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ } else {
+ entry = _REGION2_ENTRY_EMPTY;
+ mm->context.asce_limit = 1UL << 53;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION2;
+ }
+ crst_table_init(table, entry);
+ pgd_populate(mm, (pgd_t *) table, (pud_t *) pgd);
+ mm->pgd = (pgd_t *) table;
+ table = NULL;
+ }
+ spin_unlock(&mm->page_table_lock);
+ if (table)
+ crst_table_free(mm, table);
+ if (mm->context.asce_limit < limit)
+ goto repeat;
+ update_mm(mm, current);
+ return 0;
+}
+
+void crst_table_downgrade(struct mm_struct *mm, unsigned long limit)
+{
+ pgd_t *pgd;
+
+ if (mm->context.asce_limit <= limit)
+ return;
+ __tlb_flush_mm(mm);
+ while (mm->context.asce_limit > limit) {
+ pgd = mm->pgd;
+ switch (pgd_val(*pgd) & _REGION_ENTRY_TYPE_MASK) {
+ case _REGION_ENTRY_TYPE_R2:
+ mm->context.asce_limit = 1UL << 42;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_REGION3;
+ break;
+ case _REGION_ENTRY_TYPE_R3:
+ mm->context.asce_limit = 1UL << 31;
+ mm->context.asce_bits = _ASCE_TABLE_LENGTH |
+ _ASCE_USER_BITS |
+ _ASCE_TYPE_SEGMENT;
+ break;
+ default:
+ BUG();
+ }
+ mm->pgd = (pgd_t *) (pgd_val(*pgd) & _REGION_ENTRY_ORIGIN);
+ crst_table_free(mm, (unsigned long *) pgd);
+ }
+ update_mm(mm, current);
+}
+#endif
+
/*
* page table entry allocation/free routines.
*/