summaryrefslogtreecommitdiffstats
path: root/arch/s390/kernel/vdso.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/s390/kernel/vdso.c')
-rw-r--r--arch/s390/kernel/vdso.c290
1 files changed, 165 insertions, 125 deletions
diff --git a/arch/s390/kernel/vdso.c b/arch/s390/kernel/vdso.c
index 8bc269c55fd3..8c4e07d533c8 100644
--- a/arch/s390/kernel/vdso.c
+++ b/arch/s390/kernel/vdso.c
@@ -6,186 +6,226 @@
* Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com)
*/
-#include <linux/init.h>
+#include <linux/binfmts.h>
+#include <linux/compat.h>
+#include <linux/elf.h>
#include <linux/errno.h>
-#include <linux/sched.h>
+#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/stddef.h>
-#include <linux/unistd.h>
#include <linux/slab.h>
-#include <linux/user.h>
-#include <linux/elf.h>
-#include <linux/security.h>
-#include <linux/memblock.h>
-#include <linux/compat.h>
-#include <linux/binfmts.h>
+#include <linux/smp.h>
+#include <linux/time_namespace.h>
#include <vdso/datapage.h>
-#include <asm/asm-offsets.h>
-#include <asm/processor.h>
-#include <asm/mmu.h>
-#include <asm/mmu_context.h>
-#include <asm/sections.h>
#include <asm/vdso.h>
-#include <asm/facility.h>
-#include <asm/timex.h>
-extern char vdso64_start, vdso64_end;
-static void *vdso64_kbase = &vdso64_start;
-static unsigned int vdso64_pages;
-static struct page **vdso64_pagelist;
+extern char vdso64_start[], vdso64_end[];
+static unsigned int vdso_pages;
+
+static struct vm_special_mapping vvar_mapping;
+
+static union {
+ struct vdso_data data[CS_BASES];
+ u8 page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+
+struct vdso_data *vdso_data = vdso_data_store.data;
+
+enum vvar_pages {
+ VVAR_DATA_PAGE_OFFSET,
+ VVAR_TIMENS_PAGE_OFFSET,
+ VVAR_NR_PAGES,
+};
-/*
- * Should the kernel map a VDSO page into processes and pass its
- * address down to glibc upon exec()?
- */
unsigned int __read_mostly vdso_enabled = 1;
-static vm_fault_t vdso_fault(const struct vm_special_mapping *sm,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+static int __init vdso_setup(char *str)
+{
+ bool enabled;
+
+ if (!kstrtobool(str, &enabled))
+ vdso_enabled = enabled;
+ return 1;
+}
+__setup("vdso=", vdso_setup);
+
+#ifdef CONFIG_TIME_NS
+struct vdso_data *arch_get_vdso_data(void *vvar_page)
+{
+ return (struct vdso_data *)(vvar_page);
+}
+
+static struct page *find_timens_vvar_page(struct vm_area_struct *vma)
{
- struct page **vdso_pagelist;
- unsigned long vdso_pages;
+ if (likely(vma->vm_mm == current->mm))
+ return current->nsproxy->time_ns->vvar_page;
+ /*
+ * VM_PFNMAP | VM_IO protect .fault() handler from being called
+ * through interfaces like /proc/$pid/mem or
+ * process_vm_{readv,writev}() as long as there's no .access()
+ * in special_mapping_vmops().
+ * For more details check_vma_flags() and __access_remote_vm()
+ */
+ WARN(1, "vvar_page accessed remotely");
+ return NULL;
+}
- vdso_pagelist = vdso64_pagelist;
- vdso_pages = vdso64_pages;
+/*
+ * The VVAR page layout depends on whether a task belongs to the root or
+ * non-root time namespace. Whenever a task changes its namespace, the VVAR
+ * page tables are cleared and then they will be re-faulted with a
+ * corresponding layout.
+ * See also the comment near timens_setup_vdso_data() for details.
+ */
+int vdso_join_timens(struct task_struct *task, struct time_namespace *ns)
+{
+ struct mm_struct *mm = task->mm;
+ struct vm_area_struct *vma;
- if (vmf->pgoff >= vdso_pages)
- return VM_FAULT_SIGBUS;
+ mmap_read_lock(mm);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long size = vma->vm_end - vma->vm_start;
- vmf->page = vdso_pagelist[vmf->pgoff];
- get_page(vmf->page);
+ if (!vma_is_special_mapping(vma, &vvar_mapping))
+ continue;
+ zap_page_range(vma, vma->vm_start, size);
+ break;
+ }
+ mmap_read_unlock(mm);
return 0;
}
+#else
+static inline struct page *find_timens_vvar_page(struct vm_area_struct *vma)
+{
+ return NULL;
+}
+#endif
+
+static vm_fault_t vvar_fault(const struct vm_special_mapping *sm,
+ struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+ struct page *timens_page = find_timens_vvar_page(vma);
+ unsigned long addr, pfn;
+ vm_fault_t err;
+
+ switch (vmf->pgoff) {
+ case VVAR_DATA_PAGE_OFFSET:
+ pfn = virt_to_pfn(vdso_data);
+ if (timens_page) {
+ /*
+ * Fault in VVAR page too, since it will be accessed
+ * to get clock data anyway.
+ */
+ addr = vmf->address + VVAR_TIMENS_PAGE_OFFSET * PAGE_SIZE;
+ err = vmf_insert_pfn(vma, addr, pfn);
+ if (unlikely(err & VM_FAULT_ERROR))
+ return err;
+ pfn = page_to_pfn(timens_page);
+ }
+ break;
+#ifdef CONFIG_TIME_NS
+ case VVAR_TIMENS_PAGE_OFFSET:
+ /*
+ * If a task belongs to a time namespace then a namespace
+ * specific VVAR is mapped with the VVAR_DATA_PAGE_OFFSET and
+ * the real VVAR page is mapped with the VVAR_TIMENS_PAGE_OFFSET
+ * offset.
+ * See also the comment near timens_setup_vdso_data().
+ */
+ if (!timens_page)
+ return VM_FAULT_SIGBUS;
+ pfn = virt_to_pfn(vdso_data);
+ break;
+#endif /* CONFIG_TIME_NS */
+ default:
+ return VM_FAULT_SIGBUS;
+ }
+ return vmf_insert_pfn(vma, vmf->address, pfn);
+}
static int vdso_mremap(const struct vm_special_mapping *sm,
struct vm_area_struct *vma)
{
current->mm->context.vdso_base = vma->vm_start;
-
return 0;
}
-static const struct vm_special_mapping vdso_mapping = {
+static struct vm_special_mapping vvar_mapping = {
+ .name = "[vvar]",
+ .fault = vvar_fault,
+};
+
+static struct vm_special_mapping vdso_mapping = {
.name = "[vdso]",
- .fault = vdso_fault,
.mremap = vdso_mremap,
};
-static int __init vdso_setup(char *str)
-{
- bool enabled;
-
- if (!kstrtobool(str, &enabled))
- vdso_enabled = enabled;
- return 1;
-}
-__setup("vdso=", vdso_setup);
-
-/*
- * The vdso data page
- */
-static union {
- struct vdso_data data;
- u8 page[PAGE_SIZE];
-} vdso_data_store __page_aligned_data;
-struct vdso_data *vdso_data = (struct vdso_data *)&vdso_data_store.data;
-
-void vdso_getcpu_init(void)
+int vdso_getcpu_init(void)
{
set_tod_programmable_field(smp_processor_id());
+ return 0;
}
+early_initcall(vdso_getcpu_init); /* Must be called before SMP init */
-/*
- * This is called from binfmt_elf, we create the special vma for the
- * vDSO and insert it into the mm struct tree
- */
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
+ unsigned long vdso_text_len, vdso_mapping_len;
+ unsigned long vvar_start, vdso_text_start;
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
- unsigned long vdso_pages;
- unsigned long vdso_base;
int rc;
- if (!vdso_enabled)
- return 0;
-
- if (is_compat_task())
- return 0;
-
- vdso_pages = vdso64_pages;
- /*
- * vDSO has a problem and was disabled, just don't "enable" it for
- * the process
- */
- if (vdso_pages == 0)
+ BUILD_BUG_ON(VVAR_NR_PAGES != __VVAR_PAGES);
+ if (!vdso_enabled || is_compat_task())
return 0;
-
- /*
- * pick a base address for the vDSO in process space. We try to put
- * it at vdso_base which is the "natural" base for it, but we might
- * fail and end up putting it elsewhere.
- */
if (mmap_write_lock_killable(mm))
return -EINTR;
- vdso_base = get_unmapped_area(NULL, 0, vdso_pages << PAGE_SHIFT, 0, 0);
- if (IS_ERR_VALUE(vdso_base)) {
- rc = vdso_base;
- goto out_up;
- }
-
- /*
- * our vma flags don't have VM_WRITE so by default, the process
- * isn't allowed to write those pages.
- * gdb can break that with ptrace interface, and thus trigger COW
- * on those pages but it's then your responsibility to never do that
- * on the "data" page of the vDSO or you'll stop getting kernel
- * updates and your nice userland gettimeofday will be totally dead.
- * It's fine to use that for setting breakpoints in the vDSO code
- * pages though.
- */
- vma = _install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+ vdso_text_len = vdso_pages << PAGE_SHIFT;
+ vdso_mapping_len = vdso_text_len + VVAR_NR_PAGES * PAGE_SIZE;
+ vvar_start = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0);
+ rc = vvar_start;
+ if (IS_ERR_VALUE(vvar_start))
+ goto out;
+ vma = _install_special_mapping(mm, vvar_start, VVAR_NR_PAGES*PAGE_SIZE,
+ VM_READ|VM_MAYREAD|VM_IO|VM_DONTDUMP|
+ VM_PFNMAP,
+ &vvar_mapping);
+ rc = PTR_ERR(vma);
+ if (IS_ERR(vma))
+ goto out;
+ vdso_text_start = vvar_start + VVAR_NR_PAGES * PAGE_SIZE;
+ /* VM_MAYWRITE for COW so gdb can set breakpoints */
+ vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len,
VM_READ|VM_EXEC|
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
&vdso_mapping);
if (IS_ERR(vma)) {
+ do_munmap(mm, vvar_start, PAGE_SIZE, NULL);
rc = PTR_ERR(vma);
- goto out_up;
+ } else {
+ current->mm->context.vdso_base = vdso_text_start;
+ rc = 0;
}
-
- current->mm->context.vdso_base = vdso_base;
- rc = 0;
-
-out_up:
+out:
mmap_write_unlock(mm);
return rc;
}
static int __init vdso_init(void)
{
+ struct page **pages;
int i;
- vdso_getcpu_init();
- /* Calculate the size of the 64 bit vDSO */
- vdso64_pages = ((&vdso64_end - &vdso64_start
- + PAGE_SIZE - 1) >> PAGE_SHIFT) + 1;
-
- /* Make sure pages are in the correct state */
- vdso64_pagelist = kcalloc(vdso64_pages + 1, sizeof(struct page *),
- GFP_KERNEL);
- BUG_ON(vdso64_pagelist == NULL);
- for (i = 0; i < vdso64_pages - 1; i++) {
- struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
- get_page(pg);
- vdso64_pagelist[i] = pg;
+ vdso_pages = (vdso64_end - vdso64_start) >> PAGE_SHIFT;
+ pages = kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
+ if (!pages) {
+ vdso_enabled = 0;
+ return -ENOMEM;
}
- vdso64_pagelist[vdso64_pages - 1] = virt_to_page(vdso_data);
- vdso64_pagelist[vdso64_pages] = NULL;
-
- get_page(virt_to_page(vdso_data));
-
+ for (i = 0; i < vdso_pages; i++)
+ pages[i] = virt_to_page(vdso64_start + i * PAGE_SIZE);
+ pages[vdso_pages] = NULL;
+ vdso_mapping.pages = pages;
return 0;
}
-early_initcall(vdso_init);
+arch_initcall(vdso_init);