From 1e9877902dc7e11d2be038371c6fbf2dfcd469d7 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Feb 2016 13:01:54 -0800 Subject: mm/gup: Introduce get_user_pages_remote() For protection keys, we need to understand whether protections should be enforced in software or not. In general, we enforce protections when working on our own task, but not when on others. We call these "current" and "remote" operations. This patch introduces a new get_user_pages() variant: get_user_pages_remote() Which is a replacement for when get_user_pages() is called on non-current tsk/mm. We also introduce a new gup flag: FOLL_REMOTE which can be used for the "__" gup variants to get this new behavior. The uprobes is_trap_at_addr() location holds mmap_sem and calls get_user_pages(current->mm) on an instruction address. This makes it a pretty unique gup caller. Being an instruction access and also really originating from the kernel (vs. the app), I opted to consider this a 'remote' access where protection keys will not be enforced. Without protection keys, this patch should not change any behavior. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrea Arcangeli Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Kirill A. Shutemov Cc: Linus Torvalds Cc: Naoya Horiguchi Cc: Peter Zijlstra Cc: Rik van Riel Cc: Srikar Dronamraju Cc: Vlastimil Babka Cc: jack@suse.cz Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20160212210154.3F0E51EA@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- mm/memory.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm/memory.c') diff --git a/mm/memory.c b/mm/memory.c index 38090ca37a08..8bfbad0cca8c 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3685,7 +3685,7 @@ static int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm, void *maddr; struct page *page = NULL; - ret = get_user_pages(tsk, mm, addr, 1, + ret = get_user_pages_remote(tsk, mm, addr, 1, write, 1, &page, &vma); if (ret <= 0) { #ifndef CONFIG_HAVE_IOREMAP_PROT -- cgit v1.2.3 From 33a709b25a760b91184bb335cf7d7c32b8123013 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Feb 2016 13:02:19 -0800 Subject: mm/gup, x86/mm/pkeys: Check VMAs and PTEs for protection keys Today, for normal faults and page table walks, we check the VMA and/or PTE to ensure that it is compatible with the action. For instance, if we get a write fault on a non-writeable VMA, we SIGSEGV. We try to do the same thing for protection keys. Basically, we try to make sure that if a user does this: mprotect(ptr, size, PROT_NONE); *ptr = foo; they see the same effects with protection keys when they do this: mprotect(ptr, size, PROT_READ|PROT_WRITE); set_pkey(ptr, size, 4); wrpkru(0xffffff3f); // access disable pkey 4 *ptr = foo; The state to do that checking is in the VMA, but we also sometimes have to do it on the page tables only, like when doing a get_user_pages_fast() where we have no VMA. We add two functions and expose them to generic code: arch_pte_access_permitted(pte_flags, write) arch_vma_access_permitted(vma, write) These are, of course, backed up in x86 arch code with checks against the PTE or VMA's protection key. But, there are also cases where we do not want to respect protection keys. When we ptrace(), for instance, we do not want to apply the tracer's PKRU permissions to the PTEs from the process being traced. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Alexey Kardashevskiy Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Aneesh Kumar K.V Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boaz Harrosh Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Dave Hansen Cc: David Gibson Cc: David Hildenbrand Cc: David Vrabel Cc: Denys Vlasenko Cc: Dominik Dingel Cc: Dominik Vogt Cc: Guan Xuetao Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Hugh Dickins Cc: Jason Low Cc: Jerome Marchand Cc: Juergen Gross Cc: Kirill A. Shutemov Cc: Laurent Dufour Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Mikulas Patocka Cc: Minchan Kim Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sasha Levin Cc: Shachar Raindel Cc: Stephen Smalley Cc: Toshi Kani Cc: Vlastimil Babka Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/20160212210219.14D5D715@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 11 +++++++ arch/s390/include/asm/mmu_context.h | 11 +++++++ arch/unicore32/include/asm/mmu_context.h | 11 +++++++ arch/x86/include/asm/mmu_context.h | 49 ++++++++++++++++++++++++++++++++ arch/x86/include/asm/pgtable.h | 29 +++++++++++++++++++ arch/x86/mm/fault.c | 21 +++++++++++++- arch/x86/mm/gup.c | 5 ++++ include/asm-generic/mm_hooks.h | 11 +++++++ mm/gup.c | 18 ++++++++++-- mm/memory.c | 4 +++ 10 files changed, 166 insertions(+), 4 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 878c27771717..a0f1838c8e78 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -148,5 +148,16 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif /* __KERNEL__ */ #endif /* __ASM_POWERPC_MMU_CONTEXT_H */ diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index fb1b93ea3e3f..2627b338382c 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -130,4 +130,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif /* __S390_MMU_CONTEXT_H */ diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 1cb5220afaf9..3133f947ade2 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -97,4 +97,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 94c4c8b5cb8f..19036cdbed8f 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -286,4 +286,53 @@ static inline int vma_pkey(struct vm_area_struct *vma) return pkey; } +static inline bool __pkru_allows_pkey(u16 pkey, bool write) +{ + u32 pkru = read_pkru(); + + if (!__pkru_allows_read(pkru, pkey)) + return false; + if (write && !__pkru_allows_write(pkru, pkey)) + return false; + + return true; +} + +/* + * We only want to enforce protection keys on the current process + * because we effectively have no access to PKRU for other + * processes or any way to tell *which * PKRU in a threaded + * process we could use. + * + * So do not enforce things if the VMA is not from the current + * mm, or if we are in a kernel thread. + */ +static inline bool vma_is_foreign(struct vm_area_struct *vma) +{ + if (!current->mm) + return true; + /* + * Should PKRU be enforced on the access to this VMA? If + * the VMA is from another process, then PKRU has no + * relevance and should not be enforced. + */ + if (current->mm != vma->vm_mm) + return true; + + return false; +} + +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +{ + /* allow access if the VMA is not one from this process */ + if (vma_is_foreign(vma)) + return true; + return __pkru_allows_pkey(vma_pkey(vma), write); +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + return __pkru_allows_pkey(pte_flags_pkey(pte_flags(pte)), write); +} + #endif /* _ASM_X86_MMU_CONTEXT_H */ diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index e997dcc6ee2b..3cbfae80abb2 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -919,6 +919,35 @@ static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) } #endif +#define PKRU_AD_BIT 0x1 +#define PKRU_WD_BIT 0x2 + +static inline bool __pkru_allows_read(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * 2; + return !(pkru & (PKRU_AD_BIT << pkru_pkey_bits)); +} + +static inline bool __pkru_allows_write(u32 pkru, u16 pkey) +{ + int pkru_pkey_bits = pkey * 2; + /* + * Access-disable disables writes too so we need to check + * both bits here. + */ + return !(pkru & ((PKRU_AD_BIT|PKRU_WD_BIT) << pkru_pkey_bits)); +} + +static inline u16 pte_flags_pkey(unsigned long pte_flags) +{ +#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS + /* ifdef to avoid doing 59-bit shift on 32-bit values */ + return (pte_flags & _PAGE_PKEY_MASK) >> _PAGE_BIT_PKEY_BIT0; +#else + return 0; +#endif +} + #include #endif /* __ASSEMBLY__ */ diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6e71dcf699ab..319331afae24 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -897,6 +897,16 @@ bad_area(struct pt_regs *regs, unsigned long error_code, unsigned long address) __bad_area(regs, error_code, address, NULL, SEGV_MAPERR); } +static inline bool bad_area_access_from_pkeys(unsigned long error_code, + struct vm_area_struct *vma) +{ + if (!boot_cpu_has(X86_FEATURE_OSPKE)) + return false; + if (error_code & PF_PK) + return true; + return false; +} + static noinline void bad_area_access_error(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct vm_area_struct *vma) @@ -906,7 +916,7 @@ bad_area_access_error(struct pt_regs *regs, unsigned long error_code, * But, doing it this way allows compiler optimizations * if pkeys are compiled out. */ - if (boot_cpu_has(X86_FEATURE_OSPKE) && (error_code & PF_PK)) + if (bad_area_access_from_pkeys(error_code, vma)) __bad_area(regs, error_code, address, vma, SEGV_PKUERR); else __bad_area(regs, error_code, address, vma, SEGV_ACCERR); @@ -1081,6 +1091,15 @@ int show_unhandled_signals = 1; static inline int access_error(unsigned long error_code, struct vm_area_struct *vma) { + /* + * Access or read was blocked by protection keys. We do + * this check before any others because we do not want + * to, for instance, confuse a protection-key-denied + * write with one for which we should do a COW. + */ + if (error_code & PF_PK) + return 1; + if (error_code & PF_WRITE) { /* write, present and write, not present: */ if (unlikely(!(vma->vm_flags & VM_WRITE))) diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 2f0a32945cda..bab259e75984 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -11,6 +11,7 @@ #include #include +#include #include static inline pte_t gup_get_pte(pte_t *ptep) @@ -89,6 +90,10 @@ static inline int pte_allows_gup(unsigned long pteval, int write) if ((pteval & need_pte_bits) != need_pte_bits) return 0; + /* Check memory protection keys permissions. */ + if (!__pkru_allows_pkey(pte_flags_pkey(pteval), write)) + return 0; + return 1; } diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index 866aa461efa5..c1fc5af3c384 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -26,4 +26,15 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +{ + /* by default, allow everything */ + return true; +} + +static inline bool arch_pte_access_permitted(pte_t pte, bool write) +{ + /* by default, allow everything */ + return true; +} #endif /* _ASM_GENERIC_MM_HOOKS_H */ diff --git a/mm/gup.c b/mm/gup.c index b935c2c71ec9..e0f5f3574d16 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -444,6 +445,8 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_MAYREAD)) return -EFAULT; } + if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE))) + return -EFAULT; return 0; } @@ -612,13 +615,19 @@ EXPORT_SYMBOL(__get_user_pages); bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) { - vm_flags_t vm_flags; - - vm_flags = (fault_flags & FAULT_FLAG_WRITE) ? VM_WRITE : VM_READ; + bool write = !!(fault_flags & FAULT_FLAG_WRITE); + vm_flags_t vm_flags = write ? VM_WRITE : VM_READ; if (!(vm_flags & vma->vm_flags)) return false; + /* + * The architecture might have a hardware protection + * mechanism other than read/write that can deny access + */ + if (!arch_vma_access_permitted(vma, write)) + return false; + return true; } @@ -1172,6 +1181,9 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, pte_protnone(pte) || (write && !pte_write(pte))) goto pte_unmap; + if (!arch_pte_access_permitted(pte, write)) + goto pte_unmap; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); head = compound_head(page); diff --git a/mm/memory.c b/mm/memory.c index 8bfbad0cca8c..d7e84fe6504d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -65,6 +65,7 @@ #include #include +#include #include #include #include @@ -3378,6 +3379,9 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd; pte_t *pte; + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE)) + return VM_FAULT_SIGSEGV; + if (unlikely(is_vm_hugetlb_page(vma))) return hugetlb_fault(mm, vma, address, flags); -- cgit v1.2.3 From 1b2ee1266ea647713dbaf44825967c180dfc8d76 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Feb 2016 13:02:21 -0800 Subject: mm/core: Do not enforce PKEY permissions on remote mm access We try to enforce protection keys in software the same way that we do in hardware. (See long example below). But, we only want to do this when accessing our *own* process's memory. If GDB set PKRU[6].AD=1 (disable access to PKEY 6), then tried to PTRACE_POKE a target process which just happened to have some mprotect_pkey(pkey=6) memory, we do *not* want to deny the debugger access to that memory. PKRU is fundamentally a thread-local structure and we do not want to enforce it on access to _another_ thread's data. This gets especially tricky when we have workqueues or other delayed-work mechanisms that might run in a random process's context. We can check that we only enforce pkeys when operating on our *own* mm, but delayed work gets performed when a random user context is active. We might end up with a situation where a delayed-work gup fails when running randomly under its "own" task but succeeds when running under another process. We want to avoid that. To avoid that, we use the new GUP flag: FOLL_REMOTE and add a fault flag: FAULT_FLAG_REMOTE. They indicate that we are walking an mm which is not guranteed to be the same as current->mm and should not be subject to protection key enforcement. Thanks to Jerome Glisse for pointing out this scenario. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Alexey Kardashevskiy Cc: Andrea Arcangeli Cc: Andrew Morton Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Boaz Harrosh Cc: Borislav Petkov Cc: Brian Gerst Cc: Dan Williams Cc: Dave Chinner Cc: Dave Hansen Cc: David Gibson Cc: Denys Vlasenko Cc: Dominik Dingel Cc: Dominik Vogt Cc: Eric B Munson Cc: Geliang Tang Cc: Guan Xuetao Cc: H. Peter Anvin Cc: Heiko Carstens Cc: Hugh Dickins Cc: Jan Kara Cc: Jason Low Cc: Jerome Marchand Cc: Joerg Roedel Cc: Kirill A. Shutemov Cc: Konstantin Khlebnikov Cc: Laurent Dufour Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michael Ellerman Cc: Michal Hocko Cc: Mikulas Patocka Cc: Minchan Kim Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Sasha Levin Cc: Shachar Raindel Cc: Vlastimil Babka Cc: Xie XiuQi Cc: iommu@lists.linux-foundation.org Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-s390@vger.kernel.org Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 3 ++- arch/s390/include/asm/mmu_context.h | 3 ++- arch/unicore32/include/asm/mmu_context.h | 3 ++- arch/x86/include/asm/mmu_context.h | 5 +++-- drivers/iommu/amd_iommu_v2.c | 1 + include/asm-generic/mm_hooks.h | 3 ++- include/linux/mm.h | 1 + mm/gup.c | 15 ++++++++++----- mm/ksm.c | 10 ++++++++-- mm/memory.c | 3 ++- 10 files changed, 33 insertions(+), 14 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index a0f1838c8e78..df9bf3ed025b 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -148,7 +148,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 2627b338382c..8906600922ce 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -130,7 +130,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/unicore32/include/asm/mmu_context.h b/arch/unicore32/include/asm/mmu_context.h index 3133f947ade2..e35632ef23c7 100644 --- a/arch/unicore32/include/asm/mmu_context.h +++ b/arch/unicore32/include/asm/mmu_context.h @@ -97,7 +97,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 19036cdbed8f..b4d939a17e60 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -322,10 +322,11 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) return false; } -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool foreign) { /* allow access if the VMA is not one from this process */ - if (vma_is_foreign(vma)) + if (foreign || vma_is_foreign(vma)) return true; return __pkru_allows_pkey(vma_pkey(vma), write); } diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index c865737326e1..56999d2fac07 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -526,6 +526,7 @@ static void do_fault(struct work_struct *work) flags |= FAULT_FLAG_USER; if (fault->flags & PPR_FAULT_WRITE) flags |= FAULT_FLAG_WRITE; + flags |= FAULT_FLAG_REMOTE; down_read(&mm->mmap_sem); vma = find_extend_vma(mm, address); diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index c1fc5af3c384..d5c9633bd955 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -26,7 +26,8 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, { } -static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, bool write) +static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, + bool write, bool foreign) { /* by default, allow everything */ return true; diff --git a/include/linux/mm.h b/include/linux/mm.h index 3056369bab1d..2aaa0f0d67ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -251,6 +251,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_KILLABLE 0x10 /* The fault task is in SIGKILL killable region */ #define FAULT_FLAG_TRIED 0x20 /* Second try */ #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ +#define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's diff --git a/mm/gup.c b/mm/gup.c index e0f5f3574d16..d276760163b3 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -365,6 +365,8 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, return -ENOENT; if (*flags & FOLL_WRITE) fault_flags |= FAULT_FLAG_WRITE; + if (*flags & FOLL_REMOTE) + fault_flags |= FAULT_FLAG_REMOTE; if (nonblocking) fault_flags |= FAULT_FLAG_ALLOW_RETRY; if (*flags & FOLL_NOWAIT) @@ -415,11 +417,13 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) { vm_flags_t vm_flags = vma->vm_flags; + int write = (gup_flags & FOLL_WRITE); + int foreign = (gup_flags & FOLL_REMOTE); if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; - if (gup_flags & FOLL_WRITE) { + if (write) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) return -EFAULT; @@ -445,7 +449,7 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_MAYREAD)) return -EFAULT; } - if (!arch_vma_access_permitted(vma, (gup_flags & FOLL_WRITE))) + if (!arch_vma_access_permitted(vma, write, foreign)) return -EFAULT; return 0; } @@ -615,7 +619,8 @@ EXPORT_SYMBOL(__get_user_pages); bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) { - bool write = !!(fault_flags & FAULT_FLAG_WRITE); + bool write = !!(fault_flags & FAULT_FLAG_WRITE); + bool foreign = !!(fault_flags & FAULT_FLAG_REMOTE); vm_flags_t vm_flags = write ? VM_WRITE : VM_READ; if (!(vm_flags & vma->vm_flags)) @@ -623,9 +628,9 @@ bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) /* * The architecture might have a hardware protection - * mechanism other than read/write that can deny access + * mechanism other than read/write that can deny access. */ - if (!arch_vma_access_permitted(vma, write)) + if (!arch_vma_access_permitted(vma, write, foreign)) return false; return true; diff --git a/mm/ksm.c b/mm/ksm.c index c2013f638d11..b99e828172f6 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -359,6 +359,10 @@ static inline bool ksm_test_exit(struct mm_struct *mm) * in case the application has unmapped and remapped mm,addr meanwhile. * Could a ksm page appear anywhere else? Actually yes, in a VM_PFNMAP * mmap of /dev/mem or /dev/kmem, where we would not want to touch it. + * + * FAULT_FLAG/FOLL_REMOTE are because we do this outside the context + * of the process that owns 'vma'. We also do not want to enforce + * protection keys here anyway. */ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) { @@ -367,12 +371,14 @@ static int break_ksm(struct vm_area_struct *vma, unsigned long addr) do { cond_resched(); - page = follow_page(vma, addr, FOLL_GET | FOLL_MIGRATION); + page = follow_page(vma, addr, + FOLL_GET | FOLL_MIGRATION | FOLL_REMOTE); if (IS_ERR_OR_NULL(page)) break; if (PageKsm(page)) ret = handle_mm_fault(vma->vm_mm, vma, addr, - FAULT_FLAG_WRITE); + FAULT_FLAG_WRITE | + FAULT_FLAG_REMOTE); else ret = VM_FAULT_WRITE; put_page(page); diff --git a/mm/memory.c b/mm/memory.c index d7e84fe6504d..76c44e5dffa2 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3379,7 +3379,8 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pmd_t *pmd; pte_t *pte; - if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE)) + if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_REMOTE)) return VM_FAULT_SIGSEGV; if (unlikely(is_vm_hugetlb_page(vma))) -- cgit v1.2.3 From d61172b4b695b821388cdb6088a41d431bcbb93b Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 12 Feb 2016 13:02:24 -0800 Subject: mm/core, x86/mm/pkeys: Differentiate instruction fetches As discussed earlier, we attempt to enforce protection keys in software. However, the code checks all faults to ensure that they are not violating protection key permissions. It was assumed that all faults are either write faults where we check PKRU[key].WD (write disable) or read faults where we check the AD (access disable) bit. But, there is a third category of faults for protection keys: instruction faults. Instruction faults never run afoul of protection keys because they do not affect instruction fetches. So, plumb the PF_INSTR bit down in to the arch_vma_access_permitted() function where we do the protection key checks. We also add a new FAULT_FLAG_INSTRUCTION. This is because handle_mm_fault() is not passed the architecture-specific error_code where we keep PF_INSTR, so we need to encode the instruction fetch information in to the arch-generic fault flags. Signed-off-by: Dave Hansen Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20160212210224.96928009@viggo.jf.intel.com Signed-off-by: Ingo Molnar --- arch/powerpc/include/asm/mmu_context.h | 2 +- arch/s390/include/asm/mmu_context.h | 2 +- arch/x86/include/asm/mmu_context.h | 5 ++++- arch/x86/mm/fault.c | 8 ++++++-- include/asm-generic/mm_hooks.h | 2 +- include/linux/mm.h | 1 + mm/gup.c | 11 +++++++++-- mm/memory.c | 1 + 8 files changed, 24 insertions(+), 8 deletions(-) (limited to 'mm/memory.c') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index df9bf3ed025b..4eaab40e3ade 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -149,7 +149,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool foreign) + bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 8906600922ce..fa66b6dfa97a 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -131,7 +131,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool foreign) + bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index b4d939a17e60..6572b949cbca 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -323,8 +323,11 @@ static inline bool vma_is_foreign(struct vm_area_struct *vma) } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool foreign) + bool write, bool execute, bool foreign) { + /* pkeys never affect instruction fetches */ + if (execute) + return true; /* allow access if the VMA is not one from this process */ if (foreign || vma_is_foreign(vma)) return true; diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 68ecdffe284e..d81744e6f39f 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -908,7 +908,8 @@ static inline bool bad_area_access_from_pkeys(unsigned long error_code, if (error_code & PF_PK) return true; /* this checks permission keys on the VMA: */ - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), foreign)) + if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), + (error_code & PF_INSTR), foreign)) return true; return false; } @@ -1112,7 +1113,8 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) * faults just to hit a PF_PK as soon as we fill in a * page. */ - if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), foreign)) + if (!arch_vma_access_permitted(vma, (error_code & PF_WRITE), + (error_code & PF_INSTR), foreign)) return 1; if (error_code & PF_WRITE) { @@ -1267,6 +1269,8 @@ __do_page_fault(struct pt_regs *regs, unsigned long error_code, if (error_code & PF_WRITE) flags |= FAULT_FLAG_WRITE; + if (error_code & PF_INSTR) + flags |= FAULT_FLAG_INSTRUCTION; /* * When running in the kernel we expect faults to occur only to diff --git a/include/asm-generic/mm_hooks.h b/include/asm-generic/mm_hooks.h index d5c9633bd955..cc5d9a1405df 100644 --- a/include/asm-generic/mm_hooks.h +++ b/include/asm-generic/mm_hooks.h @@ -27,7 +27,7 @@ static inline void arch_bprm_mm_init(struct mm_struct *mm, } static inline bool arch_vma_access_permitted(struct vm_area_struct *vma, - bool write, bool foreign) + bool write, bool execute, bool foreign) { /* by default, allow everything */ return true; diff --git a/include/linux/mm.h b/include/linux/mm.h index 2aaa0f0d67ea..7955c3eb83db 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -252,6 +252,7 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_TRIED 0x20 /* Second try */ #define FAULT_FLAG_USER 0x40 /* The fault originated in userspace */ #define FAULT_FLAG_REMOTE 0x80 /* faulting for non current tsk/mm */ +#define FAULT_FLAG_INSTRUCTION 0x100 /* The fault was during an instruction fetch */ /* * vm_fault is filled by the the pagefault handler and passed to the vma's diff --git a/mm/gup.c b/mm/gup.c index d276760163b3..7f1c4fb77cfa 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -449,7 +449,11 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (!(vm_flags & VM_MAYREAD)) return -EFAULT; } - if (!arch_vma_access_permitted(vma, write, foreign)) + /* + * gups are always data accesses, not instruction + * fetches, so execute=false here + */ + if (!arch_vma_access_permitted(vma, write, false, foreign)) return -EFAULT; return 0; } @@ -629,8 +633,11 @@ bool vma_permits_fault(struct vm_area_struct *vma, unsigned int fault_flags) /* * The architecture might have a hardware protection * mechanism other than read/write that can deny access. + * + * gup always represents data access, not instruction + * fetches, so execute=false here: */ - if (!arch_vma_access_permitted(vma, write, foreign)) + if (!arch_vma_access_permitted(vma, write, false, foreign)) return false; return true; diff --git a/mm/memory.c b/mm/memory.c index 76c44e5dffa2..99e9f928264a 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -3380,6 +3380,7 @@ static int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma, pte_t *pte; if (!arch_vma_access_permitted(vma, flags & FAULT_FLAG_WRITE, + flags & FAULT_FLAG_INSTRUCTION, flags & FAULT_FLAG_REMOTE)) return VM_FAULT_SIGSEGV; -- cgit v1.2.3