From 86af8230ce138e0423f43f6b104f3fa050aced6d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 5 Aug 2022 16:07:02 +0200 Subject: x86/mm: Rename set_memory_present() to set_memory_p() Have it adhere to the naming convention for those helpers. No functional changes. Signed-off-by: Borislav Petkov Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20220805140702.31538-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/mm/pat/set_memory.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1abd5438f126..6a9043b6b8f6 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1944,7 +1944,7 @@ int set_mce_nospec(unsigned long pfn) return rc; } -static int set_memory_present(unsigned long *addr, int numpages) +static int set_memory_p(unsigned long *addr, int numpages) { return change_page_attr_set(addr, numpages, __pgprot(_PAGE_PRESENT), 0); } @@ -1954,7 +1954,7 @@ int clear_mce_nospec(unsigned long pfn) { unsigned long addr = (unsigned long) pfn_to_kaddr(pfn); - return set_memory_present(&addr, 1); + return set_memory_p(&addr, 1); } EXPORT_SYMBOL_GPL(clear_mce_nospec); #endif /* CONFIG_X86_64 */ -- cgit v1.2.3 From 652c5bf380ad018e15006a7f8349800245ddbbad Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Aug 2022 12:18:03 +0200 Subject: x86/mm: Refuse W^X violations x86 has STRICT_*_RWX, but not even a warning when someone violates it. Add this warning and fully refuse the transition. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Dave Hansen Link: https://lkml.kernel.org/r/YwySW3ROc21hN7g9@hirez.programming.kicks-ass.net --- arch/x86/mm/pat/set_memory.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 6a9043b6b8f6..1a2d6376251c 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -579,6 +579,33 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, return __pgprot(pgprot_val(prot) & ~forbidden); } +/* + * Validate and enforce strict W^X semantics. + */ +static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start, + unsigned long pfn, unsigned long npg) +{ + unsigned long end; + + if (!cpu_feature_enabled(X86_FEATURE_NX)) + return new; + + if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX))) + return new; + + if ((pgprot_val(new) & (_PAGE_RW | _PAGE_NX)) != _PAGE_RW) + return new; + + end = start + npg * PAGE_SIZE - 1; + WARN_ONCE(1, "CPA refuse W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", + (unsigned long long)pgprot_val(old), + (unsigned long long)pgprot_val(new), + start, end, pfn); + + /* refuse the transition into WX */ + return old; +} + /* * Lookup the page table entry for a virtual address in a specific pgd. * Return a pointer to the entry and the level of the mapping. @@ -885,6 +912,8 @@ static int __should_split_large_page(pte_t *kpte, unsigned long address, new_prot = static_protections(req_prot, lpaddr, old_pfn, numpages, psize, CPA_DETECT); + new_prot = verify_rwx(old_prot, new_prot, lpaddr, old_pfn, numpages); + /* * If there is a conflict, split the large page. * @@ -1525,6 +1554,7 @@ repeat: if (level == PG_LEVEL_4K) { pte_t new_pte; + pgprot_t old_prot = pte_pgprot(old_pte); pgprot_t new_prot = pte_pgprot(old_pte); unsigned long pfn = pte_pfn(old_pte); @@ -1536,6 +1566,8 @@ repeat: new_prot = static_protections(new_prot, address, pfn, 1, 0, CPA_PROTECT); + new_prot = verify_rwx(old_prot, new_prot, address, pfn, 1); + new_prot = pgprot_clear_protnone_bits(new_prot); /* -- cgit v1.2.3 From 81290934eafd19ad1799526ed63ce9d92df6294c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 2 Sep 2022 10:37:53 +0200 Subject: x86/defconfig: Refresh the defconfigs Just go through a 'make savedefconfig' cycle to pick up fresh Kconfig details, no change in settings, just reordering of some entries. ( This makes followup changes generated via 'make savedefconfig' contain less noise. ) Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 5 +++-- arch/x86/configs/x86_64_defconfig | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index 7207219509f6..a2924891b4cf 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -27,7 +27,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -44,6 +43,7 @@ CONFIG_EFI_STUB=y CONFIG_HZ_1000=y CONFIG_KEXEC=y CONFIG_CRASH_DUMP=y +# CONFIG_RETHUNK is not set CONFIG_HIBERNATION=y CONFIG_PM_DEBUG=y CONFIG_PM_TRACE_RTC=y @@ -62,6 +62,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -269,9 +270,9 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y +CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 5ce67b73e218..2a7333ba514a 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -26,7 +26,6 @@ CONFIG_CGROUP_MISC=y CONFIG_CGROUP_DEBUG=y CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y -# CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y CONFIG_SMP=y CONFIG_HYPERVISOR_GUEST=y @@ -62,6 +61,7 @@ CONFIG_BLK_CGROUP_IOLATENCY=y CONFIG_BLK_CGROUP_IOCOST=y CONFIG_BLK_CGROUP_IOPRIO=y CONFIG_BINFMT_MISC=y +# CONFIG_COMPAT_BRK is not set CONFIG_NET=y CONFIG_PACKET=y CONFIG_UNIX=y @@ -267,8 +267,8 @@ CONFIG_SECURITY_SELINUX=y CONFIG_SECURITY_SELINUX_BOOTPARAM=y CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y -CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y +CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -- cgit v1.2.3 From c0d2e63d4c618185cdd92faae10bdde33a00c25d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 2 Sep 2022 10:41:42 +0200 Subject: x86/defconfig: Enable CONFIG_DEBUG_WX=y 7 years after it got introduced it's time to make this the default, at least in the x86 defconfigs. Signed-off-by: Ingo Molnar --- arch/x86/configs/i386_defconfig | 1 + arch/x86/configs/x86_64_defconfig | 1 + 2 files changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig index a2924891b4cf..3cf34912abfe 100644 --- a/arch/x86/configs/i386_defconfig +++ b/arch/x86/configs/i386_defconfig @@ -273,6 +273,7 @@ CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_FRAME_WARN=1024 CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 2a7333ba514a..27759236fd60 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -269,6 +269,7 @@ CONFIG_SECURITY_SELINUX_DISABLE=y CONFIG_PRINTK_TIME=y CONFIG_DEBUG_KERNEL=y CONFIG_MAGIC_SYSRQ=y +CONFIG_DEBUG_WX=y CONFIG_DEBUG_STACK_USAGE=y # CONFIG_SCHED_DEBUG is not set CONFIG_SCHEDSTATS=y -- cgit v1.2.3 From a3d3163fbe690cfec354fc20808adf0629adf8da Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 20 Sep 2022 11:54:54 -0700 Subject: x86/mm/32: Fix W^X detection when page tables do not support NX The x86 MM code now actively refuses to create writable+executable mappings, and warns when there is an attempt to create one. The 0day test robot ran across a warning triggered by module unloading on 32-bit kernels. This was only seen on CPUs with NX support, but where a 32-bit kernel was built without PAE support. On those systems, there is no room for the NX bit in the page tables and _PAGE_NX is #defined to 0, breaking some of the W^X detection logic in verify_rwx(). The X86_FEATURE_NX check in there does not do any good here because the CPU itself supports NX. Fix it by checking for _PAGE_NX support directly instead of checking CPU support for NX. Note that since _PAGE_NX is actually defined to be 0 at compile-time this fix should also end up letting the compiler optimize away most of verify_rwx() on non-PAE kernels. Fixes: 652c5bf380ad ("x86/mm: Refuse W^X violations") Reported-by: kernel test robot Signed-off-by: Dave Hansen Signed-off-by: Ingo Molnar Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/fcf89147-440b-e478-40c9-228c9fe56691@intel.com/ --- arch/x86/mm/pat/set_memory.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 1a2d6376251c..20b1e24baa85 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -587,7 +587,8 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star { unsigned long end; - if (!cpu_feature_enabled(X86_FEATURE_NX)) + /* Only enforce when NX is supported: */ + if (!(__supported_pte_mask & _PAGE_NX)) return new; if (!((pgprot_val(old) ^ pgprot_val(new)) & (_PAGE_RW | _PAGE_NX))) -- cgit v1.2.3 From 334b2cea811944df99ae2172bcc0effcdfdbe862 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 28 Sep 2022 09:30:31 +0200 Subject: x86/mm: Add prot_sethuge() helper to abstract out _PAGE_PSE handling We still have some historic cases of direct fiddling of page attributes with (dangerous & fragile) type casting and address shifting. Add the prot_sethuge() helper instead that gets the types right and doesn't have to transform addresses. ( Also add a debug check to make sure this doesn't get applied to _PAGE_BIT_PAT/_PAGE_BIT_PAT_LARGE pages. ) Signed-off-by: Linus Torvalds Signed-off-by: Ingo Molnar Reviewed-by: Dave Hansen --- arch/x86/mm/init_64.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0fe690ebc269..7ea7d4745681 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -90,6 +90,12 @@ DEFINE_ENTRY(pud, pud, init) DEFINE_ENTRY(pmd, pmd, init) DEFINE_ENTRY(pte, pte, init) +static inline pgprot_t prot_sethuge(pgprot_t prot) +{ + WARN_ON_ONCE(pgprot_val(prot) & _PAGE_PAT); + + return __pgprot(pgprot_val(prot) | _PAGE_PSE); +} /* * NOTE: pagetable_init alloc all the fixmap pagetables contiguous on the @@ -557,9 +563,8 @@ phys_pmd_init(pmd_t *pmd_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<> PAGE_SHIFT, - __pgprot(pgprot_val(prot) | _PAGE_PSE)), + set_pmd_init(pmd, + pfn_pmd(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; @@ -644,12 +649,8 @@ phys_pud_init(pud_t *pud_page, unsigned long paddr, unsigned long paddr_end, if (page_size_mask & (1<> PAGE_SHIFT, - prot), + set_pud_init(pud, + pfn_pud(paddr >> PAGE_SHIFT, prot_sethuge(prot)), init); spin_unlock(&init_mm.page_table_lock); paddr_last = paddr_next; -- cgit v1.2.3 From 8c4934f4754057e3577bb1536c6ecc0efa2c966e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Fri, 23 Sep 2022 14:29:45 -0700 Subject: x86/mm: Disable W^X detection and enforcement on 32-bit The 32-bit code is in a weird spot. Some 32-bit builds (non-PAE) do not even have NX support. Even PAE builds that support NX have to contend with things like EFI data and code mixed in the same pages where W+X is unavoidable. The folks still running X86_32=y kernels are unlikely to care much about NX. That combined with the fundamental inability fix _all_ of the W+X things means this code had little value on X86_32=y. Disable the checks. Reported-by: Guenter Roeck Signed-off-by: Dave Hansen Cc: Ard Biesheuvel Cc: Darren Hart Cc: Andy Shevchenko Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: linux-efi@vger.kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Link: https://lore.kernel.org/all/CAMj1kXHcF_iK_g0OZSkSv56Wmr=eQGQwNstcNjLEfS=mm7a06w@mail.gmail.com/ --- arch/x86/mm/pat/set_memory.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index 20b1e24baa85..efe882c753ca 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -587,6 +587,14 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star { unsigned long end; + /* + * 32-bit has some unfixable W+X issues, like EFI code + * and writeable data being in the same page. Disable + * detection and enforcement there. + */ + if (IS_ENABLED(CONFIG_X86_32)) + return new; + /* Only enforce when NX is supported: */ if (!(__supported_pte_mask & _PAGE_NX)) return new; -- cgit v1.2.3 From c5129ecc12a3101555d8922b1e0aa90f91247ab6 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Mon, 3 Oct 2022 13:23:46 -0700 Subject: x86/mm: Ease W^X enforcement back to just a warning Currently, the "change_page_attr" (CPA) code refuses to create W+X mappings on 64-bit kernels. There have been reports both from 32-bit[1] and from BPF[2] users where this change kept the system from booting. These reports are showing up even after about a month of soak time in -next. To avoid breaking anything, never enforce W^X. Always warn and return the requested permissions even if a problem is detected. 1. https://lore.kernel.org/all/CAMj1kXHcF_iK_g0OZSkSv56Wmr=eQGQwNstcNjLEfS=mm7a06w@mail.gmail.com/ 2. https://lore.kernel.org/bpf/c84cc27c1a5031a003039748c3c099732a718aec.camel@kernel.org/T/#u Signed-off-by: Dave Hansen Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Borislav Petkov Cc: x86@kernel.org Cc: "H. Peter Anvin" Cc: Kees Cook Cc: Peter Zijlstra --- arch/x86/mm/pat/set_memory.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index efe882c753ca..97342c42dda8 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -580,7 +580,7 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long start, } /* - * Validate and enforce strict W^X semantics. + * Validate strict W^X semantics. */ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long start, unsigned long pfn, unsigned long npg) @@ -595,7 +595,7 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star if (IS_ENABLED(CONFIG_X86_32)) return new; - /* Only enforce when NX is supported: */ + /* Only verify when NX is supported: */ if (!(__supported_pte_mask & _PAGE_NX)) return new; @@ -606,13 +606,17 @@ static inline pgprot_t verify_rwx(pgprot_t old, pgprot_t new, unsigned long star return new; end = start + npg * PAGE_SIZE - 1; - WARN_ONCE(1, "CPA refuse W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", + WARN_ONCE(1, "CPA detected W^X violation: %016llx -> %016llx range: 0x%016lx - 0x%016lx PFN %lx\n", (unsigned long long)pgprot_val(old), (unsigned long long)pgprot_val(new), start, end, pfn); - /* refuse the transition into WX */ - return old; + /* + * For now, allow all permission change attempts by returning the + * attempted permissions. This can 'return old' to actively + * refuse the permission change at a later time. + */ + return new; } /* -- cgit v1.2.3