From af80602799681c78f14fbe20b6185a56020dedee Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:18 +0200 Subject: mm: Move mm_cachep initialization to mm_init() In order to allow using mm_alloc() much earlier, move initializing mm_cachep into mm_init(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.751153381@infradead.org --- kernel/fork.c | 32 ++++++++++++++++++-------------- 1 file changed, 18 insertions(+), 14 deletions(-) (limited to 'kernel') diff --git a/kernel/fork.c b/kernel/fork.c index 08969f5aa38d..451ce8063f85 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -3015,10 +3015,27 @@ static void sighand_ctor(void *data) init_waitqueue_head(&sighand->signalfd_wqh); } -void __init proc_caches_init(void) +void __init mm_cache_init(void) { unsigned int mm_size; + /* + * The mm_cpumask is located at the end of mm_struct, and is + * dynamically sized based on the maximum CPU number this system + * can have, taking hotplug into account (nr_cpu_ids). + */ + mm_size = sizeof(struct mm_struct) + cpumask_size(); + + mm_cachep = kmem_cache_create_usercopy("mm_struct", + mm_size, ARCH_MIN_MMSTRUCT_ALIGN, + SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, + offsetof(struct mm_struct, saved_auxv), + sizeof_field(struct mm_struct, saved_auxv), + NULL); +} + +void __init proc_caches_init(void) +{ sighand_cachep = kmem_cache_create("sighand_cache", sizeof(struct sighand_struct), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_TYPESAFE_BY_RCU| @@ -3036,19 +3053,6 @@ void __init proc_caches_init(void) SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, NULL); - /* - * The mm_cpumask is located at the end of mm_struct, and is - * dynamically sized based on the maximum CPU number this system - * can have, taking hotplug into account (nr_cpu_ids). - */ - mm_size = sizeof(struct mm_struct) + cpumask_size(); - - mm_cachep = kmem_cache_create_usercopy("mm_struct", - mm_size, ARCH_MIN_MMSTRUCT_ALIGN, - SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_ACCOUNT, - offsetof(struct mm_struct, saved_auxv), - sizeof_field(struct mm_struct, saved_auxv), - NULL); vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC|SLAB_ACCOUNT); mmap_init(); nsproxy_cache_init(); -- cgit v1.2.3 From 3f4c8211d982099be693be9aa7d6fc4607dff290 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 25 Oct 2022 21:38:21 +0200 Subject: x86/mm: Use mm_alloc() in poking_init() Instead of duplicating init_mm, allocate a fresh mm. The advantage is that mm_alloc() has much simpler dependencies. Additionally it makes more conceptual sense, init_mm has no (and must not have) user state to duplicate. Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221025201057.816175235@infradead.org --- arch/x86/mm/init.c | 2 +- include/linux/sched/task.h | 1 - kernel/fork.c | 5 ----- 3 files changed, 1 insertion(+), 7 deletions(-) (limited to 'kernel') diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 9121bc1b9453..d3987359d441 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -801,7 +801,7 @@ void __init poking_init(void) spinlock_t *ptl; pte_t *ptep; - poking_mm = copy_init_mm(); + poking_mm = mm_alloc(); BUG_ON(!poking_mm); /* diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 8431558641a4..357e0068497c 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -91,7 +91,6 @@ extern void exit_itimers(struct task_struct *); extern pid_t kernel_clone(struct kernel_clone_args *kargs); struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node); struct task_struct *fork_idle(int); -struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern pid_t user_mode_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); diff --git a/kernel/fork.c b/kernel/fork.c index 451ce8063f85..6142c588c18a 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2592,11 +2592,6 @@ struct task_struct * __init fork_idle(int cpu) return task; } -struct mm_struct *copy_init_mm(void) -{ - return dup_mm(NULL, &init_mm); -} - /* * This is like kernel_clone(), but shaved down and tailored to just * creating io_uring workers. It returns a created task, or an error pointer. -- cgit v1.2.3 From d48567c9a0d1e605639f8a8705a61bbb55fb4e84 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 26 Oct 2022 12:13:03 +0200 Subject: mm: Introduce set_memory_rox() Because endlessly repeating: set_memory_ro() set_memory_x() is getting tedious. Suggested-by: Linus Torvalds Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/Y1jek64pXOsougmz@hirez.programming.kicks-ass.net --- arch/arm/mach-omap1/sram-init.c | 8 +++----- arch/arm/mach-omap2/sram.c | 8 +++----- arch/powerpc/kernel/kprobes.c | 9 ++++----- arch/x86/kernel/ftrace.c | 5 ++--- arch/x86/kernel/kprobes/core.c | 9 ++------- drivers/misc/sram-exec.c | 7 ++----- include/linux/filter.h | 3 +-- include/linux/set_memory.h | 8 ++++++++ kernel/bpf/bpf_struct_ops.c | 3 +-- kernel/bpf/core.c | 6 ++---- kernel/bpf/trampoline.c | 3 +-- net/bpf/bpf_dummy_struct_ops.c | 3 +-- 12 files changed, 30 insertions(+), 42 deletions(-) (limited to 'kernel') diff --git a/arch/arm/mach-omap1/sram-init.c b/arch/arm/mach-omap1/sram-init.c index 27c42e2a21cc..dabf0c4defeb 100644 --- a/arch/arm/mach-omap1/sram-init.c +++ b/arch/arm/mach-omap1/sram-init.c @@ -10,11 +10,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -74,8 +74,7 @@ void *omap_sram_push(void *funcp, unsigned long size) dst = fncpy(sram, funcp, size); - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); return dst; } @@ -126,8 +125,7 @@ static void __init omap_detect_and_map_sram(void) base = (unsigned long)omap_sram_base; pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); } static void (*_omap_sram_reprogram_clock)(u32 dpllctl, u32 ckctl); diff --git a/arch/arm/mach-omap2/sram.c b/arch/arm/mach-omap2/sram.c index 39cf270da718..815d390109d2 100644 --- a/arch/arm/mach-omap2/sram.c +++ b/arch/arm/mach-omap2/sram.c @@ -14,11 +14,11 @@ #include #include #include +#include #include #include #include -#include #include @@ -96,8 +96,7 @@ void *omap_sram_push(void *funcp, unsigned long size) dst = fncpy(sram, funcp, size); - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); return dst; } @@ -217,8 +216,7 @@ static void __init omap2_map_sram(void) base = (unsigned long)omap_sram_base; pages = PAGE_ALIGN(omap_sram_size) / PAGE_SIZE; - set_memory_ro(base, pages); - set_memory_x(base, pages); + set_memory_rox(base, pages); } static void (*_omap2_sram_ddr_init)(u32 *slow_dll_ctrl, u32 fast_dll_ctrl, diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index bd7b1a035459..7a89de302609 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -20,12 +20,12 @@ #include #include #include +#include #include #include #include #include #include -#include #include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; @@ -134,10 +134,9 @@ void *alloc_insn_page(void) if (!page) return NULL; - if (strict_module_rwx_enabled()) { - set_memory_ro((unsigned long)page, 1); - set_memory_x((unsigned long)page, 1); - } + if (strict_module_rwx_enabled()) + set_memory_rox((unsigned long)page, 1); + return page; } diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 43628b8480fa..03579460d0ec 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -24,10 +24,10 @@ #include #include #include +#include #include -#include #include #include #include @@ -415,8 +415,7 @@ create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size) set_vm_flush_reset_perms(trampoline); - set_memory_ro((unsigned long)trampoline, npages); - set_memory_x((unsigned long)trampoline, npages); + set_memory_rox((unsigned long)trampoline, npages); return (unsigned long)trampoline; fail: tramp_free(trampoline); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb8bc82846b9..e7b7ca64acdf 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -51,7 +52,6 @@ #include #include #include -#include #include #include "common.h" @@ -415,17 +415,12 @@ void *alloc_insn_page(void) return NULL; set_vm_flush_reset_perms(page); - /* - * First make the page read-only, and only then make it executable to - * prevent it from being W+X in between. - */ - set_memory_ro((unsigned long)page, 1); /* * TODO: Once additional kernel code protection mechanisms are set, ensure * that the page was not maliciously altered and it is still zeroed. */ - set_memory_x((unsigned long)page, 1); + set_memory_rox((unsigned long)page, 1); return page; } diff --git a/drivers/misc/sram-exec.c b/drivers/misc/sram-exec.c index a948e95d4375..b71dbbd73738 100644 --- a/drivers/misc/sram-exec.c +++ b/drivers/misc/sram-exec.c @@ -10,9 +10,9 @@ #include #include #include +#include #include -#include #include "sram.h" @@ -106,10 +106,7 @@ void *sram_exec_copy(struct gen_pool *pool, void *dst, void *src, dst_cpy = fncpy(dst, src, size); - ret = set_memory_ro((unsigned long)base, pages); - if (ret) - goto error_out; - ret = set_memory_x((unsigned long)base, pages); + ret = set_memory_rox((unsigned long)base, pages); if (ret) goto error_out; diff --git a/include/linux/filter.h b/include/linux/filter.h index efc42a6e3aed..f0b17aff4e66 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -860,8 +860,7 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) { set_vm_flush_reset_perms(hdr); - set_memory_ro((unsigned long)hdr, hdr->size >> PAGE_SHIFT); - set_memory_x((unsigned long)hdr, hdr->size >> PAGE_SHIFT); + set_memory_rox((unsigned long)hdr, hdr->size >> PAGE_SHIFT); } int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h index 369769ce7399..023ebc67a36c 100644 --- a/include/linux/set_memory.h +++ b/include/linux/set_memory.h @@ -14,6 +14,14 @@ static inline int set_memory_x(unsigned long addr, int numpages) { return 0; } static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; } #endif +static inline int set_memory_rox(unsigned long addr, int numpages) +{ + int ret = set_memory_ro(addr, numpages); + if (ret) + return ret; + return set_memory_x(addr, numpages); +} + #ifndef CONFIG_ARCH_HAS_SET_DIRECT_MAP static inline int set_direct_map_invalid_noflush(struct page *page) { diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c index 84b2d9dba79a..ece9870cab68 100644 --- a/kernel/bpf/bpf_struct_ops.c +++ b/kernel/bpf/bpf_struct_ops.c @@ -494,8 +494,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key, refcount_set(&kvalue->refcnt, 1); bpf_map_inc(map); - set_memory_ro((long)st_map->image, 1); - set_memory_x((long)st_map->image, 1); + set_memory_rox((long)st_map->image, 1); err = st_ops->reg(kdata); if (likely(!err)) { /* Pair with smp_load_acquire() during lookup_elem(). diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 25a54e04560e..b0525ea25272 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -864,8 +864,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins list_add_tail(&pack->list, &pack_list); set_vm_flush_reset_perms(pack->ptr); - set_memory_ro((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); - set_memory_x((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); + set_memory_rox((unsigned long)pack->ptr, BPF_PROG_PACK_SIZE / PAGE_SIZE); return pack; } @@ -883,8 +882,7 @@ void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) if (ptr) { bpf_fill_ill_insns(ptr, size); set_vm_flush_reset_perms(ptr); - set_memory_ro((unsigned long)ptr, size / PAGE_SIZE); - set_memory_x((unsigned long)ptr, size / PAGE_SIZE); + set_memory_rox((unsigned long)ptr, size / PAGE_SIZE); } goto out; } diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index bf0906e1e2b9..a848922ea6f7 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -468,8 +468,7 @@ again: if (err < 0) goto out; - set_memory_ro((long)im->image, 1); - set_memory_x((long)im->image, 1); + set_memory_rox((long)im->image, 1); WARN_ON(tr->cur_image && tr->selector == 0); WARN_ON(!tr->cur_image && tr->selector); diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c index e78dadfc5829..9ff32324251b 100644 --- a/net/bpf/bpf_dummy_struct_ops.c +++ b/net/bpf/bpf_dummy_struct_ops.c @@ -124,8 +124,7 @@ int bpf_struct_ops_test_run(struct bpf_prog *prog, const union bpf_attr *kattr, if (err < 0) goto out; - set_memory_ro((long)image, 1); - set_memory_x((long)image, 1); + set_memory_rox((long)image, 1); prog_ret = dummy_ops_call_op(image, args); err = dummy_ops_copy_args(args); -- cgit v1.2.3 From 1180e732c985ed3c8866d2fd9e02b619848404a0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 26 Nov 2020 17:21:30 +0100 Subject: mm/gup: Fix the lockless PMD access On architectures where the PTE/PMD is larger than the native word size (i386-PAE for example), READ_ONCE() can do the wrong thing. Use pmdp_get_lockless() just like we use ptep_get_lockless(). Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20221022114424.906110403%40infradead.org --- kernel/events/core.c | 2 +- mm/gup.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/events/core.c b/kernel/events/core.c index 4ec3717003d5..b0d38424f3f7 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7306,7 +7306,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) return pud_leaf_size(pud); pmdp = pmd_offset_lockless(pudp, pud, addr); - pmd = READ_ONCE(*pmdp); + pmd = pmdp_get_lockless(pmdp); if (!pmd_present(pmd)) return 0; diff --git a/mm/gup.c b/mm/gup.c index fe195d47de74..ff8b22327e16 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2808,7 +2808,7 @@ static int gup_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, unsigned lo pmdp = pmd_offset_lockless(pudp, pud, addr); do { - pmd_t pmd = READ_ONCE(*pmdp); + pmd_t pmd = pmdp_get_lockless(pmdp); next = pmd_addr_end(addr, end); if (!pmd_present(pmd)) -- cgit v1.2.3