From e846d13958066828a9483d862cc8370a72fadbb6 Mon Sep 17 00:00:00 2001 From: Zhou Chengming Date: Thu, 2 Nov 2017 09:18:21 +0800 Subject: kprobes, x86/alternatives: Use text_mutex to protect smp_alt_modules We use alternatives_text_reserved() to check if the address is in the fixed pieces of alternative reserved, but the problem is that we don't hold the smp_alt mutex when call this function. So the list traversal may encounter a deleted list_head if another path is doing alternatives_smp_module_del(). One solution is that we can hold smp_alt mutex before call this function, but the difficult point is that the callers of this functions, arch_prepare_kprobe() and arch_prepare_optimized_kprobe(), are called inside the text_mutex. So we must hold smp_alt mutex before we go into these arch dependent code. But we can't now, the smp_alt mutex is the arch dependent part, only x86 has it. Maybe we can export another arch dependent callback to solve this. But there is a simpler way to handle this problem. We can reuse the text_mutex to protect smp_alt_modules instead of using another mutex. And all the arch dependent checks of kprobes are inside the text_mutex, so it's safe now. Signed-off-by: Zhou Chengming Reviewed-by: Masami Hiramatsu Acked-by: Steven Rostedt (VMware) Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@suse.de Fixes: 2cfa197 "ftrace/alternatives: Introducing *_text_reserved functions" Link: http://lkml.kernel.org/r/1509585501-79466-1-git-send-email-zhouchengming1@huawei.com Signed-off-by: Ingo Molnar --- kernel/extable.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'kernel') diff --git a/kernel/extable.c b/kernel/extable.c index 9aa1cc41ecf7..a17fdb63dc3e 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -31,6 +31,8 @@ * mutex protecting text section modification (dynamic code patching). * some users need to sleep (allocating memory...) while they hold this lock. * + * Note: Also protects SMP-alternatives modification on x86. + * * NOT exported to modules - patching kernel text is a really delicate matter. */ DEFINE_MUTEX(text_mutex); -- cgit v1.2.3 From 4ac2aed837cbdbb21c12a28c04718e34c1dc225f Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:50 -0500 Subject: resource: Consolidate resource walking code The walk_iomem_res_desc(), walk_system_ram_res() and walk_system_ram_range() functions each have much of the same code. Create a new function that consolidates the common code from these functions in one place to reduce the amount of duplicated code. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: kvm@vger.kernel.org Cc: Borislav Petkov Link: https://lkml.kernel.org/r/20171020143059.3291-9-brijesh.singh@amd.com --- kernel/resource.c | 52 +++++++++++++++++++++++++--------------------------- 1 file changed, 25 insertions(+), 27 deletions(-) (limited to 'kernel') diff --git a/kernel/resource.c b/kernel/resource.c index 9b5f04404152..7323c1b636cd 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -400,6 +400,26 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc, return 0; } +static int __walk_iomem_res_desc(struct resource *res, unsigned long desc, + bool first_level_children_only, + void *arg, int (*func)(u64, u64, void *)) +{ + u64 orig_end = res->end; + int ret = -1; + + while ((res->start < res->end) && + !find_next_iomem_res(res, desc, first_level_children_only)) { + ret = (*func)(res->start, res->end, arg); + if (ret) + break; + + res->start = res->end + 1; + res->end = orig_end; + } + + return ret; +} + /* * Walks through iomem resources and calls func() with matching resource * ranges. This walks through whole tree and not just first level children. @@ -418,26 +438,12 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)) { struct resource res; - u64 orig_end; - int ret = -1; res.start = start; res.end = end; res.flags = flags; - orig_end = res.end; - - while ((res.start < res.end) && - (!find_next_iomem_res(&res, desc, false))) { - - ret = (*func)(res.start, res.end, arg); - if (ret) - break; - - res.start = res.end + 1; - res.end = orig_end; - } - return ret; + return __walk_iomem_res_desc(&res, desc, false, arg, func); } /* @@ -451,22 +457,13 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(u64, u64, void *)) { struct resource res; - u64 orig_end; - int ret = -1; res.start = start; res.end = end; res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - orig_end = res.end; - while ((res.start < res.end) && - (!find_next_iomem_res(&res, IORES_DESC_NONE, true))) { - ret = (*func)(res.start, res.end, arg); - if (ret) - break; - res.start = res.end + 1; - res.end = orig_end; - } - return ret; + + return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true, + arg, func); } #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) @@ -508,6 +505,7 @@ static int __is_ram(unsigned long pfn, unsigned long nr_pages, void *arg) { return 1; } + /* * This generic page_is_ram() returns true if specified address is * registered as System RAM in iomem_resource list. -- cgit v1.2.3 From 1d2e733b13b450e5854f4a8f8efcd77fa7362d62 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:51 -0500 Subject: resource: Provide resource struct in resource walk callback In preperation for a new function that will need additional resource information during the resource walk, update the resource walk callback to pass the resource structure. Since the current callback start and end arguments are pulled from the resource structure, the callback functions can obtain them from the resource structure directly. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: kvm@vger.kernel.org Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: linuxppc-dev@lists.ozlabs.org Link: https://lkml.kernel.org/r/20171020143059.3291-10-brijesh.singh@amd.com --- arch/powerpc/kernel/machine_kexec_file_64.c | 12 +++++++++--- arch/x86/kernel/crash.c | 18 +++++++++--------- arch/x86/kernel/pmem.c | 2 +- include/linux/ioport.h | 4 ++-- include/linux/kexec.h | 2 +- kernel/kexec_file.c | 5 +++-- kernel/resource.c | 9 +++++---- 7 files changed, 30 insertions(+), 22 deletions(-) (limited to 'kernel') diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index 992c0d258e5d..e4395f937d63 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -91,11 +91,13 @@ int arch_kimage_file_post_load_cleanup(struct kimage *image) * and that value will be returned. If all free regions are visited without * func returning non-zero, then zero will be returned. */ -int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) +int arch_kexec_walk_mem(struct kexec_buf *kbuf, + int (*func)(struct resource *, void *)) { int ret = 0; u64 i; phys_addr_t mstart, mend; + struct resource res = { }; if (kbuf->top_down) { for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, @@ -105,7 +107,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) * range while in kexec, end points to the last byte * in the range. */ - ret = func(mstart, mend - 1, kbuf); + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); if (ret) break; } @@ -117,7 +121,9 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *)) * range while in kexec, end points to the last byte * in the range. */ - ret = func(mstart, mend - 1, kbuf); + res.start = mstart; + res.end = mend - 1; + ret = func(&res, kbuf); if (ret) break; } diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c index 44404e2307bb..815008c9ca18 100644 --- a/arch/x86/kernel/crash.c +++ b/arch/x86/kernel/crash.c @@ -209,7 +209,7 @@ void native_machine_crash_shutdown(struct pt_regs *regs) } #ifdef CONFIG_KEXEC_FILE -static int get_nr_ram_ranges_callback(u64 start, u64 end, void *arg) +static int get_nr_ram_ranges_callback(struct resource *res, void *arg) { unsigned int *nr_ranges = arg; @@ -342,7 +342,7 @@ static int elf_header_exclude_ranges(struct crash_elf_data *ced, return ret; } -static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) +static int prepare_elf64_ram_headers_callback(struct resource *res, void *arg) { struct crash_elf_data *ced = arg; Elf64_Ehdr *ehdr; @@ -355,7 +355,7 @@ static int prepare_elf64_ram_headers_callback(u64 start, u64 end, void *arg) ehdr = ced->ehdr; /* Exclude unwanted mem ranges */ - ret = elf_header_exclude_ranges(ced, start, end); + ret = elf_header_exclude_ranges(ced, res->start, res->end); if (ret) return ret; @@ -518,14 +518,14 @@ static int add_e820_entry(struct boot_params *params, struct e820_entry *entry) return 0; } -static int memmap_entry_callback(u64 start, u64 end, void *arg) +static int memmap_entry_callback(struct resource *res, void *arg) { struct crash_memmap_data *cmd = arg; struct boot_params *params = cmd->params; struct e820_entry ei; - ei.addr = start; - ei.size = end - start + 1; + ei.addr = res->start; + ei.size = res->end - res->start + 1; ei.type = cmd->type; add_e820_entry(params, &ei); @@ -619,12 +619,12 @@ out: return ret; } -static int determine_backup_region(u64 start, u64 end, void *arg) +static int determine_backup_region(struct resource *res, void *arg) { struct kimage *image = arg; - image->arch.backup_src_start = start; - image->arch.backup_src_sz = end - start + 1; + image->arch.backup_src_start = res->start; + image->arch.backup_src_sz = res->end - res->start + 1; /* Expecting only one range for backup region */ return 1; diff --git a/arch/x86/kernel/pmem.c b/arch/x86/kernel/pmem.c index 3fe690067802..6b07faaa1579 100644 --- a/arch/x86/kernel/pmem.c +++ b/arch/x86/kernel/pmem.c @@ -7,7 +7,7 @@ #include #include -static int found(u64 start, u64 end, void *data) +static int found(struct resource *res, void *data) { return 1; } diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 83c8d6530f0f..c0070d7c4b99 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -272,10 +272,10 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int walk_system_ram_res(u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)); + int (*func)(struct resource *, void *)); extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, - void *arg, int (*func)(u64, u64, void *)); + void *arg, int (*func)(struct resource *, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1c08c925cefb..f16f6ceb3875 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -160,7 +160,7 @@ struct kexec_buf { }; int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(u64, u64, void *)); + int (*func)(struct resource *, void *)); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); #endif /* CONFIG_KEXEC_FILE */ diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 9f48f4412297..e5bcd94c1efb 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -406,9 +406,10 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end, return 1; } -static int locate_mem_hole_callback(u64 start, u64 end, void *arg) +static int locate_mem_hole_callback(struct resource *res, void *arg) { struct kexec_buf *kbuf = (struct kexec_buf *)arg; + u64 start = res->start, end = res->end; unsigned long sz = end - start + 1; /* Returning 0 will take to next memory range */ @@ -437,7 +438,7 @@ static int locate_mem_hole_callback(u64 start, u64 end, void *arg) * func returning non-zero, then zero will be returned. */ int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(u64, u64, void *)) + int (*func)(struct resource *, void *)) { if (kbuf->image->type == KEXEC_TYPE_CRASH) return walk_iomem_res_desc(crashk_res.desc, diff --git a/kernel/resource.c b/kernel/resource.c index 7323c1b636cd..8430042fa77b 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -402,14 +402,15 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc, static int __walk_iomem_res_desc(struct resource *res, unsigned long desc, bool first_level_children_only, - void *arg, int (*func)(u64, u64, void *)) + void *arg, + int (*func)(struct resource *, void *)) { u64 orig_end = res->end; int ret = -1; while ((res->start < res->end) && !find_next_iomem_res(res, desc, first_level_children_only)) { - ret = (*func)(res->start, res->end, arg); + ret = (*func)(res, arg); if (ret) break; @@ -435,7 +436,7 @@ static int __walk_iomem_res_desc(struct resource *res, unsigned long desc, * and set it in 'desc' of a target resource entry. */ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, - u64 end, void *arg, int (*func)(u64, u64, void *)) + u64 end, void *arg, int (*func)(struct resource *, void *)) { struct resource res; @@ -454,7 +455,7 @@ int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, * ranges. */ int walk_system_ram_res(u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)) + int (*func)(struct resource *, void *)) { struct resource res; -- cgit v1.2.3 From 0e4c12b45aa88e74fdda117896d2b61c4e510cb9 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:52 -0500 Subject: x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order for memory pages to be properly mapped when SEV is active, it's necessary to use the PAGE_KERNEL protection attribute as the base protection. This ensures that memory mapping of, e.g. ACPI tables, receives the proper mapping attributes. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Laura Abbott Cc: Kees Cook Cc: kvm@vger.kernel.org Cc: Jérôme Glisse Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Andrew Morton Cc: Dan Williams Cc: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/20171020143059.3291-11-brijesh.singh@amd.com --- arch/x86/mm/ioremap.c | 79 ++++++++++++++++++++++++++++++++++++++++++-------- include/linux/ioport.h | 3 ++ kernel/resource.c | 19 ++++++++++++ 3 files changed, 89 insertions(+), 12 deletions(-) (limited to 'kernel') diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index 52cc0f4ed494..6e4573b1da34 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -27,6 +27,11 @@ #include "physaddr.h" +struct ioremap_mem_flags { + bool system_ram; + bool desc_other; +}; + /* * Fix up the linear direct mapping of the kernel to avoid cache attribute * conflicts. @@ -56,17 +61,59 @@ int ioremap_change_attr(unsigned long vaddr, unsigned long size, return err; } -static int __ioremap_check_ram(unsigned long start_pfn, unsigned long nr_pages, - void *arg) +static bool __ioremap_check_ram(struct resource *res) { + unsigned long start_pfn, stop_pfn; unsigned long i; - for (i = 0; i < nr_pages; ++i) - if (pfn_valid(start_pfn + i) && - !PageReserved(pfn_to_page(start_pfn + i))) - return 1; + if ((res->flags & IORESOURCE_SYSTEM_RAM) != IORESOURCE_SYSTEM_RAM) + return false; - return 0; + start_pfn = (res->start + PAGE_SIZE - 1) >> PAGE_SHIFT; + stop_pfn = (res->end + 1) >> PAGE_SHIFT; + if (stop_pfn > start_pfn) { + for (i = 0; i < (stop_pfn - start_pfn); ++i) + if (pfn_valid(start_pfn + i) && + !PageReserved(pfn_to_page(start_pfn + i))) + return true; + } + + return false; +} + +static int __ioremap_check_desc_other(struct resource *res) +{ + return (res->desc != IORES_DESC_NONE); +} + +static int __ioremap_res_check(struct resource *res, void *arg) +{ + struct ioremap_mem_flags *flags = arg; + + if (!flags->system_ram) + flags->system_ram = __ioremap_check_ram(res); + + if (!flags->desc_other) + flags->desc_other = __ioremap_check_desc_other(res); + + return flags->system_ram && flags->desc_other; +} + +/* + * To avoid multiple resource walks, this function walks resources marked as + * IORESOURCE_MEM and IORESOURCE_BUSY and looking for system RAM and/or a + * resource described not as IORES_DESC_NONE (e.g. IORES_DESC_ACPI_TABLES). + */ +static void __ioremap_check_mem(resource_size_t addr, unsigned long size, + struct ioremap_mem_flags *flags) +{ + u64 start, end; + + start = (u64)addr; + end = start + size - 1; + memset(flags, 0, sizeof(*flags)); + + walk_mem_res(start, end, flags, __ioremap_res_check); } /* @@ -87,9 +134,10 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, unsigned long size, enum page_cache_mode pcm, void *caller) { unsigned long offset, vaddr; - resource_size_t pfn, last_pfn, last_addr; + resource_size_t last_addr; const resource_size_t unaligned_phys_addr = phys_addr; const unsigned long unaligned_size = size; + struct ioremap_mem_flags mem_flags; struct vm_struct *area; enum page_cache_mode new_pcm; pgprot_t prot; @@ -108,13 +156,12 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, return NULL; } + __ioremap_check_mem(phys_addr, size, &mem_flags); + /* * Don't allow anybody to remap normal RAM that we're using.. */ - pfn = phys_addr >> PAGE_SHIFT; - last_pfn = last_addr >> PAGE_SHIFT; - if (walk_system_ram_range(pfn, last_pfn - pfn + 1, NULL, - __ioremap_check_ram) == 1) { + if (mem_flags.system_ram) { WARN_ONCE(1, "ioremap on RAM at %pa - %pa\n", &phys_addr, &last_addr); return NULL; @@ -146,7 +193,15 @@ static void __iomem *__ioremap_caller(resource_size_t phys_addr, pcm = new_pcm; } + /* + * If the page being mapped is in memory and SEV is active then + * make sure the memory encryption attribute is enabled in the + * resulting mapping. + */ prot = PAGE_KERNEL_IO; + if (sev_active() && mem_flags.desc_other) + prot = pgprot_encrypted(prot); + switch (pcm) { case _PAGE_CACHE_MODE_UC: default: diff --git a/include/linux/ioport.h b/include/linux/ioport.h index c0070d7c4b99..93b4183cf53d 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -271,6 +271,9 @@ extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int +walk_mem_res(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)); +extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int diff --git a/kernel/resource.c b/kernel/resource.c index 8430042fa77b..54ba6de3757c 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -397,6 +397,8 @@ static int find_next_iomem_res(struct resource *res, unsigned long desc, res->start = p->start; if (res->end > p->end) res->end = p->end; + res->flags = p->flags; + res->desc = p->desc; return 0; } @@ -467,6 +469,23 @@ int walk_system_ram_res(u64 start, u64 end, void *arg, arg, func); } +/* + * This function calls the @func callback against all memory ranges, which + * are ranges marked as IORESOURCE_MEM and IORESOUCE_BUSY. + */ +int walk_mem_res(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)) +{ + struct resource res; + + res.start = start; + res.end = end; + res.flags = IORESOURCE_MEM | IORESOURCE_BUSY; + + return __walk_iomem_res_desc(&res, IORES_DESC_NONE, true, + arg, func); +} + #if !defined(CONFIG_ARCH_HAS_WALK_MEMORY) /* -- cgit v1.2.3