diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/Makefile | 4 | ||||
-rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 26 | ||||
-rw-r--r-- | arch/x86/mm/extable.c | 49 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/init.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 16 | ||||
-rw-r--r-- | arch/x86/mm/ioremap.c | 19 | ||||
-rw-r--r-- | arch/x86/mm/kasan_init_64.c | 61 | ||||
-rw-r--r-- | arch/x86/mm/kmmio.c | 7 | ||||
-rw-r--r-- | arch/x86/mm/maccess.c | 43 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_boot.S | 8 | ||||
-rw-r--r-- | arch/x86/mm/mmio-mod.c | 6 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/numa_emulation.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pageattr.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pat.c | 8 | ||||
-rw-r--r-- | arch/x86/mm/pat_internal.h | 20 | ||||
-rw-r--r-- | arch/x86/mm/pat_interval.c | 185 | ||||
-rw-r--r-- | arch/x86/mm/pat_rbtree.c | 268 | ||||
-rw-r--r-- | arch/x86/mm/pgtable.c | 4 | ||||
-rw-r--r-- | arch/x86/mm/pti.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/testmmiotrace.c | 6 |
23 files changed, 380 insertions, 374 deletions
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index 84373dc9b341..3b89c201ac26 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -13,7 +13,7 @@ CFLAGS_REMOVE_mem_encrypt_identity.o = -pg endif obj-y := init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ - pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o + pat.o pgtable.o physaddr.o setup_nx.o tlb.o cpu_entry_area.o maccess.o # Make sure __phys_addr has no stackprotector nostackp := $(call cc-option, -fno-stack-protector) @@ -23,7 +23,7 @@ CFLAGS_mem_encrypt_identity.o := $(nostackp) CFLAGS_fault.o := -I $(srctree)/$(src)/../include/asm/trace -obj-$(CONFIG_X86_PAT) += pat_rbtree.o +obj-$(CONFIG_X86_PAT) += pat_interval.o obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 752ad11d6868..56f9189bbadb 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -17,6 +17,10 @@ static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks); DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); #endif +#if defined(CONFIG_X86_32) && defined(CONFIG_DOUBLEFAULT) +DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); +#endif + struct cpu_entry_area *get_cpu_entry_area(int cpu) { unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; @@ -108,7 +112,15 @@ static void __init percpu_setup_exception_stacks(unsigned int cpu) cea_map_stack(MCE); } #else -static inline void percpu_setup_exception_stacks(unsigned int cpu) {} +static inline void percpu_setup_exception_stacks(unsigned int cpu) +{ +#ifdef CONFIG_DOUBLEFAULT + struct cpu_entry_area *cea = get_cpu_entry_area(cpu); + + cea_map_percpu_pages(&cea->doublefault_stack, + &per_cpu(doublefault_stack, cpu), 1, PAGE_KERNEL); +#endif +} #endif /* Setup the fixmap mappings only once per-processor */ @@ -161,6 +173,14 @@ static void __init setup_cpu_entry_area(unsigned int cpu) BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^ offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK); BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0); + /* + * VMX changes the host TR limit to 0x67 after a VM exit. This is + * okay, since 0x67 covers the size of struct x86_hw_tss. Make sure + * that this is correct. + */ + BUILD_BUG_ON(offsetof(struct tss_struct, x86_tss) != 0); + BUILD_BUG_ON(sizeof(struct x86_hw_tss) != 0x68); + cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu), sizeof(struct tss_struct) / PAGE_SIZE, tss_prot); @@ -178,7 +198,9 @@ static __init void setup_cpu_entry_area_ptes(void) #ifdef CONFIG_X86_32 unsigned long start, end; - BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE); + /* The +1 is for the readonly IDT: */ + BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE); + BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE); BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK); start = CPU_ENTRY_AREA_BASE; diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 4d75bc656f97..30bb0bd3b1b8 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -45,55 +45,6 @@ __visible bool ex_handler_fault(const struct exception_table_entry *fixup, EXPORT_SYMBOL_GPL(ex_handler_fault); /* - * Handler for UD0 exception following a failed test against the - * result of a refcount inc/dec/add/sub. - */ -__visible bool ex_handler_refcount(const struct exception_table_entry *fixup, - struct pt_regs *regs, int trapnr, - unsigned long error_code, - unsigned long fault_addr) -{ - /* First unconditionally saturate the refcount. */ - *(int *)regs->cx = INT_MIN / 2; - - /* - * Strictly speaking, this reports the fixup destination, not - * the fault location, and not the actually overflowing - * instruction, which is the instruction before the "js", but - * since that instruction could be a variety of lengths, just - * report the location after the overflow, which should be close - * enough for finding the overflow, as it's at least back in - * the function, having returned from .text.unlikely. - */ - regs->ip = ex_fixup_addr(fixup); - - /* - * This function has been called because either a negative refcount - * value was seen by any of the refcount functions, or a zero - * refcount value was seen by refcount_dec(). - * - * If we crossed from INT_MAX to INT_MIN, OF (Overflow Flag: result - * wrapped around) will be set. Additionally, seeing the refcount - * reach 0 will set ZF (Zero Flag: result was zero). In each of - * these cases we want a report, since it's a boundary condition. - * The SF case is not reported since it indicates post-boundary - * manipulations below zero or above INT_MAX. And if none of the - * flags are set, something has gone very wrong, so report it. - */ - if (regs->flags & (X86_EFLAGS_OF | X86_EFLAGS_ZF)) { - bool zero = regs->flags & X86_EFLAGS_ZF; - - refcount_error_report(regs, zero ? "hit zero" : "overflow"); - } else if ((regs->flags & X86_EFLAGS_SF) == 0) { - /* Report if none of OF, ZF, nor SF are set. */ - refcount_error_report(regs, "unexpected saturation"); - } - - return true; -} -EXPORT_SYMBOL(ex_handler_refcount); - -/* * Handler for when we fail to restore a task's FPU state. We should never get * here because the FPU state of a task using the FPU (task->thread.fpu.state) * should always be valid. However, past bugs have allowed userspace to set diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 9ceacd1156db..304d31d8cbbc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -197,7 +197,7 @@ void vmalloc_sync_all(void) return; for (address = VMALLOC_START & PMD_MASK; - address >= TASK_SIZE_MAX && address < FIXADDR_TOP; + address >= TASK_SIZE_MAX && address < VMALLOC_END; address += PMD_SIZE) { struct page *page; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index fd10d91a6115..e7bb483557c9 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -829,14 +829,13 @@ void free_init_pages(const char *what, unsigned long begin, unsigned long end) * used for the kernel image only. free_init_pages() will do the * right thing for either kind of address. */ -void free_kernel_image_pages(void *begin, void *end) +void free_kernel_image_pages(const char *what, void *begin, void *end) { unsigned long begin_ul = (unsigned long)begin; unsigned long end_ul = (unsigned long)end; unsigned long len_pages = (end_ul - begin_ul) >> PAGE_SHIFT; - - free_init_pages("unused kernel image", begin_ul, end_ul); + free_init_pages(what, begin_ul, end_ul); /* * PTI maps some of the kernel into userspace. For performance, @@ -865,7 +864,8 @@ void __ref free_initmem(void) mem_encrypt_free_decrypted_mem(); - free_kernel_image_pages(&__init_begin, &__init_end); + free_kernel_image_pages("unused kernel image (initmem)", + &__init_begin, &__init_end); } #ifdef CONFIG_BLK_DEV_INITRD diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a6b5c653727b..dcb9bc961b39 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1263,7 +1263,7 @@ int kernel_set_to_readonly; void set_kernel_text_rw(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1282,7 +1282,7 @@ void set_kernel_text_rw(void) void set_kernel_text_ro(void) { unsigned long start = PFN_ALIGN(_text); - unsigned long end = PFN_ALIGN(__stop___ex_table); + unsigned long end = PFN_ALIGN(_etext); if (!kernel_set_to_readonly) return; @@ -1300,9 +1300,9 @@ void mark_rodata_ro(void) { unsigned long start = PFN_ALIGN(_text); unsigned long rodata_start = PFN_ALIGN(__start_rodata); - unsigned long end = (unsigned long) &__end_rodata_hpage_align; - unsigned long text_end = PFN_ALIGN(&__stop___ex_table); - unsigned long rodata_end = PFN_ALIGN(&__end_rodata); + unsigned long end = (unsigned long)__end_rodata_hpage_align; + unsigned long text_end = PFN_ALIGN(_etext); + unsigned long rodata_end = PFN_ALIGN(__end_rodata); unsigned long all_end; printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", @@ -1334,8 +1334,10 @@ void mark_rodata_ro(void) set_memory_ro(start, (end-start) >> PAGE_SHIFT); #endif - free_kernel_image_pages((void *)text_end, (void *)rodata_start); - free_kernel_image_pages((void *)rodata_end, (void *)_sdata); + free_kernel_image_pages("unused kernel image (text/rodata gap)", + (void *)text_end, (void *)rodata_start); + free_kernel_image_pages("unused kernel image (rodata/data gap)", + (void *)rodata_end, (void *)_sdata); debug_checkwx(); } diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index a39dcdb5ae34..b3a2936377b5 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -280,11 +280,11 @@ err_free_memtype: } /** - * ioremap_nocache - map bus memory into CPU space + * ioremap - map bus memory into CPU space * @phys_addr: bus address of the memory * @size: size of the resource to map * - * ioremap_nocache performs a platform specific sequence of operations to + * ioremap performs a platform specific sequence of operations to * make bus memory CPU accessible via the readb/readw/readl/writeb/ * writew/writel functions and the other mmio helpers. The returned * address is not guaranteed to be usable directly as a virtual @@ -300,7 +300,7 @@ err_free_memtype: * * Must be freed with iounmap. */ -void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) +void __iomem *ioremap(resource_size_t phys_addr, unsigned long size) { /* * Ideally, this should be: @@ -315,7 +315,7 @@ void __iomem *ioremap_nocache(resource_size_t phys_addr, unsigned long size) return __ioremap_caller(phys_addr, size, pcm, __builtin_return_address(0), false); } -EXPORT_SYMBOL(ioremap_nocache); +EXPORT_SYMBOL(ioremap); /** * ioremap_uc - map bus memory into CPU space as strongly uncachable @@ -626,6 +626,17 @@ static bool memremap_is_setup_data(resource_size_t phys_addr, paddr_next = data->next; len = data->len; + if ((phys_addr > paddr) && (phys_addr < (paddr + len))) { + memunmap(data); + return true; + } + + if (data->type == SETUP_INDIRECT && + ((struct setup_indirect *)data->data)->type != SETUP_INDIRECT) { + paddr = ((struct setup_indirect *)data->data)->addr; + len = ((struct setup_indirect *)data->data)->len; + } + memunmap(data); if ((phys_addr > paddr) && (phys_addr < (paddr + len))) diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 296da58f3013..cf5bc37c90ac 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -245,6 +245,49 @@ static void __init kasan_map_early_shadow(pgd_t *pgd) } while (pgd++, addr = next, addr != end); } +static void __init kasan_shallow_populate_p4ds(pgd_t *pgd, + unsigned long addr, + unsigned long end) +{ + p4d_t *p4d; + unsigned long next; + void *p; + + p4d = p4d_offset(pgd, addr); + do { + next = p4d_addr_end(addr, end); + + if (p4d_none(*p4d)) { + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true); + p4d_populate(&init_mm, p4d, p); + } + } while (p4d++, addr = next, addr != end); +} + +static void __init kasan_shallow_populate_pgds(void *start, void *end) +{ + unsigned long addr, next; + pgd_t *pgd; + void *p; + + addr = (unsigned long)start; + pgd = pgd_offset_k(addr); + do { + next = pgd_addr_end(addr, (unsigned long)end); + + if (pgd_none(*pgd)) { + p = early_alloc(PAGE_SIZE, NUMA_NO_NODE, true); + pgd_populate(&init_mm, pgd, p); + } + + /* + * we need to populate p4ds to be synced when running in + * four level mode - see sync_global_pgds_l4() + */ + kasan_shallow_populate_p4ds(pgd, addr, next); + } while (pgd++, addr = next, addr != (unsigned long)end); +} + #ifdef CONFIG_KASAN_INLINE static int kasan_die_handler(struct notifier_block *self, unsigned long val, @@ -354,6 +397,24 @@ void __init kasan_init(void) kasan_populate_early_shadow( kasan_mem_to_shadow((void *)PAGE_OFFSET + MAXMEM), + kasan_mem_to_shadow((void *)VMALLOC_START)); + + /* + * If we're in full vmalloc mode, don't back vmalloc space with early + * shadow pages. Instead, prepopulate pgds/p4ds so they are synced to + * the global table and we can populate the lower levels on demand. + */ + if (IS_ENABLED(CONFIG_KASAN_VMALLOC)) + kasan_shallow_populate_pgds( + kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)VMALLOC_END)); + else + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)VMALLOC_START), + kasan_mem_to_shadow((void *)VMALLOC_END)); + + kasan_populate_early_shadow( + kasan_mem_to_shadow((void *)VMALLOC_END + 1), shadow_cpu_entry_begin); kasan_populate_shadow((unsigned long)shadow_cpu_entry_begin, diff --git a/arch/x86/mm/kmmio.c b/arch/x86/mm/kmmio.c index 79eb55ce69a9..49d7814b59a9 100644 --- a/arch/x86/mm/kmmio.c +++ b/arch/x86/mm/kmmio.c @@ -193,8 +193,8 @@ static int arm_kmmio_fault_page(struct kmmio_fault_page *f) int ret; WARN_ONCE(f->armed, KERN_ERR pr_fmt("kmmio page already armed.\n")); if (f->armed) { - pr_warning("double-arm: addr 0x%08lx, ref %d, old %d\n", - f->addr, f->count, !!f->old_presence); + pr_warn("double-arm: addr 0x%08lx, ref %d, old %d\n", + f->addr, f->count, !!f->old_presence); } ret = clear_page_presence(f, true); WARN_ONCE(ret < 0, KERN_ERR pr_fmt("arming at 0x%08lx failed.\n"), @@ -341,8 +341,7 @@ static int post_kmmio_handler(unsigned long condition, struct pt_regs *regs) * something external causing them (f.e. using a debugger while * mmio tracing enabled), or erroneous behaviour */ - pr_warning("unexpected debug trap on CPU %d.\n", - smp_processor_id()); + pr_warn("unexpected debug trap on CPU %d.\n", smp_processor_id()); goto out; } diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c new file mode 100644 index 000000000000..f5b85bdc0535 --- /dev/null +++ b/arch/x86/mm/maccess.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/uaccess.h> +#include <linux/kernel.h> + +#ifdef CONFIG_X86_64 +static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static __always_inline bool invalid_probe_range(u64 vaddr) +{ + /* + * Range covering the highest possible canonical userspace address + * as well as non-canonical address range. For the canonical range + * we also need to include the userspace guard page. + */ + return vaddr < TASK_SIZE_MAX + PAGE_SIZE || + canonical_address(vaddr, boot_cpu_data.x86_virt_bits) != vaddr; +} +#else +static __always_inline bool invalid_probe_range(u64 vaddr) +{ + return vaddr < TASK_SIZE_MAX; +} +#endif + +long probe_kernel_read_strict(void *dst, const void *src, size_t size) +{ + if (unlikely(invalid_probe_range((unsigned long)src))) + return -EFAULT; + + return __probe_kernel_read(dst, src, size); +} + +long strncpy_from_unsafe_strict(char *dst, const void *unsafe_addr, long count) +{ + if (unlikely(invalid_probe_range((unsigned long)unsafe_addr))) + return -EFAULT; + + return __strncpy_from_unsafe(dst, unsafe_addr, count); +} diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 9268c12458c8..a03614bd3e1a 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -367,7 +367,7 @@ bool force_dma_unencrypted(struct device *dev) if (sme_active()) { u64 dma_enc_mask = DMA_BIT_MASK(__ffs64(sme_me_mask)); u64 dma_dev_mask = min_not_zero(dev->coherent_dma_mask, - dev->bus_dma_mask); + dev->bus_dma_limit); if (dma_dev_mask <= dma_enc_mask) return true; diff --git a/arch/x86/mm/mem_encrypt_boot.S b/arch/x86/mm/mem_encrypt_boot.S index 6d71481a1e70..106ead05bbe3 100644 --- a/arch/x86/mm/mem_encrypt_boot.S +++ b/arch/x86/mm/mem_encrypt_boot.S @@ -16,7 +16,7 @@ .text .code64 -ENTRY(sme_encrypt_execute) +SYM_FUNC_START(sme_encrypt_execute) /* * Entry parameters: @@ -66,9 +66,9 @@ ENTRY(sme_encrypt_execute) pop %rbp ret -ENDPROC(sme_encrypt_execute) +SYM_FUNC_END(sme_encrypt_execute) -ENTRY(__enc_copy) +SYM_FUNC_START(__enc_copy) /* * Routine used to encrypt memory in place. * This routine must be run outside of the kernel proper since @@ -153,4 +153,4 @@ ENTRY(__enc_copy) ret .L__enc_copy_end: -ENDPROC(__enc_copy) +SYM_FUNC_END(__enc_copy) diff --git a/arch/x86/mm/mmio-mod.c b/arch/x86/mm/mmio-mod.c index b8ef8557d4b3..673de6063345 100644 --- a/arch/x86/mm/mmio-mod.c +++ b/arch/x86/mm/mmio-mod.c @@ -394,7 +394,7 @@ static void enter_uniprocessor(void) } out: if (num_online_cpus() > 1) - pr_warning("multiple CPUs still online, may miss events.\n"); + pr_warn("multiple CPUs still online, may miss events.\n"); } static void leave_uniprocessor(void) @@ -418,8 +418,8 @@ static void leave_uniprocessor(void) static void enter_uniprocessor(void) { if (num_online_cpus() > 1) - pr_warning("multiple CPUs are online, may miss events. " - "Suggest booting with maxcpus=1 kernel argument.\n"); + pr_warn("multiple CPUs are online, may miss events. " + "Suggest booting with maxcpus=1 kernel argument.\n"); } static void leave_uniprocessor(void) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 4123100e0eaf..99f7a68738f0 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -699,7 +699,7 @@ static int __init dummy_numa_init(void) * x86_numa_init - Initialize NUMA * * Try each configured NUMA initialization method until one succeeds. The - * last fallback is dummy single node config encomapssing whole memory and + * last fallback is dummy single node config encompassing whole memory and * never fails. */ void __init x86_numa_init(void) diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index abffa0be80da..7f1d2034df1e 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -438,7 +438,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) goto no_emu; if (numa_cleanup_meminfo(&ei) < 0) { - pr_warning("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); + pr_warn("NUMA: Warning: constructed meminfo invalid, disabling emulation\n"); goto no_emu; } @@ -449,7 +449,7 @@ void __init numa_emulation(struct numa_meminfo *numa_meminfo, int numa_dist_cnt) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), phys_size, PAGE_SIZE); if (!phys) { - pr_warning("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); + pr_warn("NUMA: Warning: can't allocate copy of distance table, disabling emulation\n"); goto no_emu; } memblock_reserve(phys, phys_size); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 0d09cc5aad61..1b99ad05b117 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -1784,7 +1784,7 @@ static inline int cpa_clear_pages_array(struct page **pages, int numpages, int _set_memory_uc(unsigned long addr, int numpages) { /* - * for now UC MINUS. see comments in ioremap_nocache() + * for now UC MINUS. see comments in ioremap() * If you really need strong UC use ioremap_uc(), but note * that you cannot override IO areas with set_memory_*() as * these helpers cannot work with IO memory. @@ -1799,7 +1799,7 @@ int set_memory_uc(unsigned long addr, int numpages) int ret; /* - * for now UC MINUS. see comments in ioremap_nocache() + * for now UC MINUS. see comments in ioremap() */ ret = reserve_memtype(__pa(addr), __pa(addr) + numpages * PAGE_SIZE, _PAGE_CACHE_MODE_UC_MINUS, NULL); diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d9fbd4f69920..2d758e19ef22 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -603,7 +603,7 @@ int reserve_memtype(u64 start, u64 end, enum page_cache_mode req_type, spin_lock(&memtype_lock); - err = rbt_memtype_check_insert(new, new_type); + err = memtype_check_insert(new, new_type); if (err) { pr_info("x86/PAT: reserve_memtype failed [mem %#010Lx-%#010Lx], track %s, req %s\n", start, end - 1, @@ -650,7 +650,7 @@ int free_memtype(u64 start, u64 end) } spin_lock(&memtype_lock); - entry = rbt_memtype_erase(start, end); + entry = memtype_erase(start, end); spin_unlock(&memtype_lock); if (IS_ERR(entry)) { @@ -693,7 +693,7 @@ static enum page_cache_mode lookup_memtype(u64 paddr) spin_lock(&memtype_lock); - entry = rbt_memtype_lookup(paddr); + entry = memtype_lookup(paddr); if (entry != NULL) rettype = entry->type; else @@ -1109,7 +1109,7 @@ static struct memtype *memtype_get_idx(loff_t pos) return NULL; spin_lock(&memtype_lock); - ret = rbt_memtype_copy_nth_element(print_entry, pos); + ret = memtype_copy_nth_element(print_entry, pos); spin_unlock(&memtype_lock); if (!ret) { diff --git a/arch/x86/mm/pat_internal.h b/arch/x86/mm/pat_internal.h index eeb5caeb089b..79a06684349e 100644 --- a/arch/x86/mm/pat_internal.h +++ b/arch/x86/mm/pat_internal.h @@ -29,20 +29,20 @@ static inline char *cattr_name(enum page_cache_mode pcm) } #ifdef CONFIG_X86_PAT -extern int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type); -extern struct memtype *rbt_memtype_erase(u64 start, u64 end); -extern struct memtype *rbt_memtype_lookup(u64 addr); -extern int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos); +extern int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type); +extern struct memtype *memtype_erase(u64 start, u64 end); +extern struct memtype *memtype_lookup(u64 addr); +extern int memtype_copy_nth_element(struct memtype *out, loff_t pos); #else -static inline int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *new_type) +static inline int memtype_check_insert(struct memtype *new, + enum page_cache_mode *new_type) { return 0; } -static inline struct memtype *rbt_memtype_erase(u64 start, u64 end) +static inline struct memtype *memtype_erase(u64 start, u64 end) { return NULL; } -static inline struct memtype *rbt_memtype_lookup(u64 addr) +static inline struct memtype *memtype_lookup(u64 addr) { return NULL; } -static inline int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) +static inline int memtype_copy_nth_element(struct memtype *out, loff_t pos) { return 0; } #endif diff --git a/arch/x86/mm/pat_interval.c b/arch/x86/mm/pat_interval.c new file mode 100644 index 000000000000..6855362eaf21 --- /dev/null +++ b/arch/x86/mm/pat_interval.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Handle caching attributes in page tables (PAT) + * + * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> + * Suresh B Siddha <suresh.b.siddha@intel.com> + * + * Interval tree used to store the PAT memory type reservations. + */ + +#include <linux/seq_file.h> +#include <linux/debugfs.h> +#include <linux/kernel.h> +#include <linux/interval_tree_generic.h> +#include <linux/sched.h> +#include <linux/gfp.h> + +#include <asm/pgtable.h> +#include <asm/pat.h> + +#include "pat_internal.h" + +/* + * The memtype tree keeps track of memory type for specific + * physical memory areas. Without proper tracking, conflicting memory + * types in different mappings can cause CPU cache corruption. + * + * The tree is an interval tree (augmented rbtree) with tree ordered + * on starting address. Tree can contain multiple entries for + * different regions which overlap. All the aliases have the same + * cache attributes of course. + * + * memtype_lock protects the rbtree. + */ +static inline u64 memtype_interval_start(struct memtype *memtype) +{ + return memtype->start; +} + +static inline u64 memtype_interval_end(struct memtype *memtype) +{ + return memtype->end - 1; +} +INTERVAL_TREE_DEFINE(struct memtype, rb, u64, subtree_max_end, + memtype_interval_start, memtype_interval_end, + static, memtype_interval) + +static struct rb_root_cached memtype_rbroot = RB_ROOT_CACHED; + +enum { + MEMTYPE_EXACT_MATCH = 0, + MEMTYPE_END_MATCH = 1 +}; + +static struct memtype *memtype_match(u64 start, u64 end, int match_type) +{ + struct memtype *match; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); + while (match != NULL && match->start < end) { + if ((match_type == MEMTYPE_EXACT_MATCH) && + (match->start == start) && (match->end == end)) + return match; + + if ((match_type == MEMTYPE_END_MATCH) && + (match->start < start) && (match->end == end)) + return match; + + match = memtype_interval_iter_next(match, start, end-1); + } + + return NULL; /* Returns NULL if there is no match */ +} + +static int memtype_check_conflict(u64 start, u64 end, + enum page_cache_mode reqtype, + enum page_cache_mode *newtype) +{ + struct memtype *match; + enum page_cache_mode found_type = reqtype; + + match = memtype_interval_iter_first(&memtype_rbroot, start, end-1); + if (match == NULL) + goto success; + + if (match->type != found_type && newtype == NULL) + goto failure; + + dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); + found_type = match->type; + + match = memtype_interval_iter_next(match, start, end-1); + while (match) { + if (match->type != found_type) + goto failure; + + match = memtype_interval_iter_next(match, start, end-1); + } +success: + if (newtype) + *newtype = found_type; + + return 0; + +failure: + pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", + current->comm, current->pid, start, end, + cattr_name(found_type), cattr_name(match->type)); + return -EBUSY; +} + +int memtype_check_insert(struct memtype *new, + enum page_cache_mode *ret_type) +{ + int err = 0; + + err = memtype_check_conflict(new->start, new->end, new->type, ret_type); + if (err) + return err; + + if (ret_type) + new->type = *ret_type; + + memtype_interval_insert(new, &memtype_rbroot); + return 0; +} + +struct memtype *memtype_erase(u64 start, u64 end) +{ + struct memtype *data; + + /* + * Since the memtype_rbroot tree allows overlapping ranges, + * memtype_erase() checks with EXACT_MATCH first, i.e. free + * a whole node for the munmap case. If no such entry is found, + * it then checks with END_MATCH, i.e. shrink the size of a node + * from the end for the mremap case. + */ + data = memtype_match(start, end, MEMTYPE_EXACT_MATCH); + if (!data) { + data = memtype_match(start, end, MEMTYPE_END_MATCH); + if (!data) + return ERR_PTR(-EINVAL); + } + + if (data->start == start) { + /* munmap: erase this node */ + memtype_interval_remove(data, &memtype_rbroot); + } else { + /* mremap: update the end value of this node */ + memtype_interval_remove(data, &memtype_rbroot); + data->end = start; + memtype_interval_insert(data, &memtype_rbroot); + return NULL; + } + + return data; +} + +struct memtype *memtype_lookup(u64 addr) +{ + return memtype_interval_iter_first(&memtype_rbroot, addr, + addr + PAGE_SIZE-1); +} + +#if defined(CONFIG_DEBUG_FS) +int memtype_copy_nth_element(struct memtype *out, loff_t pos) +{ + struct memtype *match; + int i = 1; + + match = memtype_interval_iter_first(&memtype_rbroot, 0, ULONG_MAX); + while (match && pos != i) { + match = memtype_interval_iter_next(match, 0, ULONG_MAX); + i++; + } + + if (match) { /* pos == i */ + *out = *match; + return 0; + } else { + return 1; + } +} +#endif diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c deleted file mode 100644 index 65ebe4b88f7c..000000000000 --- a/arch/x86/mm/pat_rbtree.c +++ /dev/null @@ -1,268 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Handle caching attributes in page tables (PAT) - * - * Authors: Venkatesh Pallipadi <venkatesh.pallipadi@intel.com> - * Suresh B Siddha <suresh.b.siddha@intel.com> - * - * Interval tree (augmented rbtree) used to store the PAT memory type - * reservations. - */ - -#include <linux/seq_file.h> -#include <linux/debugfs.h> -#include <linux/kernel.h> -#include <linux/rbtree_augmented.h> -#include <linux/sched.h> -#include <linux/gfp.h> - -#include <asm/pgtable.h> -#include <asm/pat.h> - -#include "pat_internal.h" - -/* - * The memtype tree keeps track of memory type for specific - * physical memory areas. Without proper tracking, conflicting memory - * types in different mappings can cause CPU cache corruption. - * - * The tree is an interval tree (augmented rbtree) with tree ordered - * on starting address. Tree can contain multiple entries for - * different regions which overlap. All the aliases have the same - * cache attributes of course. - * - * memtype_lock protects the rbtree. - */ - -static struct rb_root memtype_rbroot = RB_ROOT; - -static int is_node_overlap(struct memtype *node, u64 start, u64 end) -{ - if (node->start >= end || node->end <= start) - return 0; - - return 1; -} - -static u64 get_subtree_max_end(struct rb_node *node) -{ - u64 ret = 0; - if (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - ret = data->subtree_max_end; - } - return ret; -} - -#define NODE_END(node) ((node)->end) - -RB_DECLARE_CALLBACKS_MAX(static, memtype_rb_augment_cb, - struct memtype, rb, u64, subtree_max_end, NODE_END) - -/* Find the first (lowest start addr) overlapping range from rb tree */ -static struct memtype *memtype_rb_lowest_match(struct rb_root *root, - u64 start, u64 end) -{ - struct rb_node *node = root->rb_node; - struct memtype *last_lower = NULL; - - while (node) { - struct memtype *data = rb_entry(node, struct memtype, rb); - - if (get_subtree_max_end(node->rb_left) > start) { - /* Lowest overlap if any must be on left side */ - node = node->rb_left; - } else if (is_node_overlap(data, start, end)) { - last_lower = data; - break; - } else if (start >= data->start) { - /* Lowest overlap if any must be on right side */ - node = node->rb_right; - } else { - break; - } - } - return last_lower; /* Returns NULL if there is no overlap */ -} - -enum { - MEMTYPE_EXACT_MATCH = 0, - MEMTYPE_END_MATCH = 1 -}; - -static struct memtype *memtype_rb_match(struct rb_root *root, - u64 start, u64 end, int match_type) -{ - struct memtype *match; - - match = memtype_rb_lowest_match(root, start, end); - while (match != NULL && match->start < end) { - struct rb_node *node; - - if ((match_type == MEMTYPE_EXACT_MATCH) && - (match->start == start) && (match->end == end)) - return match; - - if ((match_type == MEMTYPE_END_MATCH) && - (match->start < start) && (match->end == end)) - return match; - - node = rb_next(&match->rb); - if (node) - match = rb_entry(node, struct memtype, rb); - else - match = NULL; - } - - return NULL; /* Returns NULL if there is no match */ -} - -static int memtype_rb_check_conflict(struct rb_root *root, - u64 start, u64 end, - enum page_cache_mode reqtype, - enum page_cache_mode *newtype) -{ - struct rb_node *node; - struct memtype *match; - enum page_cache_mode found_type = reqtype; - - match = memtype_rb_lowest_match(&memtype_rbroot, start, end); - if (match == NULL) - goto success; - - if (match->type != found_type && newtype == NULL) - goto failure; - - dprintk("Overlap at 0x%Lx-0x%Lx\n", match->start, match->end); - found_type = match->type; - - node = rb_next(&match->rb); - while (node) { - match = rb_entry(node, struct memtype, rb); - - if (match->start >= end) /* Checked all possible matches */ - goto success; - - if (is_node_overlap(match, start, end) && - match->type != found_type) { - goto failure; - } - - node = rb_next(&match->rb); - } -success: - if (newtype) - *newtype = found_type; - - return 0; - -failure: - pr_info("x86/PAT: %s:%d conflicting memory types %Lx-%Lx %s<->%s\n", - current->comm, current->pid, start, end, - cattr_name(found_type), cattr_name(match->type)); - return -EBUSY; -} - -static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) -{ - struct rb_node **node = &(root->rb_node); - struct rb_node *parent = NULL; - - while (*node) { - struct memtype *data = rb_entry(*node, struct memtype, rb); - - parent = *node; - if (data->subtree_max_end < newdata->end) - data->subtree_max_end = newdata->end; - if (newdata->start <= data->start) - node = &((*node)->rb_left); - else if (newdata->start > data->start) - node = &((*node)->rb_right); - } - - newdata->subtree_max_end = newdata->end; - rb_link_node(&newdata->rb, parent, node); - rb_insert_augmented(&newdata->rb, root, &memtype_rb_augment_cb); -} - -int rbt_memtype_check_insert(struct memtype *new, - enum page_cache_mode *ret_type) -{ - int err = 0; - - err = memtype_rb_check_conflict(&memtype_rbroot, new->start, new->end, - new->type, ret_type); - - if (!err) { - if (ret_type) - new->type = *ret_type; - - new->subtree_max_end = new->end; - memtype_rb_insert(&memtype_rbroot, new); - } - return err; -} - -struct memtype *rbt_memtype_erase(u64 start, u64 end) -{ - struct memtype *data; - - /* - * Since the memtype_rbroot tree allows overlapping ranges, - * rbt_memtype_erase() checks with EXACT_MATCH first, i.e. free - * a whole node for the munmap case. If no such entry is found, - * it then checks with END_MATCH, i.e. shrink the size of a node - * from the end for the mremap case. - */ - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_EXACT_MATCH); - if (!data) { - data = memtype_rb_match(&memtype_rbroot, start, end, - MEMTYPE_END_MATCH); - if (!data) - return ERR_PTR(-EINVAL); - } - - if (data->start == start) { - /* munmap: erase this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); - } else { - /* mremap: update the end value of this node */ - rb_erase_augmented(&data->rb, &memtype_rbroot, - &memtype_rb_augment_cb); - data->end = start; - data->subtree_max_end = data->end; - memtype_rb_insert(&memtype_rbroot, data); - return NULL; - } - - return data; -} - -struct memtype *rbt_memtype_lookup(u64 addr) -{ - return memtype_rb_lowest_match(&memtype_rbroot, addr, addr + PAGE_SIZE); -} - -#if defined(CONFIG_DEBUG_FS) -int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) -{ - struct rb_node *node; - int i = 1; - - node = rb_first(&memtype_rbroot); - while (node && pos != i) { - node = rb_next(node); - i++; - } - - if (node) { /* pos == i */ - struct memtype *this = rb_entry(node, struct memtype, rb); - *out = *this; - return 0; - } else { - return 1; - } -} -#endif diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3e4b9035bb9a..7bd2c3a52297 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -643,8 +643,8 @@ void __native_set_fixmap(enum fixed_addresses idx, pte_t pte) fixmaps_set++; } -void native_set_fixmap(enum fixed_addresses idx, phys_addr_t phys, - pgprot_t flags) +void native_set_fixmap(unsigned /* enum fixed_addresses */ idx, + phys_addr_t phys, pgprot_t flags) { /* Sanitize 'prot' against any unsupported bits: */ pgprot_val(flags) &= __default_kernel_pte_mask; diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c index 7f2140414440..44a9f068eee0 100644 --- a/arch/x86/mm/pti.c +++ b/arch/x86/mm/pti.c @@ -574,7 +574,7 @@ static void pti_clone_kernel_text(void) */ unsigned long start = PFN_ALIGN(_text); unsigned long end_clone = (unsigned long)__end_rodata_aligned; - unsigned long end_global = PFN_ALIGN((unsigned long)__stop___ex_table); + unsigned long end_global = PFN_ALIGN((unsigned long)_etext); if (!pti_kernel_image_global_ok()) return; diff --git a/arch/x86/mm/testmmiotrace.c b/arch/x86/mm/testmmiotrace.c index a8bd952e136d..92153d054d6c 100644 --- a/arch/x86/mm/testmmiotrace.c +++ b/arch/x86/mm/testmmiotrace.c @@ -127,9 +127,9 @@ static int __init init(void) return -ENXIO; } - pr_warning("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " - "and writing 16 kB of rubbish in there.\n", - size >> 10, mmio_address); + pr_warn("WARNING: mapping %lu kB @ 0x%08lx in PCI address space, " + "and writing 16 kB of rubbish in there.\n", + size >> 10, mmio_address); do_test(size); do_test_bulk_ioremapping(); pr_info("All done.\n"); |