From f6e5aedf470bd4522732595a85688052e3cbc03f Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 25 Feb 2021 14:54:17 +0800 Subject: riscv: Add support for memtest The riscv [rv32_]defconfig enabled CONFIG_MEMTEST, but memtest feature is not supported in RISCV. Add early_memtest() to support for memtest. Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 067583ab1bd7..7f5036fbee8c 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -128,8 +128,9 @@ void __init setup_bootmem(void) if (max_mapped_addr == (dram_end - 1)) memblock_set_current_limit(max_mapped_addr - 4096); - max_pfn = PFN_DOWN(dram_end); - max_low_pfn = max_pfn; + min_low_pfn = PFN_UP(memblock_start_of_DRAM()); + max_low_pfn = max_pfn = PFN_DOWN(dram_end); + dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn)); set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET); @@ -593,6 +594,7 @@ void __init paging_init(void) void __init misc_mem_init(void) { + early_memtest(min_low_pfn << PAGE_SHIFT, max_low_pfn << PAGE_SHIFT); arch_numa_init(); sparse_init(); zone_sizes_init(); -- cgit v1.2.3 From 9530141455c968938a913d602a236c2a7b0322e1 Mon Sep 17 00:00:00 2001 From: Kefeng Wang Date: Thu, 25 Feb 2021 15:03:03 +0800 Subject: riscv: Add ARCH_HAS_FORTIFY_SOURCE FORTIFY_SOURCE could detect various overflows at compile and run time. ARCH_HAS_FORTIFY_SOURCE means that the architecture can be built and run with CONFIG_FORTIFY_SOURCE. Select it in RISCV. See more about this feature from commit 6974f0c4555e ("include/linux/string.h: add the option of fortified string.h functions"). Signed-off-by: Kefeng Wang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/string.h | 5 +++++ 2 files changed, 6 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 85d626b8ce5e..6978b0739718 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -20,6 +20,7 @@ config RISCV select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DEBUG_WX + select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV diff --git a/arch/riscv/include/asm/string.h b/arch/riscv/include/asm/string.h index 5477e7ecb6e1..909049366555 100644 --- a/arch/riscv/include/asm/string.h +++ b/arch/riscv/include/asm/string.h @@ -23,5 +23,10 @@ extern asmlinkage void *__memmove(void *, const void *, size_t); #define memcpy(dst, src, len) __memcpy(dst, src, len) #define memset(s, c, n) __memset(s, c, n) #define memmove(dst, src, len) __memmove(dst, src, len) + +#ifndef __NO_FORTIFY +#define __NO_FORTIFY /* FORTIFY_SOURCE uses __builtin_memcpy, etc. */ +#endif + #endif #endif /* _ASM_RISCV_STRING_H */ -- cgit v1.2.3 From 2f100585d04506004b8027ec9bbaee26940a769f Mon Sep 17 00:00:00 2001 From: Guo Ren Date: Sun, 7 Mar 2021 10:24:46 +0800 Subject: riscv: Enable generic clockevent broadcast When percpu-timers are stopped by deep power saving mode, we need system timer help to broadcast IPI_TIMER. This is first introduced by broken x86 hardware, where the local apic timer stops in C3 state. But many other architectures(powerpc, mips, arm, hexagon, openrisc, sh) have supported the infrastructure to deal with Power Management issues. Signed-off-by: Guo Ren Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 ++ arch/riscv/kernel/smp.c | 16 ++++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 6978b0739718..8ea60a0a19ae 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT if MMU @@ -40,6 +41,7 @@ config RISCV select EDAC_SUPPORT select GENERIC_ARCH_TOPOLOGY if SMP select GENERIC_ATOMIC64 if !64BIT + select GENERIC_CLOCKEVENTS_BROADCAST if SMP select GENERIC_EARLY_IOREMAP select GENERIC_GETTIMEOFDAY if HAVE_GENERIC_VDSO select GENERIC_IOREMAP diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index ea028d9e0d24..8325d33411d8 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include @@ -27,6 +28,7 @@ enum ipi_message_type { IPI_CALL_FUNC, IPI_CPU_STOP, IPI_IRQ_WORK, + IPI_TIMER, IPI_MAX }; @@ -176,6 +178,12 @@ void handle_IPI(struct pt_regs *regs) irq_work_run(); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST + if (ops & (1 << IPI_TIMER)) { + stats[IPI_TIMER]++; + tick_receive_broadcast(); + } +#endif BUG_ON((ops >> IPI_MAX) != 0); /* Order data access and bit testing. */ @@ -192,6 +200,7 @@ static const char * const ipi_names[] = { [IPI_CALL_FUNC] = "Function call interrupts", [IPI_CPU_STOP] = "CPU stop interrupts", [IPI_IRQ_WORK] = "IRQ work interrupts", + [IPI_TIMER] = "Timer broadcast interrupts", }; void show_ipi_stats(struct seq_file *p, int prec) @@ -217,6 +226,13 @@ void arch_send_call_function_single_ipi(int cpu) send_ipi_single(cpu, IPI_CALL_FUNC); } +#ifdef CONFIG_GENERIC_CLOCKEVENTS_BROADCAST +void tick_broadcast(const struct cpumask *mask) +{ + send_ipi_mask(mask, IPI_TIMER); +} +#endif + void smp_send_stop(void) { unsigned long timeout; -- cgit v1.2.3 From f35bb4b8d10a8ce356f0fff634fa885926f8d439 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Mon, 15 Mar 2021 16:34:59 +0530 Subject: RISC-V: Don't print SBI version for all detected extensions The sbi_init() already prints SBI version before detecting various SBI extensions so we don't need to print SBI version for all detected SBI extensions. Signed-off-by: Anup Patel Reviewed-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index f4a7db3d309e..c0dcebdd30ec 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -577,19 +577,19 @@ void __init sbi_init(void) sbi_get_firmware_id(), sbi_get_firmware_version()); if (sbi_probe_extension(SBI_EXT_TIME) > 0) { __sbi_set_timer = __sbi_set_timer_v02; - pr_info("SBI v0.2 TIME extension detected\n"); + pr_info("SBI TIME extension detected\n"); } else { __sbi_set_timer = __sbi_set_timer_v01; } if (sbi_probe_extension(SBI_EXT_IPI) > 0) { __sbi_send_ipi = __sbi_send_ipi_v02; - pr_info("SBI v0.2 IPI extension detected\n"); + pr_info("SBI IPI extension detected\n"); } else { __sbi_send_ipi = __sbi_send_ipi_v01; } if (sbi_probe_extension(SBI_EXT_RFENCE) > 0) { __sbi_rfence = __sbi_rfence_v02; - pr_info("SBI v0.2 RFENCE extension detected\n"); + pr_info("SBI RFENCE extension detected\n"); } else { __sbi_rfence = __sbi_rfence_v01; } -- cgit v1.2.3 From 2da073c19641bb6820c3591d3d865120263f14e8 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sat, 13 Mar 2021 03:45:05 -0500 Subject: riscv: Cleanup KASAN_VMALLOC support When KASAN vmalloc region is populated, there is no userspace process and the page table in use is swapper_pg_dir, so there is no need to read SATP. Then we can use the same scheme used by kasan_populate_p*d functions to go through the page table, which harmonizes the code. In addition, make use of set_pgd that goes through all unused page table levels, contrary to p*d_populate functions, which makes this function work whatever the number of page table levels. Signed-off-by: Alexandre Ghiti Reviewed-by: Palmer Dabbelt Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/kasan_init.c | 59 ++++++++++++++-------------------------------- 1 file changed, 18 insertions(+), 41 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 3fc18f469efb..2c39f0386673 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -11,18 +11,6 @@ #include #include -static __init void *early_alloc(size_t size, int node) -{ - void *ptr = memblock_alloc_try_nid(size, size, - __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node); - - if (!ptr) - panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n", - __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS)); - - return ptr; -} - extern pgd_t early_pg_dir[PTRS_PER_PGD]; asmlinkage void __init kasan_early_init(void) { @@ -155,38 +143,27 @@ static void __init kasan_populate(void *start, void *end) memset(start, KASAN_SHADOW_INIT, end - start); } -void __init kasan_shallow_populate(void *start, void *end) +static void __init kasan_shallow_populate_pgd(unsigned long vaddr, unsigned long end) { - unsigned long vaddr = (unsigned long)start & PAGE_MASK; - unsigned long vend = PAGE_ALIGN((unsigned long)end); - unsigned long pfn; - int index; + unsigned long next; void *p; - pud_t *pud_dir, *pud_k; - pgd_t *pgd_dir, *pgd_k; - p4d_t *p4d_dir, *p4d_k; - - while (vaddr < vend) { - index = pgd_index(vaddr); - pfn = csr_read(CSR_SATP) & SATP_PPN; - pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index; - pgd_k = init_mm.pgd + index; - pgd_dir = pgd_offset_k(vaddr); - set_pgd(pgd_dir, *pgd_k); - - p4d_dir = p4d_offset(pgd_dir, vaddr); - p4d_k = p4d_offset(pgd_k, vaddr); - - vaddr = (vaddr + PUD_SIZE) & PUD_MASK; - pud_dir = pud_offset(p4d_dir, vaddr); - pud_k = pud_offset(p4d_k, vaddr); - - if (pud_present(*pud_dir)) { - p = early_alloc(PAGE_SIZE, NUMA_NO_NODE); - pud_populate(&init_mm, pud_dir, p); + pgd_t *pgd_k = pgd_offset_k(vaddr); + + do { + next = pgd_addr_end(vaddr, end); + if (pgd_page_vaddr(*pgd_k) == (unsigned long)lm_alias(kasan_early_shadow_pmd)) { + p = memblock_alloc(PAGE_SIZE, PAGE_SIZE); + set_pgd(pgd_k, pfn_pgd(PFN_DOWN(__pa(p)), PAGE_TABLE)); } - vaddr += PAGE_SIZE; - } + } while (pgd_k++, vaddr = next, vaddr != end); +} + +static void __init kasan_shallow_populate(void *start, void *end) +{ + unsigned long vaddr = (unsigned long)start & PAGE_MASK; + unsigned long vend = PAGE_ALIGN((unsigned long)end); + + kasan_shallow_populate_pgd(vaddr, vend); } void __init kasan_init(void) -- cgit v1.2.3 From 183787c6fcc2c793ec96e946a4fdd8cd0e6d7aa0 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 22 Mar 2021 22:26:02 +0800 Subject: riscv: Add 3 SBI wrapper functions to get cpu manufacturer information Add 3 wrapper functions to get vendor id, architecture id and implement id from M-mode Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/sbi.h | 3 +++ arch/riscv/kernel/sbi.c | 15 +++++++++++++++ 2 files changed, 18 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/sbi.h b/arch/riscv/include/asm/sbi.h index 99895d9c3bdd..dd2329962ceb 100644 --- a/arch/riscv/include/asm/sbi.h +++ b/arch/riscv/include/asm/sbi.h @@ -97,6 +97,9 @@ struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, void sbi_console_putchar(int ch); int sbi_console_getchar(void); +long sbi_get_mvendorid(void); +long sbi_get_marchid(void); +long sbi_get_mimpid(void); void sbi_set_timer(uint64_t stime_value); void sbi_shutdown(void); void sbi_clear_ipi(void); diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index c0dcebdd30ec..fa0dd881fc5e 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -547,6 +547,21 @@ static inline long sbi_get_firmware_version(void) return __sbi_base_ecall(SBI_EXT_BASE_GET_IMP_VERSION); } +long sbi_get_mvendorid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MVENDORID); +} + +long sbi_get_marchid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MARCHID); +} + +long sbi_get_mimpid(void) +{ + return __sbi_base_ecall(SBI_EXT_BASE_GET_MIMPID); +} + static void sbi_send_cpumask_ipi(const struct cpumask *target) { struct cpumask hartid_mask; -- cgit v1.2.3 From 6f4eea90465ad0cd5f3d041b9b2c728426f2b8d4 Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 22 Mar 2021 22:26:03 +0800 Subject: riscv: Introduce alternative mechanism to apply errata solution Introduce the "alternative" mechanism from ARM64 and x86 to apply the CPU vendors' errata solution at runtime. The main purpose of this patch is to provide a framework. Therefore, the implementation is quite basic for now so that some scenarios could not use this schemei, such as patching code to a module, relocating the patching code and heterogeneous CPU topology. Users could use the macro ALTERNATIVE to apply an errata to the existing code flow. In the macro ALTERNATIVE, users need to specify the manufacturer information(vendorid, archid, and impid) for this errata. Therefore, kernel will know this errata is suitable for which CPU core. During the booting procedure, kernel will select the errata required by the CPU core and then patch it. It means that the kernel only applies the errata to the specified CPU core. In this case, the vendor's errata does not affect each other at runtime. The above patching procedure only occurs during the booting phase, so we only take the overhead of the "alternative" mechanism once. This "alternative" mechanism is enabled by default to ensure that all required errata will be applied. However, users can disable this feature by the Kconfig "CONFIG_RISCV_ERRATA_ALTERNATIVE". Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + arch/riscv/Kconfig.erratas | 12 +++ arch/riscv/Makefile | 1 + arch/riscv/errata/Makefile | 1 + arch/riscv/errata/alternative.c | 69 ++++++++++++++ arch/riscv/include/asm/alternative-macros.h | 142 ++++++++++++++++++++++++++++ arch/riscv/include/asm/alternative.h | 36 +++++++ arch/riscv/include/asm/asm.h | 1 + arch/riscv/include/asm/csr.h | 3 + arch/riscv/include/asm/errata_list.h | 12 +++ arch/riscv/include/asm/sections.h | 1 + arch/riscv/include/asm/vendorid_list.h | 10 ++ arch/riscv/kernel/smpboot.c | 4 + arch/riscv/kernel/vmlinux.lds.S | 7 ++ 14 files changed, 300 insertions(+) create mode 100644 arch/riscv/Kconfig.erratas create mode 100644 arch/riscv/errata/Makefile create mode 100644 arch/riscv/errata/alternative.c create mode 100644 arch/riscv/include/asm/alternative-macros.h create mode 100644 arch/riscv/include/asm/alternative.h create mode 100644 arch/riscv/include/asm/errata_list.h create mode 100644 arch/riscv/include/asm/vendorid_list.h (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 8ea60a0a19ae..82c469b340c2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -207,6 +207,7 @@ config LOCKDEP_SUPPORT def_bool y source "arch/riscv/Kconfig.socs" +source "arch/riscv/Kconfig.erratas" menu "Platform type" diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas new file mode 100644 index 000000000000..4d0bafc536df --- /dev/null +++ b/arch/riscv/Kconfig.erratas @@ -0,0 +1,12 @@ +menu "CPU errata selection" + +config RISCV_ERRATA_ALTERNATIVE + bool "RISC-V alternative scheme" + default y + help + This Kconfig allows the kernel to automatically patch the + errata required by the execution platform at run time. The + code patching is performed once in the boot stages. It means + that the overhead from this mechanism is just taken once. + +endmenu diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1368d943f1f3..1f5c03082976 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -87,6 +87,7 @@ KBUILD_IMAGE := $(boot)/Image.gz head-y := arch/riscv/kernel/head.o core-y += arch/riscv/ +core-$(CONFIG_RISCV_ERRATA_ALTERNATIVE) += arch/riscv/errata/ libs-y += arch/riscv/lib/ libs-$(CONFIG_EFI_STUB) += $(objtree)/drivers/firmware/efi/libstub/lib.a diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile new file mode 100644 index 000000000000..43e6d5424367 --- /dev/null +++ b/arch/riscv/errata/Makefile @@ -0,0 +1 @@ +obj-y += alternative.o diff --git a/arch/riscv/errata/alternative.c b/arch/riscv/errata/alternative.c new file mode 100644 index 000000000000..8efa60ad69b7 --- /dev/null +++ b/arch/riscv/errata/alternative.c @@ -0,0 +1,69 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * alternative runtime patching + * inspired by the ARM64 and x86 version + * + * Copyright (C) 2021 Sifive. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +static struct cpu_manufacturer_info_t { + unsigned long vendor_id; + unsigned long arch_id; + unsigned long imp_id; +} cpu_mfr_info; + +static void (*vendor_patch_func)(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid); + +static inline void __init riscv_fill_cpu_mfr_info(void) +{ +#ifdef CONFIG_RISCV_M_MODE + cpu_mfr_info.vendor_id = csr_read(CSR_MVENDORID); + cpu_mfr_info.arch_id = csr_read(CSR_MARCHID); + cpu_mfr_info.imp_id = csr_read(CSR_MIMPID); +#else + cpu_mfr_info.vendor_id = sbi_get_mvendorid(); + cpu_mfr_info.arch_id = sbi_get_marchid(); + cpu_mfr_info.imp_id = sbi_get_mimpid(); +#endif +} + +static void __init init_alternative(void) +{ + riscv_fill_cpu_mfr_info(); + + switch (cpu_mfr_info.vendor_id) { + default: + vendor_patch_func = NULL; + } +} + +/* + * This is called very early in the boot process (directly after we run + * a feature detect on the boot CPU). No need to worry about other CPUs + * here. + */ +void __init apply_boot_alternatives(void) +{ + /* If called on non-boot cpu things could go wrong */ + WARN_ON(smp_processor_id() != 0); + + init_alternative(); + + if (!vendor_patch_func) + return; + + vendor_patch_func((struct alt_entry *)__alt_start, + (struct alt_entry *)__alt_end, + cpu_mfr_info.arch_id, cpu_mfr_info.imp_id); +} + diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h new file mode 100644 index 000000000000..88c08705f64a --- /dev/null +++ b/arch/riscv/include/asm/alternative-macros.h @@ -0,0 +1,142 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __ASM_ALTERNATIVE_MACROS_H +#define __ASM_ALTERNATIVE_MACROS_H + +#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE + +#ifdef __ASSEMBLY__ + +.macro ALT_ENTRY oldptr newptr vendor_id errata_id new_len + RISCV_PTR \oldptr + RISCV_PTR \newptr + REG_ASM \vendor_id + REG_ASM \new_len + .word \errata_id +.endm + +.macro ALT_NEW_CONTENT vendor_id, errata_id, enable = 1, new_c : vararg + .if \enable + .pushsection .alternative, "a" + ALT_ENTRY 886b, 888f, \vendor_id, \errata_id, 889f - 888f + .popsection + .subsection 1 +888 : + \new_c +889 : + .previous + .org . - (889b - 888b) + (887b - 886b) + .org . - (887b - 886b) + (889b - 888b) + .endif +.endm + +.macro __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, enable +886 : + \old_c +887 : + ALT_NEW_CONTENT \vendor_id, \errata_id, \enable, \new_c +.endm + +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k) + +#else /* !__ASSEMBLY__ */ + +#include +#include + +#define ALT_ENTRY(oldptr, newptr, vendor_id, errata_id, newlen) \ + RISCV_PTR " " oldptr "\n" \ + RISCV_PTR " " newptr "\n" \ + REG_ASM " " vendor_id "\n" \ + REG_ASM " " newlen "\n" \ + ".word " errata_id "\n" + +#define ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) \ + ".if " __stringify(enable) " == 1\n" \ + ".pushsection .alternative, \"a\"\n" \ + ALT_ENTRY("886b", "888f", __stringify(vendor_id), __stringify(errata_id), "889f - 888f") \ + ".popsection\n" \ + ".subsection 1\n" \ + "888 :\n" \ + new_c "\n" \ + "889 :\n" \ + ".previous\n" \ + ".org . - (887b - 886b) + (889b - 888b)\n" \ + ".org . - (889b - 888b) + (887b - 886b)\n" \ + ".endif\n" + +#define __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, enable) \ + "886 :\n" \ + old_c "\n" \ + "887 :\n" \ + ALT_NEW_CONSTENT(vendor_id, errata_id, enable, new_c) + +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, IS_ENABLED(CONFIG_k)) + +#endif /* __ASSEMBLY__ */ + +#else /* !CONFIG_RISCV_ERRATA_ALTERNATIVE*/ +#ifdef __ASSEMBLY__ + +.macro __ALTERNATIVE_CFG old_c + \old_c +.endm + +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG old_c + +#else /* !__ASSEMBLY__ */ + +#define __ALTERNATIVE_CFG(old_c) \ + old_c "\n" + +#define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, errata_id, CONFIG_k) \ + __ALTERNATIVE_CFG(old_c) + +#endif /* __ASSEMBLY__ */ +#endif /* CONFIG_RISCV_ERRATA_ALTERNATIVE */ +/* + * Usage: + * ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) + * in the assembly code. Otherwise, + * asm(ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k)); + * + * old_content: The old content which is probably replaced with new content. + * new_content: The new content. + * vendor_id: The CPU vendor ID. + * errata_id: The errata ID. + * CONFIG_k: The Kconfig of this errata. When Kconfig is disabled, the old + * content will alwyas be executed. + */ +#define ALTERNATIVE(old_content, new_content, vendor_id, errata_id, CONFIG_k) \ + _ALTERNATIVE_CFG(old_content, new_content, vendor_id, errata_id, CONFIG_k) + +/* + * A vendor wants to replace an old_content, but another vendor has used + * ALTERNATIVE() to patch its customized content at the same location. In + * this case, this vendor can create a new macro ALTERNATIVE_2() based + * on the following sample code and then replace ALTERNATIVE() with + * ALTERNATIVE_2() to append its customized content. + * + * .macro __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, enable_1, \ + * new_c_2, vendor_id_2, errata_id_2, enable_2 + * 886 : + * \old_c + * 887 : + * ALT_NEW_CONTENT \vendor_id_1, \errata_id_1, \enable_1, \new_c_1 + * ALT_NEW_CONTENT \vendor_id_2, \errata_id_2, \enable_2, \new_c_2 + * .endm + * + * #define _ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + * new_c_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + * __ALTERNATIVE_CFG_2 old_c, new_c_1, vendor_id_1, errata_id_1, IS_ENABLED(CONFIG_k_1), \ + * new_c_2, vendor_id_2, errata_id_2, IS_ENABLED(CONFIG_k_2) \ + * + * #define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) \ + * _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, errata_id_1, CONFIG_k_1, \ + * new_content_2, vendor_id_2, errata_id_2, CONFIG_k_2) + * + */ +#endif diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h new file mode 100644 index 000000000000..430bc4fea133 --- /dev/null +++ b/arch/riscv/include/asm/alternative.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Sifive. + */ + +#ifndef __ASM_ALTERNATIVE_H +#define __ASM_ALTERNATIVE_H + +#define ERRATA_STRING_LENGTH_MAX 32 + +#include + +#ifndef __ASSEMBLY__ + +#include +#include +#include +#include + +void __init apply_boot_alternatives(void); + +struct alt_entry { + void *old_ptr; /* address of original instruciton or data */ + void *alt_ptr; /* address of replacement instruction or data */ + unsigned long vendor_id; /* cpu vendor id */ + unsigned long alt_len; /* The replacement size */ + unsigned int errata_id; /* The errata id */ +} __packed; + +struct errata_checkfunc_id { + unsigned long vendor_id; + bool (*func)(struct alt_entry *alt); +}; + +#endif +#endif diff --git a/arch/riscv/include/asm/asm.h b/arch/riscv/include/asm/asm.h index 9c992a88d858..618d7c5af1a2 100644 --- a/arch/riscv/include/asm/asm.h +++ b/arch/riscv/include/asm/asm.h @@ -23,6 +23,7 @@ #define REG_L __REG_SEL(ld, lw) #define REG_S __REG_SEL(sd, sw) #define REG_SC __REG_SEL(sc.d, sc.w) +#define REG_ASM __REG_SEL(.dword, .word) #define SZREG __REG_SEL(8, 4) #define LGREG __REG_SEL(3, 2) diff --git a/arch/riscv/include/asm/csr.h b/arch/riscv/include/asm/csr.h index caadfc1d7487..87ac65696871 100644 --- a/arch/riscv/include/asm/csr.h +++ b/arch/riscv/include/asm/csr.h @@ -115,6 +115,9 @@ #define CSR_MIP 0x344 #define CSR_PMPCFG0 0x3a0 #define CSR_PMPADDR0 0x3b0 +#define CSR_MVENDORID 0xf11 +#define CSR_MARCHID 0xf12 +#define CSR_MIMPID 0xf13 #define CSR_MHARTID 0xf14 #ifdef CONFIG_RISCV_M_MODE diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h new file mode 100644 index 000000000000..1b56131431c9 --- /dev/null +++ b/arch/riscv/include/asm/errata_list.h @@ -0,0 +1,12 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 Sifive. + */ +#ifndef ASM_ERRATA_LIST_H +#define ASM_ERRATA_LIST_H + +#ifdef CONFIG_ERRATA_SIFIVE +#define ERRATA_SIFIVE_NUMBER 0 +#endif + +#endif diff --git a/arch/riscv/include/asm/sections.h b/arch/riscv/include/asm/sections.h index 1595c5b60cfd..8a303fb1ee3b 100644 --- a/arch/riscv/include/asm/sections.h +++ b/arch/riscv/include/asm/sections.h @@ -11,5 +11,6 @@ extern char _start[]; extern char _start_kernel[]; extern char __init_data_begin[], __init_data_end[]; extern char __init_text_begin[], __init_text_end[]; +extern char __alt_start[], __alt_end[]; #endif /* __ASM_SECTIONS_H */ diff --git a/arch/riscv/include/asm/vendorid_list.h b/arch/riscv/include/asm/vendorid_list.h new file mode 100644 index 000000000000..9d934215b3c8 --- /dev/null +++ b/arch/riscv/include/asm/vendorid_list.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 SiFive + */ +#ifndef ASM_VENDOR_LIST_H +#define ASM_VENDOR_LIST_H + +#define SIFIVE_VENDOR_ID 0x489 + +#endif diff --git a/arch/riscv/kernel/smpboot.c b/arch/riscv/kernel/smpboot.c index 5e276c25646f..9a408e2942ac 100644 --- a/arch/riscv/kernel/smpboot.c +++ b/arch/riscv/kernel/smpboot.c @@ -32,6 +32,7 @@ #include #include #include +#include #include "head.h" @@ -40,6 +41,9 @@ static DECLARE_COMPLETION(cpu_running); void __init smp_prepare_boot_cpu(void) { init_cpu_topology(); +#ifdef CONFIG_RISCV_ERRATA_ALTERNATIVE + apply_boot_alternatives(); +#endif } void __init smp_prepare_cpus(unsigned int max_cpus) diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index de03cb22d0e9..7e61bc1dc36e 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -90,6 +90,13 @@ SECTIONS } __init_data_end = .; + + . = ALIGN(8); + .alternative : { + __alt_start = .; + *(.alternative) + __alt_end = .; + } __init_end = .; /* Start of data section */ -- cgit v1.2.3 From 1a0e5dbd3723e1194cc549def69fe7b557d4c72b Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 22 Mar 2021 22:26:04 +0800 Subject: riscv: sifive: Add SiFive alternative ports Add required ports of the Alternative scheme for SiFive. Signed-off-by: Vincent Chen Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.erratas | 10 ++++++ arch/riscv/Kconfig.socs | 1 + arch/riscv/errata/Makefile | 1 + arch/riscv/errata/alternative.c | 5 +++ arch/riscv/errata/sifive/Makefile | 1 + arch/riscv/errata/sifive/errata.c | 68 ++++++++++++++++++++++++++++++++++++ arch/riscv/include/asm/alternative.h | 3 ++ 7 files changed, 89 insertions(+) create mode 100644 arch/riscv/errata/sifive/Makefile create mode 100644 arch/riscv/errata/sifive/errata.c (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index 4d0bafc536df..302e7467f302 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -9,4 +9,14 @@ config RISCV_ERRATA_ALTERNATIVE code patching is performed once in the boot stages. It means that the overhead from this mechanism is just taken once. +config ERRATA_SIFIVE + bool "SiFive errata" + depends on RISCV_ERRATA_ALTERNATIVE + help + All SiFive errata Kconfig depend on this Kconfig. Disabling + this Kconfig will disable all SiFive errata. Please say "Y" + here if your platform uses SiFive CPU cores. + + Otherwise, please say "N" here to avoid unnecessary overhead. + endmenu diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 7efcece8896c..b9eda857fc87 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -7,6 +7,7 @@ config SOC_SIFIVE select CLK_SIFIVE select CLK_SIFIVE_PRCI select SIFIVE_PLIC + select ERRATA_SIFIVE help This enables support for SiFive SoC platform hardware. diff --git a/arch/riscv/errata/Makefile b/arch/riscv/errata/Makefile index 43e6d5424367..b8f8740a3e44 100644 --- a/arch/riscv/errata/Makefile +++ b/arch/riscv/errata/Makefile @@ -1 +1,2 @@ obj-y += alternative.o +obj-$(CONFIG_ERRATA_SIFIVE) += sifive/ diff --git a/arch/riscv/errata/alternative.c b/arch/riscv/errata/alternative.c index 8efa60ad69b7..3b15885db70b 100644 --- a/arch/riscv/errata/alternative.c +++ b/arch/riscv/errata/alternative.c @@ -42,6 +42,11 @@ static void __init init_alternative(void) riscv_fill_cpu_mfr_info(); switch (cpu_mfr_info.vendor_id) { +#ifdef CONFIG_ERRATA_SIFIVE + case SIFIVE_VENDOR_ID: + vendor_patch_func = sifive_errata_patch_func; + break; +#endif default: vendor_patch_func = NULL; } diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile new file mode 100644 index 000000000000..2d644e19caef --- /dev/null +++ b/arch/riscv/errata/sifive/Makefile @@ -0,0 +1 @@ +obj-y += errata.o diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c new file mode 100644 index 000000000000..826cd391fc55 --- /dev/null +++ b/arch/riscv/errata/sifive/errata.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2021 Sifive. + */ + +#include +#include +#include +#include +#include +#include +#include + +struct errata_info_t { + char name[ERRATA_STRING_LENGTH_MAX]; + bool (*check_func)(unsigned long arch_id, unsigned long impid); +}; + +static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid) +{ + int idx; + u32 cpu_req_errata = 0; + + for (idx = 0; idx < ERRATA_SIFIVE_NUMBER; idx++) + if (errata_list[idx].check_func(archid, impid)) + cpu_req_errata |= (1U << idx); + + return cpu_req_errata; +} + +static void __init warn_miss_errata(u32 miss_errata) +{ + int i; + + pr_warn("----------------------------------------------------------------\n"); + pr_warn("WARNING: Missing the following errata may cause potential issues\n"); + for (i = 0; i < ERRATA_SIFIVE_NUMBER; i++) + if (miss_errata & 0x1 << i) + pr_warn("\tSiFive Errata[%d]:%s\n", i, errata_list[i].name); + pr_warn("Please enable the corresponding Kconfig to apply them\n"); + pr_warn("----------------------------------------------------------------\n"); +} + +void __init sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid) +{ + struct alt_entry *alt; + u32 cpu_req_errata = sifive_errata_probe(archid, impid); + u32 cpu_apply_errata = 0; + u32 tmp; + + for (alt = begin; alt < end; alt++) { + if (alt->vendor_id != SIFIVE_VENDOR_ID) + continue; + if (alt->errata_id >= ERRATA_SIFIVE_NUMBER) { + WARN(1, "This errata id:%d is not in kernel errata list", alt->errata_id); + continue; + } + + tmp = (1U << alt->errata_id); + if (cpu_req_errata & tmp) { + patch_text_nosync(alt->old_ptr, alt->alt_ptr, alt->alt_len); + cpu_apply_errata |= tmp; + } + } + if (cpu_apply_errata != cpu_req_errata) + warn_miss_errata(cpu_req_errata - cpu_apply_errata); +} diff --git a/arch/riscv/include/asm/alternative.h b/arch/riscv/include/asm/alternative.h index 430bc4fea133..e625d3cafbed 100644 --- a/arch/riscv/include/asm/alternative.h +++ b/arch/riscv/include/asm/alternative.h @@ -32,5 +32,8 @@ struct errata_checkfunc_id { bool (*func)(struct alt_entry *alt); }; +void sifive_errata_patch_func(struct alt_entry *begin, struct alt_entry *end, + unsigned long archid, unsigned long impid); + #endif #endif -- cgit v1.2.3 From 800149a77c2cb8746a94457939b1ba1e37d2c14e Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 22 Mar 2021 22:26:05 +0800 Subject: riscv: sifive: Apply errata "cip-453" patch Add sign extension to the $badaddr before addressing the instruction page fault and instruction access fault to workaround the issue "cip-453". To avoid affecting the existing code sequence, this patch will creates two trampolines to add sign extension to the $badaddr. By the "alternative" mechanism, these two trampolines will replace the original exception handler of instruction page fault and instruction access fault in the excp_vect_table. In this case, only the specific SiFive CPU core jumps to the do_page_fault and do_trap_insn_fault through these two trampolines. Other CPUs are not affected. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.erratas | 11 +++++++++ arch/riscv/errata/sifive/Makefile | 1 + arch/riscv/errata/sifive/errata.c | 20 ++++++++++++++++ arch/riscv/errata/sifive/errata_cip_453.S | 38 +++++++++++++++++++++++++++++++ arch/riscv/include/asm/errata_list.h | 21 ++++++++++++++++- arch/riscv/kernel/entry.S | 6 +++-- 6 files changed, 94 insertions(+), 3 deletions(-) create mode 100644 arch/riscv/errata/sifive/errata_cip_453.S (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index 302e7467f302..b4146dca50fc 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -19,4 +19,15 @@ config ERRATA_SIFIVE Otherwise, please say "N" here to avoid unnecessary overhead. +config ERRATA_SIFIVE_CIP_453 + bool "Apply SiFive errata CIP-453" + depends on ERRATA_SIFIVE + default y + help + This will apply the SiFive CIP-453 errata to add sign extension + to the $badaddr when exception type is instruction page fault + and instruction access fault. + + If you don't know what to do here, say "Y". + endmenu diff --git a/arch/riscv/errata/sifive/Makefile b/arch/riscv/errata/sifive/Makefile index 2d644e19caef..bdd5fc843b8e 100644 --- a/arch/riscv/errata/sifive/Makefile +++ b/arch/riscv/errata/sifive/Makefile @@ -1 +1,2 @@ +obj-y += errata_cip_453.o obj-y += errata.o diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index 826cd391fc55..e27391823f0f 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -16,6 +16,26 @@ struct errata_info_t { bool (*check_func)(unsigned long arch_id, unsigned long impid); }; +static bool errata_cip_453_check_func(unsigned long arch_id, unsigned long impid) +{ + /* + * Affected cores: + * Architecture ID: 0x8000000000000007 + * Implement ID: 0x20181004 <= impid <= 0x20191105 + */ + if (arch_id != 0x8000000000000007 || + (impid < 0x20181004 || impid > 0x20191105)) + return false; + return true; +} + +static struct errata_info_t errata_list[ERRATA_SIFIVE_NUMBER] = { + { + .name = "cip-453", + .check_func = errata_cip_453_check_func + }, +}; + static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid) { int idx; diff --git a/arch/riscv/errata/sifive/errata_cip_453.S b/arch/riscv/errata/sifive/errata_cip_453.S new file mode 100644 index 000000000000..f1b9623fe1de --- /dev/null +++ b/arch/riscv/errata/sifive/errata_cip_453.S @@ -0,0 +1,38 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2021 SiFive + */ + +#include +#include +#include +#include + +.macro ADD_SIGN_EXT pt_reg badaddr tmp_reg + REG_L \badaddr, PT_BADADDR(\pt_reg) + li \tmp_reg,1 + slli \tmp_reg,\tmp_reg,0x26 + and \tmp_reg,\tmp_reg,\badaddr + beqz \tmp_reg, 1f + li \tmp_reg,-1 + slli \tmp_reg,\tmp_reg,0x27 + or \badaddr,\tmp_reg,\badaddr + REG_S \badaddr, PT_BADADDR(\pt_reg) +1: +.endm + +ENTRY(sifive_cip_453_page_fault_trp) + ADD_SIGN_EXT a0, t0, t1 +#ifdef CONFIG_MMU + la t0, do_page_fault +#else + la t0, do_trap_unknown +#endif + jr t0 +END(sifive_cip_453_page_fault_trp) + +ENTRY(sifive_cip_453_insn_fault_trp) + ADD_SIGN_EXT a0, t0, t1 + la t0, do_trap_insn_fault + jr t0 +END(sifive_cip_453_insn_fault_trp) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 1b56131431c9..6148d34d4245 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -5,8 +5,27 @@ #ifndef ASM_ERRATA_LIST_H #define ASM_ERRATA_LIST_H +#include +#include + #ifdef CONFIG_ERRATA_SIFIVE -#define ERRATA_SIFIVE_NUMBER 0 +#define ERRATA_SIFIVE_CIP_453 0 +#define ERRATA_SIFIVE_NUMBER 1 #endif +#ifdef __ASSEMBLY__ + +#define ALT_INSN_FAULT(x) \ +ALTERNATIVE(__stringify(RISCV_PTR do_trap_insn_fault), \ + __stringify(RISCV_PTR sifive_cip_453_insn_fault_trp), \ + SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \ + CONFIG_ERRATA_SIFIVE_CIP_453) + +#define ALT_PAGE_FAULT(x) \ +ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \ + __stringify(RISCV_PTR sifive_cip_453_page_fault_trp), \ + SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \ + CONFIG_ERRATA_SIFIVE_CIP_453) +#endif /* __ASSEMBLY__ */ + #endif diff --git a/arch/riscv/kernel/entry.S b/arch/riscv/kernel/entry.S index 744f3209c48d..60d0a2f1cd88 100644 --- a/arch/riscv/kernel/entry.S +++ b/arch/riscv/kernel/entry.S @@ -12,6 +12,7 @@ #include #include #include +#include #if !IS_ENABLED(CONFIG_PREEMPTION) .set resume_kernel, restore_all @@ -450,7 +451,7 @@ ENDPROC(__switch_to) /* Exception vector table */ ENTRY(excp_vect_table) RISCV_PTR do_trap_insn_misaligned - RISCV_PTR do_trap_insn_fault + ALT_INSN_FAULT(RISCV_PTR do_trap_insn_fault) RISCV_PTR do_trap_insn_illegal RISCV_PTR do_trap_break RISCV_PTR do_trap_load_misaligned @@ -461,7 +462,8 @@ ENTRY(excp_vect_table) RISCV_PTR do_trap_ecall_s RISCV_PTR do_trap_unknown RISCV_PTR do_trap_ecall_m - RISCV_PTR do_page_fault /* instruction page fault */ + /* instruciton page fault */ + ALT_PAGE_FAULT(RISCV_PTR do_page_fault) RISCV_PTR do_page_fault /* load page fault */ RISCV_PTR do_trap_unknown RISCV_PTR do_page_fault /* store page fault */ -- cgit v1.2.3 From bff3ff525460b492dca1d1665e821d2b5816ebdb Mon Sep 17 00:00:00 2001 From: Vincent Chen Date: Mon, 22 Mar 2021 22:26:06 +0800 Subject: riscv: sifive: Apply errata "cip-1200" patch For certain SiFive CPUs, "sfence.vma addr" cannot exactly flush addr from TLB in the particular cases. The details could be found here: https://sifive.cdn.prismic.io/sifive/167a1a56-03f4-4615-a79e-b2a86153148f_FU740_errata_20210205.pdf In order to ensure the functionality, this patch uses the Alternative scheme to replace all "sfence.vma addr" with "sfence.vma" at runtime. Signed-off-by: Vincent Chen Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.erratas | 11 +++++++++++ arch/riscv/errata/sifive/errata.c | 18 ++++++++++++++++++ arch/riscv/include/asm/errata_list.h | 10 +++++++++- arch/riscv/include/asm/tlbflush.h | 3 ++- 4 files changed, 40 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig.erratas b/arch/riscv/Kconfig.erratas index b4146dca50fc..d5d03ae8d685 100644 --- a/arch/riscv/Kconfig.erratas +++ b/arch/riscv/Kconfig.erratas @@ -30,4 +30,15 @@ config ERRATA_SIFIVE_CIP_453 If you don't know what to do here, say "Y". +config ERRATA_SIFIVE_CIP_1200 + bool "Apply SiFive errata CIP-1200" + depends on ERRATA_SIFIVE + default y + help + This will apply the SiFive CIP-1200 errata to repalce all + "sfence.vma addr" with "sfence.vma" to ensure that the addr + has been flushed from TLB. + + If you don't know what to do here, say "Y". + endmenu diff --git a/arch/riscv/errata/sifive/errata.c b/arch/riscv/errata/sifive/errata.c index e27391823f0f..f5e5ae70e829 100644 --- a/arch/riscv/errata/sifive/errata.c +++ b/arch/riscv/errata/sifive/errata.c @@ -29,11 +29,29 @@ static bool errata_cip_453_check_func(unsigned long arch_id, unsigned long impi return true; } +static bool errata_cip_1200_check_func(unsigned long arch_id, unsigned long impid) +{ + /* + * Affected cores: + * Architecture ID: 0x8000000000000007 or 0x1 + * Implement ID: mimpid[23:0] <= 0x200630 and mimpid != 0x01200626 + */ + if (arch_id != 0x8000000000000007 && arch_id != 0x1) + return false; + if ((impid & 0xffffff) > 0x200630 || impid == 0x1200626) + return false; + return true; +} + static struct errata_info_t errata_list[ERRATA_SIFIVE_NUMBER] = { { .name = "cip-453", .check_func = errata_cip_453_check_func }, + { + .name = "cip-1200", + .check_func = errata_cip_1200_check_func + }, }; static u32 __init sifive_errata_probe(unsigned long archid, unsigned long impid) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 6148d34d4245..5f1046e82d9f 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -10,7 +10,8 @@ #ifdef CONFIG_ERRATA_SIFIVE #define ERRATA_SIFIVE_CIP_453 0 -#define ERRATA_SIFIVE_NUMBER 1 +#define ERRATA_SIFIVE_CIP_1200 1 +#define ERRATA_SIFIVE_NUMBER 2 #endif #ifdef __ASSEMBLY__ @@ -26,6 +27,13 @@ ALTERNATIVE(__stringify(RISCV_PTR do_page_fault), \ __stringify(RISCV_PTR sifive_cip_453_page_fault_trp), \ SIFIVE_VENDOR_ID, ERRATA_SIFIVE_CIP_453, \ CONFIG_ERRATA_SIFIVE_CIP_453) +#else /* !__ASSEMBLY__ */ + +#define ALT_FLUSH_TLB_PAGE(x) \ +asm(ALTERNATIVE("sfence.vma %0", "sfence.vma", SIFIVE_VENDOR_ID, \ + ERRATA_SIFIVE_CIP_1200, CONFIG_ERRATA_SIFIVE_CIP_1200) \ + : : "r" (addr) : "memory") + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/riscv/include/asm/tlbflush.h b/arch/riscv/include/asm/tlbflush.h index 394cfbccdcd9..c84218ad7afc 100644 --- a/arch/riscv/include/asm/tlbflush.h +++ b/arch/riscv/include/asm/tlbflush.h @@ -9,6 +9,7 @@ #include #include +#include #ifdef CONFIG_MMU static inline void local_flush_tlb_all(void) @@ -19,7 +20,7 @@ static inline void local_flush_tlb_all(void) /* Flush one page from local TLB */ static inline void local_flush_tlb_page(unsigned long addr) { - __asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory"); + ALT_FLUSH_TLB_PAGE(__asm__ __volatile__ ("sfence.vma %0" : : "r" (addr) : "memory")); } #else /* CONFIG_MMU */ #define local_flush_tlb_all() do { } while (0) -- cgit v1.2.3 From 7f3d349065d0c643f7f7013fbf9bc9f2c90b675f Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Mar 2021 14:51:56 -0700 Subject: riscv: Use $(LD) instead of $(CC) to link vDSO Currently, the VDSO is being linked through $(CC). This does not match how the rest of the kernel links objects, which is through the $(LD) variable. When linking with clang, there are a couple of warnings about flags that will not be used during the link: clang-12: warning: argument unused during compilation: '-no-pie' [-Wunused-command-line-argument] clang-12: warning: argument unused during compilation: '-pg' [-Wunused-command-line-argument] '-no-pie' was added in commit 85602bea297f ("RISC-V: build vdso-dummy.o with -no-pie") to override '-pie' getting added to the ld command from distribution versions of GCC that enable PIE by default. It is technically no longer needed after commit c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+"), which removed vdso-dummy.o in favor of generating vdso-syms.S from vdso.so with $(NM) but this also resolves the issue in case it ever comes back due to having full control over the $(LD) command. '-pg' is for function tracing, it is not used during linking as clang states. These flags could be removed/filtered to fix the warnings but it is easier to just match the rest of the kernel and use $(LD) directly for linking. See commits fe00e50b2db8 ("ARM: 8858/1: vdso: use $(LD) instead of $(CC) to link VDSO") 691efbedc60d ("arm64: vdso: use $(LD) instead of $(CC) to link VDSO") 2ff906994b6c ("MIPS: VDSO: Use $(LD) instead of $(CC) to link VDSO") 2b2a25845d53 ("s390/vdso: Use $(LD) instead of $(CC) to link vDSO") for more information. The flags are converted to linker flags and '--eh-frame-hdr' is added to match what is added by GCC implicitly, which can be seen by adding '-v' to GCC's invocation. Additionally, since this area is being modified, use the $(OBJCOPY) variable instead of an open coded $(CROSS_COMPILE)objcopy so that the user's choice of objcopy binary is respected. Link: https://github.com/ClangBuiltLinux/linux/issues/803 Link: https://github.com/ClangBuiltLinux/linux/issues/970 Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index 71a315e73cbe..ca2b40dfd24b 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -41,11 +41,10 @@ KASAN_SANITIZE := n $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first -SYSCFLAGS_vdso.so.dbg = $(c_flags) $(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) -SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \ - -Wl,--build-id=sha1 -Wl,--hash-style=both +LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \ + --build-id=sha1 --hash-style=both --eh-frame-hdr # We also create a special relocatable object that should mirror the symbol # table and layout of the linked DSO. With ld --just-symbols we can then @@ -60,13 +59,10 @@ $(obj)/%.so: $(obj)/%.so.dbg FORCE # actual build commands # The DSO images are built using a special linker script -# Add -lgcc so rv32 gets static muldi3 and lshrdi3 definitions. # Make sure only to export the intended __vdso_xxx symbol offsets. quiet_cmd_vdsold = VDSOLD $@ - cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \ - -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \ - $(CROSS_COMPILE)objcopy \ - $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ + cmd_vdsold = $(LD) $(ld_flags) -T $(filter-out FORCE,$^) -o $@.tmp && \ + $(OBJCOPY) $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \ rm $@.tmp # Extracts symbol offsets from the VDSO, converting them into an assembly file -- cgit v1.2.3 From 7ce04771503074a7de7f539cc43f5e1b385cb99b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Mar 2021 15:38:06 -0700 Subject: riscv: Workaround mcount name prior to clang-13 Prior to clang 13.0.0, the RISC-V name for the mcount symbol was "mcount", which differs from the GCC version of "_mcount", which results in the following errors: riscv64-linux-gnu-ld: init/main.o: in function `__traceiter_initcall_level': main.c:(.text+0xe): undefined reference to `mcount' riscv64-linux-gnu-ld: init/main.o: in function `__traceiter_initcall_start': main.c:(.text+0x4e): undefined reference to `mcount' riscv64-linux-gnu-ld: init/main.o: in function `__traceiter_initcall_finish': main.c:(.text+0x92): undefined reference to `mcount' riscv64-linux-gnu-ld: init/main.o: in function `.LBB32_28': main.c:(.text+0x30c): undefined reference to `mcount' riscv64-linux-gnu-ld: init/main.o: in function `free_initmem': main.c:(.text+0x54c): undefined reference to `mcount' This has been corrected in https://reviews.llvm.org/D98881 but the minimum supported clang version is 10.0.1. To avoid build errors and to gain a working function tracer, adjust the name of the mcount symbol for older versions of clang in mount.S and recordmcount.pl. Link: https://github.com/ClangBuiltLinux/linux/issues/1331 Signed-off-by: Nathan Chancellor Reviewed-by: Nick Desaulniers Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/ftrace.h | 14 ++++++++++++-- arch/riscv/kernel/mcount.S | 10 +++++----- scripts/recordmcount.pl | 2 +- 3 files changed, 18 insertions(+), 8 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/ftrace.h b/arch/riscv/include/asm/ftrace.h index 845002cc2e57..04dad3380041 100644 --- a/arch/riscv/include/asm/ftrace.h +++ b/arch/riscv/include/asm/ftrace.h @@ -13,9 +13,19 @@ #endif #define HAVE_FUNCTION_GRAPH_RET_ADDR_PTR +/* + * Clang prior to 13 had "mcount" instead of "_mcount": + * https://reviews.llvm.org/D98881 + */ +#if defined(CONFIG_CC_IS_GCC) || CONFIG_CLANG_VERSION >= 130000 +#define MCOUNT_NAME _mcount +#else +#define MCOUNT_NAME mcount +#endif + #define ARCH_SUPPORTS_FTRACE_OPS 1 #ifndef __ASSEMBLY__ -void _mcount(void); +void MCOUNT_NAME(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; @@ -36,7 +46,7 @@ struct dyn_arch_ftrace { * both auipc and jalr at the same time. */ -#define MCOUNT_ADDR ((unsigned long)_mcount) +#define MCOUNT_ADDR ((unsigned long)MCOUNT_NAME) #define JALR_SIGN_MASK (0x00000800) #define JALR_OFFSET_MASK (0x00000fff) #define AUIPC_OFFSET_MASK (0xfffff000) diff --git a/arch/riscv/kernel/mcount.S b/arch/riscv/kernel/mcount.S index 8a5593ff9ff3..6d462681c9c0 100644 --- a/arch/riscv/kernel/mcount.S +++ b/arch/riscv/kernel/mcount.S @@ -47,8 +47,8 @@ ENTRY(ftrace_stub) #ifdef CONFIG_DYNAMIC_FTRACE - .global _mcount - .set _mcount, ftrace_stub + .global MCOUNT_NAME + .set MCOUNT_NAME, ftrace_stub #endif ret ENDPROC(ftrace_stub) @@ -78,7 +78,7 @@ ENDPROC(return_to_handler) #endif #ifndef CONFIG_DYNAMIC_FTRACE -ENTRY(_mcount) +ENTRY(MCOUNT_NAME) la t4, ftrace_stub #ifdef CONFIG_FUNCTION_GRAPH_TRACER la t0, ftrace_graph_return @@ -124,6 +124,6 @@ do_trace: jalr t5 RESTORE_ABI_STATE ret -ENDPROC(_mcount) +ENDPROC(MCOUNT_NAME) #endif -EXPORT_SYMBOL(_mcount) +EXPORT_SYMBOL(MCOUNT_NAME) diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index a36df04cfa09..7b83a1aaec98 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -392,7 +392,7 @@ if ($arch eq "x86_64") { $mcount_regex = "^\\s*([0-9a-fA-F]+):.*\\s_mcount\$"; } elsif ($arch eq "riscv") { $function_regex = "^([0-9a-fA-F]+)\\s+<([^.0-9][0-9a-zA-Z_\\.]+)>:"; - $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_mcount\$"; + $mcount_regex = "^\\s*([0-9a-fA-F]+):\\sR_RISCV_CALL(_PLT)?\\s_?mcount\$"; $type = ".quad"; $alignment = 2; } elsif ($arch eq "nds32") { -- cgit v1.2.3 From adebc8817b5c975d598ac379bbdf67a7a5186ade Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 25 Mar 2021 15:38:07 -0700 Subject: riscv: Select HAVE_DYNAMIC_FTRACE when -fpatchable-function-entry is available clang prior to 13.0.0 does not support -fpatchable-function-entry for RISC-V. clang: error: unsupported option '-fpatchable-function-entry=8' for target 'riscv64-unknown-linux-gnu' To avoid this error, only select HAVE_DYNAMIC_FTRACE when this option is not available. Fixes: afc76b8b8011 ("riscv: Using PATCHABLE_FUNCTION_ENTRY instead of MCOUNT") Link: https://github.com/ClangBuiltLinux/linux/issues/1268 Reported-by: kernel test robot Signed-off-by: Nathan Chancellor Reviewed-by: Fangrui Song Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 82c469b340c2..a840004d2829 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -231,7 +231,7 @@ config ARCH_RV64I bool "RV64I" select 64BIT select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && GCC_VERSION >= 50000 - select HAVE_DYNAMIC_FTRACE if MMU + select HAVE_DYNAMIC_FTRACE if MMU && $(cc-option,-fpatchable-function-entry=8) select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER -- cgit v1.2.3 From 2bfc6cd81bd17e4306e24ee47b9554c967bcb499 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 11 Apr 2021 12:41:44 -0400 Subject: riscv: Move kernel mapping outside of linear mapping This is a preparatory patch for relocatable kernel and sv48 support. The kernel used to be linked at PAGE_OFFSET address therefore we could use the linear mapping for the kernel mapping. But the relocated kernel base address will be different from PAGE_OFFSET and since in the linear mapping, two different virtual addresses cannot point to the same physical address, the kernel mapping needs to lie outside the linear mapping so that we don't have to copy it at the same physical offset. The kernel mapping is moved to the last 2GB of the address space, BPF is now always after the kernel and modules use the 2GB memory range right before the kernel, so BPF and modules regions do not overlap. KASLR implementation will simply have to move the kernel in the last 2GB range and just take care of leaving enough space for BPF. In addition, by moving the kernel to the end of the address space, both sv39 and sv48 kernels will be exactly the same without needing to be relocated at runtime. Suggested-by: Arnd Bergmann Signed-off-by: Alexandre Ghiti [Palmer: Squash the STRICT_RWX fix, and a !MMU fix] Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/loader.lds.S | 3 +- arch/riscv/include/asm/page.h | 26 ++++++++- arch/riscv/include/asm/pgtable.h | 39 ++++++++++--- arch/riscv/include/asm/set_memory.h | 1 + arch/riscv/kernel/head.S | 3 +- arch/riscv/kernel/module.c | 6 +- arch/riscv/kernel/setup.c | 7 ++- arch/riscv/kernel/vmlinux.lds.S | 3 +- arch/riscv/mm/fault.c | 13 +++++ arch/riscv/mm/init.c | 106 ++++++++++++++++++++++++++++++------ arch/riscv/mm/kasan_init.c | 9 +++ arch/riscv/mm/physaddr.c | 2 +- 12 files changed, 182 insertions(+), 36 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/boot/loader.lds.S b/arch/riscv/boot/loader.lds.S index 47a5003c2e28..62d94696a19c 100644 --- a/arch/riscv/boot/loader.lds.S +++ b/arch/riscv/boot/loader.lds.S @@ -1,13 +1,14 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include OUTPUT_ARCH(riscv) ENTRY(_start) SECTIONS { - . = PAGE_OFFSET; + . = KERNEL_LINK_ADDR; .payload : { *(.payload) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index adc9d26f3d75..f64b61296c0c 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -90,15 +90,37 @@ typedef struct page *pgtable_t; #ifdef CONFIG_MMU extern unsigned long va_pa_offset; +#ifdef CONFIG_64BIT +extern unsigned long va_kernel_pa_offset; +#endif extern unsigned long pfn_base; #define ARCH_PFN_OFFSET (pfn_base) #else #define va_pa_offset 0 +#ifdef CONFIG_64BIT +#define va_kernel_pa_offset 0 +#endif #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #endif /* CONFIG_MMU */ -#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) -#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) +#ifdef CONFIG_64BIT +extern unsigned long kernel_virt_addr; + +#define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset)) +#define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset)) +#define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) + +#define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset) +#define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset) +#define __va_to_pa_nodebug(x) ({ \ + unsigned long _x = x; \ + (_x < kernel_virt_addr) ? \ + linear_mapping_va_to_pa(_x) : kernel_mapping_va_to_pa(_x); \ + }) +#else +#define __pa_to_va_nodebug(x) ((void *)((unsigned long) (x) + va_pa_offset)) +#define __va_to_pa_nodebug(x) ((unsigned long)(x) - va_pa_offset) +#endif #ifdef CONFIG_DEBUG_VIRTUAL extern phys_addr_t __virt_to_phys(unsigned long x); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ebf817c1bdf4..5afda75cc2c3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -11,23 +11,38 @@ #include -#ifndef __ASSEMBLY__ +#ifndef CONFIG_MMU +#define KERNEL_LINK_ADDR PAGE_OFFSET +#else -/* Page Upper Directory not used in RISC-V */ -#include -#include -#include -#include +#define ADDRESS_SPACE_END (UL(-1)) -#ifdef CONFIG_MMU +#ifdef CONFIG_64BIT +/* Leave 2GB for kernel and BPF at the end of the address space */ +#define KERNEL_LINK_ADDR (ADDRESS_SPACE_END - SZ_2G + 1) +#else +#define KERNEL_LINK_ADDR PAGE_OFFSET +#endif #define VMALLOC_SIZE (KERN_VIRT_SIZE >> 1) #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) #define BPF_JIT_REGION_SIZE (SZ_128M) +#ifdef CONFIG_64BIT +/* KASLR should leave at least 128MB for BPF after the kernel */ +#define BPF_JIT_REGION_START PFN_ALIGN((unsigned long)&_end) +#define BPF_JIT_REGION_END (BPF_JIT_REGION_START + BPF_JIT_REGION_SIZE) +#else #define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) #define BPF_JIT_REGION_END (VMALLOC_END) +#endif + +/* Modules always live before the kernel */ +#ifdef CONFIG_64BIT +#define MODULES_VADDR (PFN_ALIGN((unsigned long)&_end) - SZ_2G) +#define MODULES_END (PFN_ALIGN((unsigned long)&_start)) +#endif /* * Roughly size the vmemmap space to be large enough to fit enough @@ -57,9 +72,16 @@ #define FIXADDR_SIZE PGDIR_SIZE #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) - #endif +#ifndef __ASSEMBLY__ + +/* Page Upper Directory not used in RISC-V */ +#include +#include +#include +#include + #ifdef CONFIG_64BIT #include #else @@ -484,6 +506,7 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) /* FIXME */ +extern char _start[]; extern void *dtb_early_va; extern uintptr_t dtb_early_pa; void setup_bootmem(void); diff --git a/arch/riscv/include/asm/set_memory.h b/arch/riscv/include/asm/set_memory.h index 6887b3d9f371..a9c56776fa0e 100644 --- a/arch/riscv/include/asm/set_memory.h +++ b/arch/riscv/include/asm/set_memory.h @@ -17,6 +17,7 @@ int set_memory_x(unsigned long addr, int numpages); int set_memory_nx(unsigned long addr, int numpages); int set_memory_rw_nx(unsigned long addr, int numpages); void protect_kernel_text_data(void); +void protect_kernel_linear_mapping_text_rodata(void); #else static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; } static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; } diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index f5a9bad86e58..6cb05f22e52a 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -69,7 +69,8 @@ pe_head_start: #ifdef CONFIG_MMU relocate: /* Relocate return address */ - li a1, PAGE_OFFSET + la a1, kernel_virt_addr + REG_L a1, 0(a1) la a2, _start sub a1, a1, a2 add ra, ra, a1 diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index 104fba889cf7..ce153771e5e9 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -408,12 +408,10 @@ int apply_relocate_add(Elf_Shdr *sechdrs, const char *strtab, } #if defined(CONFIG_MMU) && defined(CONFIG_64BIT) -#define VMALLOC_MODULE_START \ - max(PFN_ALIGN((unsigned long)&_end - SZ_2G), VMALLOC_START) void *module_alloc(unsigned long size) { - return __vmalloc_node_range(size, 1, VMALLOC_MODULE_START, - VMALLOC_END, GFP_KERNEL, + return __vmalloc_node_range(size, 1, MODULES_VADDR, + MODULES_END, GFP_KERNEL, PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, __builtin_return_address(0)); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index e85bacff1b50..d208abc0b473 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -263,8 +263,13 @@ void __init setup_arch(char **cmdline_p) sbi_init(); - if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) + if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { protect_kernel_text_data(); +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) + protect_kernel_linear_mapping_text_rodata(); +#endif + } + #ifdef CONFIG_SWIOTLB swiotlb_init(1); #endif diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 7e61bc1dc36e..56677137c85b 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,7 +4,8 @@ * Copyright (C) 2017 SiFive */ -#define LOAD_OFFSET PAGE_OFFSET +#include +#define LOAD_OFFSET KERNEL_LINK_ADDR #include #include #include diff --git a/arch/riscv/mm/fault.c b/arch/riscv/mm/fault.c index 8f17519208c7..1b14d523a95c 100644 --- a/arch/riscv/mm/fault.c +++ b/arch/riscv/mm/fault.c @@ -231,6 +231,19 @@ asmlinkage void do_page_fault(struct pt_regs *regs) return; } +#ifdef CONFIG_64BIT + /* + * Modules in 64bit kernels lie in their own virtual region which is not + * in the vmalloc region, but dealing with page faults in this region + * or the vmalloc region amounts to doing the same thing: checking that + * the mapping exists in init_mm.pgd and updating user page table, so + * just use vmalloc_fault. + */ + if (unlikely(addr >= MODULES_VADDR && addr < MODULES_END)) { + vmalloc_fault(regs, code, addr); + return; + } +#endif /* Enable interrupts if they were enabled in the parent context. */ if (likely(regs->status & SR_PIE)) local_irq_enable(); diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 7f5036fbee8c..dc9b988e0778 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -25,6 +25,9 @@ #include "../kernel/head.h" +unsigned long kernel_virt_addr = KERNEL_LINK_ADDR; +EXPORT_SYMBOL(kernel_virt_addr); + unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; EXPORT_SYMBOL(empty_zero_page); @@ -88,6 +91,10 @@ static void print_vm_layout(void) (unsigned long)VMALLOC_END); print_mlm("lowmem", (unsigned long)PAGE_OFFSET, (unsigned long)high_memory); +#ifdef CONFIG_64BIT + print_mlm("kernel", (unsigned long)KERNEL_LINK_ADDR, + (unsigned long)ADDRESS_SPACE_END); +#endif } #else static void print_vm_layout(void) { } @@ -116,8 +123,13 @@ void __init setup_bootmem(void) /* The maximal physical memory size is -PAGE_OFFSET. */ memblock_enforce_memory_limit(-PAGE_OFFSET); - /* Reserve from the start of the kernel to the end of the kernel */ - memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start); + /* + * Reserve from the start of the kernel to the end of the kernel + * and make sure we align the reservation on PMD_SIZE since we will + * map the kernel in the linear mapping as read-only: we do not want + * any allocation to happen between _end and the next pmd aligned page. + */ + memblock_reserve(vmlinux_start, (vmlinux_end - vmlinux_start + PMD_SIZE - 1) & PMD_MASK); /* * memblock allocator is not aware of the fact that last 4K bytes of @@ -152,8 +164,14 @@ void __init setup_bootmem(void) #ifdef CONFIG_MMU static struct pt_alloc_ops pt_ops; +/* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset; EXPORT_SYMBOL(va_pa_offset); +#ifdef CONFIG_64BIT +/* Offset between kernel mapping virtual address and kernel load address */ +unsigned long va_kernel_pa_offset; +EXPORT_SYMBOL(va_kernel_pa_offset); +#endif unsigned long pfn_base; EXPORT_SYMBOL(pfn_base); @@ -257,7 +275,7 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa) static phys_addr_t __init alloc_pmd_early(uintptr_t va) { - BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT); + BUG_ON((va - kernel_virt_addr) >> PGDIR_SHIFT); return (uintptr_t)early_pmd; } @@ -372,17 +390,34 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) #error "setup_vm() is called from head.S before relocate so it should not use absolute addressing." #endif +uintptr_t load_pa, load_sz; + +static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size) +{ + uintptr_t va, end_va; + + end_va = kernel_virt_addr + load_sz; + for (va = kernel_virt_addr; va < end_va; va += map_size) + create_pgd_mapping(pgdir, va, + load_pa + (va - kernel_virt_addr), + map_size, PAGE_KERNEL_EXEC); +} + asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t va, pa, end_va; - uintptr_t load_pa = (uintptr_t)(&_start); - uintptr_t load_sz = (uintptr_t)(&_end) - load_pa; + uintptr_t pa; uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif + load_pa = (uintptr_t)(&_start); + load_sz = (uintptr_t)(&_end) - load_pa; va_pa_offset = PAGE_OFFSET - load_pa; +#ifdef CONFIG_64BIT + va_kernel_pa_offset = kernel_virt_addr - load_pa; +#endif + pfn_base = PFN_DOWN(load_pa); /* @@ -410,26 +445,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) create_pmd_mapping(fixmap_pmd, FIXADDR_START, (uintptr_t)fixmap_pte, PMD_SIZE, PAGE_TABLE); /* Setup trampoline PGD and PMD */ - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); - create_pmd_mapping(trampoline_pmd, PAGE_OFFSET, + create_pmd_mapping(trampoline_pmd, kernel_virt_addr, load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); #else /* Setup trampoline PGD */ - create_pgd_mapping(trampoline_pg_dir, PAGE_OFFSET, + create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, load_pa, PGDIR_SIZE, PAGE_KERNEL_EXEC); #endif /* - * Setup early PGD covering entire kernel which will allows + * Setup early PGD covering entire kernel which will allow * us to reach paging_init(). We map all memory banks later * in setup_vm_final() below. */ - end_va = PAGE_OFFSET + load_sz; - for (va = PAGE_OFFSET; va < end_va; va += map_size) - create_pgd_mapping(early_pg_dir, va, - load_pa + (va - PAGE_OFFSET), - map_size, PAGE_KERNEL_EXEC); + create_kernel_page_table(early_pg_dir, map_size); #ifndef __PAGETABLE_PMD_FOLDED /* Setup early PMD for DTB */ @@ -444,7 +475,16 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ +#ifdef CONFIG_64BIT + /* + * __va can't be used since it would return a linear mapping address + * whereas dtb_early_va will be used before setup_vm_final installs + * the linear mapping. + */ + dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); +#else dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ #else #ifndef CONFIG_BUILTIN_DTB @@ -456,7 +496,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL); dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ +#ifdef CONFIG_64BIT + dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); +#else dtb_early_va = __va(dtb_pa); +#endif /* CONFIG_64BIT */ #endif /* CONFIG_BUILTIN_DTB */ #endif dtb_early_pa = dtb_pa; @@ -492,6 +536,22 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #endif } +#ifdef CONFIG_64BIT +void protect_kernel_linear_mapping_text_rodata(void) +{ + unsigned long text_start = (unsigned long)lm_alias(_start); + unsigned long init_text_start = (unsigned long)lm_alias(__init_text_begin); + unsigned long rodata_start = (unsigned long)lm_alias(__start_rodata); + unsigned long data_start = (unsigned long)lm_alias(_data); + + set_memory_ro(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + set_memory_nx(text_start, (init_text_start - text_start) >> PAGE_SHIFT); + + set_memory_ro(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); + set_memory_nx(rodata_start, (data_start - rodata_start) >> PAGE_SHIFT); +} +#endif + static void __init setup_vm_final(void) { uintptr_t va, map_size; @@ -513,7 +573,7 @@ static void __init setup_vm_final(void) __pa_symbol(fixmap_pgd_next), PGDIR_SIZE, PAGE_TABLE); - /* Map all memory banks */ + /* Map all memory banks in the linear mapping */ for_each_mem_range(i, &start, &end) { if (start >= end) break; @@ -525,10 +585,22 @@ static void __init setup_vm_final(void) for (pa = start; pa < end; pa += map_size) { va = (uintptr_t)__va(pa); create_pgd_mapping(swapper_pg_dir, va, pa, - map_size, PAGE_KERNEL_EXEC); + map_size, +#ifdef CONFIG_64BIT + PAGE_KERNEL +#else + PAGE_KERNEL_EXEC +#endif + ); + } } +#ifdef CONFIG_64BIT + /* Map the kernel */ + create_kernel_page_table(swapper_pg_dir, PMD_SIZE); +#endif + /* Clear fixmap PTE and PMD mappings */ clear_fixmap(FIX_PTE); clear_fixmap(FIX_PMD); diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 2c39f0386673..28f4d52cf17e 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -171,6 +171,10 @@ void __init kasan_init(void) phys_addr_t _start, _end; u64 i; + /* + * Populate all kernel virtual address space with kasan_early_shadow_page + * except for the linear mapping and the modules/kernel/BPF mapping. + */ kasan_populate_early_shadow((void *)KASAN_SHADOW_START, (void *)kasan_mem_to_shadow((void *) VMEMMAP_END)); @@ -183,6 +187,7 @@ void __init kasan_init(void) (void *)kasan_mem_to_shadow((void *)VMALLOC_START), (void *)kasan_mem_to_shadow((void *)VMALLOC_END)); + /* Populate the linear mapping */ for_each_mem_range(i, &_start, &_end) { void *start = (void *)__va(_start); void *end = (void *)__va(_end); @@ -193,6 +198,10 @@ void __init kasan_init(void) kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end)); }; + /* Populate kernel, BPF, modules mapping */ + kasan_populate(kasan_mem_to_shadow((const void *)MODULES_VADDR), + kasan_mem_to_shadow((const void *)BPF_JIT_REGION_END)); + for (i = 0; i < PTRS_PER_PTE; i++) set_pte(&kasan_early_shadow_pte[i], mk_pte(virt_to_page(kasan_early_shadow_page), diff --git a/arch/riscv/mm/physaddr.c b/arch/riscv/mm/physaddr.c index e8e4dcd39fed..35703d5ef5fd 100644 --- a/arch/riscv/mm/physaddr.c +++ b/arch/riscv/mm/physaddr.c @@ -23,7 +23,7 @@ EXPORT_SYMBOL(__virt_to_phys); phys_addr_t __phys_addr_symbol(unsigned long x) { - unsigned long kernel_start = (unsigned long)PAGE_OFFSET; + unsigned long kernel_start = (unsigned long)kernel_virt_addr; unsigned long kernel_end = (unsigned long)_end; /* -- cgit v1.2.3 From 0df68ce4c26a48115a9e8d45e24f18d964a10050 Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 11 Apr 2021 12:41:46 -0400 Subject: riscv: Prepare ptdump for vm layout dynamic addresses This is a preparatory patch for sv48 support that will introduce dynamic PAGE_OFFSET. Dynamic PAGE_OFFSET implies that all zones (vmalloc, vmemmap, fixaddr...) whose addresses depend on PAGE_OFFSET become dynamic and can't be used to statically initialize the array used by ptdump to identify the different zones of the vm layout. Signed-off-by: Alexandre Ghiti Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/ptdump.c | 73 +++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 61 insertions(+), 12 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index ace74dec7492..0aba4421115c 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -58,29 +58,56 @@ struct ptd_mm_info { unsigned long end; }; +enum address_markers_idx { +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif + FIXMAP_START_NR, + FIXMAP_END_NR, + PCI_IO_START_NR, + PCI_IO_END_NR, +#ifdef CONFIG_SPARSEMEM_VMEMMAP + VMEMMAP_START_NR, + VMEMMAP_END_NR, +#endif + VMALLOC_START_NR, + VMALLOC_END_NR, + PAGE_OFFSET_NR, +#ifdef CONFIG_64BIT + MODULES_MAPPING_NR, +#endif + KERNEL_MAPPING_NR, + END_OF_SPACE_NR +}; + static struct addr_marker address_markers[] = { #ifdef CONFIG_KASAN - {KASAN_SHADOW_START, "Kasan shadow start"}, - {KASAN_SHADOW_END, "Kasan shadow end"}, + {0, "Kasan shadow start"}, + {0, "Kasan shadow end"}, #endif - {FIXADDR_START, "Fixmap start"}, - {FIXADDR_TOP, "Fixmap end"}, - {PCI_IO_START, "PCI I/O start"}, - {PCI_IO_END, "PCI I/O end"}, + {0, "Fixmap start"}, + {0, "Fixmap end"}, + {0, "PCI I/O start"}, + {0, "PCI I/O end"}, #ifdef CONFIG_SPARSEMEM_VMEMMAP - {VMEMMAP_START, "vmemmap start"}, - {VMEMMAP_END, "vmemmap end"}, + {0, "vmemmap start"}, + {0, "vmemmap end"}, +#endif + {0, "vmalloc() area"}, + {0, "vmalloc() end"}, + {0, "Linear mapping"}, +#ifdef CONFIG_64BIT + {0, "Modules mapping"}, #endif - {VMALLOC_START, "vmalloc() area"}, - {VMALLOC_END, "vmalloc() end"}, - {PAGE_OFFSET, "Linear mapping"}, + {0, "Kernel mapping (kernel, BPF)"}, {-1, NULL}, }; static struct ptd_mm_info kernel_ptd_info = { .mm = &init_mm, .markers = address_markers, - .base_addr = KERN_VIRT_START, + .base_addr = 0, .end = ULONG_MAX, }; @@ -335,6 +362,28 @@ static int ptdump_init(void) { unsigned int i, j; +#ifdef CONFIG_KASAN + address_markers[KASAN_SHADOW_START_NR].start_address = KASAN_SHADOW_START; + address_markers[KASAN_SHADOW_END_NR].start_address = KASAN_SHADOW_END; +#endif + address_markers[FIXMAP_START_NR].start_address = FIXADDR_START; + address_markers[FIXMAP_END_NR].start_address = FIXADDR_TOP; + address_markers[PCI_IO_START_NR].start_address = PCI_IO_START; + address_markers[PCI_IO_END_NR].start_address = PCI_IO_END; +#ifdef CONFIG_SPARSEMEM_VMEMMAP + address_markers[VMEMMAP_START_NR].start_address = VMEMMAP_START; + address_markers[VMEMMAP_END_NR].start_address = VMEMMAP_END; +#endif + address_markers[VMALLOC_START_NR].start_address = VMALLOC_START; + address_markers[VMALLOC_END_NR].start_address = VMALLOC_END; + address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET; +#ifdef CONFIG_64BIT + address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR; +#endif + address_markers[KERNEL_MAPPING_NR].start_address = kernel_virt_addr; + + kernel_ptd_info.base_addr = KERN_VIRT_START; + for (i = 0; i < ARRAY_SIZE(pg_level); i++) for (j = 0; j < ARRAY_SIZE(pte_bits); j++) pg_level[i].mask |= pte_bits[j].mask; -- cgit v1.2.3 From 1987501b1130c6b4b7e1cef4b9c1dc9a8adae025 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:22:21 +0800 Subject: riscv: add __init section marker to some functions They are not needed after booting, so mark them as __init to move them to the __init section. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/traps.c | 2 +- arch/riscv/mm/init.c | 6 +++--- arch/riscv/mm/kasan_init.c | 6 +++--- arch/riscv/mm/ptdump.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 3ed2c23601a0..6272da7b19d2 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -195,6 +195,6 @@ int is_valid_bugaddr(unsigned long pc) #endif /* CONFIG_GENERIC_BUG */ /* stvec & scratch is already set from head.S */ -void trap_init(void) +void __init trap_init(void) { } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index dc9b988e0778..3de7ec030342 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -60,7 +60,7 @@ static void __init zone_sizes_init(void) free_area_init(max_zone_pfns); } -static void setup_zero_page(void) +static void __init setup_zero_page(void) { memset((void *)empty_zero_page, 0, PAGE_SIZE); } @@ -78,7 +78,7 @@ static inline void print_mlm(char *name, unsigned long b, unsigned long t) (((t) - (b)) >> 20)); } -static void print_vm_layout(void) +static void __init print_vm_layout(void) { pr_notice("Virtual kernel memory layout:\n"); print_mlk("fixmap", (unsigned long)FIXADDR_START, @@ -630,7 +630,7 @@ static inline void setup_vm_final(void) #endif /* CONFIG_MMU */ #ifdef CONFIG_STRICT_KERNEL_RWX -void protect_kernel_text_data(void) +void __init protect_kernel_text_data(void) { unsigned long text_start = (unsigned long)_start; unsigned long init_text_start = (unsigned long)__init_text_begin; diff --git a/arch/riscv/mm/kasan_init.c b/arch/riscv/mm/kasan_init.c index 28f4d52cf17e..4fa412a314f9 100644 --- a/arch/riscv/mm/kasan_init.c +++ b/arch/riscv/mm/kasan_init.c @@ -48,7 +48,7 @@ asmlinkage void __init kasan_early_init(void) local_flush_tlb_all(); } -static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; pte_t *ptep, *base_pte; @@ -70,7 +70,7 @@ static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long en set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE)); } -static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; pmd_t *pmdp, *base_pmd; @@ -105,7 +105,7 @@ static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long en set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE)); } -static void kasan_populate_pgd(unsigned long vaddr, unsigned long end) +static void __init kasan_populate_pgd(unsigned long vaddr, unsigned long end) { phys_addr_t phys_addr; pgd_t *pgdp = pgd_offset_k(vaddr); diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 0aba4421115c..92179598d8f8 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -358,7 +358,7 @@ static int ptdump_show(struct seq_file *m, void *v) DEFINE_SHOW_ATTRIBUTE(ptdump); -static int ptdump_init(void) +static int __init ptdump_init(void) { unsigned int i, j; -- cgit v1.2.3 From de31ea4a1181a8bb4d32ab74f3434f2bc2b79122 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:22:51 +0800 Subject: riscv: Mark some global variables __ro_after_init All of these are never modified after init, so they can be __ro_after_init. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/sbi.c | 8 ++++---- arch/riscv/kernel/smp.c | 4 ++-- arch/riscv/kernel/time.c | 2 +- arch/riscv/kernel/vdso.c | 4 ++-- arch/riscv/mm/init.c | 6 +++--- 5 files changed, 12 insertions(+), 12 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index fa0dd881fc5e..a404d648ef79 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -11,14 +11,14 @@ #include /* default SBI version is 0.1 */ -unsigned long sbi_spec_version = SBI_SPEC_VERSION_DEFAULT; +unsigned long sbi_spec_version __ro_after_init = SBI_SPEC_VERSION_DEFAULT; EXPORT_SYMBOL(sbi_spec_version); -static void (*__sbi_set_timer)(uint64_t stime); -static int (*__sbi_send_ipi)(const unsigned long *hart_mask); +static void (*__sbi_set_timer)(uint64_t stime) __ro_after_init; +static int (*__sbi_send_ipi)(const unsigned long *hart_mask) __ro_after_init; static int (*__sbi_rfence)(int fid, const unsigned long *hart_mask, unsigned long start, unsigned long size, - unsigned long arg4, unsigned long arg5); + unsigned long arg4, unsigned long arg5) __ro_after_init; struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0, unsigned long arg1, unsigned long arg2, diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 8325d33411d8..476e2e4bc5c5 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -32,7 +32,7 @@ enum ipi_message_type { IPI_MAX }; -unsigned long __cpuid_to_hartid_map[NR_CPUS] = { +unsigned long __cpuid_to_hartid_map[NR_CPUS] __ro_after_init = { [0 ... NR_CPUS-1] = INVALID_HARTID }; @@ -87,7 +87,7 @@ static void ipi_stop(void) wait_for_interrupt(); } -static struct riscv_ipi_ops *ipi_ops; +static struct riscv_ipi_ops *ipi_ops __ro_after_init; void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) { diff --git a/arch/riscv/kernel/time.c b/arch/riscv/kernel/time.c index 8a5cf99c0776..302ceafe0f81 100644 --- a/arch/riscv/kernel/time.c +++ b/arch/riscv/kernel/time.c @@ -10,7 +10,7 @@ #include #include -unsigned long riscv_timebase; +unsigned long riscv_timebase __ro_after_init; EXPORT_SYMBOL_GPL(riscv_timebase); void __init time_init(void) diff --git a/arch/riscv/kernel/vdso.c b/arch/riscv/kernel/vdso.c index 3f1d35e7c98a..25a3b8849599 100644 --- a/arch/riscv/kernel/vdso.c +++ b/arch/riscv/kernel/vdso.c @@ -20,8 +20,8 @@ extern char vdso_start[], vdso_end[]; -static unsigned int vdso_pages; -static struct page **vdso_pagelist; +static unsigned int vdso_pages __ro_after_init; +static struct page **vdso_pagelist __ro_after_init; /* * The vDSO data page. diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 3de7ec030342..9914ca0d789b 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -162,17 +162,17 @@ void __init setup_bootmem(void) } #ifdef CONFIG_MMU -static struct pt_alloc_ops pt_ops; +static struct pt_alloc_ops pt_ops __ro_after_init; /* Offset between linear mapping virtual address and kernel load address */ -unsigned long va_pa_offset; +unsigned long va_pa_offset __ro_after_init; EXPORT_SYMBOL(va_pa_offset); #ifdef CONFIG_64BIT /* Offset between kernel mapping virtual address and kernel load address */ unsigned long va_kernel_pa_offset; EXPORT_SYMBOL(va_kernel_pa_offset); #endif -unsigned long pfn_base; +unsigned long pfn_base __ro_after_init; EXPORT_SYMBOL(pfn_base); pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss; -- cgit v1.2.3 From e6a302248cec96c3af4cbfcedc44b0de8a26ebe0 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:23:24 +0800 Subject: riscv: Constify sys_call_table Constify the sys_call_table so that it will be placed in the .rodata section. This will cause attempts to modify the table to fail when strict page permissions are in place. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/syscall.h | 2 +- arch/riscv/kernel/syscall_table.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h index 49350c8bd7b0..b933b1583c9f 100644 --- a/arch/riscv/include/asm/syscall.h +++ b/arch/riscv/include/asm/syscall.h @@ -15,7 +15,7 @@ #include /* The array of function pointers for syscalls. */ -extern void *sys_call_table[]; +extern void * const sys_call_table[]; /* * Only the low 32 bits of orig_r0 are meaningful, so we return int. diff --git a/arch/riscv/kernel/syscall_table.c b/arch/riscv/kernel/syscall_table.c index f1ead9df96ca..a63c667c27b3 100644 --- a/arch/riscv/kernel/syscall_table.c +++ b/arch/riscv/kernel/syscall_table.c @@ -13,7 +13,7 @@ #undef __SYSCALL #define __SYSCALL(nr, call) [nr] = (call), -void *sys_call_table[__NR_syscalls] = { +void * const sys_call_table[__NR_syscalls] = { [0 ... __NR_syscalls - 1] = sys_ni_syscall, #include }; -- cgit v1.2.3 From 300f62c37d4601e5b7967c6399917dc6880070bc Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:23:54 +0800 Subject: riscv: Constify sbi_ipi_ops Constify the sbi_ipi_ops so that it will be placed in the .rodata section. This will cause attempts to modify it to fail when strict page permissions are in place. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/smp.h | 4 ++-- arch/riscv/kernel/sbi.c | 2 +- arch/riscv/kernel/smp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/smp.h b/arch/riscv/include/asm/smp.h index df1f7c4cd433..a7d2811f3536 100644 --- a/arch/riscv/include/asm/smp.h +++ b/arch/riscv/include/asm/smp.h @@ -46,7 +46,7 @@ int riscv_hartid_to_cpuid(int hartid); void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out); /* Set custom IPI operations */ -void riscv_set_ipi_ops(struct riscv_ipi_ops *ops); +void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops); /* Clear IPI for current CPU */ void riscv_clear_ipi(void); @@ -92,7 +92,7 @@ static inline void riscv_cpuid_to_hartid_mask(const struct cpumask *in, cpumask_set_cpu(boot_cpu_hartid, out); } -static inline void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +static inline void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) { } diff --git a/arch/riscv/kernel/sbi.c b/arch/riscv/kernel/sbi.c index a404d648ef79..38f94926f2ee 100644 --- a/arch/riscv/kernel/sbi.c +++ b/arch/riscv/kernel/sbi.c @@ -571,7 +571,7 @@ static void sbi_send_cpumask_ipi(const struct cpumask *target) sbi_send_ipi(cpumask_bits(&hartid_mask)); } -static struct riscv_ipi_ops sbi_ipi_ops = { +static const struct riscv_ipi_ops sbi_ipi_ops = { .ipi_inject = sbi_send_cpumask_ipi }; diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 476e2e4bc5c5..366cb87c0e2e 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -87,9 +87,9 @@ static void ipi_stop(void) wait_for_interrupt(); } -static struct riscv_ipi_ops *ipi_ops __ro_after_init; +static const struct riscv_ipi_ops *ipi_ops __ro_after_init; -void riscv_set_ipi_ops(struct riscv_ipi_ops *ops) +void riscv_set_ipi_ops(const struct riscv_ipi_ops *ops) { ipi_ops = ops; } -- cgit v1.2.3 From cdd1b2bd358ffda2638fe18ff47191e84e18525f Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:24:21 +0800 Subject: riscv: kprobes: Implement alloc_insn_page() Allocate PAGE_KERNEL_READ_EXEC(read only, executable) page for kprobes insn page. This is to prepare for STRICT_MODULE_RWX. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index a2ec18662fee..0b451ff76d55 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -84,6 +84,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return 0; } +void *alloc_insn_page(void) +{ + return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END, + GFP_KERNEL, PAGE_KERNEL_READ_EXEC, + VM_FLUSH_RESET_PERMS, NUMA_NO_NODE, + __builtin_return_address(0)); +} + /* install breakpoint in text */ void __kprobes arch_arm_kprobe(struct kprobe *p) { -- cgit v1.2.3 From 1d27d854425faec98f352cf88ec3e2a8844429a4 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:24:54 +0800 Subject: riscv: bpf: Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to core We will drop the executable permissions of the code pages from the mapping at allocation time soon. Move bpf_jit_alloc_exec() and bpf_jit_free_exec() to bpf_jit_core.c so that they can be shared by both RV64I and RV32I. Signed-off-by: Jisheng Zhang Acked-by: Luke Nelson Signed-off-by: Palmer Dabbelt --- arch/riscv/net/bpf_jit_comp64.c | 13 ------------- arch/riscv/net/bpf_jit_core.c | 13 +++++++++++++ 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c index b44ff52f84a6..87e3bf5b9086 100644 --- a/arch/riscv/net/bpf_jit_comp64.c +++ b/arch/riscv/net/bpf_jit_comp64.c @@ -1148,16 +1148,3 @@ void bpf_jit_build_epilogue(struct rv_jit_context *ctx) { __build_epilogue(false, ctx); } - -void *bpf_jit_alloc_exec(unsigned long size) -{ - return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, - BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, - __builtin_return_address(0)); -} - -void bpf_jit_free_exec(void *addr) -{ - return vfree(addr); -} diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index 3630d447352c..d8da819290b7 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -164,3 +164,16 @@ out: tmp : orig_prog); return prog; } + +void *bpf_jit_alloc_exec(unsigned long size) +{ + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} + +void bpf_jit_free_exec(void *addr) +{ + return vfree(addr); +} -- cgit v1.2.3 From fc8504765ec5e812135b8ccafca7101069a0c6d8 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:25:21 +0800 Subject: riscv: bpf: Avoid breaking W^X We allocate Non-executable pages, then call bpf_jit_binary_lock_ro() to enable executable permission after mapping them read-only. This is to prepare for STRICT_MODULE_RWX in following patch. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/net/bpf_jit_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/net/bpf_jit_core.c b/arch/riscv/net/bpf_jit_core.c index d8da819290b7..fed86f42dfbe 100644 --- a/arch/riscv/net/bpf_jit_core.c +++ b/arch/riscv/net/bpf_jit_core.c @@ -152,6 +152,7 @@ skip_init_ctx: bpf_flush_icache(jit_data->header, ctx->insns + ctx->ninsns); if (!prog->is_func || extra_pass) { + bpf_jit_binary_lock_ro(jit_data->header); out_offset: kfree(ctx->offset); kfree(jit_data); @@ -169,7 +170,7 @@ void *bpf_jit_alloc_exec(unsigned long size) { return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, BPF_JIT_REGION_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } -- cgit v1.2.3 From 5387054b986e2d0d994b519020d81b8aa64789c5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:25:51 +0800 Subject: riscv: module: Create module allocations without exec permissions The core code manages the executable permissions of code regions of modules explicitly, it is not necessary to create the module vmalloc regions with RWX permissions. Create them with RW- permissions instead. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/module.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/module.c b/arch/riscv/kernel/module.c index ce153771e5e9..68a9e3d1fe16 100644 --- a/arch/riscv/kernel/module.c +++ b/arch/riscv/kernel/module.c @@ -412,7 +412,7 @@ void *module_alloc(unsigned long size) { return __vmalloc_node_range(size, 1, MODULES_VADDR, MODULES_END, GFP_KERNEL, - PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + PAGE_KERNEL, 0, NUMA_NO_NODE, __builtin_return_address(0)); } #endif -- cgit v1.2.3 From a9451b8e19716cf8bf420a1d0e58199558ecaeb5 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Tue, 30 Mar 2021 02:26:17 +0800 Subject: riscv: Set ARCH_HAS_STRICT_MODULE_RWX if MMU Now we can set ARCH_HAS_STRICT_MODULE_RWX for MMU riscv platforms, this is good from security perspective. Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a840004d2829..847ab3c32fc0 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -29,6 +29,7 @@ config RISCV select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if MMU + select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT -- cgit v1.2.3 From b1ebaa0e1318494a7637099a26add50509e37964 Mon Sep 17 00:00:00 2001 From: Liao Chang Date: Tue, 30 Mar 2021 16:18:48 +0800 Subject: riscv/kprobe: fix kernel panic when invoking sys_read traced by kprobe The execution of sys_read end up hitting a BUG_ON() in __find_get_block after installing kprobe at sys_read, the BUG message like the following: [ 65.708663] ------------[ cut here ]------------ [ 65.709987] kernel BUG at fs/buffer.c:1251! [ 65.711283] Kernel BUG [#1] [ 65.712032] Modules linked in: [ 65.712925] CPU: 0 PID: 51 Comm: sh Not tainted 5.12.0-rc4 #1 [ 65.714407] Hardware name: riscv-virtio,qemu (DT) [ 65.715696] epc : __find_get_block+0x218/0x2c8 [ 65.716835] ra : __getblk_gfp+0x1c/0x4a [ 65.717831] epc : ffffffe00019f11e ra : ffffffe00019f56a sp : ffffffe002437930 [ 65.719553] gp : ffffffe000f06030 tp : ffffffe0015abc00 t0 : ffffffe00191e038 [ 65.721290] t1 : ffffffe00191e038 t2 : 000000000000000a s0 : ffffffe002437960 [ 65.723051] s1 : ffffffe00160ad00 a0 : ffffffe00160ad00 a1 : 000000000000012a [ 65.724772] a2 : 0000000000000400 a3 : 0000000000000008 a4 : 0000000000000040 [ 65.726545] a5 : 0000000000000000 a6 : ffffffe00191e000 a7 : 0000000000000000 [ 65.728308] s2 : 000000000000012a s3 : 0000000000000400 s4 : 0000000000000008 [ 65.730049] s5 : 000000000000006c s6 : ffffffe00240f800 s7 : ffffffe000f080a8 [ 65.731802] s8 : 0000000000000001 s9 : 000000000000012a s10: 0000000000000008 [ 65.733516] s11: 0000000000000008 t3 : 00000000000003ff t4 : 000000000000000f [ 65.734434] t5 : 00000000000003ff t6 : 0000000000040000 [ 65.734613] status: 0000000000000100 badaddr: 0000000000000000 cause: 0000000000000003 [ 65.734901] Call Trace: [ 65.735076] [] __find_get_block+0x218/0x2c8 [ 65.735417] [] __ext4_get_inode_loc+0xb2/0x2f6 [ 65.735618] [] ext4_get_inode_loc+0x3a/0x8a [ 65.735802] [] ext4_reserve_inode_write+0x2e/0x8c [ 65.735999] [] __ext4_mark_inode_dirty+0x4c/0x18e [ 65.736208] [] ext4_dirty_inode+0x46/0x66 [ 65.736387] [] __mark_inode_dirty+0x12c/0x3da [ 65.736576] [] touch_atime+0x146/0x150 [ 65.736748] [] filemap_read+0x234/0x246 [ 65.736920] [] generic_file_read_iter+0xc0/0x114 [ 65.737114] [] ext4_file_read_iter+0x42/0xea [ 65.737310] [] new_sync_read+0xe2/0x15a [ 65.737483] [] vfs_read+0xca/0xf2 [ 65.737641] [] ksys_read+0x5e/0xc8 [ 65.737816] [] sys_read+0xe/0x16 [ 65.737973] [] ret_from_syscall+0x0/0x2 [ 65.738858] ---[ end trace fe93f985456c935d ]--- A simple reproducer looks like: echo 'p:myprobe sys_read fd=%a0 buf=%a1 count=%a2' > /sys/kernel/debug/tracing/kprobe_events echo 1 > /sys/kernel/debug/tracing/events/kprobes/myprobe/enable cat /sys/kernel/debug/tracing/trace Here's what happens to hit that BUG_ON(): 1) After installing kprobe at entry of sys_read, the first instruction is replaced by 'ebreak' instruction on riscv64 platform. 2) Once kernel reach the 'ebreak' instruction at the entry of sys_read, it trap into the riscv breakpoint handler, where it do something to setup for coming single-step of origin instruction, including backup the 'sstatus' in pt_regs, followed by disable interrupt during single stepping via clear 'SIE' bit of 'sstatus' in pt_regs. 3) Then kernel restore to the instruction slot contains two instructions, one is original instruction at entry of sys_read, the other is 'ebreak'. Here it trigger a 'Instruction page fault' exception (value at 'scause' is '0xc'), if PF is not filled into PageTabe for that slot yet. 4) Again kernel trap into page fault exception handler, where it choose different policy according to the state of running kprobe. Because afte 2) the state is KPROBE_HIT_SS, so kernel reset the current kprobe and 'pc' points back to the probe address. 5) Because 'epc' point back to 'ebreak' instrution at sys_read probe, kernel trap into breakpoint handler again, and repeat the operations at 2), however 'sstatus' without 'SIE' is keep at 4), it cause the real 'sstatus' saved at 2) is overwritten by the one withou 'SIE'. 6) When kernel cross the probe the 'sstatus' CSR restore with value without 'SIE', and reach __find_get_block where it requires the interrupt must be enabled. Fix this is very trivial, just restore the value of 'sstatus' in pt_regs with backup one at 2) when the instruction being single stepped cause a page fault. Fixes: c22b0bcb1dd02 ("riscv: Add kprobes supported") Signed-off-by: Liao Chang Cc: stable@vger.kernel.org Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/probes/kprobes.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c index 0b451ff76d55..2610febc5012 100644 --- a/arch/riscv/kernel/probes/kprobes.c +++ b/arch/riscv/kernel/probes/kprobes.c @@ -269,8 +269,10 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr) if (kcb->kprobe_status == KPROBE_REENTER) restore_previous_kprobe(kcb); - else + else { + kprobes_restore_local_irqflag(kcb, regs); reset_current_kprobe(); + } break; case KPROBE_HIT_ACTIVE: -- cgit v1.2.3 From e75e6bf47a4723ce16f65c7387c20a8c18a1c13b Mon Sep 17 00:00:00 2001 From: zhouchuangao Date: Tue, 30 Mar 2021 06:56:26 -0700 Subject: riscv/mm: Use BUG_ON instead of if condition followed by BUG. BUG_ON() uses unlikely in if(), which can be optimized at compile time. Signed-off-by: zhouchuangao Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 9914ca0d789b..a2f4c148f02e 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -232,8 +232,8 @@ static phys_addr_t alloc_pte_late(uintptr_t va) unsigned long vaddr; vaddr = __get_free_page(GFP_KERNEL); - if (!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr))) - BUG(); + BUG_ON(!vaddr || !pgtable_pte_page_ctor(virt_to_page(vaddr))); + return __pa(vaddr); } -- cgit v1.2.3 From 772d7891e8b3b0baae7bb88a294d61fd07ba6d15 Mon Sep 17 00:00:00 2001 From: Jisheng Zhang Date: Fri, 2 Apr 2021 21:29:08 +0800 Subject: riscv: vdso: fix and clean-up Makefile Running "make" on an already compiled kernel tree will rebuild the kernel even without any modifications: CALL linux/scripts/checksyscalls.sh CALL linux/scripts/atomic/check-atomics.sh CHK include/generated/compile.h SO2S arch/riscv/kernel/vdso/vdso-syms.S AS arch/riscv/kernel/vdso/vdso-syms.o AR arch/riscv/kernel/vdso/built-in.a AR arch/riscv/kernel/built-in.a AR arch/riscv/built-in.a GEN .version CHK include/generated/compile.h UPD include/generated/compile.h CC init/version.o AR init/built-in.a LD vmlinux.o The reason is "Any target that utilizes if_changed must be listed in $(targets), otherwise the command line check will fail, and the target will always be built" as explained by Documentation/kbuild/makefiles.rst Fix this build bug by adding vdso-syms.S to $(targets) At the same time, there are two trivial clean up modifications: - the vdso-dummy.o is not needed any more after so remove it. - vdso.lds is a generated file, so it should be prefixed with $(obj)/ instead of $(src)/ Fixes: c2c81bb2f691 ("RISC-V: Fix the VDSO symbol generaton for binutils-2.35+") Cc: stable@vger.kernel.org Signed-off-by: Jisheng Zhang Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/vdso/Makefile b/arch/riscv/kernel/vdso/Makefile index ca2b40dfd24b..24d936c147cd 100644 --- a/arch/riscv/kernel/vdso/Makefile +++ b/arch/riscv/kernel/vdso/Makefile @@ -23,7 +23,7 @@ ifneq ($(c-gettimeofday-y),) endif # Build rules -targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o +targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-syms.S obj-vdso := $(addprefix $(obj)/, $(obj-vdso)) obj-y += vdso.o vdso-syms.o @@ -41,7 +41,7 @@ KASAN_SANITIZE := n $(obj)/vdso.o: $(obj)/vdso.so # link rule for the .so file, .lds has to be first -$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(obj-vdso) FORCE $(call if_changed,vdsold) LDFLAGS_vdso.so.dbg = -shared -s -soname=linux-vdso.so.1 \ --build-id=sha1 --hash-style=both --eh-frame-hdr -- cgit v1.2.3 From fba8a8674f68a0628abae470dfcfbcb4a0d7a79e Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Mon, 19 Apr 2021 03:55:36 +0300 Subject: RISC-V: Add kexec support This patch adds support for kexec on RISC-V. On SMP systems it depends on HOTPLUG_CPU in order to be able to bring up all harts after kexec. It also needs a recent OpenSBI version that supports the HSM extension. I tested it on riscv64 QEMU on both an smp and a non-smp system. Signed-off-by: Nick Kossifidis Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 15 +++ arch/riscv/include/asm/kexec.h | 49 ++++++++++ arch/riscv/kernel/Makefile | 5 + arch/riscv/kernel/kexec_relocate.S | 157 +++++++++++++++++++++++++++++++ arch/riscv/kernel/machine_kexec.c | 186 +++++++++++++++++++++++++++++++++++++ 5 files changed, 412 insertions(+) create mode 100644 arch/riscv/include/asm/kexec.h create mode 100644 arch/riscv/kernel/kexec_relocate.S create mode 100644 arch/riscv/kernel/machine_kexec.c (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 847ab3c32fc0..e38bd044610f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -391,6 +391,21 @@ config RISCV_SBI_V01 help This config allows kernel to use SBI v0.1 APIs. This will be deprecated in future once legacy M-mode software are no longer in use. + +config KEXEC + bool "Kexec system call" + select KEXEC_CORE + select HOTPLUG_CPU if SMP + depends on MMU + help + kexec is a system call that implements the ability to shutdown your + current kernel, and to start another kernel. It is like a reboot + but it is independent of the system firmware. And like a reboot + you can start any kernel with it, not just Linux. + + The name comes from the similarity to the exec system call. + + endmenu menu "Boot options" diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h new file mode 100644 index 000000000000..86e6e4922d1f --- /dev/null +++ b/arch/riscv/include/asm/kexec.h @@ -0,0 +1,49 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 FORTH-ICS/CARV + * Nick Kossifidis + */ + +#ifndef _RISCV_KEXEC_H +#define _RISCV_KEXEC_H + +#include /* For PAGE_SIZE */ + +/* Maximum physical address we can use pages from */ +#define KEXEC_SOURCE_MEMORY_LIMIT (-1UL) + +/* Maximum address we can reach in physical address mode */ +#define KEXEC_DESTINATION_MEMORY_LIMIT (-1UL) + +/* Maximum address we can use for the control code buffer */ +#define KEXEC_CONTROL_MEMORY_LIMIT (-1UL) + +/* Reserve a page for the control code buffer */ +#define KEXEC_CONTROL_PAGE_SIZE PAGE_SIZE + +#define KEXEC_ARCH KEXEC_ARCH_RISCV + +static inline void +crash_setup_regs(struct pt_regs *newregs, + struct pt_regs *oldregs) +{ + /* Dummy implementation for now */ +} + + +#define ARCH_HAS_KIMAGE_ARCH + +struct kimage_arch { + unsigned long fdt_addr; +}; + +const extern unsigned char riscv_kexec_relocate[]; +const extern unsigned int riscv_kexec_relocate_size; + +typedef void (*riscv_kexec_do_relocate)(unsigned long first_ind_entry, + unsigned long jump_addr, + unsigned long fdt_addr, + unsigned long hartid, + unsigned long va_pa_off); + +#endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3dc0abde988a..96650aaed5eb 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -9,6 +9,10 @@ CFLAGS_REMOVE_patch.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_sbi.o = $(CC_FLAGS_FTRACE) endif +ifdef CONFIG_KEXEC +AFLAGS_kexec_relocate.o := -mcmodel=medany -mno-relax +endif + extra-y += head.o extra-y += vmlinux.lds @@ -54,6 +58,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o +obj-$(CONFIG_KEXEC) += kexec_relocate.o machine_kexec.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S new file mode 100644 index 000000000000..84101d0a23ef --- /dev/null +++ b/arch/riscv/kernel/kexec_relocate.S @@ -0,0 +1,157 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 FORTH-ICS/CARV + * Nick Kossifidis + */ + +#include /* For RISCV_* and REG_* macros */ +#include /* For CSR_* macros */ +#include /* For PAGE_SIZE */ +#include /* For SYM_* macros */ + +.section ".rodata" +SYM_CODE_START(riscv_kexec_relocate) + + /* + * s0: Pointer to the current entry + * s1: (const) Phys address to jump to after relocation + * s2: (const) Phys address of the FDT image + * s3: (const) The hartid of the current hart + * s4: Pointer to the destination address for the relocation + * s5: (const) Number of words per page + * s6: (const) 1, used for subtraction + * s7: (const) va_pa_offset, used when switching MMU off + * s8: (const) Physical address of the main loop + * s9: (debug) indirection page counter + * s10: (debug) entry counter + * s11: (debug) copied words counter + */ + mv s0, a0 + mv s1, a1 + mv s2, a2 + mv s3, a3 + mv s4, zero + li s5, (PAGE_SIZE / RISCV_SZPTR) + li s6, 1 + mv s7, a4 + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + /* Disable / cleanup interrupts */ + csrw CSR_SIE, zero + csrw CSR_SIP, zero + + /* + * When we switch SATP.MODE to "Bare" we'll only + * play with physical addresses. However the first time + * we try to jump somewhere, the offset on the jump + * will be relative to pc which will still be on VA. To + * deal with this we set stvec to the physical address at + * the start of the loop below so that we jump there in + * any case. + */ + la s8, 1f + sub s8, s8, s7 + csrw CSR_STVEC, s8 + + /* Process entries in a loop */ +.align 2 +1: + addi s10, s10, 1 + REG_L t0, 0(s0) /* t0 = *image->entry */ + addi s0, s0, RISCV_SZPTR /* image->entry++ */ + + /* IND_DESTINATION entry ? -> save destination address */ + andi t1, t0, 0x1 + beqz t1, 2f + andi s4, t0, ~0x1 + j 1b + +2: + /* IND_INDIRECTION entry ? -> update next entry ptr (PA) */ + andi t1, t0, 0x2 + beqz t1, 2f + andi s0, t0, ~0x2 + addi s9, s9, 1 + csrw CSR_SATP, zero + jalr zero, s8, 0 + +2: + /* IND_DONE entry ? -> jump to done label */ + andi t1, t0, 0x4 + beqz t1, 2f + j 4f + +2: + /* + * IND_SOURCE entry ? -> copy page word by word to the + * destination address we got from IND_DESTINATION + */ + andi t1, t0, 0x8 + beqz t1, 1b /* Unknown entry type, ignore it */ + andi t0, t0, ~0x8 + mv t3, s5 /* i = num words per page */ +3: /* copy loop */ + REG_L t1, (t0) /* t1 = *src_ptr */ + REG_S t1, (s4) /* *dst_ptr = *src_ptr */ + addi t0, t0, RISCV_SZPTR /* stc_ptr++ */ + addi s4, s4, RISCV_SZPTR /* dst_ptr++ */ + sub t3, t3, s6 /* i-- */ + addi s11, s11, 1 /* c++ */ + beqz t3, 1b /* copy done ? */ + j 3b + +4: + /* Pass the arguments to the next kernel / Cleanup*/ + mv a0, s3 + mv a1, s2 + mv a2, s1 + + /* Cleanup */ + mv a3, zero + mv a4, zero + mv a5, zero + mv a6, zero + mv a7, zero + + mv s0, zero + mv s1, zero + mv s2, zero + mv s3, zero + mv s4, zero + mv s5, zero + mv s6, zero + mv s7, zero + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + mv t0, zero + mv t1, zero + mv t2, zero + mv t3, zero + mv t4, zero + mv t5, zero + mv t6, zero + csrw CSR_SEPC, zero + csrw CSR_SCAUSE, zero + csrw CSR_SSCRATCH, zero + + /* + * Make sure the relocated code is visible + * and jump to the new kernel + */ + fence.i + + jalr zero, a2, 0 + +SYM_CODE_END(riscv_kexec_relocate) +riscv_kexec_relocate_end: + + .section ".rodata" +SYM_DATA(riscv_kexec_relocate_size, + .long riscv_kexec_relocate_end - riscv_kexec_relocate) + diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c new file mode 100644 index 000000000000..e165c7874740 --- /dev/null +++ b/arch/riscv/kernel/machine_kexec.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 FORTH-ICS/CARV + * Nick Kossifidis + */ + +#include +#include /* For riscv_kexec_* symbol defines */ +#include /* For smp_send_stop () */ +#include /* For local_flush_icache_all() */ +#include /* For smp_wmb() */ +#include /* For PAGE_MASK */ +#include /* For fdt_check_header() */ +#include /* For set_memory_x() */ +#include /* For unreachable() */ +#include /* For cpu_down() */ + +/** + * kexec_image_info - Print received image details + */ +static void +kexec_image_info(const struct kimage *image) +{ + unsigned long i; + + pr_debug("Kexec image info:\n"); + pr_debug("\ttype: %d\n", image->type); + pr_debug("\tstart: %lx\n", image->start); + pr_debug("\thead: %lx\n", image->head); + pr_debug("\tnr_segments: %lu\n", image->nr_segments); + + for (i = 0; i < image->nr_segments; i++) { + pr_debug("\t segment[%lu]: %016lx - %016lx", i, + image->segment[i].mem, + image->segment[i].mem + image->segment[i].memsz); + pr_debug("\t\t0x%lx bytes, %lu pages\n", + (unsigned long) image->segment[i].memsz, + (unsigned long) image->segment[i].memsz / PAGE_SIZE); + } +} + +/** + * machine_kexec_prepare - Initialize kexec + * + * This function is called from do_kexec_load, when the user has + * provided us with an image to be loaded. Its goal is to validate + * the image and prepare the control code buffer as needed. + * Note that kimage_alloc_init has already been called and the + * control buffer has already been allocated. + */ +int +machine_kexec_prepare(struct kimage *image) +{ + struct kimage_arch *internal = &image->arch; + struct fdt_header fdt = {0}; + void *control_code_buffer = NULL; + unsigned int control_code_buffer_sz = 0; + int i = 0; + + kexec_image_info(image); + + if (image->type == KEXEC_TYPE_CRASH) { + pr_warn("Loading a crash kernel is unsupported for now.\n"); + return -EINVAL; + } + + /* Find the Flattened Device Tree and save its physical address */ + for (i = 0; i < image->nr_segments; i++) { + if (image->segment[i].memsz <= sizeof(fdt)) + continue; + + if (copy_from_user(&fdt, image->segment[i].buf, sizeof(fdt))) + continue; + + if (fdt_check_header(&fdt)) + continue; + + internal->fdt_addr = (unsigned long) image->segment[i].mem; + break; + } + + if (!internal->fdt_addr) { + pr_err("Device tree not included in the provided image\n"); + return -EINVAL; + } + + /* Copy the assembler code for relocation to the control page */ + control_code_buffer = page_address(image->control_code_page); + control_code_buffer_sz = page_size(image->control_code_page); + if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { + pr_err("Relocation code doesn't fit within a control page\n"); + return -EINVAL; + } + memcpy(control_code_buffer, riscv_kexec_relocate, + riscv_kexec_relocate_size); + + /* Mark the control page executable */ + set_memory_x((unsigned long) control_code_buffer, 1); + + return 0; +} + + +/** + * machine_kexec_cleanup - Cleanup any leftovers from + * machine_kexec_prepare + * + * This function is called by kimage_free to handle any arch-specific + * allocations done on machine_kexec_prepare. Since we didn't do any + * allocations there, this is just an empty function. Note that the + * control buffer is freed by kimage_free. + */ +void +machine_kexec_cleanup(struct kimage *image) +{ +} + + +/* + * machine_shutdown - Prepare for a kexec reboot + * + * This function is called by kernel_kexec just before machine_kexec + * below. Its goal is to prepare the rest of the system (the other + * harts and possibly devices etc) for a kexec reboot. + */ +void machine_shutdown(void) +{ + /* + * No more interrupts on this hart + * until we are back up. + */ + local_irq_disable(); + +#if defined(CONFIG_HOTPLUG_CPU) + smp_shutdown_nonboot_cpus(smp_processor_id()); +#endif +} + +/** + * machine_crash_shutdown - Prepare to kexec after a kernel crash + * + * This function is called by crash_kexec just before machine_kexec + * below and its goal is similar to machine_shutdown, but in case of + * a kernel crash. Since we don't handle such cases yet, this function + * is empty. + */ +void +machine_crash_shutdown(struct pt_regs *regs) +{ +} + +/** + * machine_kexec - Jump to the loaded kimage + * + * This function is called by kernel_kexec which is called by the + * reboot system call when the reboot cmd is LINUX_REBOOT_CMD_KEXEC, + * or by crash_kernel which is called by the kernel's arch-specific + * trap handler in case of a kernel panic. It's the final stage of + * the kexec process where the pre-loaded kimage is ready to be + * executed. We assume at this point that all other harts are + * suspended and this hart will be the new boot hart. + */ +void __noreturn +machine_kexec(struct kimage *image) +{ + struct kimage_arch *internal = &image->arch; + unsigned long jump_addr = (unsigned long) image->start; + unsigned long first_ind_entry = (unsigned long) &image->head; + unsigned long this_hart_id = raw_smp_processor_id(); + unsigned long fdt_addr = internal->fdt_addr; + void *control_code_buffer = page_address(image->control_code_page); + riscv_kexec_do_relocate do_relocate = control_code_buffer; + + pr_notice("Will call new kernel at %08lx from hart id %lx\n", + jump_addr, this_hart_id); + pr_notice("FDT image at %08lx\n", fdt_addr); + + /* Make sure the relocation code is visible to the hart */ + local_flush_icache_all(); + + /* Jump to the relocation code */ + pr_notice("Bye...\n"); + do_relocate(first_ind_entry, jump_addr, fdt_addr, + this_hart_id, va_pa_offset); + unreachable(); +} -- cgit v1.2.3 From ffe0e526126884cf036a6f724220f1f9b4094fd2 Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Mon, 19 Apr 2021 03:55:37 +0300 Subject: RISC-V: Improve init_resources() The kernel region is always present and we know where it is, no need to look for it inside the loop, just ignore it like the rest of the reserved regions within system's memory. Additionally, we don't need to call memblock_free inside the loop, as if called it'll split the region of pre-allocated resources in two parts, messing things up, just re-use the previous pre-allocated resource and free any unused resources after both loops finish. Signed-off-by: Nick Kossifidis [Palmer: commit text] Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/setup.c | 90 ++++++++++++++++++++++++----------------------- 1 file changed, 46 insertions(+), 44 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index d208abc0b473..9193ba3a006b 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -60,6 +60,7 @@ static DEFINE_PER_CPU(struct cpu, cpu_devices); * also add "System RAM" regions for compatibility with other * archs, and the rest of the known regions for completeness. */ +static struct resource kimage_res = { .name = "Kernel image", }; static struct resource code_res = { .name = "Kernel code", }; static struct resource data_res = { .name = "Kernel data", }; static struct resource rodata_res = { .name = "Kernel rodata", }; @@ -80,45 +81,54 @@ static int __init add_resource(struct resource *parent, return 1; } -static int __init add_kernel_resources(struct resource *res) +static int __init add_kernel_resources(void) { int ret = 0; /* * The memory region of the kernel image is continuous and - * was reserved on setup_bootmem, find it here and register - * it as a resource, then register the various segments of - * the image as child nodes + * was reserved on setup_bootmem, register it here as a + * resource, with the various segments of the image as + * child nodes. */ - if (!(res->start <= code_res.start && res->end >= data_res.end)) - return 0; - res->name = "Kernel image"; - res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + code_res.start = __pa_symbol(_text); + code_res.end = __pa_symbol(_etext) - 1; + code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - /* - * We removed a part of this region on setup_bootmem so - * we need to expand the resource for the bss to fit in. - */ - res->end = bss_res.end; + rodata_res.start = __pa_symbol(__start_rodata); + rodata_res.end = __pa_symbol(__end_rodata) - 1; + rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - ret = add_resource(&iomem_resource, res); + data_res.start = __pa_symbol(_data); + data_res.end = __pa_symbol(_edata) - 1; + data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + bss_res.start = __pa_symbol(__bss_start); + bss_res.end = __pa_symbol(__bss_stop) - 1; + bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + kimage_res.start = code_res.start; + kimage_res.end = bss_res.end; + kimage_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + + ret = add_resource(&iomem_resource, &kimage_res); if (ret < 0) return ret; - ret = add_resource(res, &code_res); + ret = add_resource(&kimage_res, &code_res); if (ret < 0) return ret; - ret = add_resource(res, &rodata_res); + ret = add_resource(&kimage_res, &rodata_res); if (ret < 0) return ret; - ret = add_resource(res, &data_res); + ret = add_resource(&kimage_res, &data_res); if (ret < 0) return ret; - ret = add_resource(res, &bss_res); + ret = add_resource(&kimage_res, &bss_res); return ret; } @@ -129,53 +139,42 @@ static void __init init_resources(void) struct resource *res = NULL; struct resource *mem_res = NULL; size_t mem_res_sz = 0; - int ret = 0, i = 0; - - code_res.start = __pa_symbol(_text); - code_res.end = __pa_symbol(_etext) - 1; - code_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - - rodata_res.start = __pa_symbol(__start_rodata); - rodata_res.end = __pa_symbol(__end_rodata) - 1; - rodata_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; - - data_res.start = __pa_symbol(_data); - data_res.end = __pa_symbol(_edata) - 1; - data_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + int num_resources = 0, res_idx = 0; + int ret = 0; - bss_res.start = __pa_symbol(__bss_start); - bss_res.end = __pa_symbol(__bss_stop) - 1; - bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */ + num_resources = memblock.memory.cnt + memblock.reserved.cnt + 1; + res_idx = num_resources - 1; - mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res); + mem_res_sz = num_resources * sizeof(*mem_res); mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES); if (!mem_res) panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz); + /* * Start by adding the reserved regions, if they overlap * with /memory regions, insert_resource later on will take * care of it. */ + ret = add_kernel_resources(); + if (ret < 0) + goto error; + for_each_reserved_mem_region(region) { - res = &mem_res[i++]; + res = &mem_res[res_idx--]; res->name = "Reserved"; res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; res->start = __pfn_to_phys(memblock_region_reserved_base_pfn(region)); res->end = __pfn_to_phys(memblock_region_reserved_end_pfn(region)) - 1; - ret = add_kernel_resources(res); - if (ret < 0) - goto error; - else if (ret) - continue; - /* * Ignore any other reserved regions within * system memory. */ if (memblock_is_memory(res->start)) { - memblock_free((phys_addr_t) res, sizeof(struct resource)); + /* Re-use this pre-allocated resource */ + res_idx++; continue; } @@ -186,7 +185,7 @@ static void __init init_resources(void) /* Add /memory regions to the resource tree */ for_each_mem_region(region) { - res = &mem_res[i++]; + res = &mem_res[res_idx--]; if (unlikely(memblock_is_nomap(region))) { res->name = "Reserved"; @@ -204,6 +203,9 @@ static void __init init_resources(void) goto error; } + /* Clean-up any unused pre-allocated resources */ + mem_res_sz = (num_resources - res_idx + 1) * sizeof(*mem_res); + memblock_free((phys_addr_t) mem_res, mem_res_sz); return; error: -- cgit v1.2.3 From e53d28180d4d0fd12b6d2bde49cb87aa775b6ba8 Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Mon, 19 Apr 2021 03:55:38 +0300 Subject: RISC-V: Add kdump support This patch adds support for kdump, the kernel will reserve a region for the crash kernel and jump there on panic. In order for userspace tools (kexec-tools) to prepare the crash kernel kexec image, we also need to expose some information on /proc/iomem for the memory regions used by the kernel and for the region reserved for crash kernel. Note that on userspace the device tree is used to determine the system's memory layout so the "System RAM" on /proc/iomem is ignored. I tested this on riscv64 qemu and works as expected, you may test it by triggering a crash through /proc/sysrq_trigger: echo c > /proc/sysrq_trigger Signed-off-by: Nick Kossifidis Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/elf.h | 6 ++++ arch/riscv/include/asm/kexec.h | 19 ++++++---- arch/riscv/kernel/Makefile | 2 +- arch/riscv/kernel/crash_save_regs.S | 56 +++++++++++++++++++++++++++++ arch/riscv/kernel/kexec_relocate.S | 68 ++++++++++++++++++++++++++++++++++- arch/riscv/kernel/machine_kexec.c | 43 ++++++++++++---------- arch/riscv/kernel/setup.c | 11 +++++- arch/riscv/mm/init.c | 71 +++++++++++++++++++++++++++++++++++++ 8 files changed, 249 insertions(+), 27 deletions(-) create mode 100644 arch/riscv/kernel/crash_save_regs.S (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/elf.h b/arch/riscv/include/asm/elf.h index 5c725e1df58b..f4b490cd0e5d 100644 --- a/arch/riscv/include/asm/elf.h +++ b/arch/riscv/include/asm/elf.h @@ -81,4 +81,10 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp); #endif /* CONFIG_MMU */ +#define ELF_CORE_COPY_REGS(dest, regs) \ +do { \ + *(struct user_regs_struct *)&(dest) = \ + *(struct user_regs_struct *)regs; \ +} while (0); + #endif /* _ASM_RISCV_ELF_H */ diff --git a/arch/riscv/include/asm/kexec.h b/arch/riscv/include/asm/kexec.h index 86e6e4922d1f..1e954101906a 100644 --- a/arch/riscv/include/asm/kexec.h +++ b/arch/riscv/include/asm/kexec.h @@ -23,11 +23,16 @@ #define KEXEC_ARCH KEXEC_ARCH_RISCV +extern void riscv_crash_save_regs(struct pt_regs *newregs); + static inline void crash_setup_regs(struct pt_regs *newregs, struct pt_regs *oldregs) { - /* Dummy implementation for now */ + if (oldregs) + memcpy(newregs, oldregs, sizeof(struct pt_regs)); + else + riscv_crash_save_regs(newregs); } @@ -40,10 +45,12 @@ struct kimage_arch { const extern unsigned char riscv_kexec_relocate[]; const extern unsigned int riscv_kexec_relocate_size; -typedef void (*riscv_kexec_do_relocate)(unsigned long first_ind_entry, - unsigned long jump_addr, - unsigned long fdt_addr, - unsigned long hartid, - unsigned long va_pa_off); +typedef void (*riscv_kexec_method)(unsigned long first_ind_entry, + unsigned long jump_addr, + unsigned long fdt_addr, + unsigned long hartid, + unsigned long va_pa_off); + +extern riscv_kexec_method riscv_kexec_norelocate; #endif diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 96650aaed5eb..3ee07bf0cea7 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -58,7 +58,7 @@ obj-$(CONFIG_SMP) += cpu_ops_sbi.o endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o -obj-$(CONFIG_KEXEC) += kexec_relocate.o machine_kexec.o +obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/crash_save_regs.S b/arch/riscv/kernel/crash_save_regs.S new file mode 100644 index 000000000000..7832fb763aba --- /dev/null +++ b/arch/riscv/kernel/crash_save_regs.S @@ -0,0 +1,56 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis + */ + +#include /* For RISCV_* and REG_* macros */ +#include /* For CSR_* macros */ +#include /* For offsets on pt_regs */ +#include /* For SYM_* macros */ + +.section ".text" +SYM_CODE_START(riscv_crash_save_regs) + REG_S ra, PT_RA(a0) /* x1 */ + REG_S sp, PT_SP(a0) /* x2 */ + REG_S gp, PT_GP(a0) /* x3 */ + REG_S tp, PT_TP(a0) /* x4 */ + REG_S t0, PT_T0(a0) /* x5 */ + REG_S t1, PT_T1(a0) /* x6 */ + REG_S t2, PT_T2(a0) /* x7 */ + REG_S s0, PT_S0(a0) /* x8/fp */ + REG_S s1, PT_S1(a0) /* x9 */ + REG_S a0, PT_A0(a0) /* x10 */ + REG_S a1, PT_A1(a0) /* x11 */ + REG_S a2, PT_A2(a0) /* x12 */ + REG_S a3, PT_A3(a0) /* x13 */ + REG_S a4, PT_A4(a0) /* x14 */ + REG_S a5, PT_A5(a0) /* x15 */ + REG_S a6, PT_A6(a0) /* x16 */ + REG_S a7, PT_A7(a0) /* x17 */ + REG_S s2, PT_S2(a0) /* x18 */ + REG_S s3, PT_S3(a0) /* x19 */ + REG_S s4, PT_S4(a0) /* x20 */ + REG_S s5, PT_S5(a0) /* x21 */ + REG_S s6, PT_S6(a0) /* x22 */ + REG_S s7, PT_S7(a0) /* x23 */ + REG_S s8, PT_S8(a0) /* x24 */ + REG_S s9, PT_S9(a0) /* x25 */ + REG_S s10, PT_S10(a0) /* x26 */ + REG_S s11, PT_S11(a0) /* x27 */ + REG_S t3, PT_T3(a0) /* x28 */ + REG_S t4, PT_T4(a0) /* x29 */ + REG_S t5, PT_T5(a0) /* x30 */ + REG_S t6, PT_T6(a0) /* x31 */ + + csrr t1, CSR_STATUS + csrr t2, CSR_EPC + csrr t3, CSR_TVAL + csrr t4, CSR_CAUSE + + REG_S t1, PT_STATUS(a0) + REG_S t2, PT_EPC(a0) + REG_S t3, PT_BADADDR(a0) + REG_S t4, PT_CAUSE(a0) + ret +SYM_CODE_END(riscv_crash_save_regs) diff --git a/arch/riscv/kernel/kexec_relocate.S b/arch/riscv/kernel/kexec_relocate.S index 84101d0a23ef..88c3beabe9b4 100644 --- a/arch/riscv/kernel/kexec_relocate.S +++ b/arch/riscv/kernel/kexec_relocate.S @@ -151,7 +151,73 @@ SYM_CODE_START(riscv_kexec_relocate) SYM_CODE_END(riscv_kexec_relocate) riscv_kexec_relocate_end: - .section ".rodata" + +/* Used for jumping to crashkernel */ +.section ".text" +SYM_CODE_START(riscv_kexec_norelocate) + /* + * s0: (const) Phys address to jump to + * s1: (const) Phys address of the FDT image + * s2: (const) The hartid of the current hart + * s3: (const) va_pa_offset, used when switching MMU off + */ + mv s0, a1 + mv s1, a2 + mv s2, a3 + mv s3, a4 + + /* Disable / cleanup interrupts */ + csrw CSR_SIE, zero + csrw CSR_SIP, zero + + /* Switch to physical addressing */ + la s4, 1f + sub s4, s4, s3 + csrw CSR_STVEC, s4 + csrw CSR_SATP, zero + +.align 2 +1: + /* Pass the arguments to the next kernel / Cleanup*/ + mv a0, s2 + mv a1, s1 + mv a2, s0 + + /* Cleanup */ + mv a3, zero + mv a4, zero + mv a5, zero + mv a6, zero + mv a7, zero + + mv s0, zero + mv s1, zero + mv s2, zero + mv s3, zero + mv s4, zero + mv s5, zero + mv s6, zero + mv s7, zero + mv s8, zero + mv s9, zero + mv s10, zero + mv s11, zero + + mv t0, zero + mv t1, zero + mv t2, zero + mv t3, zero + mv t4, zero + mv t5, zero + mv t6, zero + csrw CSR_SEPC, zero + csrw CSR_SCAUSE, zero + csrw CSR_SSCRATCH, zero + + jalr zero, a2, 0 +SYM_CODE_END(riscv_kexec_norelocate) + +.section ".rodata" SYM_DATA(riscv_kexec_relocate_size, .long riscv_kexec_relocate_end - riscv_kexec_relocate) diff --git a/arch/riscv/kernel/machine_kexec.c b/arch/riscv/kernel/machine_kexec.c index e165c7874740..cc048143fba5 100644 --- a/arch/riscv/kernel/machine_kexec.c +++ b/arch/riscv/kernel/machine_kexec.c @@ -59,11 +59,6 @@ machine_kexec_prepare(struct kimage *image) kexec_image_info(image); - if (image->type == KEXEC_TYPE_CRASH) { - pr_warn("Loading a crash kernel is unsupported for now.\n"); - return -EINVAL; - } - /* Find the Flattened Device Tree and save its physical address */ for (i = 0; i < image->nr_segments; i++) { if (image->segment[i].memsz <= sizeof(fdt)) @@ -85,17 +80,21 @@ machine_kexec_prepare(struct kimage *image) } /* Copy the assembler code for relocation to the control page */ - control_code_buffer = page_address(image->control_code_page); - control_code_buffer_sz = page_size(image->control_code_page); - if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { - pr_err("Relocation code doesn't fit within a control page\n"); - return -EINVAL; - } - memcpy(control_code_buffer, riscv_kexec_relocate, - riscv_kexec_relocate_size); + if (image->type != KEXEC_TYPE_CRASH) { + control_code_buffer = page_address(image->control_code_page); + control_code_buffer_sz = page_size(image->control_code_page); - /* Mark the control page executable */ - set_memory_x((unsigned long) control_code_buffer, 1); + if (unlikely(riscv_kexec_relocate_size > control_code_buffer_sz)) { + pr_err("Relocation code doesn't fit within a control page\n"); + return -EINVAL; + } + + memcpy(control_code_buffer, riscv_kexec_relocate, + riscv_kexec_relocate_size); + + /* Mark the control page executable */ + set_memory_x((unsigned long) control_code_buffer, 1); + } return 0; } @@ -147,6 +146,9 @@ void machine_shutdown(void) void machine_crash_shutdown(struct pt_regs *regs) { + crash_save_cpu(regs, smp_processor_id()); + machine_shutdown(); + pr_info("Starting crashdump kernel...\n"); } /** @@ -169,7 +171,12 @@ machine_kexec(struct kimage *image) unsigned long this_hart_id = raw_smp_processor_id(); unsigned long fdt_addr = internal->fdt_addr; void *control_code_buffer = page_address(image->control_code_page); - riscv_kexec_do_relocate do_relocate = control_code_buffer; + riscv_kexec_method kexec_method = NULL; + + if (image->type != KEXEC_TYPE_CRASH) + kexec_method = control_code_buffer; + else + kexec_method = (riscv_kexec_method) &riscv_kexec_norelocate; pr_notice("Will call new kernel at %08lx from hart id %lx\n", jump_addr, this_hart_id); @@ -180,7 +187,7 @@ machine_kexec(struct kimage *image) /* Jump to the relocation code */ pr_notice("Bye...\n"); - do_relocate(first_ind_entry, jump_addr, fdt_addr, - this_hart_id, va_pa_offset); + kexec_method(first_ind_entry, jump_addr, fdt_addr, + this_hart_id, va_pa_offset); unreachable(); } diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 9193ba3a006b..0aa9f8340115 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -160,6 +161,14 @@ static void __init init_resources(void) if (ret < 0) goto error; +#ifdef CONFIG_KEXEC_CORE + if (crashk_res.start != crashk_res.end) { + ret = add_resource(&iomem_resource, &crashk_res); + if (ret < 0) + goto error; + } +#endif + for_each_reserved_mem_region(region) { res = &mem_res[res_idx--]; @@ -252,7 +261,6 @@ void __init setup_arch(char **cmdline_p) efi_init(); setup_bootmem(); paging_init(); - init_resources(); #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else @@ -263,6 +271,7 @@ void __init setup_arch(char **cmdline_p) #endif misc_mem_init(); + init_resources(); sbi_init(); if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index a2f4c148f02e..63c94d33e18a 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -2,6 +2,8 @@ /* * Copyright (C) 2012 Regents of the University of California * Copyright (C) 2019 Western Digital Corporation or its affiliates. + * Copyright (C) 2020 FORTH-ICS/CARV + * Nick Kossifidis */ #include @@ -14,6 +16,7 @@ #include #include #include +#include #include #include @@ -658,6 +661,71 @@ void mark_rodata_ro(void) } #endif +#ifdef CONFIG_KEXEC_CORE +/* + * reserve_crashkernel() - reserves memory for crash kernel + * + * This function reserves memory area given in "crashkernel=" kernel command + * line parameter. The memory reserved is used by dump capture kernel when + * primary kernel is crashing. + */ +static void __init reserve_crashkernel(void) +{ + unsigned long long crash_base = 0; + unsigned long long crash_size = 0; + unsigned long search_start = memblock_start_of_DRAM(); + unsigned long search_end = memblock_end_of_DRAM(); + + int ret = 0; + + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), + &crash_size, &crash_base); + if (ret || !crash_size) + return; + + crash_size = PAGE_ALIGN(crash_size); + + if (crash_base == 0) { + /* + * Current riscv boot protocol requires 2MB alignment for + * RV64 and 4MB alignment for RV32 (hugepage size) + */ + crash_base = memblock_find_in_range(search_start, search_end, + crash_size, PMD_SIZE); + + if (crash_base == 0) { + pr_warn("crashkernel: couldn't allocate %lldKB\n", + crash_size >> 10); + return; + } + } else { + /* User specifies base address explicitly. */ + if (!memblock_is_region_memory(crash_base, crash_size)) { + pr_warn("crashkernel: requested region is not memory\n"); + return; + } + + if (memblock_is_region_reserved(crash_base, crash_size)) { + pr_warn("crashkernel: requested region is reserved\n"); + return; + } + + + if (!IS_ALIGNED(crash_base, PMD_SIZE)) { + pr_warn("crashkernel: requested region is misaligned\n"); + return; + } + } + memblock_reserve(crash_base, crash_size); + + pr_info("crashkernel: reserved 0x%016llx - 0x%016llx (%lld MB)\n", + crash_base, crash_base + crash_size, crash_size >> 20); + + crashk_res.start = crash_base; + crashk_res.end = crash_base + crash_size - 1; +} +#endif /* CONFIG_KEXEC_CORE */ + void __init paging_init(void) { setup_vm_final(); @@ -670,6 +738,9 @@ void __init misc_mem_init(void) arch_numa_init(); sparse_init(); zone_sizes_init(); +#ifdef CONFIG_KEXEC_CORE + reserve_crashkernel(); +#endif memblock_dump_all(); } -- cgit v1.2.3 From 5640975003d0234da08559677e22ec25b9cb3267 Mon Sep 17 00:00:00 2001 From: Nick Kossifidis Date: Mon, 19 Apr 2021 03:55:39 +0300 Subject: RISC-V: Add crash kernel support This patch allows Linux to act as a crash kernel for use with kdump. Userspace will let the crash kernel know about the memory region it can use through linux,usable-memory property on the /memory node (overriding its reg property), and about the memory region where the elf core header of the previous kernel is saved, through a reserved-memory node with a compatible string of "linux,elfcorehdr". This approach is the least invasive and re-uses functionality already present. I tested this on riscv64 qemu and it works as expected, you may test it by retrieving the dmesg of the previous kernel through /proc/vmcore, using the vmcore-dmesg utility from kexec-tools. Signed-off-by: Nick Kossifidis Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 10 +++++++++ arch/riscv/kernel/Makefile | 1 + arch/riscv/kernel/crash_dump.c | 46 ++++++++++++++++++++++++++++++++++++++++++ arch/riscv/kernel/setup.c | 12 +++++++++++ arch/riscv/mm/init.c | 33 ++++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+) create mode 100644 arch/riscv/kernel/crash_dump.c (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index e38bd044610f..61514e0d268e 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -405,6 +405,16 @@ config KEXEC The name comes from the similarity to the exec system call. +config CRASH_DUMP + bool "Build kdump crash kernel" + help + Generate crash dump after being started by kexec. This should + be normally only set in special crash dump kernels which are + loaded in the main kernel with kexec-tools into a specially + reserved region and then later executed after a crash by + kdump/kexec. + + For more details see Documentation/admin-guide/kdump/kdump.rst endmenu diff --git a/arch/riscv/kernel/Makefile b/arch/riscv/kernel/Makefile index 3ee07bf0cea7..56d5cd2a5982 100644 --- a/arch/riscv/kernel/Makefile +++ b/arch/riscv/kernel/Makefile @@ -59,6 +59,7 @@ endif obj-$(CONFIG_HOTPLUG_CPU) += cpu-hotplug.o obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KEXEC) += kexec_relocate.o crash_save_regs.o machine_kexec.o +obj-$(CONFIG_CRASH_DUMP) += crash_dump.o obj-$(CONFIG_JUMP_LABEL) += jump_label.o diff --git a/arch/riscv/kernel/crash_dump.c b/arch/riscv/kernel/crash_dump.c new file mode 100644 index 000000000000..86cc0ada5752 --- /dev/null +++ b/arch/riscv/kernel/crash_dump.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * This code comes from arch/arm64/kernel/crash_dump.c + * Created by: AKASHI Takahiro + * Copyright (C) 2017 Linaro Limited + */ + +#include +#include + +/** + * copy_oldmem_page() - copy one page from old kernel memory + * @pfn: page frame number to be copied + * @buf: buffer where the copied page is placed + * @csize: number of bytes to copy + * @offset: offset in bytes into the page + * @userbuf: if set, @buf is in a user address space + * + * This function copies one page from old kernel memory into buffer pointed by + * @buf. If @buf is in userspace, set @userbuf to %1. Returns number of bytes + * copied or negative error in case of failure. + */ +ssize_t copy_oldmem_page(unsigned long pfn, char *buf, + size_t csize, unsigned long offset, + int userbuf) +{ + void *vaddr; + + if (!csize) + return 0; + + vaddr = memremap(__pfn_to_phys(pfn), PAGE_SIZE, MEMREMAP_WB); + if (!vaddr) + return -ENOMEM; + + if (userbuf) { + if (copy_to_user((char __user *)buf, vaddr + offset, csize)) { + memunmap(vaddr); + return -EFAULT; + } + } else + memcpy(buf, vaddr + offset, csize); + + memunmap(vaddr); + return csize; +} diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 0aa9f8340115..932ef73cf622 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -66,6 +66,9 @@ static struct resource code_res = { .name = "Kernel code", }; static struct resource data_res = { .name = "Kernel data", }; static struct resource rodata_res = { .name = "Kernel rodata", }; static struct resource bss_res = { .name = "Kernel bss", }; +#ifdef CONFIG_CRASH_DUMP +static struct resource elfcorehdr_res = { .name = "ELF Core hdr", }; +#endif static int __init add_resource(struct resource *parent, struct resource *res) @@ -169,6 +172,15 @@ static void __init init_resources(void) } #endif +#ifdef CONFIG_CRASH_DUMP + if (elfcorehdr_size > 0) { + elfcorehdr_res.start = elfcorehdr_addr; + elfcorehdr_res.end = elfcorehdr_addr + elfcorehdr_size - 1; + elfcorehdr_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; + add_resource(&iomem_resource, &elfcorehdr_res); + } +#endif + for_each_reserved_mem_region(region) { res = &mem_res[res_idx--]; diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 63c94d33e18a..3cb4c1b6cee1 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -678,6 +679,18 @@ static void __init reserve_crashkernel(void) int ret = 0; + /* + * Don't reserve a region for a crash kernel on a crash kernel + * since it doesn't make much sense and we have limited memory + * resources. + */ +#ifdef CONFIG_CRASH_DUMP + if (is_kdump_kernel()) { + pr_info("crashkernel: ignoring reservation request\n"); + return; + } +#endif + ret = parse_crashkernel(boot_command_line, memblock_phys_mem_size(), &crash_size, &crash_base); if (ret || !crash_size) @@ -726,6 +739,26 @@ static void __init reserve_crashkernel(void) } #endif /* CONFIG_KEXEC_CORE */ +#ifdef CONFIG_CRASH_DUMP +/* + * We keep track of the ELF core header of the crashed + * kernel with a reserved-memory region with compatible + * string "linux,elfcorehdr". Here we register a callback + * to populate elfcorehdr_addr/size when this region is + * present. Note that this region will be marked as + * reserved once we call early_init_fdt_scan_reserved_mem() + * later on. + */ +static int elfcore_hdr_setup(struct reserved_mem *rmem) +{ + elfcorehdr_addr = rmem->base; + elfcorehdr_size = rmem->size; + return 0; +} + +RESERVEDMEM_OF_DECLARE(elfcorehdr, "linux,elfcorehdr", elfcore_hdr_setup); +#endif + void __init paging_init(void) { setup_vm_final(); -- cgit v1.2.3 From 44c922572952d89a1ed15764f2b373ba62692865 Mon Sep 17 00:00:00 2001 From: Vitaly Wool Date: Tue, 13 Apr 2021 02:35:14 -0400 Subject: RISC-V: enable XIP Introduce XIP (eXecute In Place) support for RISC-V platforms. It allows code to be executed directly from non-volatile storage directly addressable by the CPU, such as QSPI NOR flash which can be found on many RISC-V platforms. This makes way for significant optimization of RAM footprint. The XIP kernel is not compressed since it has to run directly from flash, so it will occupy more space on the non-volatile storage. The physical flash address used to link the kernel object files and for storing it has to be known at compile time and is represented by a Kconfig option. XIP on RISC-V will for the time being only work on MMU-enabled kernels. Signed-off-by: Vitaly Wool [Alex: Rebase on top of "Move kernel mapping outside the linear mapping" ] Signed-off-by: Alexandre Ghiti [Palmer: disable XIP for allyesconfig] Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig | 60 ++++++++++++++-- arch/riscv/Makefile | 8 ++- arch/riscv/boot/Makefile | 13 ++++ arch/riscv/include/asm/page.h | 21 ++++++ arch/riscv/include/asm/pgtable.h | 25 ++++++- arch/riscv/kernel/head.S | 46 ++++++++++++- arch/riscv/kernel/head.h | 3 + arch/riscv/kernel/setup.c | 11 ++- arch/riscv/kernel/vmlinux-xip.lds.S | 133 ++++++++++++++++++++++++++++++++++++ arch/riscv/kernel/vmlinux.lds.S | 6 ++ arch/riscv/mm/init.c | 117 ++++++++++++++++++++++++++++--- 11 files changed, 424 insertions(+), 19 deletions(-) create mode 100644 arch/riscv/kernel/vmlinux-xip.lds.S (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 61514e0d268e..6ddf5a2eb84d 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -28,8 +28,8 @@ config RISCV select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_SET_DIRECT_MAP select ARCH_HAS_SET_MEMORY - select ARCH_HAS_STRICT_KERNEL_RWX if MMU - select ARCH_HAS_STRICT_MODULE_RWX if MMU + select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL + select ARCH_HAS_STRICT_MODULE_RWX if MMU && !XIP_KERNEL select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT @@ -468,7 +468,7 @@ config EFI_STUB config EFI bool "UEFI runtime support" - depends on OF + depends on OF && !XIP_KERNEL select LIBFDT select UCS2_STRING select EFI_PARAMS_FROM_FDT @@ -492,11 +492,63 @@ config STACKPROTECTOR_PER_TASK def_bool y depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS +config PHYS_RAM_BASE_FIXED + bool "Explicitly specified physical RAM address" + default n + +config PHYS_RAM_BASE + hex "Platform Physical RAM address" + depends on PHYS_RAM_BASE_FIXED + default "0x80000000" + help + This is the physical address of RAM in the system. It has to be + explicitly specified to run early relocations of read-write data + from flash to RAM. + +config XIP_KERNEL + bool "Kernel Execute-In-Place from ROM" + depends on MMU && SPARSEMEM + # This prevents XIP from being enabled by all{yes,mod}config, which + # fail to build since XIP doesn't support large kernels. + depends on !COMPILE_TEST + select PHYS_RAM_BASE_FIXED + help + Execute-In-Place allows the kernel to run from non-volatile storage + directly addressable by the CPU, such as NOR flash. This saves RAM + space since the text section of the kernel is not loaded from flash + to RAM. Read-write sections, such as the data section and stack, + are still copied to RAM. The XIP kernel is not compressed since + it has to run directly from flash, so it will take more space to + store it. The flash address used to link the kernel object files, + and for storing it, is configuration dependent. Therefore, if you + say Y here, you must know the proper physical address where to + store the kernel image depending on your own flash memory usage. + + Also note that the make target becomes "make xipImage" rather than + "make zImage" or "make Image". The final kernel binary to put in + ROM memory will be arch/riscv/boot/xipImage. + + SPARSEMEM is required because the kernel text and rodata that are + flash resident are not backed by memmap, then any attempt to get + a struct page on those regions will trigger a fault. + + If unsure, say N. + +config XIP_PHYS_ADDR + hex "XIP Kernel Physical Location" + depends on XIP_KERNEL + default "0x21000000" + help + This is the physical address in your flash memory the kernel will + be linked for and stored to. This address is dependent on your + own flash usage. + endmenu config BUILTIN_DTB - def_bool n + bool depends on OF + default y if XIP_KERNEL menu "Power management options" diff --git a/arch/riscv/Makefile b/arch/riscv/Makefile index 1f5c03082976..3eb9590a0775 100644 --- a/arch/riscv/Makefile +++ b/arch/riscv/Makefile @@ -82,7 +82,11 @@ CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS) # Default target when executing plain make boot := arch/riscv/boot +ifeq ($(CONFIG_XIP_KERNEL),y) +KBUILD_IMAGE := $(boot)/xipImage +else KBUILD_IMAGE := $(boot)/Image.gz +endif head-y := arch/riscv/kernel/head.o @@ -96,12 +100,14 @@ PHONY += vdso_install vdso_install: $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@ +ifneq ($(CONFIG_XIP_KERNEL),y) ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy) KBUILD_IMAGE := $(boot)/loader.bin else KBUILD_IMAGE := $(boot)/Image.gz endif -BOOT_TARGETS := Image Image.gz loader loader.bin +endif +BOOT_TARGETS := Image Image.gz loader loader.bin xipImage all: $(notdir $(KBUILD_IMAGE)) diff --git a/arch/riscv/boot/Makefile b/arch/riscv/boot/Makefile index 03404c84f971..6bf299f70c27 100644 --- a/arch/riscv/boot/Makefile +++ b/arch/riscv/boot/Makefile @@ -17,8 +17,21 @@ KCOV_INSTRUMENT := n OBJCOPYFLAGS_Image :=-O binary -R .note -R .note.gnu.build-id -R .comment -S +OBJCOPYFLAGS_xipImage :=-O binary -R .note -R .note.gnu.build-id -R .comment -S targets := Image Image.* loader loader.o loader.lds loader.bin +targets := Image Image.* loader loader.o loader.lds loader.bin xipImage + +ifeq ($(CONFIG_XIP_KERNEL),y) + +quiet_cmd_mkxip = $(quiet_cmd_objcopy) +cmd_mkxip = $(cmd_objcopy) + +$(obj)/xipImage: vmlinux FORCE + $(call if_changed,mkxip) + @$(kecho) ' Physical Address of xipImage: $(CONFIG_XIP_PHYS_ADDR)' + +endif $(obj)/Image: vmlinux FORCE $(call if_changed,objcopy) diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index f64b61296c0c..e280ba60cb34 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -93,6 +93,9 @@ extern unsigned long va_pa_offset; #ifdef CONFIG_64BIT extern unsigned long va_kernel_pa_offset; #endif +#ifdef CONFIG_XIP_KERNEL +extern unsigned long va_kernel_xip_pa_offset; +#endif extern unsigned long pfn_base; #define ARCH_PFN_OFFSET (pfn_base) #else @@ -107,11 +110,29 @@ extern unsigned long pfn_base; extern unsigned long kernel_virt_addr; #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset)) +#ifdef CONFIG_XIP_KERNEL +#define kernel_mapping_pa_to_va(y) ({ \ + unsigned long _y = y; \ + (_y >= CONFIG_PHYS_RAM_BASE) ? \ + (void *)((unsigned long)(_y) + va_kernel_pa_offset + XIP_OFFSET) : \ + (void *)((unsigned long)(_y) + va_kernel_xip_pa_offset); \ + }) +#else #define kernel_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_kernel_pa_offset)) +#endif #define __pa_to_va_nodebug(x) linear_mapping_pa_to_va(x) #define linear_mapping_va_to_pa(x) ((unsigned long)(x) - va_pa_offset) +#ifdef CONFIG_XIP_KERNEL +#define kernel_mapping_va_to_pa(y) ({ \ + unsigned long _y = y; \ + (_y < kernel_virt_addr + XIP_OFFSET) ? \ + ((unsigned long)(_y) - va_kernel_xip_pa_offset) : \ + ((unsigned long)(_y) - va_kernel_pa_offset - XIP_OFFSET); \ + }) +#else #define kernel_mapping_va_to_pa(x) ((unsigned long)(x) - va_kernel_pa_offset) +#endif #define __va_to_pa_nodebug(x) ({ \ unsigned long _x = x; \ (_x < kernel_virt_addr) ? \ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 5afda75cc2c3..2f1384e14e31 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -72,6 +72,19 @@ #define FIXADDR_SIZE PGDIR_SIZE #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) + +#ifdef CONFIG_XIP_KERNEL +#define XIP_OFFSET SZ_8M +#define XIP_FIXUP(addr) ({ \ + uintptr_t __a = (uintptr_t)(addr); \ + (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \ + __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ + __a; \ + }) +#else +#define XIP_FIXUP(addr) (addr) +#endif /* CONFIG_XIP_KERNEL */ + #endif #ifndef __ASSEMBLY__ @@ -507,8 +520,16 @@ static inline int ptep_clear_flush_young(struct vm_area_struct *vma, #define kern_addr_valid(addr) (1) /* FIXME */ extern char _start[]; -extern void *dtb_early_va; -extern uintptr_t dtb_early_pa; +extern void *_dtb_early_va; +extern uintptr_t _dtb_early_pa; +#if defined(CONFIG_XIP_KERNEL) && defined(CONFIG_MMU) +#define dtb_early_va (*(void **)XIP_FIXUP(&_dtb_early_va)) +#define dtb_early_pa (*(uintptr_t *)XIP_FIXUP(&_dtb_early_pa)) +#else +#define dtb_early_va _dtb_early_va +#define dtb_early_pa _dtb_early_pa +#endif /* CONFIG_XIP_KERNEL */ + void setup_bootmem(void); void paging_init(void); void misc_mem_init(void); diff --git a/arch/riscv/kernel/head.S b/arch/riscv/kernel/head.S index 6cb05f22e52a..89cc58ab52b4 100644 --- a/arch/riscv/kernel/head.S +++ b/arch/riscv/kernel/head.S @@ -9,11 +9,23 @@ #include #include #include +#include #include #include #include #include "efi-header.S" +#ifdef CONFIG_XIP_KERNEL +.macro XIP_FIXUP_OFFSET reg + REG_L t0, _xip_fixup + add \reg, \reg, t0 +.endm +_xip_fixup: .dword CONFIG_PHYS_RAM_BASE - CONFIG_XIP_PHYS_ADDR - XIP_OFFSET +#else +.macro XIP_FIXUP_OFFSET reg +.endm +#endif /* CONFIG_XIP_KERNEL */ + __HEAD ENTRY(_start) /* @@ -70,6 +82,7 @@ pe_head_start: relocate: /* Relocate return address */ la a1, kernel_virt_addr + XIP_FIXUP_OFFSET a1 REG_L a1, 0(a1) la a2, _start sub a1, a1, a2 @@ -92,6 +105,7 @@ relocate: * to ensure the new translations are in use. */ la a0, trampoline_pg_dir + XIP_FIXUP_OFFSET a0 srl a0, a0, PAGE_SHIFT or a0, a0, a1 sfence.vma @@ -145,7 +159,9 @@ secondary_start_sbi: slli a3, a0, LGREG la a4, __cpu_up_stack_pointer + XIP_FIXUP_OFFSET a4 la a5, __cpu_up_task_pointer + XIP_FIXUP_OFFSET a5 add a4, a3, a4 add a5, a3, a5 REG_L sp, (a4) @@ -157,6 +173,7 @@ secondary_start_common: #ifdef CONFIG_MMU /* Enable virtual memory and relocate to virtual address */ la a0, swapper_pg_dir + XIP_FIXUP_OFFSET a0 call relocate #endif call setup_trap_vector @@ -237,12 +254,33 @@ pmp_done: .Lgood_cores: #endif +#ifndef CONFIG_XIP_KERNEL /* Pick one hart to run the main boot sequence */ la a3, hart_lottery li a2, 1 amoadd.w a3, a2, (a3) bnez a3, .Lsecondary_start +#else + /* hart_lottery in flash contains a magic number */ + la a3, hart_lottery + mv a2, a3 + XIP_FIXUP_OFFSET a2 + lw t1, (a3) + amoswap.w t0, t1, (a2) + /* first time here if hart_lottery in RAM is not set */ + beq t0, t1, .Lsecondary_start + + la sp, _end + THREAD_SIZE + XIP_FIXUP_OFFSET sp + mv s0, a0 + call __copy_data + + /* Restore a0 copy */ + mv a0, s0 +#endif + +#ifndef CONFIG_XIP_KERNEL /* Clear BSS for flat non-ELF images */ la a3, __bss_start la a4, __bss_stop @@ -252,15 +290,18 @@ clear_bss: add a3, a3, RISCV_SZPTR blt a3, a4, clear_bss clear_bss_done: - +#endif /* Save hart ID and DTB physical address */ mv s0, a0 mv s1, a1 + la a2, boot_cpu_hartid + XIP_FIXUP_OFFSET a2 REG_S a0, (a2) /* Initialize page tables and relocate to virtual addresses */ la sp, init_thread_union + THREAD_SIZE + XIP_FIXUP_OFFSET sp #ifdef CONFIG_BUILTIN_DTB la a0, __dtb_start #else @@ -269,6 +310,7 @@ clear_bss_done: call setup_vm #ifdef CONFIG_MMU la a0, early_pg_dir + XIP_FIXUP_OFFSET a0 call relocate #endif /* CONFIG_MMU */ @@ -293,7 +335,9 @@ clear_bss_done: slli a3, a0, LGREG la a1, __cpu_up_stack_pointer + XIP_FIXUP_OFFSET a1 la a2, __cpu_up_task_pointer + XIP_FIXUP_OFFSET a2 add a1, a3, a1 add a2, a3, a2 diff --git a/arch/riscv/kernel/head.h b/arch/riscv/kernel/head.h index b48dda3d04f6..aabbc3ac3e48 100644 --- a/arch/riscv/kernel/head.h +++ b/arch/riscv/kernel/head.h @@ -12,6 +12,9 @@ extern atomic_t hart_lottery; asmlinkage void do_page_fault(struct pt_regs *regs); asmlinkage void __init setup_vm(uintptr_t dtb_pa); +#ifdef CONFIG_XIP_KERNEL +asmlinkage void __init __copy_data(void); +#endif extern void *__cpu_up_stack_pointer[]; extern void *__cpu_up_task_pointer[]; diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index 932ef73cf622..7b31779101f6 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -51,7 +52,11 @@ struct screen_info screen_info __section(".data") = { * This is used before the kernel initializes the BSS so it can't be in the * BSS. */ -atomic_t hart_lottery __section(".sdata"); +atomic_t hart_lottery __section(".sdata") +#ifdef CONFIG_XIP_KERNEL += ATOMIC_INIT(0xC001BEEF) +#endif +; unsigned long boot_cpu_hartid; static DEFINE_PER_CPU(struct cpu, cpu_devices); @@ -276,7 +281,7 @@ void __init setup_arch(char **cmdline_p) #if IS_ENABLED(CONFIG_BUILTIN_DTB) unflatten_and_copy_device_tree(); #else - if (early_init_dt_verify(__va(dtb_early_pa))) + if (early_init_dt_verify(__va(XIP_FIXUP(dtb_early_pa)))) unflatten_device_tree(); else pr_err("No DTB found in kernel mappings\n"); @@ -288,7 +293,7 @@ void __init setup_arch(char **cmdline_p) if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)) { protect_kernel_text_data(); -#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) +#if defined(CONFIG_64BIT) && defined(CONFIG_MMU) && !defined(CONFIG_XIP_KERNEL) protect_kernel_linear_mapping_text_rodata(); #endif } diff --git a/arch/riscv/kernel/vmlinux-xip.lds.S b/arch/riscv/kernel/vmlinux-xip.lds.S new file mode 100644 index 000000000000..4b29b9917f99 --- /dev/null +++ b/arch/riscv/kernel/vmlinux-xip.lds.S @@ -0,0 +1,133 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2012 Regents of the University of California + * Copyright (C) 2017 SiFive + * Copyright (C) 2020 Vitaly Wool, Konsulko AB + */ + +#include +#define LOAD_OFFSET KERNEL_LINK_ADDR +/* No __ro_after_init data in the .rodata section - which will always be ro */ +#define RO_AFTER_INIT_DATA + +#include +#include +#include +#include +#include + +OUTPUT_ARCH(riscv) +ENTRY(_start) + +jiffies = jiffies_64; + +SECTIONS +{ + /* Beginning of code and text segment */ + . = LOAD_OFFSET; + _xiprom = .; + _start = .; + HEAD_TEXT_SECTION + INIT_TEXT_SECTION(PAGE_SIZE) + /* we have to discard exit text and such at runtime, not link time */ + .exit.text : + { + EXIT_TEXT + } + + .text : { + _text = .; + _stext = .; + TEXT_TEXT + SCHED_TEXT + CPUIDLE_TEXT + LOCK_TEXT + KPROBES_TEXT + ENTRY_TEXT + IRQENTRY_TEXT + SOFTIRQENTRY_TEXT + *(.fixup) + _etext = .; + } + RO_DATA(L1_CACHE_BYTES) + .srodata : { + *(.srodata*) + } + .init.rodata : { + INIT_SETUP(16) + INIT_CALLS + CON_INITCALL + INIT_RAM_FS + } + _exiprom = .; /* End of XIP ROM area */ + + +/* + * From this point, stuff is considered writable and will be copied to RAM + */ + __data_loc = ALIGN(16); /* location in file */ + . = LOAD_OFFSET + XIP_OFFSET; /* location in memory */ + + _sdata = .; /* Start of data section */ + _data = .; + RW_DATA(L1_CACHE_BYTES, PAGE_SIZE, THREAD_SIZE) + _edata = .; + __start_ro_after_init = .; + .data.ro_after_init : AT(ADDR(.data.ro_after_init) - LOAD_OFFSET) { + *(.data..ro_after_init) + } + __end_ro_after_init = .; + + . = ALIGN(PAGE_SIZE); + __init_begin = .; + .init.data : { + INIT_DATA + } + .exit.data : { + EXIT_DATA + } + . = ALIGN(8); + __soc_early_init_table : { + __soc_early_init_table_start = .; + KEEP(*(__soc_early_init_table)) + __soc_early_init_table_end = .; + } + __soc_builtin_dtb_table : { + __soc_builtin_dtb_table_start = .; + KEEP(*(__soc_builtin_dtb_table)) + __soc_builtin_dtb_table_end = .; + } + PERCPU_SECTION(L1_CACHE_BYTES) + + . = ALIGN(PAGE_SIZE); + __init_end = .; + + .sdata : { + __global_pointer$ = . + 0x800; + *(.sdata*) + *(.sbss*) + } + + BSS_SECTION(PAGE_SIZE, PAGE_SIZE, 0) + EXCEPTION_TABLE(0x10) + + .rel.dyn : AT(ADDR(.rel.dyn) - LOAD_OFFSET) { + *(.rel.dyn*) + } + + /* + * End of copied data. We need a dummy section to get its LMA. + * Also located before final ALIGN() as trailing padding is not stored + * in the resulting binary file and useless to copy. + */ + .data.endmark : AT(ADDR(.data.endmark) - LOAD_OFFSET) { } + _edata_loc = LOADADDR(.data.endmark); + + . = ALIGN(PAGE_SIZE); + _end = .; + + STABS_DEBUG + DWARF_DEBUG + + DISCARDS +} diff --git a/arch/riscv/kernel/vmlinux.lds.S b/arch/riscv/kernel/vmlinux.lds.S index 56677137c85b..891742ff75a7 100644 --- a/arch/riscv/kernel/vmlinux.lds.S +++ b/arch/riscv/kernel/vmlinux.lds.S @@ -4,8 +4,13 @@ * Copyright (C) 2017 SiFive */ +#ifdef CONFIG_XIP_KERNEL +#include "vmlinux-xip.lds.S" +#else + #include #define LOAD_OFFSET KERNEL_LINK_ADDR + #include #include #include @@ -140,3 +145,4 @@ SECTIONS DISCARDS } +#endif /* CONFIG_XIP_KERNEL */ diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 3cb4c1b6cee1..788eb222deac 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -31,6 +31,9 @@ unsigned long kernel_virt_addr = KERNEL_LINK_ADDR; EXPORT_SYMBOL(kernel_virt_addr); +#ifdef CONFIG_XIP_KERNEL +#define kernel_virt_addr (*((unsigned long *)XIP_FIXUP(&kernel_virt_addr))) +#endif unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)] __page_aligned_bss; @@ -38,8 +41,8 @@ EXPORT_SYMBOL(empty_zero_page); extern char _start[]; #define DTB_EARLY_BASE_VA PGDIR_SIZE -void *dtb_early_va __initdata; -uintptr_t dtb_early_pa __initdata; +void *_dtb_early_va __initdata; +uintptr_t _dtb_early_pa __initdata; struct pt_alloc_ops { pte_t *(*get_pte_virt)(phys_addr_t pa); @@ -124,6 +127,10 @@ void __init setup_bootmem(void) phys_addr_t dram_end = memblock_end_of_DRAM(); phys_addr_t max_mapped_addr = __pa(~(ulong)0); +#ifdef CONFIG_XIP_KERNEL + vmlinux_start = __pa_symbol(&_sdata); +#endif + /* The maximal physical memory size is -PAGE_OFFSET. */ memblock_enforce_memory_limit(-PAGE_OFFSET); @@ -165,17 +172,41 @@ void __init setup_bootmem(void) memblock_allow_resize(); } +#ifdef CONFIG_XIP_KERNEL + +extern char _xiprom[], _exiprom[]; +extern char _sdata[], _edata[]; + +#endif /* CONFIG_XIP_KERNEL */ + #ifdef CONFIG_MMU -static struct pt_alloc_ops pt_ops __ro_after_init; +static struct pt_alloc_ops _pt_ops __ro_after_init; + +#ifdef CONFIG_XIP_KERNEL +#define pt_ops (*(struct pt_alloc_ops *)XIP_FIXUP(&_pt_ops)) +#else +#define pt_ops _pt_ops +#endif /* Offset between linear mapping virtual address and kernel load address */ unsigned long va_pa_offset __ro_after_init; EXPORT_SYMBOL(va_pa_offset); -#ifdef CONFIG_64BIT +#ifdef CONFIG_XIP_KERNEL +#define va_pa_offset (*((unsigned long *)XIP_FIXUP(&va_pa_offset))) +#endif /* Offset between kernel mapping virtual address and kernel load address */ +#ifdef CONFIG_64BIT unsigned long va_kernel_pa_offset; EXPORT_SYMBOL(va_kernel_pa_offset); #endif +#ifdef CONFIG_XIP_KERNEL +#define va_kernel_pa_offset (*((unsigned long *)XIP_FIXUP(&va_kernel_pa_offset))) +#endif +unsigned long va_kernel_xip_pa_offset; +EXPORT_SYMBOL(va_kernel_xip_pa_offset); +#ifdef CONFIG_XIP_KERNEL +#define va_kernel_xip_pa_offset (*((unsigned long *)XIP_FIXUP(&va_kernel_xip_pa_offset))) +#endif unsigned long pfn_base __ro_after_init; EXPORT_SYMBOL(pfn_base); @@ -185,6 +216,12 @@ pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss; pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE); +#ifdef CONFIG_XIP_KERNEL +#define trampoline_pg_dir ((pgd_t *)XIP_FIXUP(trampoline_pg_dir)) +#define fixmap_pte ((pte_t *)XIP_FIXUP(fixmap_pte)) +#define early_pg_dir ((pgd_t *)XIP_FIXUP(early_pg_dir)) +#endif /* CONFIG_XIP_KERNEL */ + void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot) { unsigned long addr = __fix_to_virt(idx); @@ -260,6 +297,12 @@ pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss; pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE); +#ifdef CONFIG_XIP_KERNEL +#define trampoline_pmd ((pmd_t *)XIP_FIXUP(trampoline_pmd)) +#define fixmap_pmd ((pmd_t *)XIP_FIXUP(fixmap_pmd)) +#define early_pmd ((pmd_t *)XIP_FIXUP(early_pmd)) +#endif /* CONFIG_XIP_KERNEL */ + static pmd_t *__init get_pmd_virt_early(phys_addr_t pa) { /* Before MMU is enabled */ @@ -376,6 +419,19 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) return PMD_SIZE; } +#ifdef CONFIG_XIP_KERNEL +/* called from head.S with MMU off */ +asmlinkage void __init __copy_data(void) +{ + void *from = (void *)(&_sdata); + void *end = (void *)(&_end); + void *to = (void *)CONFIG_PHYS_RAM_BASE; + size_t sz = (size_t)(end - from + 1); + + memcpy(to, from, sz); +} +#endif + /* * setup_vm() is called from head.S with MMU-off. * @@ -395,7 +451,35 @@ static uintptr_t __init best_map_size(phys_addr_t base, phys_addr_t size) #endif uintptr_t load_pa, load_sz; +#ifdef CONFIG_XIP_KERNEL +#define load_pa (*((uintptr_t *)XIP_FIXUP(&load_pa))) +#define load_sz (*((uintptr_t *)XIP_FIXUP(&load_sz))) +#endif +#ifdef CONFIG_XIP_KERNEL +uintptr_t xiprom, xiprom_sz; +#define xiprom_sz (*((uintptr_t *)XIP_FIXUP(&xiprom_sz))) +#define xiprom (*((uintptr_t *)XIP_FIXUP(&xiprom))) + +static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size) +{ + uintptr_t va, end_va; + + /* Map the flash resident part */ + end_va = kernel_virt_addr + xiprom_sz; + for (va = kernel_virt_addr; va < end_va; va += map_size) + create_pgd_mapping(pgdir, va, + xiprom + (va - kernel_virt_addr), + map_size, PAGE_KERNEL_EXEC); + + /* Map the data in RAM */ + end_va = kernel_virt_addr + XIP_OFFSET + load_sz; + for (va = kernel_virt_addr + XIP_OFFSET; va < end_va; va += map_size) + create_pgd_mapping(pgdir, va, + load_pa + (va - (kernel_virt_addr + XIP_OFFSET)), + map_size, PAGE_KERNEL); +} +#else static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size) { uintptr_t va, end_va; @@ -406,16 +490,28 @@ static void __init create_kernel_page_table(pgd_t *pgdir, uintptr_t map_size) load_pa + (va - kernel_virt_addr), map_size, PAGE_KERNEL_EXEC); } +#endif asmlinkage void __init setup_vm(uintptr_t dtb_pa) { - uintptr_t pa; + uintptr_t __maybe_unused pa; uintptr_t map_size; #ifndef __PAGETABLE_PMD_FOLDED pmd_t fix_bmap_spmd, fix_bmap_epmd; #endif + +#ifdef CONFIG_XIP_KERNEL + xiprom = (uintptr_t)CONFIG_XIP_PHYS_ADDR; + xiprom_sz = (uintptr_t)(&_exiprom) - (uintptr_t)(&_xiprom); + + load_pa = (uintptr_t)CONFIG_PHYS_RAM_BASE; + load_sz = (uintptr_t)(&_end) - (uintptr_t)(&_sdata); + + va_kernel_xip_pa_offset = kernel_virt_addr - xiprom; +#else load_pa = (uintptr_t)(&_start); load_sz = (uintptr_t)(&_end) - load_pa; +#endif va_pa_offset = PAGE_OFFSET - load_pa; #ifdef CONFIG_64BIT @@ -451,8 +547,13 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) /* Setup trampoline PGD and PMD */ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, (uintptr_t)trampoline_pmd, PGDIR_SIZE, PAGE_TABLE); +#ifdef CONFIG_XIP_KERNEL + create_pmd_mapping(trampoline_pmd, kernel_virt_addr, + xiprom, PMD_SIZE, PAGE_KERNEL_EXEC); +#else create_pmd_mapping(trampoline_pmd, kernel_virt_addr, load_pa, PMD_SIZE, PAGE_KERNEL_EXEC); +#endif #else /* Setup trampoline PGD */ create_pgd_mapping(trampoline_pg_dir, kernel_virt_addr, @@ -485,7 +586,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) * whereas dtb_early_va will be used before setup_vm_final installs * the linear mapping. */ - dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); + dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); #else dtb_early_va = __va(dtb_pa); #endif /* CONFIG_64BIT */ @@ -501,7 +602,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1)); #else /* CONFIG_BUILTIN_DTB */ #ifdef CONFIG_64BIT - dtb_early_va = kernel_mapping_pa_to_va(dtb_pa); + dtb_early_va = kernel_mapping_pa_to_va(XIP_FIXUP(dtb_pa)); #else dtb_early_va = __va(dtb_pa); #endif /* CONFIG_64BIT */ @@ -540,7 +641,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa) #endif } -#ifdef CONFIG_64BIT +#if defined(CONFIG_64BIT) && !defined(CONFIG_XIP_KERNEL) void protect_kernel_linear_mapping_text_rodata(void) { unsigned long text_start = (unsigned long)lm_alias(_start); -- cgit v1.2.3 From 99b3e3d41a034d9b3993800287d023ea063da293 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 3 Mar 2021 12:02:49 -0800 Subject: RISC-V: Add Microchip PolarFire SoC kconfig option Add Microchip PolarFire kconfig option which selects SoC specific and common drivers that is required for this SoC. Signed-off-by: Atish Patra Reviewed-by: Bin Meng Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/Kconfig.socs | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index b9eda857fc87..00c2b205654c 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -1,5 +1,12 @@ menu "SoC selection" +config SOC_MICROCHIP_POLARFIRE + bool "Microchip PolarFire SoCs" + select MCHP_CLK_MPFS + select SIFIVE_PLIC + help + This enables support for Microchip PolarFire SoC platforms. + config SOC_SIFIVE bool "SiFive SoCs" select SERIAL_SIFIVE if TTY -- cgit v1.2.3 From 0fa6107eca4186adc6adda3b54c8b942477066c1 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 3 Mar 2021 12:02:51 -0800 Subject: RISC-V: Initial DTS for Microchip ICICLE board Add initial DTS for Microchip ICICLE board having only essential devices (clocks, sdhci, ethernet, serial, etc). The device tree is based on the U-Boot patch. https://patchwork.ozlabs.org/project/uboot/patch/20201110103414.10142-6-padmarao.begari@microchip.com/ Signed-off-by: Atish Patra Signed-off-by: Palmer Dabbelt --- arch/riscv/boot/dts/Makefile | 1 + arch/riscv/boot/dts/microchip/Makefile | 2 + .../dts/microchip/microchip-mpfs-icicle-kit.dts | 72 +++++ arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi | 329 +++++++++++++++++++++ 4 files changed, 404 insertions(+) create mode 100644 arch/riscv/boot/dts/microchip/Makefile create mode 100644 arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts create mode 100644 arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi (limited to 'arch/riscv') diff --git a/arch/riscv/boot/dts/Makefile b/arch/riscv/boot/dts/Makefile index 7ffd502e3e7b..fe996b88319e 100644 --- a/arch/riscv/boot/dts/Makefile +++ b/arch/riscv/boot/dts/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 subdir-y += sifive subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan +subdir-y += microchip obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y)) diff --git a/arch/riscv/boot/dts/microchip/Makefile b/arch/riscv/boot/dts/microchip/Makefile new file mode 100644 index 000000000000..622b12771fd3 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/Makefile @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0 +dtb-$(CONFIG_SOC_MICROCHIP_POLARFIRE) += microchip-mpfs-icicle-kit.dtb diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts new file mode 100644 index 000000000000..ec79944065c9 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs-icicle-kit.dts @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020 Microchip Technology Inc */ + +/dts-v1/; + +#include "microchip-mpfs.dtsi" + +/* Clock frequency (in Hz) of the rtcclk */ +#define RTCCLK_FREQ 1000000 + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "Microchip PolarFire-SoC Icicle Kit"; + compatible = "microchip,mpfs-icicle-kit"; + + chosen { + stdout-path = &serial0; + }; + + cpus { + timebase-frequency = ; + }; + + memory@80000000 { + device_type = "memory"; + reg = <0x0 0x80000000 0x0 0x40000000>; + clocks = <&clkcfg 26>; + }; + + soc { + }; +}; + +&serial0 { + status = "okay"; +}; + +&serial1 { + status = "okay"; +}; + +&serial2 { + status = "okay"; +}; + +&serial3 { + status = "okay"; +}; + +&sdcard { + status = "okay"; +}; + +&emac0 { + phy-mode = "sgmii"; + phy-handle = <&phy0>; + phy0: ethernet-phy@8 { + reg = <8>; + ti,fifo-depth = <0x01>; + }; +}; + +&emac1 { + status = "okay"; + phy-mode = "sgmii"; + phy-handle = <&phy1>; + phy1: ethernet-phy@9 { + reg = <9>; + ti,fifo-depth = <0x01>; + }; +}; diff --git a/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi new file mode 100644 index 000000000000..b9819570a7d1 --- /dev/null +++ b/arch/riscv/boot/dts/microchip/microchip-mpfs.dtsi @@ -0,0 +1,329 @@ +// SPDX-License-Identifier: (GPL-2.0 OR MIT) +/* Copyright (c) 2020 Microchip Technology Inc */ + +/dts-v1/; + +/ { + #address-cells = <2>; + #size-cells = <2>; + model = "Microchip MPFS Icicle Kit"; + compatible = "microchip,mpfs-icicle-kit"; + + chosen { + }; + + cpus { + #address-cells = <1>; + #size-cells = <0>; + + cpu@0 { + clock-frequency = <0>; + compatible = "sifive,e51", "sifive,rocket0", "riscv"; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <128>; + i-cache-size = <16384>; + reg = <0>; + riscv,isa = "rv64imac"; + status = "disabled"; + + cpu0_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu@1 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <1>; + riscv,isa = "rv64imafdc"; + tlb-split; + status = "okay"; + + cpu1_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu@2 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <2>; + riscv,isa = "rv64imafdc"; + tlb-split; + status = "okay"; + + cpu2_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu@3 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <3>; + riscv,isa = "rv64imafdc"; + tlb-split; + status = "okay"; + + cpu3_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + + cpu@4 { + clock-frequency = <0>; + compatible = "sifive,u54-mc", "sifive,rocket0", "riscv"; + d-cache-block-size = <64>; + d-cache-sets = <64>; + d-cache-size = <32768>; + d-tlb-sets = <1>; + d-tlb-size = <32>; + device_type = "cpu"; + i-cache-block-size = <64>; + i-cache-sets = <64>; + i-cache-size = <32768>; + i-tlb-sets = <1>; + i-tlb-size = <32>; + mmu-type = "riscv,sv39"; + reg = <4>; + riscv,isa = "rv64imafdc"; + tlb-split; + status = "okay"; + cpu4_intc: interrupt-controller { + #interrupt-cells = <1>; + compatible = "riscv,cpu-intc"; + interrupt-controller; + }; + }; + }; + + soc { + #address-cells = <2>; + #size-cells = <2>; + compatible = "simple-bus"; + ranges; + + cache-controller@2010000 { + compatible = "sifive,fu540-c000-ccache", "cache"; + cache-block-size = <64>; + cache-level = <2>; + cache-sets = <1024>; + cache-size = <2097152>; + cache-unified; + interrupt-parent = <&plic>; + interrupts = <1 2 3>; + reg = <0x0 0x2010000 0x0 0x1000>; + }; + + clint@2000000 { + compatible = "sifive,clint0"; + reg = <0x0 0x2000000 0x0 0xC000>; + interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7 + &cpu1_intc 3 &cpu1_intc 7 + &cpu2_intc 3 &cpu2_intc 7 + &cpu3_intc 3 &cpu3_intc 7 + &cpu4_intc 3 &cpu4_intc 7>; + }; + + plic: interrupt-controller@c000000 { + #interrupt-cells = <1>; + compatible = "sifive,plic-1.0.0"; + reg = <0x0 0xc000000 0x0 0x4000000>; + riscv,ndev = <186>; + interrupt-controller; + interrupts-extended = <&cpu0_intc 11 + &cpu1_intc 11 &cpu1_intc 9 + &cpu2_intc 11 &cpu2_intc 9 + &cpu3_intc 11 &cpu3_intc 9 + &cpu4_intc 11 &cpu4_intc 9>; + }; + + dma@3000000 { + compatible = "sifive,fu540-c000-pdma"; + reg = <0x0 0x3000000 0x0 0x8000>; + interrupt-parent = <&plic>; + interrupts = <23 24 25 26 27 28 29 30>; + #dma-cells = <1>; + }; + + refclk: refclk { + compatible = "fixed-clock"; + #clock-cells = <0>; + clock-frequency = <600000000>; + clock-output-names = "msspllclk"; + }; + + clkcfg: clkcfg@20002000 { + compatible = "microchip,mpfs-clkcfg"; + reg = <0x0 0x20002000 0x0 0x1000>; + reg-names = "mss_sysreg"; + clocks = <&refclk>; + #clock-cells = <1>; + clock-output-names = "cpu", "axi", "ahb", "envm", /* 0-3 */ + "mac0", "mac1", "mmc", "timer", /* 4-7 */ + "mmuart0", "mmuart1", "mmuart2", "mmuart3", /* 8-11 */ + "mmuart4", "spi0", "spi1", "i2c0", /* 12-15 */ + "i2c1", "can0", "can1", "usb", /* 16-19 */ + "rsvd", "rtc", "qspi", "gpio0", /* 20-23 */ + "gpio1", "gpio2", "ddrc", "fic0", /* 24-27 */ + "fic1", "fic2", "fic3", "athena", "cfm"; /* 28-32 */ + }; + + serial0: serial@20000000 { + compatible = "ns16550a"; + reg = <0x0 0x20000000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <90>; + current-speed = <115200>; + clocks = <&clkcfg 8>; + status = "disabled"; + }; + + serial1: serial@20100000 { + compatible = "ns16550a"; + reg = <0x0 0x20100000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <91>; + current-speed = <115200>; + clocks = <&clkcfg 9>; + status = "disabled"; + }; + + serial2: serial@20102000 { + compatible = "ns16550a"; + reg = <0x0 0x20102000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <92>; + current-speed = <115200>; + clocks = <&clkcfg 10>; + status = "disabled"; + }; + + serial3: serial@20104000 { + compatible = "ns16550a"; + reg = <0x0 0x20104000 0x0 0x400>; + reg-io-width = <4>; + reg-shift = <2>; + interrupt-parent = <&plic>; + interrupts = <93>; + current-speed = <115200>; + clocks = <&clkcfg 11>; + status = "disabled"; + }; + + emmc: mmc@20008000 { + compatible = "cdns,sd4hc"; + reg = <0x0 0x20008000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <88 89>; + pinctrl-names = "default"; + clocks = <&clkcfg 6>; + bus-width = <4>; + cap-mmc-highspeed; + mmc-ddr-3_3v; + max-frequency = <200000000>; + non-removable; + no-sd; + no-sdio; + voltage-ranges = <3300 3300>; + status = "disabled"; + }; + + sdcard: sdhc@20008000 { + compatible = "cdns,sd4hc"; + reg = <0x0 0x20008000 0x0 0x1000>; + interrupt-parent = <&plic>; + interrupts = <88>; + pinctrl-names = "default"; + clocks = <&clkcfg 6>; + bus-width = <4>; + disable-wp; + cap-sd-highspeed; + card-detect-delay = <200>; + sd-uhs-sdr12; + sd-uhs-sdr25; + sd-uhs-sdr50; + sd-uhs-sdr104; + max-frequency = <200000000>; + status = "disabled"; + }; + + emac0: ethernet@20110000 { + compatible = "cdns,macb"; + reg = <0x0 0x20110000 0x0 0x2000>; + interrupt-parent = <&plic>; + interrupts = <64 65 66 67>; + local-mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg 4>, <&clkcfg 2>; + clock-names = "pclk", "hclk"; + status = "disabled"; + #address-cells = <1>; + #size-cells = <0>; + }; + + emac1: ethernet@20112000 { + compatible = "cdns,macb"; + reg = <0x0 0x20112000 0x0 0x2000>; + interrupt-parent = <&plic>; + interrupts = <70 71 72 73>; + mac-address = [00 00 00 00 00 00]; + clocks = <&clkcfg 5>, <&clkcfg 2>; + status = "disabled"; + clock-names = "pclk", "hclk"; + #address-cells = <1>; + #size-cells = <0>; + }; + + }; +}; -- cgit v1.2.3 From 2951162094e61f574b0ddf886c783ace65049450 Mon Sep 17 00:00:00 2001 From: Atish Patra Date: Wed, 3 Mar 2021 12:02:52 -0800 Subject: RISC-V: Enable Microchip PolarFire ICICLE SoC Enable Microchip PolarFire ICICLE soc config in defconfig. It allows the default upstream kernel to boot on PolarFire ICICLE board. Signed-off-by: Atish Patra Reviewed-by: Anup Patel Reviewed-by: Bin Meng Signed-off-by: Palmer Dabbelt --- arch/riscv/configs/defconfig | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/riscv') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 6c0625aa96c7..1f2be234b11c 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -16,6 +16,7 @@ CONFIG_EXPERT=y CONFIG_BPF_SYSCALL=y CONFIG_SOC_SIFIVE=y CONFIG_SOC_VIRT=y +CONFIG_SOC_MICROCHIP_POLARFIRE=y CONFIG_SMP=y CONFIG_HOTPLUG_CPU=y CONFIG_JUMP_LABEL=y @@ -82,6 +83,9 @@ CONFIG_USB_OHCI_HCD=y CONFIG_USB_OHCI_HCD_PLATFORM=y CONFIG_USB_STORAGE=y CONFIG_USB_UAS=y +CONFIG_MMC_SDHCI=y +CONFIG_MMC_SDHCI_PLTFM=y +CONFIG_MMC_SDHCI_CADENCE=y CONFIG_MMC=y CONFIG_MMC_SPI=y CONFIG_RTC_CLASS=y -- cgit v1.2.3 From 533b4f3a789d49574e7ae0f6ececed153f651f97 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Thu, 15 Apr 2021 14:25:22 +0530 Subject: RISC-V: Fix error code returned by riscv_hartid_to_cpuid() We should return a negative error code upon failure in riscv_hartid_to_cpuid() instead of NR_CPUS. This is also aligned with all uses of riscv_hartid_to_cpuid() which expect negative error code upon failure. Fixes: 6825c7a80f18 ("RISC-V: Add logical CPU indexing for RISC-V") Fixes: f99fb607fb2b ("RISC-V: Use Linux logical CPU number instead of hartid") Signed-off-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/kernel/smp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/kernel/smp.c b/arch/riscv/kernel/smp.c index 366cb87c0e2e..921d9d7df400 100644 --- a/arch/riscv/kernel/smp.c +++ b/arch/riscv/kernel/smp.c @@ -56,7 +56,7 @@ int riscv_hartid_to_cpuid(int hartid) return i; pr_err("Couldn't find cpu id for hartid [%d]\n", hartid); - return i; + return -ENOENT; } void riscv_cpuid_to_hartid_mask(const struct cpumask *in, struct cpumask *out) -- cgit v1.2.3 From 883fcb8ecaaffbc46d5ed20f336da61e422021aa Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Wed, 28 Apr 2021 06:02:17 -0400 Subject: riscv: Fix 32b kernel build with CONFIG_DEBUG_VIRTUAL=y Declare kernel_virt_addr for 32b kernel since it is used in __phys_addr_symbol defined when CONFIG_DEBUG_VIRTUAL is set. Fixes: 2bfc6cd81bd17 ("riscv: Move kernel mapping outside of linear mapping") Signed-off-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/page.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/page.h b/arch/riscv/include/asm/page.h index e280ba60cb34..6a7761c86ec2 100644 --- a/arch/riscv/include/asm/page.h +++ b/arch/riscv/include/asm/page.h @@ -106,9 +106,9 @@ extern unsigned long pfn_base; #define ARCH_PFN_OFFSET (PAGE_OFFSET >> PAGE_SHIFT) #endif /* CONFIG_MMU */ -#ifdef CONFIG_64BIT extern unsigned long kernel_virt_addr; +#ifdef CONFIG_64BIT #define linear_mapping_pa_to_va(x) ((void *)((unsigned long)(x) + va_pa_offset)) #ifdef CONFIG_XIP_KERNEL #define kernel_mapping_pa_to_va(y) ({ \ -- cgit v1.2.3 From 28252e08649f3aa06cb6b5420e29df7a9d5fe67d Mon Sep 17 00:00:00 2001 From: Alexandre Ghiti Date: Sun, 18 Apr 2021 07:28:56 -0400 Subject: riscv: Remove 32b kernel mapping from page table dump The 32b kernel mapping lies in the linear mapping, there is no point in printing its address in page table dump, so remove this leftover that comes from moving the kernel mapping outside the linear mapping for 64b kernel. Fixes: e9efb21fe352 ("riscv: Prepare ptdump for vm layout dynamic addresses") Signed-off-by: Alexandre Ghiti Reviewed-by: Anup Patel Signed-off-by: Palmer Dabbelt --- arch/riscv/mm/ptdump.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 92179598d8f8..0536ac84b730 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -76,8 +76,8 @@ enum address_markers_idx { PAGE_OFFSET_NR, #ifdef CONFIG_64BIT MODULES_MAPPING_NR, -#endif KERNEL_MAPPING_NR, +#endif END_OF_SPACE_NR }; @@ -99,8 +99,8 @@ static struct addr_marker address_markers[] = { {0, "Linear mapping"}, #ifdef CONFIG_64BIT {0, "Modules mapping"}, -#endif {0, "Kernel mapping (kernel, BPF)"}, +#endif {-1, NULL}, }; @@ -379,8 +379,8 @@ static int __init ptdump_init(void) address_markers[PAGE_OFFSET_NR].start_address = PAGE_OFFSET; #ifdef CONFIG_64BIT address_markers[MODULES_MAPPING_NR].start_address = MODULES_VADDR; -#endif address_markers[KERNEL_MAPPING_NR].start_address = kernel_virt_addr; +#endif kernel_ptd_info.base_addr = KERN_VIRT_START; -- cgit v1.2.3 From f54c7b5898d31eda3d6608da13b55c0466ba49fe Mon Sep 17 00:00:00 2001 From: Palmer Dabbelt Date: Wed, 28 Apr 2021 14:45:12 -0700 Subject: RISC-V: Always define XIP_FIXUP XIP depends on MMU, but XIP_FIXUP is used throughout the kernel in order to avoid excessive ifdefs. This just makes sure to always define XIP_FIXUP, which will fix MMU=n builds. XIP_OFFSET is used by assembly but XIP_FIXUP is C-only, so they're split. Fixes: 44c922572952 ("RISC-V: enable XIP") Reported-by: Guenter Roeck Signed-off-by: Palmer Dabbelt Tested-by: Alexandre Ghiti Signed-off-by: Palmer Dabbelt --- arch/riscv/include/asm/pgtable.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'arch/riscv') diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 2f1384e14e31..9469f464e71a 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -73,18 +73,10 @@ #endif #define FIXADDR_START (FIXADDR_TOP - FIXADDR_SIZE) +#endif + #ifdef CONFIG_XIP_KERNEL #define XIP_OFFSET SZ_8M -#define XIP_FIXUP(addr) ({ \ - uintptr_t __a = (uintptr_t)(addr); \ - (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \ - __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ - __a; \ - }) -#else -#define XIP_FIXUP(addr) (addr) -#endif /* CONFIG_XIP_KERNEL */ - #endif #ifndef __ASSEMBLY__ @@ -101,6 +93,17 @@ #include #endif /* CONFIG_64BIT */ +#ifdef CONFIG_XIP_KERNEL +#define XIP_FIXUP(addr) ({ \ + uintptr_t __a = (uintptr_t)(addr); \ + (__a >= CONFIG_XIP_PHYS_ADDR && __a < CONFIG_XIP_PHYS_ADDR + SZ_16M) ? \ + __a - CONFIG_XIP_PHYS_ADDR + CONFIG_PHYS_RAM_BASE - XIP_OFFSET :\ + __a; \ + }) +#else +#define XIP_FIXUP(addr) (addr) +#endif /* CONFIG_XIP_KERNEL */ + #ifdef CONFIG_MMU /* Number of entries in the page global directory */ #define PTRS_PER_PGD (PAGE_SIZE / sizeof(pgd_t)) -- cgit v1.2.3