diff options
Diffstat (limited to 'arch')
640 files changed, 14191 insertions, 7847 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 8e0d665c8d53..b695a3e3e922 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -278,9 +278,6 @@ config HAVE_CLK The <linux/clk.h> calls support software clock gating and thus are a key power management tool on many systems. -config HAVE_DMA_API_DEBUG - bool - config HAVE_HW_BREAKPOINT bool depends on PERF_EVENTS @@ -464,6 +461,10 @@ config GCC_PLUGIN_LATENT_ENTROPY config GCC_PLUGIN_STRUCTLEAK bool "Force initialization of variables containing userspace addresses" depends on GCC_PLUGINS + # Currently STRUCTLEAK inserts initialization out of live scope of + # variables from KASAN point of view. This leads to KASAN false + # positive reports. Prohibit this combination for now. + depends on !KASAN_EXTRA help This plugin zero-initializes any structures containing a __user attribute. This can prevent some classes of information @@ -870,6 +871,21 @@ config OLD_SIGACTION config COMPAT_OLD_SIGACTION bool +config 64BIT_TIME + def_bool ARCH_HAS_64BIT_TIME + help + This should be selected by all architectures that need to support + new system calls with a 64-bit time_t. This is relevant on all 32-bit + architectures, and 64-bit architectures as part of compat syscall + handling. + +config COMPAT_32BIT_TIME + def_bool (!64BIT && 64BIT_TIME) || COMPAT + help + This enables 32 bit time_t support in addition to 64 bit time_t support. + This is relevant on all 32-bit architectures, and 64-bit architectures + as part of compat syscall handling. + config ARCH_NO_COHERENT_DMA_MMAP bool diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index b2022885ced8..0c4805a572c8 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -10,6 +10,8 @@ config ALPHA select HAVE_OPROFILE select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH select VIRT_TO_BUS select GENERIC_IRQ_PROBE select AUTO_IRQ_AFFINITY if SMP @@ -64,15 +66,6 @@ config ZONE_DMA bool default y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool default y @@ -211,6 +204,7 @@ config ALPHA_EIGER config ALPHA_JENSEN bool "Jensen" depends on BROKEN + select DMA_DIRECT_OPS help DEC PC 150 AXP (aka Jensen): This is a very old Digital system - one of the first-generation Alpha systems. A number of these systems @@ -345,9 +339,6 @@ config PCI_DOMAINS config PCI_SYSCALL def_bool PCI -config IOMMU_HELPER - def_bool PCI - config ALPHA_NONAME bool depends on ALPHA_BOOK1 || ALPHA_NONAME_CH @@ -585,7 +576,7 @@ config ARCH_DISCONTIGMEM_ENABLE Say Y to support efficient handling of discontiguous physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) or have huge holes in the physical address space for other reasons. - See <file:Documentation/vm/numa> for more. + See <file:Documentation/vm/numa.rst> for more. source "mm/Kconfig" diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 9b68790013e2..0580cb8c84b2 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 +generic-y += compat.h generic-y += exec.h generic-y += export.h generic-y += fb.h diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index b78f61f20796..8beeafd4f68e 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -2,11 +2,15 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -extern const struct dma_map_ops *dma_ops; +extern const struct dma_map_ops alpha_pci_ops; static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - return dma_ops; +#ifdef CONFIG_ALPHA_JENSEN + return &dma_direct_ops; +#else + return &alpha_pci_ops; +#endif } #endif /* _ALPHA_DMA_MAPPING_H */ diff --git a/arch/alpha/include/asm/pci.h b/arch/alpha/include/asm/pci.h index b9ec55351924..cf6bc1e64d66 100644 --- a/arch/alpha/include/asm/pci.h +++ b/arch/alpha/include/asm/pci.h @@ -56,11 +56,6 @@ struct pci_controller { /* IOMMU controls. */ -/* The PCI address space does not equal the physical memory address space. - The networking and block device layers use this boolean for bounce buffer - decisions. */ -#define PCI_DMA_BUS_IS_PHYS 0 - /* TODO: integrate with include/asm-generic/pci.h ? */ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) { diff --git a/arch/alpha/include/uapi/asm/Kbuild b/arch/alpha/include/uapi/asm/Kbuild index 9afaba5e5503..1a5b75310cf4 100644 --- a/arch/alpha/include/uapi/asm/Kbuild +++ b/arch/alpha/include/uapi/asm/Kbuild @@ -2,4 +2,8 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bpf_perf_event.h +generic-y += ipcbuf.h +generic-y += msgbuf.h generic-y += poll.h +generic-y += sembuf.h +generic-y += shmbuf.h diff --git a/arch/alpha/include/uapi/asm/ipcbuf.h b/arch/alpha/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 90d6445a14df..000000000000 --- a/arch/alpha/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include <asm-generic/ipcbuf.h> diff --git a/arch/alpha/include/uapi/asm/msgbuf.h b/arch/alpha/include/uapi/asm/msgbuf.h deleted file mode 100644 index 8c5d4d8c1b16..000000000000 --- a/arch/alpha/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ALPHA_MSGBUF_H -#define _ALPHA_MSGBUF_H - -/* - * The msqid64_ds structure for alpha architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ - __kernel_time_t msg_rtime; /* last msgrcv time */ - __kernel_time_t msg_ctime; /* last change time */ - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _ALPHA_MSGBUF_H */ diff --git a/arch/alpha/include/uapi/asm/sembuf.h b/arch/alpha/include/uapi/asm/sembuf.h deleted file mode 100644 index f28ffa668b2f..000000000000 --- a/arch/alpha/include/uapi/asm/sembuf.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ALPHA_SEMBUF_H -#define _ALPHA_SEMBUF_H - -/* - * The semid64_ds structure for alpha architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct semid64_ds { - struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - __kernel_time_t sem_otime; /* last semop time */ - __kernel_time_t sem_ctime; /* last change time */ - unsigned long sem_nsems; /* no. of semaphores in array */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _ALPHA_SEMBUF_H */ diff --git a/arch/alpha/include/uapi/asm/shmbuf.h b/arch/alpha/include/uapi/asm/shmbuf.h deleted file mode 100644 index 7e041ca2eb40..000000000000 --- a/arch/alpha/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ALPHA_SHMBUF_H -#define _ALPHA_SHMBUF_H - -/* - * The shmid64_ds structure for alpha architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - __kernel_time_t shm_dtime; /* last detach time */ - __kernel_time_t shm_ctime; /* last change time */ - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused1; - unsigned long __unused2; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _ALPHA_SHMBUF_H */ diff --git a/arch/alpha/include/uapi/asm/siginfo.h b/arch/alpha/include/uapi/asm/siginfo.h index 0cf3b527b274..db3f0138536f 100644 --- a/arch/alpha/include/uapi/asm/siginfo.h +++ b/arch/alpha/include/uapi/asm/siginfo.h @@ -7,18 +7,4 @@ #include <asm-generic/siginfo.h> -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -/* - * SIGTRAP si_codes - */ -#ifdef __KERNEL__ -#define TRAP_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif diff --git a/arch/alpha/kernel/io.c b/arch/alpha/kernel/io.c index 3e3d49c254c5..c025a3e5e357 100644 --- a/arch/alpha/kernel/io.c +++ b/arch/alpha/kernel/io.c @@ -37,20 +37,20 @@ unsigned int ioread32(void __iomem *addr) void iowrite8(u8 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite8)(b, addr); } void iowrite16(u16 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite16)(b, addr); } void iowrite32(u32 b, void __iomem *addr) { - IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); mb(); + IO_CONCAT(__IO_PREFIX,iowrite32)(b, addr); } EXPORT_SYMBOL(ioread8); @@ -176,26 +176,26 @@ u64 readq(const volatile void __iomem *addr) void writeb(u8 b, volatile void __iomem *addr) { - __raw_writeb(b, addr); mb(); + __raw_writeb(b, addr); } void writew(u16 b, volatile void __iomem *addr) { - __raw_writew(b, addr); mb(); + __raw_writew(b, addr); } void writel(u32 b, volatile void __iomem *addr) { - __raw_writel(b, addr); mb(); + __raw_writel(b, addr); } void writeq(u64 b, volatile void __iomem *addr) { - __raw_writeq(b, addr); mb(); + __raw_writeq(b, addr); } EXPORT_SYMBOL(readb); diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 89faa6f4de47..6e921754c8fc 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -871,8 +871,7 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, send a signal. Old exceptions are not signaled. */ fex = (exc >> IEEE_STATUS_TO_EXCSUM_SHIFT) & swcr; if (fex) { - siginfo_t info; - int si_code = 0; + int si_code = FPE_FLTUNK; if (fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND; if (fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES; @@ -881,11 +880,9 @@ SYSCALL_DEFINE5(osf_setsysinfo, unsigned long, op, void __user *, buffer, if (fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV; if (fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = NULL; /* FIXME */ - send_sig_info(SIGFPE, &info, current); + send_sig_fault(SIGFPE, si_code, + (void __user *)NULL, /* FIXME */ + 0, current); } return 0; } diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index b6ebb65127a8..c7c5879869d3 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -102,36 +102,3 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn, else return -ENODEV; } - -static void *alpha_noop_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - if (!dev || *dev->dma_mask >= 0xffffffffUL) - gfp &= ~GFP_DMA; - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} - -static int alpha_noop_supported(struct device *dev, u64 mask) -{ - return mask < 0x00ffffffUL ? 0 : 1; -} - -const struct dma_map_ops alpha_noop_ops = { - .alloc = alpha_noop_alloc_coherent, - .free = dma_noop_free_coherent, - .map_page = dma_noop_map_page, - .map_sg = dma_noop_map_sg, - .mapping_error = dma_noop_mapping_error, - .dma_supported = alpha_noop_supported, -}; - -const struct dma_map_ops *dma_ops = &alpha_noop_ops; -EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 83b34b9188ea..6923b0d9c1e1 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -950,6 +950,4 @@ const struct dma_map_ops alpha_pci_ops = { .mapping_error = alpha_pci_mapping_error, .dma_supported = alpha_pci_supported, }; - -const struct dma_map_ops *dma_ops = &alpha_pci_ops; -EXPORT_SYMBOL(dma_ops); +EXPORT_SYMBOL(alpha_pci_ops); diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 9ebb3bcbc626..8c0c4ee0be6e 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -219,14 +219,8 @@ do_sigreturn(struct sigcontext __user *sc) /* Send SIGTRAP if we're single-stepping: */ if (ptrace_cancel_bpt (current)) { - siginfo_t info; - - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *) regs->pc; - info.si_trapno = 0; - send_sig_info(SIGTRAP, &info, current); + send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0, + current); } return; @@ -253,14 +247,8 @@ do_rt_sigreturn(struct rt_sigframe __user *frame) /* Send SIGTRAP if we're single-stepping: */ if (ptrace_cancel_bpt (current)) { - siginfo_t info; - - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *) regs->pc; - info.si_trapno = 0; - send_sig_info(SIGTRAP, &info, current); + send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *) regs->pc, 0, + current); } return; diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index f43bd05dede2..bc9627698796 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -213,7 +213,6 @@ do_entArith(unsigned long summary, unsigned long write_mask, struct pt_regs *regs) { long si_code = FPE_FLTINV; - siginfo_t info; if (summary & 1) { /* Software-completion summary bit is set, so try to @@ -228,17 +227,12 @@ do_entArith(unsigned long summary, unsigned long write_mask, } die_if_kernel("Arithmetic fault", regs, 0, NULL); - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *) regs->pc; - send_sig_info(SIGFPE, &info, current); + send_sig_fault(SIGFPE, si_code, (void __user *) regs->pc, 0, current); } asmlinkage void do_entIF(unsigned long type, struct pt_regs *regs) { - siginfo_t info; int signo, code; if ((regs->ps & ~IPL_MAX) == 0) { @@ -270,31 +264,20 @@ do_entIF(unsigned long type, struct pt_regs *regs) switch (type) { case 0: /* breakpoint */ - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_trapno = 0; - info.si_addr = (void __user *) regs->pc; - if (ptrace_cancel_bpt(current)) { regs->pc -= 4; /* make pc point to former bpt */ } - send_sig_info(SIGTRAP, &info, current); + send_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, + current); return; case 1: /* bugcheck */ - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_FIXME; - info.si_addr = (void __user *) regs->pc; - info.si_trapno = 0; - send_sig_info(SIGTRAP, &info, current); + send_sig_fault(SIGTRAP, TRAP_UNK, (void __user *) regs->pc, 0, + current); return; case 2: /* gentrap */ - info.si_addr = (void __user *) regs->pc; - info.si_trapno = regs->r16; switch ((long) regs->r16) { case GEN_INTOVF: signo = SIGFPE; @@ -326,7 +309,7 @@ do_entIF(unsigned long type, struct pt_regs *regs) break; case GEN_ROPRAND: signo = SIGFPE; - code = FPE_FIXME; + code = FPE_FLTUNK; break; case GEN_DECOVF: @@ -348,15 +331,12 @@ do_entIF(unsigned long type, struct pt_regs *regs) case GEN_SUBRNG7: default: signo = SIGTRAP; - code = TRAP_FIXME; + code = TRAP_UNK; break; } - info.si_signo = signo; - info.si_errno = 0; - info.si_code = code; - info.si_addr = (void __user *) regs->pc; - send_sig_info(signo, &info, current); + send_sig_fault(signo, code, (void __user *) regs->pc, regs->r16, + current); return; case 4: /* opDEC */ @@ -380,11 +360,9 @@ do_entIF(unsigned long type, struct pt_regs *regs) if (si_code == 0) return; if (si_code > 0) { - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *) regs->pc; - send_sig_info(SIGFPE, &info, current); + send_sig_fault(SIGFPE, si_code, + (void __user *) regs->pc, 0, + current); return; } } @@ -409,11 +387,7 @@ do_entIF(unsigned long type, struct pt_regs *regs) ; } - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) regs->pc; - send_sig_info(SIGILL, &info, current); + send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current); } /* There is an ifdef in the PALcode in MILO that enables a @@ -426,15 +400,9 @@ do_entIF(unsigned long type, struct pt_regs *regs) asmlinkage void do_entDbg(struct pt_regs *regs) { - siginfo_t info; - die_if_kernel("Instruction fault", regs, 0, NULL); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *) regs->pc; - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)regs->pc, 0, current); } @@ -758,7 +726,7 @@ do_entUnaUser(void __user * va, unsigned long opcode, unsigned long tmp1, tmp2, tmp3, tmp4; unsigned long fake_reg, *reg_addr = &fake_reg; - siginfo_t info; + int si_code; long error; /* Check the UAC bits to decide what the user wants us to do @@ -981,34 +949,27 @@ do_entUnaUser(void __user * va, unsigned long opcode, give_sigsegv: regs->pc -= 4; /* make pc point to faulting insn */ - info.si_signo = SIGSEGV; - info.si_errno = 0; /* We need to replicate some of the logic in mm/fault.c, since we don't have access to the fault code in the exception handling return path. */ if ((unsigned long)va >= TASK_SIZE) - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; else { struct mm_struct *mm = current->mm; down_read(&mm->mmap_sem); if (find_vma(mm, (unsigned long)va)) - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; else - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; up_read(&mm->mmap_sem); } - info.si_addr = va; - send_sig_info(SIGSEGV, &info, current); + send_sig_fault(SIGSEGV, si_code, va, 0, current); return; give_sigbus: regs->pc -= 4; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = va; - send_sig_info(SIGBUS, &info, current); + send_sig_fault(SIGBUS, BUS_ADRALN, va, 0, current); return; } diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index cd3c572ee912..de2bd217adad 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -88,7 +88,6 @@ do_page_fault(unsigned long address, unsigned long mmcsr, struct mm_struct *mm = current->mm; const struct exception_table_entry *fixup; int fault, si_code = SEGV_MAPERR; - siginfo_t info; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; /* As of EV6, a load into $31/$f31 is a prefetch, and never faults @@ -221,21 +220,13 @@ retry: up_read(&mm->mmap_sem); /* Send a sigbus, regardless of whether we were in kernel or user mode. */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *) address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *) address, 0, current); if (!user_mode(regs)) goto no_context; return; do_sigsegv: - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *) address; - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address, 0, current); return; #ifdef CONFIG_ALPHA_LARGE_VMALLOC diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index d76bf4a83740..89d47eac18b2 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -9,11 +9,15 @@ config ARC def_bool y select ARC_TIMERS + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_SG_CHAIN select ARCH_SUPPORTS_ATOMIC_RMW if ARC_HAS_LLSC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK + select DMA_NONCOHERENT_OPS + select DMA_NONCOHERENT_MMAP select GENERIC_ATOMIC64 if !ISA_ARCV2 || !(ARC_HAS_LL64 && ARC_HAS_LLSC) select GENERIC_CLOCKEVENTS select GENERIC_FIND_FIRST_BIT @@ -453,16 +457,11 @@ config ARC_HAS_PAE40 default n depends on ISA_ARCV2 select HIGHMEM + select PHYS_ADDR_T_64BIT help Enable access to physical memory beyond 4G, only supported on ARC cores with 40 bit Physical Addressing support -config ARCH_PHYS_ADDR_T_64BIT - def_bool ARC_HAS_PAE40 - -config ARCH_DMA_ADDR_T_64BIT - bool - config ARC_KVADDR_SIZE int "Kernel Virtual Address Space size (MB)" range 0 512 diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index 4bd5d4369e05..feed50ce89fa 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -1,7 +1,9 @@ # SPDX-License-Identifier: GPL-2.0 generic-y += bugs.h +generic-y += compat.h generic-y += device.h generic-y += div64.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += extable.h generic-y += fb.h diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h deleted file mode 100644 index 7a16824bfe98..000000000000 --- a/arch/arc/include/asm/dma-mapping.h +++ /dev/null @@ -1,21 +0,0 @@ -/* - * DMA Mapping glue for ARC - * - * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef ASM_ARC_DMA_MAPPING_H -#define ASM_ARC_DMA_MAPPING_H - -extern const struct dma_map_ops arc_dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &arc_dma_ops; -} - -#endif diff --git a/arch/arc/include/asm/pci.h b/arch/arc/include/asm/pci.h index ba56c23c1b20..4ff53c041c64 100644 --- a/arch/arc/include/asm/pci.h +++ b/arch/arc/include/asm/pci.h @@ -16,12 +16,6 @@ #define PCIBIOS_MIN_MEM 0x100000 #define pcibios_assign_all_busses() 1 -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS 1 #endif /* __KERNEL__ */ diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 1dcc404b5aec..8c1071840979 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -16,13 +16,12 @@ * The default DMA address == Phy address which is 0x8000_0000 based. */ -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <asm/cache.h> #include <asm/cacheflush.h> - -static void *arc_dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t gfp, unsigned long attrs) { unsigned long order = get_order(size); struct page *page; @@ -89,7 +88,7 @@ static void *arc_dma_alloc(struct device *dev, size_t size, return kvaddr; } -static void arc_dma_free(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { phys_addr_t paddr = dma_handle; @@ -105,9 +104,9 @@ static void arc_dma_free(struct device *dev, size_t size, void *vaddr, __free_pages(page, get_order(size)); } -static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) +int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, + void *cpu_addr, dma_addr_t dma_addr, size_t size, + unsigned long attrs) { unsigned long user_count = vma_pages(vma); unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; @@ -130,149 +129,14 @@ static int arc_dma_mmap(struct device *dev, struct vm_area_struct *vma, return ret; } -/* - * streaming DMA Mapping API... - * CPU accesses page via normal paddr, thus needs to explicitly made - * consistent before each use - */ -static void _dma_cache_sync(phys_addr_t paddr, size_t size, - enum dma_data_direction dir) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - switch (dir) { - case DMA_FROM_DEVICE: - dma_cache_inv(paddr, size); - break; - case DMA_TO_DEVICE: - dma_cache_wback(paddr, size); - break; - case DMA_BIDIRECTIONAL: - dma_cache_wback_inv(paddr, size); - break; - default: - pr_err("Invalid DMA dir [%d] for OP @ %pa[p]\n", dir, &paddr); - } + dma_cache_wback(paddr, size); } -/* - * arc_dma_map_page - map a portion of a page for streaming DMA - * - * Ensure that any data held in the cache is appropriately discarded - * or written back. - * - * The device owns this memory once this call has completed. The CPU - * can regain ownership by calling dma_unmap_page(). - * - * Note: while it takes struct page as arg, caller can "abuse" it to pass - * a region larger than PAGE_SIZE, provided it is physically contiguous - * and this still works correctly - */ -static dma_addr_t arc_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - phys_addr_t paddr = page_to_phys(page) + offset; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - _dma_cache_sync(paddr, size, dir); - - return paddr; -} - -/* - * arc_dma_unmap_page - unmap a buffer previously mapped through dma_map_page() - * - * After this call, reads by the CPU to the buffer are guaranteed to see - * whatever the device wrote there. - * - * Note: historically this routine was not implemented for ARC - */ -static void arc_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = handle; - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - _dma_cache_sync(paddr, size, dir); + dma_cache_inv(paddr, size); } - -static int arc_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - s->dma_address = dma_map_page(dev, sg_page(s), s->offset, - s->length, dir); - - return nents; -} - -static void arc_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *s; - int i; - - for_each_sg(sg, s, nents, i) - arc_dma_unmap_page(dev, sg_dma_address(s), sg_dma_len(s), dir, - attrs); -} - -static void arc_dma_sync_single_for_cpu(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(dma_handle, size, DMA_FROM_DEVICE); -} - -static void arc_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, enum dma_data_direction dir) -{ - _dma_cache_sync(dma_handle, size, DMA_TO_DEVICE); -} - -static void arc_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction dir) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); -} - -static void arc_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sglist, int nelems, - enum dma_data_direction dir) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sglist, sg, nelems, i) - _dma_cache_sync(sg_phys(sg), sg->length, dir); -} - -static int arc_dma_supported(struct device *dev, u64 dma_mask) -{ - /* Support 32 bit DMA mask exclusively */ - return dma_mask == DMA_BIT_MASK(32); -} - -const struct dma_map_ops arc_dma_ops = { - .alloc = arc_dma_alloc, - .free = arc_dma_free, - .mmap = arc_dma_mmap, - .map_page = arc_dma_map_page, - .unmap_page = arc_dma_unmap_page, - .map_sg = arc_dma_map_sg, - .unmap_sg = arc_dma_unmap_sg, - .sync_single_for_device = arc_dma_sync_single_for_device, - .sync_single_for_cpu = arc_dma_sync_single_for_cpu, - .sync_sg_for_cpu = arc_dma_sync_sg_for_cpu, - .sync_sg_for_device = arc_dma_sync_sg_for_device, - .dma_supported = arc_dma_supported, -}; -EXPORT_SYMBOL(arc_dma_ops); diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index a0b7bd6d030d..b884bbd6f354 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -70,6 +70,8 @@ void do_page_fault(unsigned long address, struct pt_regs *regs) int write = regs->ecr_cause & ECR_C_PROTV_STORE; /* ST/EX */ unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; + clear_siginfo(&info); + /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index a7f8e7f4b88f..c43f5bb55ac8 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -60,7 +60,6 @@ config ARM select HAVE_CONTEXT_TRACKING select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS if MMU select HAVE_DYNAMIC_FTRACE if (!XIP_KERNEL) && !CPU_ENDIAN_BE32 && MMU select HAVE_DYNAMIC_FTRACE_WITH_REGS if HAVE_DYNAMIC_FTRACE @@ -96,6 +95,7 @@ config ARM select HAVE_VIRT_CPU_ACCOUNTING_GEN select IRQ_FORCED_THREADING select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE select NO_BOOTMEM select OF_EARLY_FLATTREE if OF select OF_RESERVED_MEM if OF @@ -119,9 +119,6 @@ config ARM_HAS_SG_CHAIN select ARCH_HAS_SG_CHAIN bool -config NEED_SG_DMA_LENGTH - bool - config ARM_DMA_USE_IOMMU bool select ARM_HAS_SG_CHAIN @@ -224,9 +221,6 @@ config ARCH_MAY_HAVE_PC_FDC config ZONE_DMA bool -config NEED_DMA_MAP_STATE - def_bool y - config ARCH_SUPPORTS_UPROBES def_bool y @@ -1778,12 +1772,6 @@ config SECCOMP and the task is only allowed to execute a few safe syscalls defined by each seccomp mode. -config SWIOTLB - def_bool y - -config IOMMU_HELPER - def_bool SWIOTLB - config PARAVIRT bool "Enable paravirtualization code" help @@ -1815,6 +1803,7 @@ config XEN depends on MMU select ARCH_DMA_ADDR_T_64BIT select ARM_PSCI + select SWIOTLB select SWIOTLB_XEN select PARAVIRT help diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 45a6b9b7af2a..6a4e7341ecd3 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -117,11 +117,9 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ - perl -e 'while (<>) { \ - $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \ - $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \ - }; printf "%d\n", $$bss_end - $$bss_start;') +KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ + sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \ + -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) ) LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ) # Supply ZRELADDR to the decompressor via a linker symbol. ifneq ($(CONFIG_AUTO_ZRELADDR),y) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 45c8823c3750..517e0e18f0b8 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -29,19 +29,19 @@ #if defined(CONFIG_DEBUG_ICEDCC) #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c0, c5, 0 .endm #elif defined(CONFIG_CPU_XSCALE) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c8, c0, 0 .endm #else - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c1, c0, 0 @@ -57,7 +57,7 @@ .endm #if defined(CONFIG_ARCH_SA1100) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 mov \rb, #0x80000000 @ physical base address #ifdef CONFIG_DEBUG_LL_SER3 add \rb, \rb, #0x00050000 @ Ser3 @@ -66,8 +66,8 @@ #endif .endm #else - .macro loadsp, rb, tmp - addruart \rb, \tmp + .macro loadsp, rb, tmp1, tmp2 + addruart \rb, \tmp1, \tmp2 .endm #endif #endif @@ -561,8 +561,6 @@ not_relocated: mov r0, #0 bl decompress_kernel bl cache_clean_flush bl cache_off - mov r1, r7 @ restore architecture number - mov r2, r8 @ restore atags pointer #ifdef CONFIG_ARM_VIRT_EXT mrs r0, spsr @ Get saved CPU boot mode @@ -1297,7 +1295,7 @@ phex: adr r3, phexbuf b 1b @ puts corrupts {r0, r1, r2, r3} -puts: loadsp r3, r1 +puts: loadsp r3, r2, r1 1: ldrb r2, [r0], #1 teq r2, #0 moveq pc, lr @@ -1314,8 +1312,8 @@ puts: loadsp r3, r1 @ putc corrupts {r0, r1, r2, r3} putc: mov r2, r0 + loadsp r3, r1, r0 mov r0, #0 - loadsp r3, r1 b 2b @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr} @@ -1365,6 +1363,8 @@ __hyp_reentry_vectors: __enter_kernel: mov r0, #0 @ must be 0 + mov r1, r7 @ restore architecture number + mov r2, r8 @ restore atags pointer ARM( mov pc, r4 ) @ call kernel M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class THUMB( bx r4 ) @ entry point is always ARM for A/R classes diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 699fdf94d139..9fe4f5a6379e 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -69,7 +69,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = <GIC_PPI 11 IRQ_TYPE_LEVEL_HIGH>; + interrupts = <GIC_PPI 11 IRQ_TYPE_EDGE_RISING>; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index a1f4d6d5a569..0edf769ea95c 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -21,8 +21,8 @@ stdout-path = "serial2:115200n8"; }; - memory { - device_type = "memory"; + memory@c0000000 { + /* 128 MB DDR2 SDRAM @ 0xc0000000 */ reg = <0xc0000000 0x08000000>; }; diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index c66cf7895363..12010002dbdb 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -7,10 +7,19 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#include "skeleton.dtsi" #include <dt-bindings/interrupt-controller/irq.h> / { + #address-cells = <1>; + #size-cells = <1>; + chosen { }; + aliases { }; + + memory@c0000000 { + device_type = "memory"; + reg = <0xc0000000 0x0>; + }; + arm { #address-cells = <1>; #size-cells = <1>; @@ -46,8 +55,6 @@ pmx_core: pinmux@14120 { compatible = "pinctrl-single"; reg = <0x14120 0x50>; - #address-cells = <1>; - #size-cells = <0>; #pinctrl-cells = <2>; pinctrl-single,bit-per-mux; pinctrl-single,register-width = <32>; diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index d6657b3bae84..85d7b5148b0a 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -10,7 +10,7 @@ / { model = "DM8148 EVM"; - compatible = "ti,dm8148-evm", "ti,dm8148"; + compatible = "ti,dm8148-evm", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 63883b3479f9..6418f9cdbe83 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -9,7 +9,7 @@ / { model = "HP t410 Smart Zero Client"; - compatible = "hp,t410", "ti,dm8148"; + compatible = "hp,t410", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index c72a2132aa82..1d030d567307 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -10,7 +10,7 @@ / { model = "DM8168 EVM"; - compatible = "ti,dm8168-evm", "ti,dm8168"; + compatible = "ti,dm8168-evm", "ti,dm8168", "ti,dm816"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts index fee0547f7302..31b824ad5d29 100644 --- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts +++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts @@ -10,7 +10,7 @@ / { model = "DRA62x J5 Eco EVM"; - compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148"; + compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/imx35.dtsi b/arch/arm/boot/dts/imx35.dtsi index bf343195697e..54111ed218b1 100644 --- a/arch/arm/boot/dts/imx35.dtsi +++ b/arch/arm/boot/dts/imx35.dtsi @@ -303,7 +303,7 @@ }; can1: can@53fe4000 { - compatible = "fsl,imx35-flexcan"; + compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan"; reg = <0x53fe4000 0x1000>; clocks = <&clks 33>, <&clks 33>; clock-names = "ipg", "per"; @@ -312,7 +312,7 @@ }; can2: can@53fe8000 { - compatible = "fsl,imx35-flexcan"; + compatible = "fsl,imx35-flexcan", "fsl,imx25-flexcan"; reg = <0x53fe8000 0x1000>; clocks = <&clks 34>, <&clks 34>; clock-names = "ipg", "per"; diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index 0c99ac04ad08..6464f2560e06 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -523,7 +523,7 @@ }; touchscreen@20 { - compatible = "syna,rmi4_i2c"; + compatible = "syna,rmi4-i2c"; reg = <0x20>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ts>; @@ -541,8 +541,8 @@ rmi4-f11@11 { reg = <0x11>; - touch-inverted-y; - touch-swapped-x-y; + touchscreen-inverted-y; + touchscreen-swapped-x-y; syna,sensor-type = <1>; }; }; diff --git a/arch/arm/boot/dts/imx53.dtsi b/arch/arm/boot/dts/imx53.dtsi index 7d647d043f52..3d65c0192f69 100644 --- a/arch/arm/boot/dts/imx53.dtsi +++ b/arch/arm/boot/dts/imx53.dtsi @@ -551,7 +551,7 @@ }; can1: can@53fc8000 { - compatible = "fsl,imx53-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fc8000 0x4000>; interrupts = <82>; clocks = <&clks IMX5_CLK_CAN1_IPG_GATE>, @@ -561,7 +561,7 @@ }; can2: can@53fcc000 { - compatible = "fsl,imx53-flexcan"; + compatible = "fsl,imx53-flexcan", "fsl,imx25-flexcan"; reg = <0x53fcc000 0x4000>; interrupts = <83>; clocks = <&clks IMX5_CLK_CAN2_IPG_GATE>, diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 4d42335c0dee..ce85b3ca1a55 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -868,6 +868,7 @@ crypto: caam@30900000 { compatible = "fsl,sec-v4.0"; + fsl,sec-era = <8>; #address-cells = <1>; #size-cells = <1>; reg = <0x30900000 0x40000>; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index b47cac23a04b..6fa7bba3e801 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -26,7 +26,7 @@ gpio = <&gpio1 3 0>; /* gpio_3 */ startup-delay-us = <70000>; enable-active-high; - vin-supply = <&vmmc2>; + vin-supply = <&vaux3>; }; /* HS USB Host PHY on PORT 1 */ @@ -82,6 +82,7 @@ twl_audio: audio { compatible = "ti,twl4030-audio"; codec { + ti,hs_extmute_gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>; }; }; }; @@ -199,6 +200,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + OMAP3_CORE1_IOPAD(0x20ba, PIN_OUTPUT | MUX_MODE4) /* gpmc_ncs6.gpio_57 */ >; }; }; @@ -213,7 +215,7 @@ }; wl127x_gpio: pinmux_wl127x_gpio_pin { pinctrl-single,pins = < - OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */ + OMAP3_WKUP_IOPAD(0x2a0a, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ >; }; @@ -260,6 +262,11 @@ #include "twl4030.dtsi" #include "twl4030_omap3.dtsi" +&vaux3 { + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; +}; + &twl { twl_power: power { compatible = "ti,twl4030-power-idle-osc-off", "ti,twl4030-power-idle"; diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts index 063fdb65dc60..f07f9018c3e7 100644 --- a/arch/arm/boot/dts/r8a7790-lager.dts +++ b/arch/arm/boot/dts/r8a7790-lager.dts @@ -379,7 +379,7 @@ port@0 { reg = <0>; adv7511_in: endpoint { - remote-endpoint = <&du_out_lvds0>; + remote-endpoint = <&lvds0_out>; }; }; @@ -467,10 +467,8 @@ status = "okay"; clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>, - <&cpg CPG_MOD 726>, <&cpg CPG_MOD 725>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "du.2", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -478,12 +476,26 @@ remote-endpoint = <&adv7123_in>; }; }; + }; +}; + +&lvds0 { + status = "okay"; + + ports { port@1 { endpoint { remote-endpoint = <&adv7511_in>; }; }; - port@2 { + }; +}; + +&lvds1 { + status = "okay"; + + ports { + port@1 { lvds_connector: endpoint { }; }; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index e4367cecad18..05a0fc23ac88 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -1627,18 +1627,13 @@ du: display@feb00000 { compatible = "renesas,du-r8a7790"; - reg = <0 0xfeb00000 0 0x70000>, - <0 0xfeb90000 0 0x1c>, - <0 0xfeb94000 0 0x1c>; - reg-names = "du", "lvds.0", "lvds.1"; + reg = <0 0xfeb00000 0 0x70000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 269 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 722>, <&cpg CPG_MOD 726>, - <&cpg CPG_MOD 725>; - clock-names = "du.0", "du.1", "du.2", "lvds.0", - "lvds.1"; + <&cpg CPG_MOD 722>; + clock-names = "du.0", "du.1", "du.2"; status = "disabled"; ports { @@ -1653,11 +1648,65 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; }; }; port@2 { reg = <2>; du_out_lvds1: endpoint { + remote-endpoint = <&lvds1_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7790-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7790_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { + }; + }; + }; + }; + + lvds1: lvds@feb94000 { + compatible = "renesas,r8a7790-lvds"; + reg = <0 0xfeb94000 0 0x1c>; + clocks = <&cpg CPG_MOD 725>; + power-domains = <&sysc R8A7790_PD_ALWAYS_ON>; + resets = <&cpg 725>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds1_in: endpoint { + remote-endpoint = <&du_out_lvds1>; + }; + }; + port@1 { + reg = <1>; + lvds1_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts index f40321a1c917..9d7213a0b8b8 100644 --- a/arch/arm/boot/dts/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/r8a7791-koelsch.dts @@ -468,10 +468,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -479,6 +478,13 @@ remote-endpoint = <&adv7511_in>; }; }; + }; +}; + +&lvds0 { + status = "okay"; + + ports { port@1 { lvds_connector: endpoint { }; diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts index c14e6fe9e4f6..ae9ed9ff53ef 100644 --- a/arch/arm/boot/dts/r8a7791-porter.dts +++ b/arch/arm/boot/dts/r8a7791-porter.dts @@ -441,10 +441,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x3_clk>, <&x16_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -455,6 +454,17 @@ }; }; +&lvds0 { + status = "okay"; + + ports { + port@1 { + lvds_connector: endpoint { + }; + }; + }; +}; + &rcar_sound { pinctrl-0 = <&ssi_pins &audio_clk_pins>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index f11dab71b03a..506b20885413 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -1633,15 +1633,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7791"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1656,6 +1653,33 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7791-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7791_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts index 9ed6961f2d9a..96e117d8b2cc 100644 --- a/arch/arm/boot/dts/r8a7793-gose.dts +++ b/arch/arm/boot/dts/r8a7793-gose.dts @@ -447,10 +447,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -458,6 +457,11 @@ remote-endpoint = <&adv7511_in>; }; }; + }; +}; + +&lvds0 { + ports { port@1 { lvds_connector: endpoint { }; diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index f9c5a557107d..4f526030dc7c 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -1292,15 +1292,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7793"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = <GIC_SPI 256 IRQ_TYPE_LEVEL_HIGH>, <GIC_SPI 268 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1315,6 +1312,34 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7793-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7793_PD_ALWAYS_ON>; + resets = <&cpg 726>; + + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi index c0743305f31b..eb96ac3e6c1d 100644 --- a/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi +++ b/arch/arm/boot/dts/stm32mp157-pinctrl.dtsi @@ -12,6 +12,8 @@ #size-cells = <1>; compatible = "st,stm32mp157-pinctrl"; ranges = <0 0x50002000 0xa400>; + interrupt-parent = <&exti>; + st,syscfg = <&exti 0x60 0xff>; pins-are-numbered; gpioa: gpio@50002000 { @@ -166,6 +168,8 @@ compatible = "st,stm32mp157-z-pinctrl"; ranges = <0 0x54004000 0x400>; pins-are-numbered; + interrupt-parent = <&exti>; + st,syscfg = <&exti 0x60 0xff>; status = "disabled"; gpioz: gpio@54004000 { diff --git a/arch/arm/boot/dts/stm32mp157c.dtsi b/arch/arm/boot/dts/stm32mp157c.dtsi index 9e17e42b02b2..4fa0df853c8a 100644 --- a/arch/arm/boot/dts/stm32mp157c.dtsi +++ b/arch/arm/boot/dts/stm32mp157c.dtsi @@ -183,6 +183,13 @@ status = "disabled"; }; + exti: interrupt-controller@5000d000 { + compatible = "st,stm32mp1-exti", "syscon"; + interrupt-controller; + #interrupt-cells = <2>; + reg = <0x5000d000 0x400>; + }; + usart1: serial@5c000000 { compatible = "st,stm32h7-uart"; reg = <0x5c000000 0x400>; diff --git a/arch/arm/boot/dts/sun4i-a10.dtsi b/arch/arm/boot/dts/sun4i-a10.dtsi index 77e8436beed4..3a1c6b45c9a1 100644 --- a/arch/arm/boot/dts/sun4i-a10.dtsi +++ b/arch/arm/boot/dts/sun4i-a10.dtsi @@ -76,7 +76,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0-hdmi"; clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_HDMI0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, - <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, + <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH1>, <&ccu CLK_HDMI>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; @@ -88,7 +88,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0"; clocks = <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_DE_BE0>, - <&ccu CLK_AHB_DE_FE0>, <&ccu CLK_TCON0_CH0>, + <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH0>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; }; @@ -99,7 +99,7 @@ allwinner,pipeline = "de_fe0-de_be0-lcd0-tve0"; clocks = <&ccu CLK_AHB_TVE0>, <&ccu CLK_AHB_LCD0>, <&ccu CLK_AHB_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, - <&ccu CLK_DE_BE0>, <&ccu CLK_AHB_DE_FE0>, + <&ccu CLK_DE_BE0>, <&ccu CLK_DE_FE0>, <&ccu CLK_TCON0_CH1>, <&ccu CLK_DRAM_TVE0>, <&ccu CLK_DRAM_DE_FE0>, <&ccu CLK_DRAM_DE_BE0>; status = "disabled"; diff --git a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts index 3328fe583c9b..232f124ce62c 100644 --- a/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts +++ b/arch/arm/boot/dts/sun8i-h3-orangepi-one.dts @@ -117,6 +117,7 @@ phy-handle = <&int_mii_phy>; phy-mode = "mii"; allwinner,leds-active-low; + status = "okay"; }; &hdmi { diff --git a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts index d1311098ea45..ad173605b1b8 100644 --- a/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts +++ b/arch/arm/boot/dts/sun8i-v3s-licheepi-zero-dock.dts @@ -51,7 +51,7 @@ leds { /* The LEDs use PG0~2 pins, which conflict with MMC1 */ - status = "disbaled"; + status = "disabled"; }; }; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 0a7136462a1a..983dd5c14794 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -741,7 +741,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; + <&tegra_car TEGRA20_CLK_CDEV2>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; diff --git a/arch/arm/crypto/sha1-armv4-large.S b/arch/arm/crypto/sha1-armv4-large.S index 99207c45ec10..f82cd8cf5a09 100644 --- a/arch/arm/crypto/sha1-armv4-large.S +++ b/arch/arm/crypto/sha1-armv4-large.S @@ -1,4 +1,14 @@ #define __ARM_ARCH__ __LINUX_ARM_ARCH__ +@ SPDX-License-Identifier: GPL-2.0 + +@ This code is taken from the OpenSSL project but the author (Andy Polyakov) +@ has relicensed it under the GPLv2. Therefore this program is free software; +@ you can redistribute it and/or modify it under the terms of the GNU General +@ Public License version 2 as published by the Free Software Foundation. +@ +@ The original headers, including the original license headers, are +@ included below for completeness. + @ ==================================================================== @ Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL @ project. The module is, however, dual licensed under OpenSSL and diff --git a/arch/arm/crypto/sha256-armv4.pl b/arch/arm/crypto/sha256-armv4.pl index fac0533ea633..b9ec44060ed3 100644 --- a/arch/arm/crypto/sha256-armv4.pl +++ b/arch/arm/crypto/sha256-armv4.pl @@ -1,12 +1,19 @@ #!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +# This code is taken from the OpenSSL project but the author (Andy Polyakov) +# has relicensed it under the GPLv2. Therefore this program is free software; +# you can redistribute it and/or modify it under the terms of the GNU General +# Public License version 2 as published by the Free Software Foundation. +# +# The original headers, including the original license headers, are +# included below for completeness. # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. -# -# Permission to use under GPL terms is granted. # ==================================================================== # SHA256 block procedure for ARMv4. May 2007. diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped index 555a1a8eec90..3b58300d611c 100644 --- a/arch/arm/crypto/sha256-core.S_shipped +++ b/arch/arm/crypto/sha256-core.S_shipped @@ -1,11 +1,18 @@ +@ SPDX-License-Identifier: GPL-2.0 + +@ This code is taken from the OpenSSL project but the author (Andy Polyakov) +@ has relicensed it under the GPLv2. Therefore this program is free software; +@ you can redistribute it and/or modify it under the terms of the GNU General +@ Public License version 2 as published by the Free Software Foundation. +@ +@ The original headers, including the original license headers, are +@ included below for completeness. @ ==================================================================== @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ project. The module is, however, dual licensed under OpenSSL and @ CRYPTOGAMS licenses depending on where you obtain it. For further @ details see http://www.openssl.org/~appro/cryptogams/. -@ -@ Permission to use under GPL terms is granted. @ ==================================================================== @ SHA256 block procedure for ARMv4. May 2007. diff --git a/arch/arm/crypto/sha512-armv4.pl b/arch/arm/crypto/sha512-armv4.pl index a2b11a844357..fb5d15048c0b 100644 --- a/arch/arm/crypto/sha512-armv4.pl +++ b/arch/arm/crypto/sha512-armv4.pl @@ -1,12 +1,19 @@ #!/usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +# This code is taken from the OpenSSL project but the author (Andy Polyakov) +# has relicensed it under the GPLv2. Therefore this program is free software; +# you can redistribute it and/or modify it under the terms of the GNU General +# Public License version 2 as published by the Free Software Foundation. +# +# The original headers, including the original license headers, are +# included below for completeness. # ==================================================================== # Written by Andy Polyakov <appro@openssl.org> for the OpenSSL # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. -# -# Permission to use under GPL terms is granted. # ==================================================================== # SHA512 block procedure for ARMv4. September 2007. diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped index 3694c4d4ca2b..b1c334a49cda 100644 --- a/arch/arm/crypto/sha512-core.S_shipped +++ b/arch/arm/crypto/sha512-core.S_shipped @@ -1,11 +1,18 @@ +@ SPDX-License-Identifier: GPL-2.0 + +@ This code is taken from the OpenSSL project but the author (Andy Polyakov) +@ has relicensed it under the GPLv2. Therefore this program is free software; +@ you can redistribute it and/or modify it under the terms of the GNU General +@ Public License version 2 as published by the Free Software Foundation. +@ +@ The original headers, including the original license headers, are +@ included below for completeness. @ ==================================================================== @ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL @ project. The module is, however, dual licensed under OpenSSL and @ CRYPTOGAMS licenses depending on where you obtain it. For further @ details see http://www.openssl.org/~appro/cryptogams/. -@ -@ Permission to use under GPL terms is granted. @ ==================================================================== @ SHA512 block procedure for ARMv4. September 2007. diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index 873e3c189279..1d66db9c9db5 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -1,3 +1,4 @@ +generic-y += compat.h generic-y += current.h generic-y += early_ioremap.h generic-y += emergency-restart.h diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index bc8d4bbd82e2..9342904cccca 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm +#ifdef CONFIG_KPROBES +#define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist", "aw" ; \ + .balign 4 ; \ + .long entry; \ + .popsection +#else +#define _ASM_NOKPROBE(entry) +#endif + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 707a1f06dc5d..f675162663f0 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { return kvm_ksym_ref(__kvm_hyp_vector); diff --git a/arch/arm/include/asm/pci.h b/arch/arm/include/asm/pci.h index 1f0de808d111..0abd389cf0ec 100644 --- a/arch/arm/include/asm/pci.h +++ b/arch/arm/include/asm/pci.h @@ -19,13 +19,6 @@ static inline int pci_proc_domain(struct pci_bus *bus) } #endif /* CONFIG_PCI_DOMAINS */ -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #define HAVE_PCI_MMAP #define ARCH_GENERIC_PCI_MMAP_RESOURCE diff --git a/arch/arm/include/uapi/asm/siginfo.h b/arch/arm/include/uapi/asm/siginfo.h deleted file mode 100644 index d0513880be21..000000000000 --- a/arch/arm/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_SIGINFO_H -#define __ASM_SIGINFO_H - -#include <asm-generic/siginfo.h> - -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/arm/kernel/dma.c b/arch/arm/kernel/dma.c index e651c4d0a0d9..6739d37c2bc5 100644 --- a/arch/arm/kernel/dma.c +++ b/arch/arm/kernel/dma.c @@ -276,21 +276,9 @@ static int proc_dma_show(struct seq_file *m, void *v) return 0; } -static int proc_dma_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_dma_show, NULL); -} - -static const struct file_operations proc_dma_operations = { - .open = proc_dma_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_dma_init(void) { - proc_create("dma", 0, NULL, &proc_dma_operations); + proc_create_single("dma", 0, NULL, proc_dma_show); return 0; } diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 6b38d7a634c1..dd2eb5f76b9f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -83,7 +83,7 @@ void machine_crash_nonpanic_core(void *unused) { struct pt_regs regs; - crash_setup_regs(®s, NULL); + crash_setup_regs(®s, get_irq_regs()); printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", smp_processor_id()); crash_save_cpu(®s, smp_processor_id()); @@ -95,6 +95,27 @@ void machine_crash_nonpanic_core(void *unused) cpu_relax(); } +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + unsigned long msecs; + + if (cpus_stopped) + return; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("Non-crashing CPUs did not react to IPI\n"); + + cpus_stopped = 1; +} + static void machine_kexec_mask_interrupts(void) { unsigned int i; @@ -120,19 +141,8 @@ static void machine_kexec_mask_interrupts(void) void machine_crash_shutdown(struct pt_regs *regs) { - unsigned long msecs; - local_irq_disable(); - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - smp_call_function(machine_crash_nonpanic_core, NULL, false); - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - if (atomic_read(&waiting_for_crash_ipi) > 0) - pr_warn("Non-crashing CPUs did not react to IPI\n"); + crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 7724b0f661b3..36718a424358 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -205,6 +205,7 @@ void ptrace_break(struct task_struct *tsk, struct pt_regs *regs) { siginfo_t info; + clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_BRKPT; diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index fc40a2b40595..35ca494c028c 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -754,7 +754,7 @@ int __init arm_add_memory(u64 start, u64 size) else size -= aligned_start - start; -#ifndef CONFIG_ARCH_PHYS_ADDR_T_64BIT +#ifndef CONFIG_PHYS_ADDR_T_64BIT if (aligned_start > ULONG_MAX) { pr_crit("Ignoring memory at 0x%08llx outside 32-bit physical address space\n", (long long)start); diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 3bda08bee674..80517f293eb9 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -91,18 +91,6 @@ static int proc_status_show(struct seq_file *m, void *v) seq_printf(m, "Last process:\t\t%d\n", previous_pid); return 0; } - -static int proc_status_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_status_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_status_fops = { - .open = proc_status_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* @@ -112,6 +100,7 @@ static void set_segfault(struct pt_regs *regs, unsigned long addr) { siginfo_t info; + clear_siginfo(&info); down_read(¤t->mm->mmap_sem); if (find_vma(current->mm, addr) == NULL) info.si_code = SEGV_MAPERR; @@ -260,7 +249,8 @@ static int __init swp_emulation_init(void) return 0; #ifdef CONFIG_PROC_FS - if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops)) + if (!proc_create_single("cpu/swp_emulation", S_IRUGO, NULL, + proc_status_show)) return -ENOMEM; #endif /* CONFIG_PROC_FS */ diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5e3633c24e63..badf02ca3693 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -19,6 +19,7 @@ #include <linux/uaccess.h> #include <linux/hardirq.h> #include <linux/kdebug.h> +#include <linux/kprobes.h> #include <linux/module.h> #include <linux/kexec.h> #include <linux/bug.h> @@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook) raw_spin_unlock_irqrestore(&undef_lock, flags); } -static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +static nokprobe_inline +int call_undef_hook(struct pt_regs *regs, unsigned int instr) { struct undef_hook *hook; unsigned long flags; @@ -439,6 +441,7 @@ asmlinkage void do_undefinstr(struct pt_regs *regs) siginfo_t info; void __user *pc; + clear_siginfo(&info); pc = (void __user *)instruction_pointer(regs); if (processor_mode(regs) == SVC_MODE) { @@ -490,6 +493,7 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); } +NOKPROBE_SYMBOL(do_undefinstr) /* * Handle FIQ similarly to NMI on x86 systems. @@ -537,6 +541,7 @@ static int bad_syscall(int n, struct pt_regs *regs) { siginfo_t info; + clear_siginfo(&info); if ((current->personality & PER_MASK) != PER_LINUX) { send_sig(SIGSEGV, current, 1); return regs->ARM_r0; @@ -604,6 +609,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) { siginfo_t info; + clear_siginfo(&info); if ((no >> 16) != (__ARM_NR_BASE>> 16)) return bad_syscall(no, regs); @@ -740,6 +746,8 @@ baddataabort(int code, unsigned long instr, struct pt_regs *regs) unsigned long addr = instruction_pointer(regs); siginfo_t info; + clear_siginfo(&info); + #ifdef CONFIG_DEBUG_USER if (user_debug & UDBG_BADABORT) { pr_err("[%d] %s: bad data abort: code %d instr 0x%08lx\n", diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index df73914e81c8..746e7801dcdf 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -38,6 +38,7 @@ ENTRY(__get_user_1) mov r0, #0 ret lr ENDPROC(__get_user_1) +_ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad @@ -58,6 +59,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_2) +_ASM_NOKPROBE(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad @@ -65,6 +67,7 @@ ENTRY(__get_user_4) mov r0, #0 ret lr ENDPROC(__get_user_4) +_ASM_NOKPROBE(__get_user_4) ENTRY(__get_user_8) check_uaccess r0, 8, r1, r2, __get_user_bad8 @@ -78,6 +81,7 @@ ENTRY(__get_user_8) mov r0, #0 ret lr ENDPROC(__get_user_8) +_ASM_NOKPROBE(__get_user_8) #ifdef __ARMEB__ ENTRY(__get_user_32t_8) @@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8) mov r0, #0 ret lr ENDPROC(__get_user_32t_8) +_ASM_NOKPROBE(__get_user_32t_8) ENTRY(__get_user_64t_1) check_uaccess r0, 1, r1, r2, __get_user_bad8 @@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1) mov r0, #0 ret lr ENDPROC(__get_user_64t_1) +_ASM_NOKPROBE(__get_user_64t_1) ENTRY(__get_user_64t_2) check_uaccess r0, 2, r1, r2, __get_user_bad8 @@ -114,6 +120,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_64t_2) +_ASM_NOKPROBE(__get_user_64t_2) ENTRY(__get_user_64t_4) check_uaccess r0, 4, r1, r2, __get_user_bad8 @@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4) mov r0, #0 ret lr ENDPROC(__get_user_64t_4) +_ASM_NOKPROBE(__get_user_64t_4) #endif __get_user_bad8: @@ -131,6 +139,8 @@ __get_user_bad: ret lr ENDPROC(__get_user_bad) ENDPROC(__get_user_bad8) +_ASM_NOKPROBE(__get_user_bad) +_ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad diff --git a/arch/arm/mach-axxia/Kconfig b/arch/arm/mach-axxia/Kconfig index bb2ce1c63fd9..d3eae6037913 100644 --- a/arch/arm/mach-axxia/Kconfig +++ b/arch/arm/mach-axxia/Kconfig @@ -2,7 +2,6 @@ config ARCH_AXXIA bool "LSI Axxia platforms" depends on ARCH_MULTI_V7 && ARM_LPAE - select ARCH_DMA_ADDR_T_64BIT select ARM_AMBA select ARM_GIC select ARM_TIMER_SP804 diff --git a/arch/arm/mach-bcm/Kconfig b/arch/arm/mach-bcm/Kconfig index c2f3b0d216a4..c46a728df44e 100644 --- a/arch/arm/mach-bcm/Kconfig +++ b/arch/arm/mach-bcm/Kconfig @@ -211,7 +211,6 @@ config ARCH_BRCMSTB select BRCMSTB_L2_IRQ select BCM7120_L2_IRQ select ARCH_HAS_HOLES_MEMORYMODEL - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ZONE_DMA if ARM_LPAE select SOC_BRCMSTB select SOC_BUS diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 004f9c8de032..d1e8ce7b4bd2 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -205,12 +205,17 @@ static const short da830_evm_mmc_sd_pins[] = { -1 }; +#define DA830_MMCSD_WP_PIN GPIO_TO_PIN(2, 1) +#define DA830_MMCSD_CD_PIN GPIO_TO_PIN(2, 2) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* gpio chip 1 contains gpio range 32-63 */ - GPIO_LOOKUP("davinci_gpio.1", 2, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.1", 1, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 3063478bcc36..158ed9a1483f 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -763,12 +763,17 @@ static const short da850_evm_mcasp_pins[] __initconst = { -1 }; +#define DA850_MMCSD_CD_PIN GPIO_TO_PIN(4, 0) +#define DA850_MMCSD_WP_PIN GPIO_TO_PIN(4, 1) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* gpio chip 2 contains gpio range 64-95 */ - GPIO_LOOKUP("davinci_gpio.2", 0, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.2", 1, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index cb30637d9eaf..23ab9e8bc04c 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -19,6 +19,7 @@ #include <linux/gpio.h> #include <linux/gpio/machine.h> #include <linux/clk.h> +#include <linux/dm9000.h> #include <linux/videodev2.h> #include <media/i2c/tvp514x.h> #include <linux/spi/spi.h> @@ -109,12 +110,15 @@ static struct platform_device davinci_nand_device = { }, }; +#define DM355_I2C_SDA_PIN GPIO_TO_PIN(0, 15) +#define DM355_I2C_SCL_PIN GPIO_TO_PIN(0, 14) + static struct gpiod_lookup_table i2c_recovery_gpiod_table = { - .dev_id = "i2c_davinci", + .dev_id = "i2c_davinci.1", .table = { - GPIO_LOOKUP("davinci_gpio", 15, "sda", + GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SDA_PIN, "sda", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP("davinci_gpio", 14, "scl", + GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SCL_PIN, "scl", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), }, }; @@ -179,11 +183,16 @@ static struct resource dm355evm_dm9000_rsrc[] = { }, }; +static struct dm9000_plat_data dm335evm_dm9000_platdata; + static struct platform_device dm355evm_dm9000 = { .name = "dm9000", .id = -1, .resource = dm355evm_dm9000_rsrc, .num_resources = ARRAY_SIZE(dm355evm_dm9000_rsrc), + .dev = { + .platform_data = &dm335evm_dm9000_platdata, + }, }; static struct tvp514x_platform_data tvp5146_pdata = { diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 95b55aae1366..509e64ab1994 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -17,6 +17,7 @@ #include <linux/i2c.h> #include <linux/platform_data/pcf857x.h> #include <linux/platform_data/at24.h> +#include <linux/platform_data/gpio-davinci.h> #include <linux/mtd/mtd.h> #include <linux/mtd/rawnand.h> #include <linux/mtd/partitions.h> @@ -596,12 +597,15 @@ static struct i2c_board_info __initdata i2c_info[] = { }, }; +#define DM644X_I2C_SDA_PIN GPIO_TO_PIN(2, 12) +#define DM644X_I2C_SCL_PIN GPIO_TO_PIN(2, 11) + static struct gpiod_lookup_table i2c_recovery_gpiod_table = { - .dev_id = "i2c_davinci", + .dev_id = "i2c_davinci.1", .table = { - GPIO_LOOKUP("davinci_gpio", 44, "sda", + GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SDA_PIN, "sda", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP("davinci_gpio", 43, "scl", + GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SCL_PIN, "scl", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), }, }; diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index 2d37f5b0e1f5..a3c0d1e87647 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -532,11 +532,12 @@ static struct vpif_display_config dm646x_vpif_display_config = { .set_clock = set_vpif_clock, .subdevinfo = dm646x_vpif_subdev, .subdev_count = ARRAY_SIZE(dm646x_vpif_subdev), + .i2c_adapter_id = 1, .chan_config[0] = { .outputs = dm6467_ch0_outputs, .output_count = ARRAY_SIZE(dm6467_ch0_outputs), }, - .card_name = "DM646x EVM", + .card_name = "DM646x EVM Video Display", }; /** @@ -674,6 +675,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = { .setup_input_channel_mode = setup_vpif_input_channel_mode, .subdev_info = vpif_capture_sdev_info, .subdev_count = ARRAY_SIZE(vpif_capture_sdev_info), + .i2c_adapter_id = 1, .chan_config[0] = { .inputs = dm6467_ch0_inputs, .input_count = ARRAY_SIZE(dm6467_ch0_inputs), @@ -694,6 +696,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = { .fid_pol = 0, }, }, + .card_name = "DM646x EVM Video Capture", }; static void __init evm_init_video(void) diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index 0d32042b728f..be8b892a6ea7 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -123,12 +123,16 @@ static const short hawk_mmcsd0_pins[] = { -1 }; +#define DA850_HAWK_MMCSD_CD_PIN GPIO_TO_PIN(3, 12) +#define DA850_HAWK_MMCSD_WP_PIN GPIO_TO_PIN(3, 13) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { - /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/ - GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index 109ab1fa0d2c..c32ca27ab343 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -488,7 +488,8 @@ static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = { [IRQ_DM646X_MCASP0TXINT] = 7, [IRQ_DM646X_MCASP0RXINT] = 7, [IRQ_DM646X_RESERVED_3] = 7, - [IRQ_DM646X_MCASP1TXINT] = 7, /* clockevent */ + [IRQ_DM646X_MCASP1TXINT] = 7, + [IRQ_TINT0_TINT12] = 7, /* clockevent */ [IRQ_TINT0_TINT34] = 7, /* clocksource */ [IRQ_TINT1_TINT12] = 7, /* DSP timer */ [IRQ_TINT1_TINT34] = 7, /* system tick */ diff --git a/arch/arm/mach-ep93xx/core.c b/arch/arm/mach-ep93xx/core.c index e70feec6fad5..0581ffbedddd 100644 --- a/arch/arm/mach-ep93xx/core.c +++ b/arch/arm/mach-ep93xx/core.c @@ -323,7 +323,7 @@ void __init ep93xx_register_eth(struct ep93xx_eth_data *data, int copy_addr) /* All EP93xx devices use the same two GPIO pins for I2C bit-banging */ static struct gpiod_lookup_table ep93xx_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { /* Use local offsets on gpiochip/port "G" */ GPIO_LOOKUP_IDX("G", 1, NULL, 0, diff --git a/arch/arm/mach-exynos/Kconfig b/arch/arm/mach-exynos/Kconfig index 647c319f9f5f..2ca405816846 100644 --- a/arch/arm/mach-exynos/Kconfig +++ b/arch/arm/mach-exynos/Kconfig @@ -112,7 +112,6 @@ config SOC_EXYNOS5440 bool "SAMSUNG EXYNOS5440" default y depends on ARCH_EXYNOS5 - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select HAVE_ARM_ARCH_TIMER select AUTO_ZRELADDR select PINCTRL_EXYNOS5440 diff --git a/arch/arm/mach-highbank/Kconfig b/arch/arm/mach-highbank/Kconfig index 81110ec34226..5552968f07f8 100644 --- a/arch/arm/mach-highbank/Kconfig +++ b/arch/arm/mach-highbank/Kconfig @@ -1,7 +1,6 @@ config ARCH_HIGHBANK bool "Calxeda ECX-1000/2000 (Highbank/Midway)" depends on ARCH_MULTI_V7 - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_HAS_HOLES_MEMORYMODEL select ARCH_SUPPORTS_BIG_ENDIAN select ARM_AMBA diff --git a/arch/arm/mach-ixp4xx/avila-setup.c b/arch/arm/mach-ixp4xx/avila-setup.c index 77def6169f50..44cbbce6bda6 100644 --- a/arch/arm/mach-ixp4xx/avila-setup.c +++ b/arch/arm/mach-ixp4xx/avila-setup.c @@ -51,7 +51,7 @@ static struct platform_device avila_flash = { }; static struct gpiod_lookup_table avila_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", AVILA_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/dsmg600-setup.c b/arch/arm/mach-ixp4xx/dsmg600-setup.c index 0f5c99941a7d..397190f3a8da 100644 --- a/arch/arm/mach-ixp4xx/dsmg600-setup.c +++ b/arch/arm/mach-ixp4xx/dsmg600-setup.c @@ -70,7 +70,7 @@ static struct platform_device dsmg600_flash = { }; static struct gpiod_lookup_table dsmg600_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", DSMG600_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/fsg-setup.c b/arch/arm/mach-ixp4xx/fsg-setup.c index 033f79b35d51..f0a152e365b1 100644 --- a/arch/arm/mach-ixp4xx/fsg-setup.c +++ b/arch/arm/mach-ixp4xx/fsg-setup.c @@ -56,7 +56,7 @@ static struct platform_device fsg_flash = { }; static struct gpiod_lookup_table fsg_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", FSG_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach-ixp4xx/ixdp425-setup.c index b168e2fbdbeb..3ec829d52cdd 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -124,7 +124,7 @@ static struct platform_device ixdp425_flash_nand = { #endif /* CONFIG_MTD_NAND_PLATFORM */ static struct gpiod_lookup_table ixdp425_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", IXDP425_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/nas100d-setup.c b/arch/arm/mach-ixp4xx/nas100d-setup.c index 76dfff03cb71..4138d6aa4c52 100644 --- a/arch/arm/mach-ixp4xx/nas100d-setup.c +++ b/arch/arm/mach-ixp4xx/nas100d-setup.c @@ -102,7 +102,7 @@ static struct platform_device nas100d_leds = { }; static struct gpiod_lookup_table nas100d_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NAS100D_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-ixp4xx/nslu2-setup.c b/arch/arm/mach-ixp4xx/nslu2-setup.c index 91da63a7d7b5..341b263482ef 100644 --- a/arch/arm/mach-ixp4xx/nslu2-setup.c +++ b/arch/arm/mach-ixp4xx/nslu2-setup.c @@ -70,7 +70,7 @@ static struct platform_device nslu2_flash = { }; static struct gpiod_lookup_table nslu2_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("IXP4XX_GPIO_CHIP", NSLU2_SDA_PIN, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c index fe57e2692629..abca83d22ff3 100644 --- a/arch/arm/mach-keystone/pm_domain.c +++ b/arch/arm/mach-keystone/pm_domain.c @@ -29,6 +29,7 @@ static struct dev_pm_domain keystone_pm_domain = { static struct pm_clk_notifier_block platform_domain_notifier = { .pm_domain = &keystone_pm_domain, + .con_ids = { NULL }, }; static const struct of_device_id of_keystone_table[] = { diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 793a24a53c52..d7ca9e2b40d2 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -58,22 +58,24 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id) irq_num = gpio_to_irq(gpio); fiq_count = fiq_buffer[FIQ_CNT_INT_00 + gpio]; - while (irq_counter[gpio] < fiq_count) { - if (gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) { - struct irq_data *d = irq_get_irq_data(irq_num); - - /* - * It looks like handle_edge_irq() that - * OMAP GPIO edge interrupts default to, - * expects interrupt already unmasked. - */ - if (irq_chip && irq_chip->irq_unmask) + if (irq_counter[gpio] < fiq_count && + gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) { + struct irq_data *d = irq_get_irq_data(irq_num); + + /* + * handle_simple_irq() that OMAP GPIO edge + * interrupts default to since commit 80ac93c27441 + * requires interrupt already acked and unmasked. + */ + if (irq_chip) { + if (irq_chip->irq_ack) + irq_chip->irq_ack(d); + if (irq_chip->irq_unmask) irq_chip->irq_unmask(d); } - generic_handle_irq(irq_num); - - irq_counter[gpio]++; } + for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++) + generic_handle_irq(irq_num); } return IRQ_HANDLED; } diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 76eb6ec5f157..1e6a967cd2d5 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -188,7 +188,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) ((prev & OMAP_POWERSTATE_MASK) << 0)); trace_power_domain_target_rcuidle(pwrdm->name, trace_state, - smp_processor_id()); + raw_smp_processor_id()); } break; default: @@ -518,7 +518,7 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ trace_power_domain_target_rcuidle(pwrdm->name, pwrst, - smp_processor_id()); + raw_smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } diff --git a/arch/arm/mach-pxa/palmz72.c b/arch/arm/mach-pxa/palmz72.c index 5877e547cecd..0adb1bd6208e 100644 --- a/arch/arm/mach-pxa/palmz72.c +++ b/arch/arm/mach-pxa/palmz72.c @@ -322,7 +322,7 @@ static struct soc_camera_link palmz72_iclink = { }; static struct gpiod_lookup_table palmz72_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("gpio-pxa", 118, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-pxa/viper.c b/arch/arm/mach-pxa/viper.c index 90d0f277de55..207dcc2e94e7 100644 --- a/arch/arm/mach-pxa/viper.c +++ b/arch/arm/mach-pxa/viper.c @@ -460,7 +460,7 @@ static struct platform_device smc91x_device = { /* i2c */ static struct gpiod_lookup_table viper_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.1", .table = { GPIO_LOOKUP_IDX("gpio-pxa", VIPER_RTC_I2C_SDA_GPIO, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), @@ -789,7 +789,7 @@ static int __init viper_tpm_setup(char *str) __setup("tpm=", viper_tpm_setup); struct gpiod_lookup_table viper_tpm_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.2", .table = { GPIO_LOOKUP_IDX("gpio-pxa", VIPER_TPM_I2C_SDA_GPIO, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-rockchip/Kconfig b/arch/arm/mach-rockchip/Kconfig index a4065966881a..fafd3d7f9f8c 100644 --- a/arch/arm/mach-rockchip/Kconfig +++ b/arch/arm/mach-rockchip/Kconfig @@ -3,7 +3,6 @@ config ARCH_ROCKCHIP depends on ARCH_MULTI_V7 select PINCTRL select PINCTRL_ROCKCHIP - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_HAS_RESET_CONTROLLER select ARM_AMBA select ARM_GIC diff --git a/arch/arm/mach-rpc/ecard.c b/arch/arm/mach-rpc/ecard.c index bdb5ec1cf560..39aef4876ed4 100644 --- a/arch/arm/mach-rpc/ecard.c +++ b/arch/arm/mach-rpc/ecard.c @@ -657,25 +657,13 @@ static int ecard_devices_proc_show(struct seq_file *m, void *v) return 0; } -static int ecard_devices_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ecard_devices_proc_show, NULL); -} - -static const struct file_operations bus_ecard_proc_fops = { - .owner = THIS_MODULE, - .open = ecard_devices_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static struct proc_dir_entry *proc_bus_ecard_dir = NULL; static void ecard_proc_init(void) { proc_bus_ecard_dir = proc_mkdir("bus/ecard", NULL); - proc_create("devices", 0, proc_bus_ecard_dir, &bus_ecard_proc_fops); + proc_create_single("devices", 0, proc_bus_ecard_dir, + ecard_devices_proc_show); } #define ec_set_resource(ec,nr,st,sz) \ diff --git a/arch/arm/mach-sa1100/simpad.c b/arch/arm/mach-sa1100/simpad.c index ace010479eb6..f45aed2519ba 100644 --- a/arch/arm/mach-sa1100/simpad.c +++ b/arch/arm/mach-sa1100/simpad.c @@ -327,7 +327,7 @@ static struct platform_device simpad_gpio_leds = { * i2c */ static struct gpiod_lookup_table simpad_i2c_gpiod_table = { - .dev_id = "i2c-gpio", + .dev_id = "i2c-gpio.0", .table = { GPIO_LOOKUP_IDX("gpio", 21, NULL, 0, GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 280e7312a9e1..fe60cd09a5ca 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -29,7 +29,6 @@ config ARCH_RMOBILE menuconfig ARCH_RENESAS bool "Renesas ARM SoCs" depends on ARCH_MULTI_V7 && MMU - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE select ARCH_SHMOBILE select ARM_GIC select GPIOLIB diff --git a/arch/arm/mach-tegra/Kconfig b/arch/arm/mach-tegra/Kconfig index 1e0aeb47bac6..7f3b83e0d324 100644 --- a/arch/arm/mach-tegra/Kconfig +++ b/arch/arm/mach-tegra/Kconfig @@ -15,6 +15,5 @@ menuconfig ARCH_TEGRA select RESET_CONTROLLER select SOC_BUS select ZONE_DMA if ARM_LPAE - select ARCH_DMA_ADDR_T_64BIT if ARM_LPAE help This enables support for NVIDIA Tegra based systems. diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 7f14acf67caf..5a016bc80e26 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -661,6 +661,7 @@ config ARM_LPAE bool "Support for the Large Physical Address Extension" depends on MMU && CPU_32v7 && !CPU_32v6 && !CPU_32v5 && \ !CPU_32v4 && !CPU_32v3 + select PHYS_ADDR_T_64BIT help Say Y if you have an ARMv7 processor supporting the LPAE page table format and you would like to access memory beyond the @@ -673,12 +674,6 @@ config ARM_PV_FIXUP def_bool y depends on ARM_LPAE && ARM_PATCH_PHYS_VIRT && ARCH_KEYSTONE -config ARCH_PHYS_ADDR_T_64BIT - def_bool ARM_LPAE - -config ARCH_DMA_ADDR_T_64BIT - bool - config ARM_THUMB bool "Support Thumb user binaries" if !CPU_THUMBONLY && EXPERT depends on CPU_THUMB_CAPABLE diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 2c96190e018b..bd2c739d8083 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -950,6 +950,7 @@ do_alignment(unsigned long addr, unsigned int fsr, struct pt_regs *regs) if (ai_usermode & UM_SIGNAL) { siginfo_t si; + clear_siginfo(&si); si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRALN; diff --git a/arch/arm/mm/dma-mapping-nommu.c b/arch/arm/mm/dma-mapping-nommu.c index 619f24a42d09..f448a0663b10 100644 --- a/arch/arm/mm/dma-mapping-nommu.c +++ b/arch/arm/mm/dma-mapping-nommu.c @@ -241,12 +241,3 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_teardown_dma_ops(struct device *dev) { } - -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -core_initcall(dma_debug_do_init); diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 8c398fedbbb6..4b6613b5e042 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -466,12 +466,6 @@ void __init dma_contiguous_early_fixup(phys_addr_t base, unsigned long size) void __init dma_contiguous_remap(void) { int i; - - if (!dma_mmu_remap_num) - return; - - /* call flush_cache_all() since CMA area would be large enough */ - flush_cache_all(); for (i = 0; i < dma_mmu_remap_num; i++) { phys_addr_t start = dma_mmu_remap[i].base; phys_addr_t end = start + dma_mmu_remap[i].size; @@ -504,15 +498,7 @@ void __init dma_contiguous_remap(void) flush_tlb_kernel_range(__phys_to_virt(start), __phys_to_virt(end)); - /* - * All the memory in CMA region will be on ZONE_MOVABLE. - * If that zone is considered as highmem, the memory in CMA - * region is also considered as highmem even if it's - * physical address belong to lowmem. In this case, - * re-mapping isn't required. - */ - if (!is_highmem_idx(ZONE_MOVABLE)) - iotable_init(&map, 1); + iotable_init(&map, 1); } } @@ -1165,15 +1151,6 @@ int arm_dma_supported(struct device *dev, u64 mask) return __dma_supported(dev, mask, false); } -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -core_initcall(dma_debug_do_init); - #ifdef CONFIG_ARM_DMA_USE_IOMMU static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs) diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index b75eada23d0a..32034543f49c 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -163,6 +163,8 @@ __do_user_fault(struct task_struct *tsk, unsigned long addr, { struct siginfo si; + clear_siginfo(&si); + #ifdef CONFIG_DEBUG_USER if (((user_debug & UDBG_SEGV) && (sig == SIGSEGV)) || ((user_debug & UDBG_BUS) && (sig == SIGBUS))) { @@ -557,6 +559,7 @@ do_DataAbort(unsigned long addr, unsigned int fsr, struct pt_regs *regs) inf->name, fsr, addr); show_pte(current->mm, addr); + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -589,6 +592,7 @@ do_PrefetchAbort(unsigned long addr, unsigned int ifsr, struct pt_regs *regs) pr_alert("Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index bcdecc25461b..b2aa9b32bff2 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { unsigned long flags; struct kprobe *p = &op->kp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; /* Save skipped registers */ regs->ARM_pc = (unsigned long)op->kp.addr; regs->ARM_ORIG_r0 = ~0UL; local_irq_save(flags); + kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) { diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 4c375e11ae95..35d0f823e823 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -218,8 +218,7 @@ static void vfp_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) { siginfo_t info; - memset(&info, 0, sizeof(info)); - + clear_siginfo(&info); info.si_signo = SIGFPE; info.si_code = sicode; info.si_addr = (void __user *)(instruction_pointer(regs) - 4); @@ -257,7 +256,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ if (exceptions == VFP_EXCEPTION_ERROR) { vfp_panic("unhandled bounce", inst); - vfp_raise_sigfpe(FPE_FIXME, regs); + vfp_raise_sigfpe(FPE_FLTINV, regs); return; } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb2cf4938f6d..b25ed7834f6c 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -105,7 +105,6 @@ config ARM64 select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EFFICIENT_UNALIGNED_ACCESS @@ -133,6 +132,8 @@ config ARM64 select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA select MULTI_IRQ_HANDLER + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -142,6 +143,7 @@ config ARM64 select POWER_SUPPLY select REFCOUNT_FULL select SPARSE_IRQ + select SWIOTLB select SYSCTL_EXCEPTION_TRACE select THREAD_INFO_IN_TASK help @@ -150,9 +152,6 @@ config ARM64 config 64BIT def_bool y -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - config MMU def_bool y @@ -237,24 +236,9 @@ config ZONE_DMA32 config HAVE_GENERIC_GUP def_bool y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config SMP def_bool y -config SWIOTLB - def_bool y - -config IOMMU_HELPER - def_bool SWIOTLB - config KERNEL_MODE_NEON def_bool y diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi index c0231d077fa6..1ad8677f6a0a 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi @@ -1317,7 +1317,7 @@ reg = <0x14d60000 0x100>; dmas = <&pdma0 31 &pdma0 30>; dma-names = "tx", "rx"; - interrupts = <GIC_SPI 435 IRQ_TYPE_NONE>; + interrupts = <GIC_SPI 435 IRQ_TYPE_LEVEL_HIGH>; clocks = <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_SCLK_I2S1>; diff --git a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts index 724a0d3b7683..edb4ee0b8896 100644 --- a/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts +++ b/arch/arm64/boot/dts/hisilicon/hi6220-hikey.dts @@ -299,7 +299,6 @@ /* GPIO blocks 16 thru 19 do not appear to be routed to pins */ dwmmc_0: dwmmc0@f723d000 { - max-frequency = <150000000>; cap-mmc-highspeed; mmc-hs200-1_8v; non-removable; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index 48cad7919efa..ed2f1237ea1e 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -38,9 +38,10 @@ compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>, - <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>; + <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>, + <&CP110_LABEL(clk) 1 18>; clock-names = "pp_clk", "gop_clk", - "mg_clk", "axi_clk"; + "mg_clk", "mg_core_clk", "axi_clk"; marvell,system-controller = <&CP110_LABEL(syscon0)>; status = "disabled"; dma-coherent; @@ -141,6 +142,8 @@ #size-cells = <0>; compatible = "marvell,xmdio"; reg = <0x12a600 0x10>; + clocks = <&CP110_LABEL(clk) 1 5>, + <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index a8baad7b80df..13f57fff1477 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -46,7 +46,7 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x0>; interrupt-parent = <&gpio>; - interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_HIGH>; + interrupts = <TEGRA_MAIN_GPIO(M, 5) IRQ_TYPE_LEVEL_LOW>; }; }; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi index e62bda1cf2d9..c32dd3419c87 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi @@ -414,7 +414,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index 2c1a92fafbfb..440c2e6a638b 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -67,3 +67,11 @@ reg = <0>; }; }; + +&pinctrl_ether_rgmii { + tx { + pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1", + "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL"; + drive-strength = <9>; + }; +}; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 9efe20d07589..3a5ed789c056 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -519,7 +519,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 7c8f710d9bfa..e85d6ddea3c2 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -334,7 +334,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index cb5a243110c4..e3fdb0fd6f70 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -47,6 +47,12 @@ config CRYPTO_SM3_ARM64_CE select CRYPTO_HASH select CRYPTO_SM3 +config CRYPTO_SM4_ARM64_CE + tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" + depends on KERNEL_MODE_NEON + select CRYPTO_ALGAPI + select CRYPTO_SM4 + config CRYPTO_GHASH_ARM64_CE tristate "GHASH/AES-GCM using ARMv8 Crypto Extensions" depends on KERNEL_MODE_NEON diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index f35ac684b1c0..bcafd016618e 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -23,6 +23,9 @@ sha3-ce-y := sha3-ce-glue.o sha3-ce-core.o obj-$(CONFIG_CRYPTO_SM3_ARM64_CE) += sm3-ce.o sm3-ce-y := sm3-ce-glue.o sm3-ce-core.o +obj-$(CONFIG_CRYPTO_SM4_ARM64_CE) += sm4-ce.o +sm4-ce-y := sm4-ce-glue.o sm4-ce-core.o + obj-$(CONFIG_CRYPTO_GHASH_ARM64_CE) += ghash-ce.o ghash-ce-y := ghash-ce-glue.o ghash-ce-core.o diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S index e3a375c4cb83..88f5aef7934c 100644 --- a/arch/arm64/crypto/aes-ce-ccm-core.S +++ b/arch/arm64/crypto/aes-ce-ccm-core.S @@ -19,24 +19,33 @@ * u32 *macp, u8 const rk[], u32 rounds); */ ENTRY(ce_aes_ccm_auth_data) - ldr w8, [x3] /* leftover from prev round? */ + frame_push 7 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + + ldr w25, [x22] /* leftover from prev round? */ ld1 {v0.16b}, [x0] /* load mac */ - cbz w8, 1f - sub w8, w8, #16 + cbz w25, 1f + sub w25, w25, #16 eor v1.16b, v1.16b, v1.16b -0: ldrb w7, [x1], #1 /* get 1 byte of input */ - subs w2, w2, #1 - add w8, w8, #1 +0: ldrb w7, [x20], #1 /* get 1 byte of input */ + subs w21, w21, #1 + add w25, w25, #1 ins v1.b[0], w7 ext v1.16b, v1.16b, v1.16b, #1 /* rotate in the input bytes */ beq 8f /* out of input? */ - cbnz w8, 0b + cbnz w25, 0b eor v0.16b, v0.16b, v1.16b -1: ld1 {v3.4s}, [x4] /* load first round key */ - prfm pldl1strm, [x1] - cmp w5, #12 /* which key size? */ - add x6, x4, #16 - sub w7, w5, #2 /* modified # of rounds */ +1: ld1 {v3.4s}, [x23] /* load first round key */ + prfm pldl1strm, [x20] + cmp w24, #12 /* which key size? */ + add x6, x23, #16 + sub w7, w24, #2 /* modified # of rounds */ bmi 2f bne 5f mov v5.16b, v3.16b @@ -55,33 +64,43 @@ ENTRY(ce_aes_ccm_auth_data) ld1 {v5.4s}, [x6], #16 /* load next round key */ bpl 3b aese v0.16b, v4.16b - subs w2, w2, #16 /* last data? */ + subs w21, w21, #16 /* last data? */ eor v0.16b, v0.16b, v5.16b /* final round */ bmi 6f - ld1 {v1.16b}, [x1], #16 /* load next input block */ + ld1 {v1.16b}, [x20], #16 /* load next input block */ eor v0.16b, v0.16b, v1.16b /* xor with mac */ - bne 1b -6: st1 {v0.16b}, [x0] /* store mac */ + beq 6f + + if_will_cond_yield_neon + st1 {v0.16b}, [x19] /* store mac */ + do_cond_yield_neon + ld1 {v0.16b}, [x19] /* reload mac */ + endif_yield_neon + + b 1b +6: st1 {v0.16b}, [x19] /* store mac */ beq 10f - adds w2, w2, #16 + adds w21, w21, #16 beq 10f - mov w8, w2 -7: ldrb w7, [x1], #1 + mov w25, w21 +7: ldrb w7, [x20], #1 umov w6, v0.b[0] eor w6, w6, w7 - strb w6, [x0], #1 - subs w2, w2, #1 + strb w6, [x19], #1 + subs w21, w21, #1 beq 10f ext v0.16b, v0.16b, v0.16b, #1 /* rotate out the mac bytes */ b 7b -8: mov w7, w8 - add w8, w8, #16 +8: mov w7, w25 + add w25, w25, #16 9: ext v1.16b, v1.16b, v1.16b, #1 adds w7, w7, #1 bne 9b eor v0.16b, v0.16b, v1.16b - st1 {v0.16b}, [x0] -10: str w8, [x3] + st1 {v0.16b}, [x19] +10: str w25, [x22] + + frame_pop ret ENDPROC(ce_aes_ccm_auth_data) @@ -126,19 +145,29 @@ ENTRY(ce_aes_ccm_final) ENDPROC(ce_aes_ccm_final) .macro aes_ccm_do_crypt,enc - ldr x8, [x6, #8] /* load lower ctr */ - ld1 {v0.16b}, [x5] /* load mac */ -CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ + frame_push 8 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + + ldr x26, [x25, #8] /* load lower ctr */ + ld1 {v0.16b}, [x24] /* load mac */ +CPU_LE( rev x26, x26 ) /* keep swabbed ctr in reg */ 0: /* outer loop */ - ld1 {v1.8b}, [x6] /* load upper ctr */ - prfm pldl1strm, [x1] - add x8, x8, #1 - rev x9, x8 - cmp w4, #12 /* which key size? */ - sub w7, w4, #2 /* get modified # of rounds */ + ld1 {v1.8b}, [x25] /* load upper ctr */ + prfm pldl1strm, [x20] + add x26, x26, #1 + rev x9, x26 + cmp w23, #12 /* which key size? */ + sub w7, w23, #2 /* get modified # of rounds */ ins v1.d[1], x9 /* no carry in lower ctr */ - ld1 {v3.4s}, [x3] /* load first round key */ - add x10, x3, #16 + ld1 {v3.4s}, [x22] /* load first round key */ + add x10, x22, #16 bmi 1f bne 4f mov v5.16b, v3.16b @@ -165,9 +194,9 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ bpl 2b aese v0.16b, v4.16b aese v1.16b, v4.16b - subs w2, w2, #16 - bmi 6f /* partial block? */ - ld1 {v2.16b}, [x1], #16 /* load next input block */ + subs w21, w21, #16 + bmi 7f /* partial block? */ + ld1 {v2.16b}, [x20], #16 /* load next input block */ .if \enc == 1 eor v2.16b, v2.16b, v5.16b /* final round enc+mac */ eor v1.16b, v1.16b, v2.16b /* xor with crypted ctr */ @@ -176,18 +205,29 @@ CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ eor v1.16b, v2.16b, v5.16b /* final round enc */ .endif eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ - st1 {v1.16b}, [x0], #16 /* write output block */ - bne 0b -CPU_LE( rev x8, x8 ) - st1 {v0.16b}, [x5] /* store mac */ - str x8, [x6, #8] /* store lsb end of ctr (BE) */ -5: ret - -6: eor v0.16b, v0.16b, v5.16b /* final round mac */ + st1 {v1.16b}, [x19], #16 /* write output block */ + beq 5f + + if_will_cond_yield_neon + st1 {v0.16b}, [x24] /* store mac */ + do_cond_yield_neon + ld1 {v0.16b}, [x24] /* reload mac */ + endif_yield_neon + + b 0b +5: +CPU_LE( rev x26, x26 ) + st1 {v0.16b}, [x24] /* store mac */ + str x26, [x25, #8] /* store lsb end of ctr (BE) */ + +6: frame_pop + ret + +7: eor v0.16b, v0.16b, v5.16b /* final round mac */ eor v1.16b, v1.16b, v5.16b /* final round enc */ - st1 {v0.16b}, [x5] /* store mac */ - add w2, w2, #16 /* process partial tail block */ -7: ldrb w9, [x1], #1 /* get 1 byte of input */ + st1 {v0.16b}, [x24] /* store mac */ + add w21, w21, #16 /* process partial tail block */ +8: ldrb w9, [x20], #1 /* get 1 byte of input */ umov w6, v1.b[0] /* get top crypted ctr byte */ umov w7, v0.b[0] /* get top mac byte */ .if \enc == 1 @@ -197,13 +237,13 @@ CPU_LE( rev x8, x8 ) eor w9, w9, w6 eor w7, w7, w9 .endif - strb w9, [x0], #1 /* store out byte */ - strb w7, [x5], #1 /* store mac byte */ - subs w2, w2, #1 - beq 5b + strb w9, [x19], #1 /* store out byte */ + strb w7, [x24], #1 /* store mac byte */ + subs w21, w21, #1 + beq 6b ext v0.16b, v0.16b, v0.16b, #1 /* shift out mac byte */ ext v1.16b, v1.16b, v1.16b, #1 /* shift out ctr byte */ - b 7b + b 8b .endm /* diff --git a/arch/arm64/crypto/aes-ce.S b/arch/arm64/crypto/aes-ce.S index 50330f5c3adc..623e74ed1c67 100644 --- a/arch/arm64/crypto/aes-ce.S +++ b/arch/arm64/crypto/aes-ce.S @@ -30,18 +30,21 @@ .endm /* prepare for encryption with key in rk[] */ - .macro enc_prepare, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro enc_prepare, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm /* prepare for encryption (again) but with new key in rk[] */ - .macro enc_switch_key, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro enc_switch_key, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm /* prepare for decryption with key in rk[] */ - .macro dec_prepare, rounds, rk, ignore - load_round_keys \rounds, \rk + .macro dec_prepare, rounds, rk, temp + mov \temp, \rk + load_round_keys \rounds, \temp .endm .macro do_enc_Nx, de, mc, k, i0, i1, i2, i3 diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index a68412e1e3a4..483a7130cf0e 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -14,12 +14,12 @@ .align 4 aes_encrypt_block4x: - encrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 + encrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 ret ENDPROC(aes_encrypt_block4x) aes_decrypt_block4x: - decrypt_block4x v0, v1, v2, v3, w3, x2, x8, w7 + decrypt_block4x v0, v1, v2, v3, w22, x21, x8, w7 ret ENDPROC(aes_decrypt_block4x) @@ -31,57 +31,71 @@ ENDPROC(aes_decrypt_block4x) */ AES_ENTRY(aes_ecb_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 - enc_prepare w3, x2, x5 + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + +.Lecbencrestart: + enc_prepare w22, x21, x5 .LecbencloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lecbenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ bl aes_encrypt_block4x - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + cond_yield_neon .Lecbencrestart b .LecbencloopNx .Lecbenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lecbencout .Lecbencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ - encrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + ld1 {v0.16b}, [x20], #16 /* get next pt block */ + encrypt_block v0, w22, x21, x5, w6 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lecbencloop .Lecbencout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_ENDPROC(aes_ecb_encrypt) AES_ENTRY(aes_ecb_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 - dec_prepare w3, x2, x5 +.Lecbdecrestart: + dec_prepare w22, x21, x5 .LecbdecloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lecbdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ bl aes_decrypt_block4x - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + cond_yield_neon .Lecbdecrestart b .LecbdecloopNx .Lecbdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lecbdecout .Lecbdecloop: - ld1 {v0.16b}, [x1], #16 /* get next ct block */ - decrypt_block v0, w3, x2, x5, w6 - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + ld1 {v0.16b}, [x20], #16 /* get next ct block */ + decrypt_block v0, w22, x21, x5, w6 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lecbdecloop .Lecbdecout: - ldp x29, x30, [sp], #16 + frame_pop ret AES_ENDPROC(aes_ecb_decrypt) @@ -94,78 +108,100 @@ AES_ENDPROC(aes_ecb_decrypt) */ AES_ENTRY(aes_cbc_encrypt) - ld1 {v4.16b}, [x5] /* get iv */ - enc_prepare w3, x2, x6 + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +.Lcbcencrestart: + ld1 {v4.16b}, [x24] /* get iv */ + enc_prepare w22, x21, x6 .Lcbcencloop4x: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lcbcenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ eor v0.16b, v0.16b, v4.16b /* ..and xor with iv */ - encrypt_block v0, w3, x2, x6, w7 + encrypt_block v0, w22, x21, x6, w7 eor v1.16b, v1.16b, v0.16b - encrypt_block v1, w3, x2, x6, w7 + encrypt_block v1, w22, x21, x6, w7 eor v2.16b, v2.16b, v1.16b - encrypt_block v2, w3, x2, x6, w7 + encrypt_block v2, w22, x21, x6, w7 eor v3.16b, v3.16b, v2.16b - encrypt_block v3, w3, x2, x6, w7 - st1 {v0.16b-v3.16b}, [x0], #64 + encrypt_block v3, w22, x21, x6, w7 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v3.16b + st1 {v4.16b}, [x24] /* return iv */ + cond_yield_neon .Lcbcencrestart b .Lcbcencloop4x .Lcbcenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lcbcencout .Lcbcencloop: - ld1 {v0.16b}, [x1], #16 /* get next pt block */ + ld1 {v0.16b}, [x20], #16 /* get next pt block */ eor v4.16b, v4.16b, v0.16b /* ..and xor with iv */ - encrypt_block v4, w3, x2, x6, w7 - st1 {v4.16b}, [x0], #16 - subs w4, w4, #1 + encrypt_block v4, w22, x21, x6, w7 + st1 {v4.16b}, [x19], #16 + subs w23, w23, #1 bne .Lcbcencloop .Lcbcencout: - st1 {v4.16b}, [x5] /* return iv */ + st1 {v4.16b}, [x24] /* return iv */ + frame_pop ret AES_ENDPROC(aes_cbc_encrypt) AES_ENTRY(aes_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 - ld1 {v7.16b}, [x5] /* get iv */ - dec_prepare w3, x2, x6 +.Lcbcdecrestart: + ld1 {v7.16b}, [x24] /* get iv */ + dec_prepare w22, x21, x6 .LcbcdecloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lcbcdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ mov v4.16b, v0.16b mov v5.16b, v1.16b mov v6.16b, v2.16b bl aes_decrypt_block4x - sub x1, x1, #16 + sub x20, x20, #16 eor v0.16b, v0.16b, v7.16b eor v1.16b, v1.16b, v4.16b - ld1 {v7.16b}, [x1], #16 /* reload 1 ct block */ + ld1 {v7.16b}, [x20], #16 /* reload 1 ct block */ eor v2.16b, v2.16b, v5.16b eor v3.16b, v3.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 + st1 {v7.16b}, [x24] /* return iv */ + cond_yield_neon .Lcbcdecrestart b .LcbcdecloopNx .Lcbcdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lcbcdecout .Lcbcdecloop: - ld1 {v1.16b}, [x1], #16 /* get next ct block */ + ld1 {v1.16b}, [x20], #16 /* get next ct block */ mov v0.16b, v1.16b /* ...and copy to v0 */ - decrypt_block v0, w3, x2, x6, w7 + decrypt_block v0, w22, x21, x6, w7 eor v0.16b, v0.16b, v7.16b /* xor with iv => pt */ mov v7.16b, v1.16b /* ct is next iv */ - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 bne .Lcbcdecloop .Lcbcdecout: - st1 {v7.16b}, [x5] /* return iv */ - ldp x29, x30, [sp], #16 + st1 {v7.16b}, [x24] /* return iv */ + frame_pop ret AES_ENDPROC(aes_cbc_decrypt) @@ -176,19 +212,26 @@ AES_ENDPROC(aes_cbc_decrypt) */ AES_ENTRY(aes_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 - enc_prepare w3, x2, x6 - ld1 {v4.16b}, [x5] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +.Lctrrestart: + enc_prepare w22, x21, x6 + ld1 {v4.16b}, [x24] umov x6, v4.d[1] /* keep swabbed ctr in reg */ rev x6, x6 - cmn w6, w4 /* 32 bit overflow? */ - bcs .Lctrloop .LctrloopNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lctr1x + cmn w6, #4 /* 32 bit overflow? */ + bcs .Lctr1x ldr q8, =0x30000000200000001 /* addends 1,2,3[,0] */ dup v7.4s, w6 mov v0.16b, v4.16b @@ -200,25 +243,27 @@ AES_ENTRY(aes_ctr_encrypt) mov v1.s[3], v8.s[0] mov v2.s[3], v8.s[1] mov v3.s[3], v8.s[2] - ld1 {v5.16b-v7.16b}, [x1], #48 /* get 3 input blocks */ + ld1 {v5.16b-v7.16b}, [x20], #48 /* get 3 input blocks */ bl aes_encrypt_block4x eor v0.16b, v5.16b, v0.16b - ld1 {v5.16b}, [x1], #16 /* get 1 input block */ + ld1 {v5.16b}, [x20], #16 /* get 1 input block */ eor v1.16b, v6.16b, v1.16b eor v2.16b, v7.16b, v2.16b eor v3.16b, v5.16b, v3.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 add x6, x6, #4 rev x7, x6 ins v4.d[1], x7 - cbz w4, .Lctrout + cbz w23, .Lctrout + st1 {v4.16b}, [x24] /* return next CTR value */ + cond_yield_neon .Lctrrestart b .LctrloopNx .Lctr1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lctrout .Lctrloop: mov v0.16b, v4.16b - encrypt_block v0, w3, x2, x8, w7 + encrypt_block v0, w22, x21, x8, w7 adds x6, x6, #1 /* increment BE ctr */ rev x7, x6 @@ -226,22 +271,22 @@ AES_ENTRY(aes_ctr_encrypt) bcs .Lctrcarry /* overflow? */ .Lctrcarrydone: - subs w4, w4, #1 + subs w23, w23, #1 bmi .Lctrtailblock /* blocks <0 means tail block */ - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 eor v3.16b, v0.16b, v3.16b - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 bne .Lctrloop .Lctrout: - st1 {v4.16b}, [x5] /* return next CTR value */ - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] /* return next CTR value */ +.Lctrret: + frame_pop ret .Lctrtailblock: - st1 {v0.16b}, [x0] - ldp x29, x30, [sp], #16 - ret + st1 {v0.16b}, [x19] + b .Lctrret .Lctrcarry: umov x7, v4.d[0] /* load upper word of ctr */ @@ -274,10 +319,16 @@ CPU_LE( .quad 1, 0x87 ) CPU_BE( .quad 0x87, 1 ) AES_ENTRY(aes_xts_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 - ld1 {v4.16b}, [x6] + ld1 {v4.16b}, [x24] cbz w7, .Lxtsencnotfirst enc_prepare w3, x5, x8 @@ -286,15 +337,17 @@ AES_ENTRY(aes_xts_encrypt) ldr q7, .Lxts_mul_x b .LxtsencNx +.Lxtsencrestart: + ld1 {v4.16b}, [x24] .Lxtsencnotfirst: - enc_prepare w3, x2, x8 + enc_prepare w22, x21, x8 .LxtsencloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsencNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lxtsenc1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 pt blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 pt blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b next_tweak v6, v5, v7, v8 @@ -307,35 +360,43 @@ AES_ENTRY(aes_xts_encrypt) eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsencout + cbz w23, .Lxtsencout + st1 {v4.16b}, [x24] + cond_yield_neon .Lxtsencrestart b .LxtsencloopNx .Lxtsenc1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lxtsencout .Lxtsencloop: - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 eor v0.16b, v1.16b, v4.16b - encrypt_block v0, w3, x2, x8, w7 + encrypt_block v0, w22, x21, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 beq .Lxtsencout next_tweak v4, v4, v7, v8 b .Lxtsencloop .Lxtsencout: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] + frame_pop ret AES_ENDPROC(aes_xts_encrypt) AES_ENTRY(aes_xts_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 - ld1 {v4.16b}, [x6] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 + + ld1 {v4.16b}, [x24] cbz w7, .Lxtsdecnotfirst enc_prepare w3, x5, x8 @@ -344,15 +405,17 @@ AES_ENTRY(aes_xts_decrypt) ldr q7, .Lxts_mul_x b .LxtsdecNx +.Lxtsdecrestart: + ld1 {v4.16b}, [x24] .Lxtsdecnotfirst: - dec_prepare w3, x2, x8 + dec_prepare w22, x21, x8 .LxtsdecloopNx: ldr q7, .Lxts_mul_x next_tweak v4, v4, v7, v8 .LxtsdecNx: - subs w4, w4, #4 + subs w23, w23, #4 bmi .Lxtsdec1x - ld1 {v0.16b-v3.16b}, [x1], #64 /* get 4 ct blocks */ + ld1 {v0.16b-v3.16b}, [x20], #64 /* get 4 ct blocks */ next_tweak v5, v4, v7, v8 eor v0.16b, v0.16b, v4.16b next_tweak v6, v5, v7, v8 @@ -365,26 +428,28 @@ AES_ENTRY(aes_xts_decrypt) eor v0.16b, v0.16b, v4.16b eor v1.16b, v1.16b, v5.16b eor v2.16b, v2.16b, v6.16b - st1 {v0.16b-v3.16b}, [x0], #64 + st1 {v0.16b-v3.16b}, [x19], #64 mov v4.16b, v7.16b - cbz w4, .Lxtsdecout + cbz w23, .Lxtsdecout + st1 {v4.16b}, [x24] + cond_yield_neon .Lxtsdecrestart b .LxtsdecloopNx .Lxtsdec1x: - adds w4, w4, #4 + adds w23, w23, #4 beq .Lxtsdecout .Lxtsdecloop: - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 eor v0.16b, v1.16b, v4.16b - decrypt_block v0, w3, x2, x8, w7 + decrypt_block v0, w22, x21, x8, w7 eor v0.16b, v0.16b, v4.16b - st1 {v0.16b}, [x0], #16 - subs w4, w4, #1 + st1 {v0.16b}, [x19], #16 + subs w23, w23, #1 beq .Lxtsdecout next_tweak v4, v4, v7, v8 b .Lxtsdecloop .Lxtsdecout: - st1 {v4.16b}, [x6] - ldp x29, x30, [sp], #16 + st1 {v4.16b}, [x24] + frame_pop ret AES_ENDPROC(aes_xts_decrypt) @@ -393,43 +458,61 @@ AES_ENDPROC(aes_xts_decrypt) * int blocks, u8 dg[], int enc_before, int enc_after) */ AES_ENTRY(aes_mac_update) - ld1 {v0.16b}, [x4] /* get dg */ + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x6 + + ld1 {v0.16b}, [x23] /* get dg */ enc_prepare w2, x1, x7 cbz w5, .Lmacloop4x encrypt_block v0, w2, x1, x7, w8 .Lmacloop4x: - subs w3, w3, #4 + subs w22, w22, #4 bmi .Lmac1x - ld1 {v1.16b-v4.16b}, [x0], #64 /* get next pt block */ + ld1 {v1.16b-v4.16b}, [x19], #64 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v2.16b - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v3.16b - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 eor v0.16b, v0.16b, v4.16b - cmp w3, wzr - csinv x5, x6, xzr, eq + cmp w22, wzr + csinv x5, x24, xzr, eq cbz w5, .Lmacout - encrypt_block v0, w2, x1, x7, w8 + encrypt_block v0, w21, x20, x7, w8 + st1 {v0.16b}, [x23] /* return dg */ + cond_yield_neon .Lmacrestart b .Lmacloop4x .Lmac1x: - add w3, w3, #4 + add w22, w22, #4 .Lmacloop: - cbz w3, .Lmacout - ld1 {v1.16b}, [x0], #16 /* get next pt block */ + cbz w22, .Lmacout + ld1 {v1.16b}, [x19], #16 /* get next pt block */ eor v0.16b, v0.16b, v1.16b /* ..and xor with dg */ - subs w3, w3, #1 - csinv x5, x6, xzr, eq + subs w22, w22, #1 + csinv x5, x24, xzr, eq cbz w5, .Lmacout - encrypt_block v0, w2, x1, x7, w8 +.Lmacenc: + encrypt_block v0, w21, x20, x7, w8 b .Lmacloop .Lmacout: - st1 {v0.16b}, [x4] /* return dg */ + st1 {v0.16b}, [x23] /* return dg */ + frame_pop ret + +.Lmacrestart: + ld1 {v0.16b}, [x23] /* get dg */ + enc_prepare w21, x20, x0 + b .Lmacloop4x AES_ENDPROC(aes_mac_update) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index ca0472500433..e613a87f8b53 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -565,54 +565,61 @@ ENDPROC(aesbs_decrypt8) * int blocks) */ .macro __ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 99: mov x5, #1 - lsl x5, x5, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x5, x5, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x5, x5, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 tbnz x5, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 tbnz x5, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 tbnz x5, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 tbnz x5, #4, 0f - ld1 {v4.16b}, [x1], #16 + ld1 {v4.16b}, [x20], #16 tbnz x5, #5, 0f - ld1 {v5.16b}, [x1], #16 + ld1 {v5.16b}, [x20], #16 tbnz x5, #6, 0f - ld1 {v6.16b}, [x1], #16 + ld1 {v6.16b}, [x20], #16 tbnz x5, #7, 0f - ld1 {v7.16b}, [x1], #16 + ld1 {v7.16b}, [x20], #16 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl \do8 - st1 {\o0\().16b}, [x0], #16 + st1 {\o0\().16b}, [x19], #16 tbnz x5, #1, 1f - st1 {\o1\().16b}, [x0], #16 + st1 {\o1\().16b}, [x19], #16 tbnz x5, #2, 1f - st1 {\o2\().16b}, [x0], #16 + st1 {\o2\().16b}, [x19], #16 tbnz x5, #3, 1f - st1 {\o3\().16b}, [x0], #16 + st1 {\o3\().16b}, [x19], #16 tbnz x5, #4, 1f - st1 {\o4\().16b}, [x0], #16 + st1 {\o4\().16b}, [x19], #16 tbnz x5, #5, 1f - st1 {\o5\().16b}, [x0], #16 + st1 {\o5\().16b}, [x19], #16 tbnz x5, #6, 1f - st1 {\o6\().16b}, [x0], #16 + st1 {\o6\().16b}, [x19], #16 tbnz x5, #7, 1f - st1 {\o7\().16b}, [x0], #16 + st1 {\o7\().16b}, [x19], #16 - cbnz x4, 99b + cbz x23, 1f + cond_yield_neon + b 99b -1: ldp x29, x30, [sp], #16 +1: frame_pop ret .endm @@ -632,43 +639,49 @@ ENDPROC(aesbs_ecb_decrypt) */ .align 4 ENTRY(aesbs_cbc_decrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp + frame_push 6 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 99: mov x6, #1 - lsl x6, x6, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x6, x6, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x6, x6, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 mov v25.16b, v0.16b tbnz x6, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 mov v26.16b, v1.16b tbnz x6, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 mov v27.16b, v2.16b tbnz x6, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 mov v28.16b, v3.16b tbnz x6, #4, 0f - ld1 {v4.16b}, [x1], #16 + ld1 {v4.16b}, [x20], #16 mov v29.16b, v4.16b tbnz x6, #5, 0f - ld1 {v5.16b}, [x1], #16 + ld1 {v5.16b}, [x20], #16 mov v30.16b, v5.16b tbnz x6, #6, 0f - ld1 {v6.16b}, [x1], #16 + ld1 {v6.16b}, [x20], #16 mov v31.16b, v6.16b tbnz x6, #7, 0f - ld1 {v7.16b}, [x1] + ld1 {v7.16b}, [x20] -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl aesbs_decrypt8 - ld1 {v24.16b}, [x5] // load IV + ld1 {v24.16b}, [x24] // load IV eor v1.16b, v1.16b, v25.16b eor v6.16b, v6.16b, v26.16b @@ -679,34 +692,36 @@ ENTRY(aesbs_cbc_decrypt) eor v3.16b, v3.16b, v30.16b eor v5.16b, v5.16b, v31.16b - st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x19], #16 mov v24.16b, v25.16b tbnz x6, #1, 1f - st1 {v1.16b}, [x0], #16 + st1 {v1.16b}, [x19], #16 mov v24.16b, v26.16b tbnz x6, #2, 1f - st1 {v6.16b}, [x0], #16 + st1 {v6.16b}, [x19], #16 mov v24.16b, v27.16b tbnz x6, #3, 1f - st1 {v4.16b}, [x0], #16 + st1 {v4.16b}, [x19], #16 mov v24.16b, v28.16b tbnz x6, #4, 1f - st1 {v2.16b}, [x0], #16 + st1 {v2.16b}, [x19], #16 mov v24.16b, v29.16b tbnz x6, #5, 1f - st1 {v7.16b}, [x0], #16 + st1 {v7.16b}, [x19], #16 mov v24.16b, v30.16b tbnz x6, #6, 1f - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 mov v24.16b, v31.16b tbnz x6, #7, 1f - ld1 {v24.16b}, [x1], #16 - st1 {v5.16b}, [x0], #16 -1: st1 {v24.16b}, [x5] // store IV + ld1 {v24.16b}, [x20], #16 + st1 {v5.16b}, [x19], #16 +1: st1 {v24.16b}, [x24] // store IV - cbnz x4, 99b + cbz x23, 2f + cond_yield_neon + b 99b - ldp x29, x30, [sp], #16 +2: frame_pop ret ENDPROC(aesbs_cbc_decrypt) @@ -731,87 +746,93 @@ CPU_BE( .quad 0x87, 1 ) */ __xts_crypt8: mov x6, #1 - lsl x6, x6, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x6, x6, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x6, x6, xzr, mi - ld1 {v0.16b}, [x1], #16 + ld1 {v0.16b}, [x20], #16 next_tweak v26, v25, v30, v31 eor v0.16b, v0.16b, v25.16b tbnz x6, #1, 0f - ld1 {v1.16b}, [x1], #16 + ld1 {v1.16b}, [x20], #16 next_tweak v27, v26, v30, v31 eor v1.16b, v1.16b, v26.16b tbnz x6, #2, 0f - ld1 {v2.16b}, [x1], #16 + ld1 {v2.16b}, [x20], #16 next_tweak v28, v27, v30, v31 eor v2.16b, v2.16b, v27.16b tbnz x6, #3, 0f - ld1 {v3.16b}, [x1], #16 + ld1 {v3.16b}, [x20], #16 next_tweak v29, v28, v30, v31 eor v3.16b, v3.16b, v28.16b tbnz x6, #4, 0f - ld1 {v4.16b}, [x1], #16 - str q29, [sp, #16] + ld1 {v4.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset] eor v4.16b, v4.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #5, 0f - ld1 {v5.16b}, [x1], #16 - str q29, [sp, #32] + ld1 {v5.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 16] eor v5.16b, v5.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #6, 0f - ld1 {v6.16b}, [x1], #16 - str q29, [sp, #48] + ld1 {v6.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 32] eor v6.16b, v6.16b, v29.16b next_tweak v29, v29, v30, v31 tbnz x6, #7, 0f - ld1 {v7.16b}, [x1], #16 - str q29, [sp, #64] + ld1 {v7.16b}, [x20], #16 + str q29, [sp, #.Lframe_local_offset + 48] eor v7.16b, v7.16b, v29.16b next_tweak v29, v29, v30, v31 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 br x7 ENDPROC(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - stp x29, x30, [sp, #-80]! - mov x29, sp + frame_push 6, 64 - ldr q30, .Lxts_mul_x - ld1 {v25.16b}, [x5] + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + +0: ldr q30, .Lxts_mul_x + ld1 {v25.16b}, [x24] 99: adr x7, \do8 bl __xts_crypt8 - ldp q16, q17, [sp, #16] - ldp q18, q19, [sp, #48] + ldp q16, q17, [sp, #.Lframe_local_offset] + ldp q18, q19, [sp, #.Lframe_local_offset + 32] eor \o0\().16b, \o0\().16b, v25.16b eor \o1\().16b, \o1\().16b, v26.16b eor \o2\().16b, \o2\().16b, v27.16b eor \o3\().16b, \o3\().16b, v28.16b - st1 {\o0\().16b}, [x0], #16 + st1 {\o0\().16b}, [x19], #16 mov v25.16b, v26.16b tbnz x6, #1, 1f - st1 {\o1\().16b}, [x0], #16 + st1 {\o1\().16b}, [x19], #16 mov v25.16b, v27.16b tbnz x6, #2, 1f - st1 {\o2\().16b}, [x0], #16 + st1 {\o2\().16b}, [x19], #16 mov v25.16b, v28.16b tbnz x6, #3, 1f - st1 {\o3\().16b}, [x0], #16 + st1 {\o3\().16b}, [x19], #16 mov v25.16b, v29.16b tbnz x6, #4, 1f @@ -820,18 +841,22 @@ ENDPROC(__xts_crypt8) eor \o6\().16b, \o6\().16b, v18.16b eor \o7\().16b, \o7\().16b, v19.16b - st1 {\o4\().16b}, [x0], #16 + st1 {\o4\().16b}, [x19], #16 tbnz x6, #5, 1f - st1 {\o5\().16b}, [x0], #16 + st1 {\o5\().16b}, [x19], #16 tbnz x6, #6, 1f - st1 {\o6\().16b}, [x0], #16 + st1 {\o6\().16b}, [x19], #16 tbnz x6, #7, 1f - st1 {\o7\().16b}, [x0], #16 + st1 {\o7\().16b}, [x19], #16 + + cbz x23, 1f + st1 {v25.16b}, [x24] - cbnz x4, 99b + cond_yield_neon 0b + b 99b -1: st1 {v25.16b}, [x5] - ldp x29, x30, [sp], #80 +1: st1 {v25.16b}, [x24] + frame_pop ret .endm @@ -856,24 +881,31 @@ ENDPROC(aesbs_xts_decrypt) * int rounds, int blocks, u8 iv[], u8 final[]) */ ENTRY(aesbs_ctr_encrypt) - stp x29, x30, [sp, #-16]! - mov x29, sp - - cmp x6, #0 - cset x10, ne - add x4, x4, x10 // do one extra block if final - - ldp x7, x8, [x5] - ld1 {v0.16b}, [x5] + frame_push 8 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + + cmp x25, #0 + cset x26, ne + add x23, x23, x26 // do one extra block if final + +98: ldp x7, x8, [x24] + ld1 {v0.16b}, [x24] CPU_LE( rev x7, x7 ) CPU_LE( rev x8, x8 ) adds x8, x8, #1 adc x7, x7, xzr 99: mov x9, #1 - lsl x9, x9, x4 - subs w4, w4, #8 - csel x4, x4, xzr, pl + lsl x9, x9, x23 + subs w23, w23, #8 + csel x23, x23, xzr, pl csel x9, x9, xzr, le tbnz x9, #1, 0f @@ -891,82 +923,85 @@ CPU_LE( rev x8, x8 ) tbnz x9, #7, 0f next_ctr v7 -0: mov bskey, x2 - mov rounds, x3 +0: mov bskey, x21 + mov rounds, x22 bl aesbs_encrypt8 - lsr x9, x9, x10 // disregard the extra block + lsr x9, x9, x26 // disregard the extra block tbnz x9, #0, 0f - ld1 {v8.16b}, [x1], #16 + ld1 {v8.16b}, [x20], #16 eor v0.16b, v0.16b, v8.16b - st1 {v0.16b}, [x0], #16 + st1 {v0.16b}, [x19], #16 tbnz x9, #1, 1f - ld1 {v9.16b}, [x1], #16 + ld1 {v9.16b}, [x20], #16 eor v1.16b, v1.16b, v9.16b - st1 {v1.16b}, [x0], #16 + st1 {v1.16b}, [x19], #16 tbnz x9, #2, 2f - ld1 {v10.16b}, [x1], #16 + ld1 {v10.16b}, [x20], #16 eor v4.16b, v4.16b, v10.16b - st1 {v4.16b}, [x0], #16 + st1 {v4.16b}, [x19], #16 tbnz x9, #3, 3f - ld1 {v11.16b}, [x1], #16 + ld1 {v11.16b}, [x20], #16 eor v6.16b, v6.16b, v11.16b - st1 {v6.16b}, [x0], #16 + st1 {v6.16b}, [x19], #16 tbnz x9, #4, 4f - ld1 {v12.16b}, [x1], #16 + ld1 {v12.16b}, [x20], #16 eor v3.16b, v3.16b, v12.16b - st1 {v3.16b}, [x0], #16 + st1 {v3.16b}, [x19], #16 tbnz x9, #5, 5f - ld1 {v13.16b}, [x1], #16 + ld1 {v13.16b}, [x20], #16 eor v7.16b, v7.16b, v13.16b - st1 {v7.16b}, [x0], #16 + st1 {v7.16b}, [x19], #16 tbnz x9, #6, 6f - ld1 {v14.16b}, [x1], #16 + ld1 {v14.16b}, [x20], #16 eor v2.16b, v2.16b, v14.16b - st1 {v2.16b}, [x0], #16 + st1 {v2.16b}, [x19], #16 tbnz x9, #7, 7f - ld1 {v15.16b}, [x1], #16 + ld1 {v15.16b}, [x20], #16 eor v5.16b, v5.16b, v15.16b - st1 {v5.16b}, [x0], #16 + st1 {v5.16b}, [x19], #16 8: next_ctr v0 - cbnz x4, 99b + st1 {v0.16b}, [x24] + cbz x23, 0f + + cond_yield_neon 98b + b 99b -0: st1 {v0.16b}, [x5] - ldp x29, x30, [sp], #16 +0: frame_pop ret /* * If we are handling the tail of the input (x6 != NULL), return the * final keystream block back to the caller. */ -1: cbz x6, 8b - st1 {v1.16b}, [x6] +1: cbz x25, 8b + st1 {v1.16b}, [x25] b 8b -2: cbz x6, 8b - st1 {v4.16b}, [x6] +2: cbz x25, 8b + st1 {v4.16b}, [x25] b 8b -3: cbz x6, 8b - st1 {v6.16b}, [x6] +3: cbz x25, 8b + st1 {v6.16b}, [x25] b 8b -4: cbz x6, 8b - st1 {v3.16b}, [x6] +4: cbz x25, 8b + st1 {v3.16b}, [x25] b 8b -5: cbz x6, 8b - st1 {v7.16b}, [x6] +5: cbz x25, 8b + st1 {v7.16b}, [x25] b 8b -6: cbz x6, 8b - st1 {v2.16b}, [x6] +6: cbz x25, 8b + st1 {v2.16b}, [x25] b 8b -7: cbz x6, 8b - st1 {v5.16b}, [x6] +7: cbz x25, 8b + st1 {v5.16b}, [x25] b 8b ENDPROC(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/crc32-ce-core.S b/arch/arm64/crypto/crc32-ce-core.S index 16ed3c7ebd37..8061bf0f9c66 100644 --- a/arch/arm64/crypto/crc32-ce-core.S +++ b/arch/arm64/crypto/crc32-ce-core.S @@ -100,9 +100,10 @@ dCONSTANT .req d0 qCONSTANT .req q0 - BUF .req x0 - LEN .req x1 - CRC .req x2 + BUF .req x19 + LEN .req x20 + CRC .req x21 + CONST .req x22 vzr .req v9 @@ -123,7 +124,14 @@ ENTRY(crc32_pmull_le) ENTRY(crc32c_pmull_le) adr_l x3, .Lcrc32c_constants -0: bic LEN, LEN, #15 +0: frame_push 4, 64 + + mov BUF, x0 + mov LEN, x1 + mov CRC, x2 + mov CONST, x3 + + bic LEN, LEN, #15 ld1 {v1.16b-v4.16b}, [BUF], #0x40 movi vzr.16b, #0 fmov dCONSTANT, CRC @@ -132,7 +140,7 @@ ENTRY(crc32c_pmull_le) cmp LEN, #0x40 b.lt less_64 - ldr qCONSTANT, [x3] + ldr qCONSTANT, [CONST] loop_64: /* 64 bytes Full cache line folding */ sub LEN, LEN, #0x40 @@ -162,10 +170,21 @@ loop_64: /* 64 bytes Full cache line folding */ eor v4.16b, v4.16b, v8.16b cmp LEN, #0x40 - b.ge loop_64 + b.lt less_64 + + if_will_cond_yield_neon + stp q1, q2, [sp, #.Lframe_local_offset] + stp q3, q4, [sp, #.Lframe_local_offset + 32] + do_cond_yield_neon + ldp q1, q2, [sp, #.Lframe_local_offset] + ldp q3, q4, [sp, #.Lframe_local_offset + 32] + ldr qCONSTANT, [CONST] + movi vzr.16b, #0 + endif_yield_neon + b loop_64 less_64: /* Folding cache line into 128bit */ - ldr qCONSTANT, [x3, #16] + ldr qCONSTANT, [CONST, #16] pmull2 v5.1q, v1.2d, vCONSTANT.2d pmull v1.1q, v1.1d, vCONSTANT.1d @@ -204,8 +223,8 @@ fold_64: eor v1.16b, v1.16b, v2.16b /* final 32-bit fold */ - ldr dCONSTANT, [x3, #32] - ldr d3, [x3, #40] + ldr dCONSTANT, [CONST, #32] + ldr d3, [CONST, #40] ext v2.16b, v1.16b, vzr.16b, #4 and v1.16b, v1.16b, v3.16b @@ -213,7 +232,7 @@ fold_64: eor v1.16b, v1.16b, v2.16b /* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */ - ldr qCONSTANT, [x3, #48] + ldr qCONSTANT, [CONST, #48] and v2.16b, v1.16b, v3.16b ext v2.16b, vzr.16b, v2.16b, #8 @@ -223,6 +242,7 @@ fold_64: eor v1.16b, v1.16b, v2.16b mov w0, v1.s[1] + frame_pop ret ENDPROC(crc32_pmull_le) ENDPROC(crc32c_pmull_le) diff --git a/arch/arm64/crypto/crct10dif-ce-core.S b/arch/arm64/crypto/crct10dif-ce-core.S index f179c01bd55c..663ea71cdb38 100644 --- a/arch/arm64/crypto/crct10dif-ce-core.S +++ b/arch/arm64/crypto/crct10dif-ce-core.S @@ -74,13 +74,19 @@ .text .cpu generic+crypto - arg1_low32 .req w0 - arg2 .req x1 - arg3 .req x2 + arg1_low32 .req w19 + arg2 .req x20 + arg3 .req x21 vzr .req v13 ENTRY(crc_t10dif_pmull) + frame_push 3, 128 + + mov arg1_low32, w0 + mov arg2, x1 + mov arg3, x2 + movi vzr.16b, #0 // init zero register // adjust the 16-bit initial_crc value, scale it to 32 bits @@ -175,8 +181,25 @@ CPU_LE( ext v12.16b, v12.16b, v12.16b, #8 ) subs arg3, arg3, #128 // check if there is another 64B in the buffer to be able to fold - b.ge _fold_64_B_loop + b.lt _fold_64_B_end + + if_will_cond_yield_neon + stp q0, q1, [sp, #.Lframe_local_offset] + stp q2, q3, [sp, #.Lframe_local_offset + 32] + stp q4, q5, [sp, #.Lframe_local_offset + 64] + stp q6, q7, [sp, #.Lframe_local_offset + 96] + do_cond_yield_neon + ldp q0, q1, [sp, #.Lframe_local_offset] + ldp q2, q3, [sp, #.Lframe_local_offset + 32] + ldp q4, q5, [sp, #.Lframe_local_offset + 64] + ldp q6, q7, [sp, #.Lframe_local_offset + 96] + ldr_l q10, rk3, x8 + movi vzr.16b, #0 // init zero register + endif_yield_neon + + b _fold_64_B_loop +_fold_64_B_end: // at this point, the buffer pointer is pointing at the last y Bytes // of the buffer the 64B of folded data is in 4 of the vector // registers: v0, v1, v2, v3 @@ -304,6 +327,7 @@ _barrett: _cleanup: // scale the result back to 16 bits lsr x0, x0, #16 + frame_pop ret _less_than_128: diff --git a/arch/arm64/crypto/ghash-ce-core.S b/arch/arm64/crypto/ghash-ce-core.S index 11ebf1ae248a..dcffb9e77589 100644 --- a/arch/arm64/crypto/ghash-ce-core.S +++ b/arch/arm64/crypto/ghash-ce-core.S @@ -213,22 +213,31 @@ .endm .macro __pmull_ghash, pn - ld1 {SHASH.2d}, [x3] - ld1 {XL.2d}, [x1] + frame_push 5 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + +0: ld1 {SHASH.2d}, [x22] + ld1 {XL.2d}, [x20] ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 eor SHASH2.16b, SHASH2.16b, SHASH.16b __pmull_pre_\pn /* do the head block first, if supplied */ - cbz x4, 0f - ld1 {T1.2d}, [x4] - b 1f + cbz x23, 1f + ld1 {T1.2d}, [x23] + mov x23, xzr + b 2f -0: ld1 {T1.2d}, [x2], #16 - sub w0, w0, #1 +1: ld1 {T1.2d}, [x21], #16 + sub w19, w19, #1 -1: /* multiply XL by SHASH in GF(2^128) */ +2: /* multiply XL by SHASH in GF(2^128) */ CPU_LE( rev64 T1.16b, T1.16b ) ext T2.16b, XL.16b, XL.16b, #8 @@ -250,9 +259,18 @@ CPU_LE( rev64 T1.16b, T1.16b ) eor T2.16b, T2.16b, XH.16b eor XL.16b, XL.16b, T2.16b - cbnz w0, 0b + cbz w19, 3f + + if_will_cond_yield_neon + st1 {XL.2d}, [x20] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b - st1 {XL.2d}, [x1] +3: st1 {XL.2d}, [x20] + frame_pop ret .endm @@ -304,38 +322,55 @@ ENDPROC(pmull_ghash_update_p8) .endm .macro pmull_gcm_do_crypt, enc - ld1 {SHASH.2d}, [x4] - ld1 {XL.2d}, [x1] - ldr x8, [x5, #8] // load lower counter + frame_push 10 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + mov x23, x4 + mov x24, x5 + mov x25, x6 + mov x26, x7 + .if \enc == 1 + ldr x27, [sp, #96] // first stacked arg + .endif + + ldr x28, [x24, #8] // load lower counter +CPU_LE( rev x28, x28 ) + +0: mov x0, x25 + load_round_keys w26, x0 + ld1 {SHASH.2d}, [x23] + ld1 {XL.2d}, [x20] movi MASK.16b, #0xe1 ext SHASH2.16b, SHASH.16b, SHASH.16b, #8 -CPU_LE( rev x8, x8 ) shl MASK.2d, MASK.2d, #57 eor SHASH2.16b, SHASH2.16b, SHASH.16b .if \enc == 1 - ld1 {KS.16b}, [x7] + ld1 {KS.16b}, [x27] .endif -0: ld1 {CTR.8b}, [x5] // load upper counter - ld1 {INP.16b}, [x3], #16 - rev x9, x8 - add x8, x8, #1 - sub w0, w0, #1 +1: ld1 {CTR.8b}, [x24] // load upper counter + ld1 {INP.16b}, [x22], #16 + rev x9, x28 + add x28, x28, #1 + sub w19, w19, #1 ins CTR.d[1], x9 // set lower counter .if \enc == 1 eor INP.16b, INP.16b, KS.16b // encrypt input - st1 {INP.16b}, [x2], #16 + st1 {INP.16b}, [x21], #16 .endif rev64 T1.16b, INP.16b - cmp w6, #12 - b.ge 2f // AES-192/256? + cmp w26, #12 + b.ge 4f // AES-192/256? -1: enc_round CTR, v21 +2: enc_round CTR, v21 ext T2.16b, XL.16b, XL.16b, #8 ext IN1.16b, T1.16b, T1.16b, #8 @@ -390,27 +425,39 @@ CPU_LE( rev x8, x8 ) .if \enc == 0 eor INP.16b, INP.16b, KS.16b - st1 {INP.16b}, [x2], #16 + st1 {INP.16b}, [x21], #16 .endif - cbnz w0, 0b + cbz w19, 3f -CPU_LE( rev x8, x8 ) - st1 {XL.2d}, [x1] - str x8, [x5, #8] // store lower counter + if_will_cond_yield_neon + st1 {XL.2d}, [x20] + .if \enc == 1 + st1 {KS.16b}, [x27] + .endif + do_cond_yield_neon + b 0b + endif_yield_neon + b 1b + +3: st1 {XL.2d}, [x20] .if \enc == 1 - st1 {KS.16b}, [x7] + st1 {KS.16b}, [x27] .endif +CPU_LE( rev x28, x28 ) + str x28, [x24, #8] // store lower counter + + frame_pop ret -2: b.eq 3f // AES-192? +4: b.eq 5f // AES-192? enc_round CTR, v17 enc_round CTR, v18 -3: enc_round CTR, v19 +5: enc_round CTR, v19 enc_round CTR, v20 - b 1b + b 2b .endm /* diff --git a/arch/arm64/crypto/ghash-ce-glue.c b/arch/arm64/crypto/ghash-ce-glue.c index cfc9c92814fd..7cf0b1aa6ea8 100644 --- a/arch/arm64/crypto/ghash-ce-glue.c +++ b/arch/arm64/crypto/ghash-ce-glue.c @@ -63,11 +63,12 @@ static void (*pmull_ghash_update)(int blocks, u64 dg[], const char *src, asmlinkage void pmull_gcm_encrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], struct ghash_key const *k, - u8 ctr[], int rounds, u8 ks[]); + u8 ctr[], u32 const rk[], int rounds, + u8 ks[]); asmlinkage void pmull_gcm_decrypt(int blocks, u64 dg[], u8 dst[], const u8 src[], struct ghash_key const *k, - u8 ctr[], int rounds); + u8 ctr[], u32 const rk[], int rounds); asmlinkage void pmull_gcm_encrypt_block(u8 dst[], u8 const src[], u32 const rk[], int rounds); @@ -368,26 +369,29 @@ static int gcm_encrypt(struct aead_request *req) pmull_gcm_encrypt_block(ks, iv, NULL, num_rounds(&ctx->aes_key)); put_unaligned_be32(3, iv + GCM_IV_SIZE); + kernel_neon_end(); - err = skcipher_walk_aead_encrypt(&walk, req, true); + err = skcipher_walk_aead_encrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; + kernel_neon_begin(); pmull_gcm_encrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, num_rounds(&ctx->aes_key), ks); + iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key), ks); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); } - kernel_neon_end(); } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_encrypt(&walk, req, true); + err = skcipher_walk_aead_encrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; @@ -467,15 +471,19 @@ static int gcm_decrypt(struct aead_request *req) pmull_gcm_encrypt_block(tag, iv, ctx->aes_key.key_enc, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); + kernel_neon_end(); - err = skcipher_walk_aead_decrypt(&walk, req, true); + err = skcipher_walk_aead_decrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; + kernel_neon_begin(); pmull_gcm_decrypt(blocks, dg, walk.dst.virt.addr, walk.src.virt.addr, &ctx->ghash_key, - iv, num_rounds(&ctx->aes_key)); + iv, ctx->aes_key.key_enc, + num_rounds(&ctx->aes_key)); + kernel_neon_end(); err = skcipher_walk_done(&walk, walk.nbytes % AES_BLOCK_SIZE); @@ -483,14 +491,12 @@ static int gcm_decrypt(struct aead_request *req) if (walk.nbytes) pmull_gcm_encrypt_block(iv, iv, NULL, num_rounds(&ctx->aes_key)); - - kernel_neon_end(); } else { __aes_arm64_encrypt(ctx->aes_key.key_enc, tag, iv, num_rounds(&ctx->aes_key)); put_unaligned_be32(2, iv + GCM_IV_SIZE); - err = skcipher_walk_aead_decrypt(&walk, req, true); + err = skcipher_walk_aead_decrypt(&walk, req, false); while (walk.nbytes >= AES_BLOCK_SIZE) { int blocks = walk.nbytes / AES_BLOCK_SIZE; diff --git a/arch/arm64/crypto/sha1-ce-core.S b/arch/arm64/crypto/sha1-ce-core.S index 46049850727d..78eb35fb5056 100644 --- a/arch/arm64/crypto/sha1-ce-core.S +++ b/arch/arm64/crypto/sha1-ce-core.S @@ -69,30 +69,36 @@ * int blocks) */ ENTRY(sha1_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load round constants */ - loadrc k0.4s, 0x5a827999, w6 +0: loadrc k0.4s, 0x5a827999, w6 loadrc k1.4s, 0x6ed9eba1, w6 loadrc k2.4s, 0x8f1bbcdc, w6 loadrc k3.4s, 0xca62c1d6, w6 /* load state */ - ld1 {dgav.4s}, [x0] - ldr dgb, [x0, #16] + ld1 {dgav.4s}, [x19] + ldr dgb, [x19, #16] /* load sha1_ce_state::finalize */ ldr_l w4, sha1_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] + ldr w4, [x19, x4] /* load input */ -0: ld1 {v8.4s-v11.4s}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v8.4s-v11.4s}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev32 v8.16b, v8.16b ) CPU_LE( rev32 v9.16b, v9.16b ) CPU_LE( rev32 v10.16b, v10.16b ) CPU_LE( rev32 v11.16b, v11.16b ) -1: add t0.4s, v8.4s, k0.4s +2: add t0.4s, v8.4s, k0.4s mov dg0v.16b, dgav.16b add_update c, ev, k0, 8, 9, 10, 11, dgb @@ -123,16 +129,25 @@ CPU_LE( rev32 v11.16b, v11.16b ) add dgbv.2s, dgbv.2s, dg1v.2s add dgav.4s, dgav.4s, dg0v.4s - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {dgav.4s}, [x19] + str dgb, [x19, #16] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ - cbz x4, 3f +3: cbz x4, 4f ldr_l w4, sha1_ce_offsetof_count, x4 - ldr x4, [x0, x4] + ldr x4, [x19, x4] movi v9.2d, #0 mov x8, #0x80000000 movi v10.2d, #0 @@ -141,10 +156,11 @@ CPU_LE( rev32 v11.16b, v11.16b ) mov x4, #0 mov v11.d[0], xzr mov v11.d[1], x7 - b 1b + b 2b /* store new state */ -3: st1 {dgav.4s}, [x0] - str dgb, [x0, #16] +4: st1 {dgav.4s}, [x19] + str dgb, [x19, #16] + frame_pop ret ENDPROC(sha1_ce_transform) diff --git a/arch/arm64/crypto/sha2-ce-core.S b/arch/arm64/crypto/sha2-ce-core.S index 4c3c89b812ce..cd8b36412469 100644 --- a/arch/arm64/crypto/sha2-ce-core.S +++ b/arch/arm64/crypto/sha2-ce-core.S @@ -79,30 +79,36 @@ */ .text ENTRY(sha2_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load round constants */ - adr_l x8, .Lsha2_rcon +0: adr_l x8, .Lsha2_rcon ld1 { v0.4s- v3.4s}, [x8], #64 ld1 { v4.4s- v7.4s}, [x8], #64 ld1 { v8.4s-v11.4s}, [x8], #64 ld1 {v12.4s-v15.4s}, [x8] /* load state */ - ld1 {dgav.4s, dgbv.4s}, [x0] + ld1 {dgav.4s, dgbv.4s}, [x19] /* load sha256_ce_state::finalize */ ldr_l w4, sha256_ce_offsetof_finalize, x4 - ldr w4, [x0, x4] + ldr w4, [x19, x4] /* load input */ -0: ld1 {v16.4s-v19.4s}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v16.4s-v19.4s}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev32 v16.16b, v16.16b ) CPU_LE( rev32 v17.16b, v17.16b ) CPU_LE( rev32 v18.16b, v18.16b ) CPU_LE( rev32 v19.16b, v19.16b ) -1: add t0.4s, v16.4s, v0.4s +2: add t0.4s, v16.4s, v0.4s mov dg0v.16b, dgav.16b mov dg1v.16b, dgbv.16b @@ -131,16 +137,24 @@ CPU_LE( rev32 v19.16b, v19.16b ) add dgbv.4s, dgbv.4s, dg1v.4s /* handled all input blocks? */ - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {dgav.4s, dgbv.4s}, [x19] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* * Final block: add padding and total bit count. * Skip if the input size was not a round multiple of the block size, * the padding is handled by the C code in that case. */ - cbz x4, 3f +3: cbz x4, 4f ldr_l w4, sha256_ce_offsetof_count, x4 - ldr x4, [x0, x4] + ldr x4, [x19, x4] movi v17.2d, #0 mov x8, #0x80000000 movi v18.2d, #0 @@ -149,9 +163,10 @@ CPU_LE( rev32 v19.16b, v19.16b ) mov x4, #0 mov v19.d[0], xzr mov v19.d[1], x7 - b 1b + b 2b /* store new state */ -3: st1 {dgav.4s, dgbv.4s}, [x0] +4: st1 {dgav.4s, dgbv.4s}, [x19] + frame_pop ret ENDPROC(sha2_ce_transform) diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped index 3ce82cc860bc..7c7ce2e3bad6 100644 --- a/arch/arm64/crypto/sha256-core.S_shipped +++ b/arch/arm64/crypto/sha256-core.S_shipped @@ -1,3 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +// This code is taken from the OpenSSL project but the author (Andy Polyakov) +// has relicensed it under the GPLv2. Therefore this program is free software; +// you can redistribute it and/or modify it under the terms of the GNU General +// Public License version 2 as published by the Free Software Foundation. +// +// The original headers, including the original license headers, are +// included below for completeness. + // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use @@ -10,8 +20,6 @@ // project. The module is, however, dual licensed under OpenSSL and // CRYPTOGAMS licenses depending on where you obtain it. For further // details see http://www.openssl.org/~appro/cryptogams/. -// -// Permission to use under GPLv2 terms is granted. // ==================================================================== // // SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sha3-ce-core.S b/arch/arm64/crypto/sha3-ce-core.S index 332ad7530690..a7d587fa54f6 100644 --- a/arch/arm64/crypto/sha3-ce-core.S +++ b/arch/arm64/crypto/sha3-ce-core.S @@ -41,9 +41,16 @@ */ .text ENTRY(sha3_ce_transform) - /* load state */ - add x8, x0, #32 - ld1 { v0.1d- v3.1d}, [x0] + frame_push 4 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + mov x22, x3 + +0: /* load state */ + add x8, x19, #32 + ld1 { v0.1d- v3.1d}, [x19] ld1 { v4.1d- v7.1d}, [x8], #32 ld1 { v8.1d-v11.1d}, [x8], #32 ld1 {v12.1d-v15.1d}, [x8], #32 @@ -51,13 +58,13 @@ ENTRY(sha3_ce_transform) ld1 {v20.1d-v23.1d}, [x8], #32 ld1 {v24.1d}, [x8] -0: sub w2, w2, #1 +1: sub w21, w21, #1 mov w8, #24 adr_l x9, .Lsha3_rcon /* load input */ - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v31.8b}, [x1], #24 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b-v31.8b}, [x20], #24 eor v0.8b, v0.8b, v25.8b eor v1.8b, v1.8b, v26.8b eor v2.8b, v2.8b, v27.8b @@ -66,10 +73,10 @@ ENTRY(sha3_ce_transform) eor v5.8b, v5.8b, v30.8b eor v6.8b, v6.8b, v31.8b - tbnz x3, #6, 2f // SHA3-512 + tbnz x22, #6, 3f // SHA3-512 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b-v30.8b}, [x1], #16 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b-v30.8b}, [x20], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b eor v9.8b, v9.8b, v27.8b @@ -77,34 +84,34 @@ ENTRY(sha3_ce_transform) eor v11.8b, v11.8b, v29.8b eor v12.8b, v12.8b, v30.8b - tbnz x3, #4, 1f // SHA3-384 or SHA3-224 + tbnz x22, #4, 2f // SHA3-384 or SHA3-224 // SHA3-256 - ld1 {v25.8b-v28.8b}, [x1], #32 + ld1 {v25.8b-v28.8b}, [x20], #32 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b - b 3f + b 4f -1: tbz x3, #2, 3f // bit 2 cleared? SHA-384 +2: tbz x22, #2, 4f // bit 2 cleared? SHA-384 // SHA3-224 - ld1 {v25.8b-v28.8b}, [x1], #32 - ld1 {v29.8b}, [x1], #8 + ld1 {v25.8b-v28.8b}, [x20], #32 + ld1 {v29.8b}, [x20], #8 eor v13.8b, v13.8b, v25.8b eor v14.8b, v14.8b, v26.8b eor v15.8b, v15.8b, v27.8b eor v16.8b, v16.8b, v28.8b eor v17.8b, v17.8b, v29.8b - b 3f + b 4f // SHA3-512 -2: ld1 {v25.8b-v26.8b}, [x1], #16 +3: ld1 {v25.8b-v26.8b}, [x20], #16 eor v7.8b, v7.8b, v25.8b eor v8.8b, v8.8b, v26.8b -3: sub w8, w8, #1 +4: sub w8, w8, #1 eor3 v29.16b, v4.16b, v9.16b, v14.16b eor3 v26.16b, v1.16b, v6.16b, v11.16b @@ -183,17 +190,33 @@ ENTRY(sha3_ce_transform) eor v0.16b, v0.16b, v31.16b - cbnz w8, 3b - cbnz w2, 0b + cbnz w8, 4b + cbz w21, 5f + + if_will_cond_yield_neon + add x8, x19, #32 + st1 { v0.1d- v3.1d}, [x19] + st1 { v4.1d- v7.1d}, [x8], #32 + st1 { v8.1d-v11.1d}, [x8], #32 + st1 {v12.1d-v15.1d}, [x8], #32 + st1 {v16.1d-v19.1d}, [x8], #32 + st1 {v20.1d-v23.1d}, [x8], #32 + st1 {v24.1d}, [x8] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* save state */ - st1 { v0.1d- v3.1d}, [x0], #32 - st1 { v4.1d- v7.1d}, [x0], #32 - st1 { v8.1d-v11.1d}, [x0], #32 - st1 {v12.1d-v15.1d}, [x0], #32 - st1 {v16.1d-v19.1d}, [x0], #32 - st1 {v20.1d-v23.1d}, [x0], #32 - st1 {v24.1d}, [x0] +5: st1 { v0.1d- v3.1d}, [x19], #32 + st1 { v4.1d- v7.1d}, [x19], #32 + st1 { v8.1d-v11.1d}, [x19], #32 + st1 {v12.1d-v15.1d}, [x19], #32 + st1 {v16.1d-v19.1d}, [x19], #32 + st1 {v20.1d-v23.1d}, [x19], #32 + st1 {v24.1d}, [x19] + frame_pop ret ENDPROC(sha3_ce_transform) diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl index c55efb308544..2d8655d5b1af 100644 --- a/arch/arm64/crypto/sha512-armv8.pl +++ b/arch/arm64/crypto/sha512-armv8.pl @@ -1,4 +1,14 @@ #! /usr/bin/env perl +# SPDX-License-Identifier: GPL-2.0 + +# This code is taken from the OpenSSL project but the author (Andy Polyakov) +# has relicensed it under the GPLv2. Therefore this program is free software; +# you can redistribute it and/or modify it under the terms of the GNU General +# Public License version 2 as published by the Free Software Foundation. +# +# The original headers, including the original license headers, are +# included below for completeness. + # Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the OpenSSL license (the "License"). You may not use @@ -11,8 +21,6 @@ # project. The module is, however, dual licensed under OpenSSL and # CRYPTOGAMS licenses depending on where you obtain it. For further # details see http://www.openssl.org/~appro/cryptogams/. -# -# Permission to use under GPLv2 terms is granted. # ==================================================================== # # SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sha512-ce-core.S b/arch/arm64/crypto/sha512-ce-core.S index 7f3bca5c59a2..ce65e3abe4f2 100644 --- a/arch/arm64/crypto/sha512-ce-core.S +++ b/arch/arm64/crypto/sha512-ce-core.S @@ -107,17 +107,23 @@ */ .text ENTRY(sha512_ce_transform) + frame_push 3 + + mov x19, x0 + mov x20, x1 + mov x21, x2 + /* load state */ - ld1 {v8.2d-v11.2d}, [x0] +0: ld1 {v8.2d-v11.2d}, [x19] /* load first 4 round constants */ adr_l x3, .Lsha512_rcon ld1 {v20.2d-v23.2d}, [x3], #64 /* load input */ -0: ld1 {v12.2d-v15.2d}, [x1], #64 - ld1 {v16.2d-v19.2d}, [x1], #64 - sub w2, w2, #1 +1: ld1 {v12.2d-v15.2d}, [x20], #64 + ld1 {v16.2d-v19.2d}, [x20], #64 + sub w21, w21, #1 CPU_LE( rev64 v12.16b, v12.16b ) CPU_LE( rev64 v13.16b, v13.16b ) @@ -196,9 +202,18 @@ CPU_LE( rev64 v19.16b, v19.16b ) add v11.2d, v11.2d, v3.2d /* handled all input blocks? */ - cbnz w2, 0b + cbz w21, 3f + + if_will_cond_yield_neon + st1 {v8.2d-v11.2d}, [x19] + do_cond_yield_neon + b 0b + endif_yield_neon + + b 1b /* store new state */ -3: st1 {v8.2d-v11.2d}, [x0] +3: st1 {v8.2d-v11.2d}, [x19] + frame_pop ret ENDPROC(sha512_ce_transform) diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped index bd0f59f06c9d..e063a6106720 100644 --- a/arch/arm64/crypto/sha512-core.S_shipped +++ b/arch/arm64/crypto/sha512-core.S_shipped @@ -1,3 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +// This code is taken from the OpenSSL project but the author (Andy Polyakov) +// has relicensed it under the GPLv2. Therefore this program is free software; +// you can redistribute it and/or modify it under the terms of the GNU General +// Public License version 2 as published by the Free Software Foundation. +// +// The original headers, including the original license headers, are +// included below for completeness. + // Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved. // // Licensed under the OpenSSL license (the "License"). You may not use @@ -10,8 +20,6 @@ // project. The module is, however, dual licensed under OpenSSL and // CRYPTOGAMS licenses depending on where you obtain it. For further // details see http://www.openssl.org/~appro/cryptogams/. -// -// Permission to use under GPLv2 terms is granted. // ==================================================================== // // SHA256/512 for ARMv8. diff --git a/arch/arm64/crypto/sm4-ce-core.S b/arch/arm64/crypto/sm4-ce-core.S new file mode 100644 index 000000000000..af3bfbc3f4d4 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-core.S @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/linkage.h> +#include <asm/assembler.h> + + .irp b, 0, 1, 2, 3, 4, 5, 6, 7, 8 + .set .Lv\b\().4s, \b + .endr + + .macro sm4e, rd, rn + .inst 0xcec08400 | .L\rd | (.L\rn << 5) + .endm + + /* + * void sm4_ce_do_crypt(const u32 *rk, u32 *out, const u32 *in); + */ + .text +ENTRY(sm4_ce_do_crypt) + ld1 {v8.4s}, [x2] + ld1 {v0.4s-v3.4s}, [x0], #64 +CPU_LE( rev32 v8.16b, v8.16b ) + ld1 {v4.4s-v7.4s}, [x0] + sm4e v8.4s, v0.4s + sm4e v8.4s, v1.4s + sm4e v8.4s, v2.4s + sm4e v8.4s, v3.4s + sm4e v8.4s, v4.4s + sm4e v8.4s, v5.4s + sm4e v8.4s, v6.4s + sm4e v8.4s, v7.4s + rev64 v8.4s, v8.4s + ext v8.16b, v8.16b, v8.16b, #8 +CPU_LE( rev32 v8.16b, v8.16b ) + st1 {v8.4s}, [x1] + ret +ENDPROC(sm4_ce_do_crypt) diff --git a/arch/arm64/crypto/sm4-ce-glue.c b/arch/arm64/crypto/sm4-ce-glue.c new file mode 100644 index 000000000000..b7fb5274b250 --- /dev/null +++ b/arch/arm64/crypto/sm4-ce-glue.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <asm/neon.h> +#include <asm/simd.h> +#include <crypto/sm4.h> +#include <linux/module.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/types.h> + +MODULE_ALIAS_CRYPTO("sm4"); +MODULE_ALIAS_CRYPTO("sm4-ce"); +MODULE_DESCRIPTION("SM4 symmetric cipher using ARMv8 Crypto Extensions"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); + +asmlinkage void sm4_ce_do_crypt(const u32 *rk, void *out, const void *in); + +static void sm4_ce_encrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!may_use_simd()) { + crypto_sm4_encrypt(tfm, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_enc, out, in); + kernel_neon_end(); + } +} + +static void sm4_ce_decrypt(struct crypto_tfm *tfm, u8 *out, const u8 *in) +{ + const struct crypto_sm4_ctx *ctx = crypto_tfm_ctx(tfm); + + if (!may_use_simd()) { + crypto_sm4_decrypt(tfm, out, in); + } else { + kernel_neon_begin(); + sm4_ce_do_crypt(ctx->rkey_dec, out, in); + kernel_neon_end(); + } +} + +static struct crypto_alg sm4_ce_alg = { + .cra_name = "sm4", + .cra_driver_name = "sm4-ce", + .cra_priority = 200, + .cra_flags = CRYPTO_ALG_TYPE_CIPHER, + .cra_blocksize = SM4_BLOCK_SIZE, + .cra_ctxsize = sizeof(struct crypto_sm4_ctx), + .cra_module = THIS_MODULE, + .cra_u.cipher = { + .cia_min_keysize = SM4_KEY_SIZE, + .cia_max_keysize = SM4_KEY_SIZE, + .cia_setkey = crypto_sm4_set_key, + .cia_encrypt = sm4_ce_encrypt, + .cia_decrypt = sm4_ce_decrypt + } +}; + +static int __init sm4_ce_mod_init(void) +{ + return crypto_register_alg(&sm4_ce_alg); +} + +static void __exit sm4_ce_mod_fini(void) +{ + crypto_unregister_alg(&sm4_ce_alg); +} + +module_cpu_feature_match(SM3, sm4_ce_mod_init); +module_exit(sm4_ce_mod_fini); diff --git a/arch/arm64/include/asm/atomic_lse.h b/arch/arm64/include/asm/atomic_lse.h index 9ef0797380cb..f9b0b09153e0 100644 --- a/arch/arm64/include/asm/atomic_lse.h +++ b/arch/arm64/include/asm/atomic_lse.h @@ -117,7 +117,7 @@ static inline void atomic_and(int i, atomic_t *v) /* LSE atomics */ " mvn %w[i], %w[i]\n" " stclr %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -135,7 +135,7 @@ static inline int atomic_fetch_and##name(int i, atomic_t *v) \ /* LSE atomics */ \ " mvn %w[i], %w[i]\n" \ " ldclr" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -161,7 +161,7 @@ static inline void atomic_sub(int i, atomic_t *v) /* LSE atomics */ " neg %w[i], %w[i]\n" " stadd %w[i], %[v]") - : [i] "+r" (w0), [v] "+Q" (v->counter) + : [i] "+&r" (w0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -180,7 +180,7 @@ static inline int atomic_sub_return##name(int i, atomic_t *v) \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], w30, %[v]\n" \ " add %w[i], %w[i], w30") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS , ##cl); \ \ @@ -207,7 +207,7 @@ static inline int atomic_fetch_sub##name(int i, atomic_t *v) \ /* LSE atomics */ \ " neg %w[i], %w[i]\n" \ " ldadd" #mb " %w[i], %w[i], %[v]") \ - : [i] "+r" (w0), [v] "+Q" (v->counter) \ + : [i] "+&r" (w0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -314,7 +314,7 @@ static inline void atomic64_and(long i, atomic64_t *v) /* LSE atomics */ " mvn %[i], %[i]\n" " stclr %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -332,7 +332,7 @@ static inline long atomic64_fetch_and##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " mvn %[i], %[i]\n" \ " ldclr" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -358,7 +358,7 @@ static inline void atomic64_sub(long i, atomic64_t *v) /* LSE atomics */ " neg %[i], %[i]\n" " stadd %[i], %[v]") - : [i] "+r" (x0), [v] "+Q" (v->counter) + : [i] "+&r" (x0), [v] "+Q" (v->counter) : "r" (x1) : __LL_SC_CLOBBERS); } @@ -377,7 +377,7 @@ static inline long atomic64_sub_return##name(long i, atomic64_t *v) \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], x30, %[v]\n" \ " add %[i], %[i], x30") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -404,7 +404,7 @@ static inline long atomic64_fetch_sub##name(long i, atomic64_t *v) \ /* LSE atomics */ \ " neg %[i], %[i]\n" \ " ldadd" #mb " %[i], %[i], %[v]") \ - : [i] "+r" (x0), [v] "+Q" (v->counter) \ + : [i] "+&r" (x0), [v] "+Q" (v->counter) \ : "r" (x1) \ : __LL_SC_CLOBBERS, ##cl); \ \ @@ -435,7 +435,7 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) " sub x30, x30, %[ret]\n" " cbnz x30, 1b\n" "2:") - : [ret] "+r" (x0), [v] "+Q" (v->counter) + : [ret] "+&r" (x0), [v] "+Q" (v->counter) : : __LL_SC_CLOBBERS, "cc", "memory"); @@ -516,7 +516,7 @@ static inline long __cmpxchg_double##name(unsigned long old1, \ " eor %[old1], %[old1], %[oldval1]\n" \ " eor %[old2], %[old2], %[oldval2]\n" \ " orr %[old1], %[old1], %[old2]") \ - : [old1] "+r" (x0), [old2] "+r" (x1), \ + : [old1] "+&r" (x0), [old2] "+&r" (x1), \ [v] "+Q" (*(unsigned long *)ptr) \ : [new1] "r" (x2), [new2] "r" (x3), [ptr] "r" (x4), \ [oldval1] "r" (oldval1), [oldval2] "r" (oldval2) \ diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index c00c62e1a4a3..1a037b94eba1 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -34,7 +34,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u16 __compat_uid_t; @@ -66,16 +65,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { #ifdef __AARCH64EB__ short st_dev; @@ -192,10 +181,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_ulong_t __unused1; - compat_time_t sem_ctime; - compat_ulong_t __unused2; + compat_ulong_t sem_otime; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime; + compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -203,12 +192,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - compat_time_t msg_stime; - compat_ulong_t __unused1; - compat_time_t msg_rtime; - compat_ulong_t __unused2; - compat_time_t msg_ctime; - compat_ulong_t __unused3; + compat_ulong_t msg_stime; + compat_ulong_t msg_stime_high; + compat_ulong_t msg_rtime; + compat_ulong_t msg_rtime_high; + compat_ulong_t msg_ctime; + compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -221,12 +210,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_ulong_t __unused1; - compat_time_t shm_dtime; - compat_ulong_t __unused2; - compat_time_t shm_ctime; - compat_ulong_t __unused3; + compat_ulong_t shm_atime; + compat_ulong_t shm_atime_high; + compat_ulong_t shm_dtime; + compat_ulong_t shm_dtime_high; + compat_ulong_t shm_ctime; + compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 30014a9f8f2b..ea690b3562af 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -75,6 +75,7 @@ #define ARM_CPU_IMP_CAVIUM 0x43 #define ARM_CPU_IMP_BRCM 0x42 #define ARM_CPU_IMP_QCOM 0x51 +#define ARM_CPU_IMP_NVIDIA 0x4E #define ARM_CPU_PART_AEM_V8 0xD0F #define ARM_CPU_PART_FOUNDATION 0xD00 @@ -99,6 +100,9 @@ #define QCOM_CPU_PART_FALKOR 0xC00 #define QCOM_CPU_PART_KRYO 0x200 +#define NVIDIA_CPU_PART_DENVER 0x003 +#define NVIDIA_CPU_PART_CARMEL 0x004 + #define MIDR_CORTEX_A53 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A53) #define MIDR_CORTEX_A57 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A57) #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) @@ -114,6 +118,8 @@ #define MIDR_QCOM_FALKOR_V1 MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR_V1) #define MIDR_QCOM_FALKOR MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_FALKOR) #define MIDR_QCOM_KRYO MIDR_CPU_MODEL(ARM_CPU_IMP_QCOM, QCOM_CPU_PART_KRYO) +#define MIDR_NVIDIA_DENVER MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_DENVER) +#define MIDR_NVIDIA_CARMEL MIDR_CPU_MODEL(ARM_CPU_IMP_NVIDIA, NVIDIA_CPU_PART_CARMEL) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 23b33e8ea03a..1dab3a984608 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -333,7 +333,7 @@ static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu) } else { u64 sctlr = vcpu_read_sys_reg(vcpu, SCTLR_EL1); sctlr |= (1 << 25); - vcpu_write_sys_reg(vcpu, SCTLR_EL1, sctlr); + vcpu_write_sys_reg(vcpu, sctlr, SCTLR_EL1); } } diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 082110993647..6128992c2ded 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, diff --git a/arch/arm64/include/asm/pci.h b/arch/arm64/include/asm/pci.h index 8747f7c5e0e7..9e690686e8aa 100644 --- a/arch/arm64/include/asm/pci.h +++ b/arch/arm64/include/asm/pci.h @@ -18,11 +18,6 @@ #define pcibios_assign_all_busses() \ (pci_has_flag(PCI_REASSIGN_ALL_BUS)) -/* - * PCI address space differs from physical memory address space - */ -#define PCI_DMA_BUS_IS_PHYS (0) - #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 extern int isa_dma_bridge_buggy; diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index ebdae15d665d..26c5bd7d88d8 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -122,11 +122,6 @@ static inline int arch_spin_value_unlocked(arch_spinlock_t lock) static inline int arch_spin_is_locked(arch_spinlock_t *lock) { - /* - * Ensure prior spin_lock operations to other locks have completed - * on this CPU before we test whether "lock" is locked. - */ - smp_mb(); /* ^^^ */ return !arch_spin_value_unlocked(READ_ONCE(*lock)); } diff --git a/arch/arm64/include/asm/stat.h b/arch/arm64/include/asm/stat.h index 15e35598ac40..eab738019707 100644 --- a/arch/arm64/include/asm/stat.h +++ b/arch/arm64/include/asm/stat.h @@ -20,6 +20,7 @@ #ifdef CONFIG_COMPAT +#include <linux/compat_time.h> #include <asm/compat.h> /* diff --git a/arch/arm64/kernel/arm64ksyms.c b/arch/arm64/kernel/arm64ksyms.c index 66be504edb6c..d894a20b70b2 100644 --- a/arch/arm64/kernel/arm64ksyms.c +++ b/arch/arm64/kernel/arm64ksyms.c @@ -75,3 +75,11 @@ NOKPROBE_SYMBOL(_mcount); /* arm-smccc */ EXPORT_SYMBOL(__arm_smccc_smc); EXPORT_SYMBOL(__arm_smccc_hvc); + + /* tishift.S */ +extern long long __ashlti3(long long a, int b); +EXPORT_SYMBOL(__ashlti3); +extern long long __ashrti3(long long a, int b); +EXPORT_SYMBOL(__ashrti3); +extern long long __lshrti3(long long a, int b); +EXPORT_SYMBOL(__lshrti3); diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a900befadfe8..e4a1182deff7 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -316,6 +316,7 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_DENVER), {}, }; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 87a35364e750..4bcdd0318729 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -882,7 +882,7 @@ asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) si_code = FPE_FLTRES; } - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = SIGFPE; info.si_code = si_code; info.si_addr = (void __user *)instruction_pointer(regs); diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 74bb56f656ef..413dbe530da8 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -30,7 +30,6 @@ #include <linux/smp.h> #include <linux/uaccess.h> -#include <asm/compat.h> #include <asm/current.h> #include <asm/debug-monitors.h> #include <asm/hw_breakpoint.h> diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 1d091d048d04..0bbac612146e 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/compat.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/perf_event.h> #include <linux/bug.h> #include <linux/sched/task_stack.h> -#include <asm/compat.h> #include <asm/perf_regs.h> #include <asm/ptrace.h> diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 93ab57dcfc14..a6109825eeb9 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -112,6 +112,7 @@ long compat_arm_syscall(struct pt_regs *regs) break; } + clear_siginfo(&info); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_ILLTRP; diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 8bbdc17e49df..d399d459397b 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -635,6 +635,7 @@ asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) siginfo_t info; void __user *pc = (void __user *)instruction_pointer(regs); + clear_siginfo(&info); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_ILLOPC; diff --git a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c index 86801b6055d6..39be799d0417 100644 --- a/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c +++ b/arch/arm64/kvm/hyp/vgic-v2-cpuif-proxy.c @@ -18,11 +18,20 @@ #include <linux/compiler.h> #include <linux/irqchip/arm-gic.h> #include <linux/kvm_host.h> +#include <linux/swab.h> #include <asm/kvm_emulate.h> #include <asm/kvm_hyp.h> #include <asm/kvm_mmu.h> +static bool __hyp_text __is_be(struct kvm_vcpu *vcpu) +{ + if (vcpu_mode_is_32bit(vcpu)) + return !!(read_sysreg_el2(spsr) & COMPAT_PSR_E_BIT); + + return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE); +} + /* * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the * guest. @@ -64,14 +73,19 @@ int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) addr += fault_ipa - vgic->vgic_cpu_base; if (kvm_vcpu_dabt_iswrite(vcpu)) { - u32 data = vcpu_data_guest_to_host(vcpu, - vcpu_get_reg(vcpu, rd), - sizeof(u32)); + u32 data = vcpu_get_reg(vcpu, rd); + if (__is_be(vcpu)) { + /* guest pre-swabbed data, undo this for writel() */ + data = swab32(data); + } writel_relaxed(data, addr); } else { u32 data = readl_relaxed(addr); - vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, - sizeof(u32))); + if (__is_be(vcpu)) { + /* guest expects swabbed data */ + data = swab32(data); + } + vcpu_set_reg(vcpu, rd, data); } return 1; diff --git a/arch/arm64/lib/tishift.S b/arch/arm64/lib/tishift.S index d3db9b2cd479..0fdff97794de 100644 --- a/arch/arm64/lib/tishift.S +++ b/arch/arm64/lib/tishift.S @@ -1,17 +1,6 @@ -/* - * Copyright (C) 2017 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause) * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see <http://www.gnu.org/licenses/>. + * Copyright (C) 2017-2018 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. */ #include <linux/linkage.h> diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..db01f2709842 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -508,16 +508,6 @@ static int __init arm64_dma_init(void) } arch_initcall(arm64_dma_init); -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_debug_do_init); - - #ifdef CONFIG_IOMMU_DMA #include <linux/dma-iommu.h> #include <linux/platform_device.h> diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..576f15153080 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -293,6 +293,57 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, static void __do_user_fault(struct siginfo *info, unsigned int esr) { current->thread.fault_address = (unsigned long)info->si_addr; + + /* + * If the faulting address is in the kernel, we must sanitize the ESR. + * From userspace's point of view, kernel-only mappings don't exist + * at all, so we report them as level 0 translation faults. + * (This is not quite the way that "no mapping there at all" behaves: + * an alignment fault not caused by the memory type would take + * precedence over translation fault for a real access to empty + * space. Unfortunately we can't easily distinguish "alignment fault + * not caused by memory type" from "alignment fault caused by memory + * type", so we ignore this wrinkle and just return the translation + * fault.) + */ + if (current->thread.fault_address >= TASK_SIZE) { + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_LOW: + /* + * These bits provide only information about the + * faulting instruction, which userspace knows already. + * We explicitly clear bits which are architecturally + * RES0 in case they are given meanings in future. + * We always report the ESR as if the fault was taken + * to EL1 and so ISV and the bits in ISS[23:14] are + * clear. (In fact it always will be a fault to EL1.) + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL | + ESR_ELx_CM | ESR_ELx_WNR; + esr |= ESR_ELx_FSC_FAULT; + break; + case ESR_ELx_EC_IABT_LOW: + /* + * Claim a level 0 translation fault. + * All other bits are architecturally RES0 for faults + * reported with that DFSC value, so we clear them. + */ + esr &= ESR_ELx_EC_MASK | ESR_ELx_IL; + esr |= ESR_ELx_FSC_FAULT; + break; + default: + /* + * This should never happen (entry.S only brings us + * into this code for insn and data aborts from a lower + * exception level). Fail safe by not providing an ESR + * context record at all. + */ + WARN(1, "ESR 0x%x is not DABT or IABT from EL0\n", esr); + esr = 0; + break; + } + } + current->thread.fault_code = esr; arm64_force_sig_info(info, esr_to_fault_info(esr)->name, current); } @@ -305,11 +356,12 @@ static void do_bad_area(unsigned long addr, unsigned int esr, struct pt_regs *re */ if (user_mode(regs)) { const struct fault_info *inf = esr_to_fault_info(esr); - struct siginfo si = { - .si_signo = inf->sig, - .si_code = inf->code, - .si_addr = (void __user *)addr, - }; + struct siginfo si; + + clear_siginfo(&si); + si.si_signo = inf->sig; + si.si_code = inf->code; + si.si_addr = (void __user *)addr; __do_user_fault(&si, esr); } else { @@ -583,6 +635,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) nmi_exit(); } + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -687,6 +740,7 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, show_pte(addr); } + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -729,6 +783,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, local_irq_enable(); } + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRALN; @@ -772,7 +827,6 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, struct pt_regs *regs) { const struct fault_info *inf = debug_fault_info + DBG_ESR_EVT(esr); - struct siginfo info; int rv; /* @@ -788,6 +842,9 @@ asmlinkage int __exception do_debug_exception(unsigned long addr, if (!inf->fn(addr, esr, regs)) { rv = 1; } else { + struct siginfo info; + + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 9f3c47acf8ff..1b18b4722420 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -646,8 +646,10 @@ static int keep_initrd __initdata; void __init free_initrd_mem(unsigned long start, unsigned long end) { - if (!keep_initrd) + if (!keep_initrd) { free_reserved_area((void *)start, (void *)end, 0, "initrd"); + memblock_free(__virt_to_phys(start), end - start); + } } static int __init keepinitrd_setup(char *__unused) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 2dbb2c9f1ec1..493ff75670ff 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -933,13 +933,15 @@ int pud_set_huge(pud_t *pudp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PUD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pud_t new_pud = pfn_pud(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pud_present(READ_ONCE(*pudp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pud_val(*pudp)), + pud_val(new_pud))) return 0; BUG_ON(phys & ~PUD_MASK); - set_pud(pudp, pfn_pud(__phys_to_pfn(phys), sect_prot)); + set_pud(pudp, new_pud); return 1; } @@ -947,13 +949,15 @@ int pmd_set_huge(pmd_t *pmdp, phys_addr_t phys, pgprot_t prot) { pgprot_t sect_prot = __pgprot(PMD_TYPE_SECT | pgprot_val(mk_sect_prot(prot))); + pmd_t new_pmd = pfn_pmd(__phys_to_pfn(phys), sect_prot); - /* ioremap_page_range doesn't honour BBM */ - if (pmd_present(READ_ONCE(*pmdp))) + /* Only allow permission changes for now */ + if (!pgattr_change_is_safe(READ_ONCE(pmd_val(*pmdp)), + pmd_val(new_pmd))) return 0; BUG_ON(phys & ~PMD_MASK); - set_pmd(pmdp, pfn_pmd(__phys_to_pfn(phys), sect_prot)); + set_pmd(pmdp, new_pmd); return 1; } diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index c6b4dd1418b4..bf59855628ac 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig @@ -6,11 +6,13 @@ config C6X def_bool y + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select CLKDEV_LOOKUP + select DMA_NONCOHERENT_OPS select GENERIC_ATOMIC64 select GENERIC_IRQ_SHOW select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_MEMBLOCK select SPARSE_IRQ select IRQ_DOMAIN diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index fd4c840de837..33a2c94fed0d 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -1,10 +1,12 @@ generic-y += atomic.h generic-y += barrier.h generic-y += bugs.h +generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h generic-y += dma.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += exec.h generic-y += extable.h diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h deleted file mode 100644 index 05daf1038111..000000000000 --- a/arch/c6x/include/asm/dma-mapping.h +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Port on Texas Instruments TMS320C6x architecture - * - * Copyright (C) 2004, 2009, 2010, 2011 Texas Instruments Incorporated - * Author: Aurelien Jacquiot <aurelien.jacquiot@ti.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ -#ifndef _ASM_C6X_DMA_MAPPING_H -#define _ASM_C6X_DMA_MAPPING_H - -extern const struct dma_map_ops c6x_dma_ops; - -static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &c6x_dma_ops; -} - -extern void coherent_mem_init(u32 start, u32 size); -void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, - gfp_t gfp, unsigned long attrs); -void c6x_dma_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle, unsigned long attrs); - -#endif /* _ASM_C6X_DMA_MAPPING_H */ diff --git a/arch/c6x/include/asm/setup.h b/arch/c6x/include/asm/setup.h index 852afb209afb..350f34debb19 100644 --- a/arch/c6x/include/asm/setup.h +++ b/arch/c6x/include/asm/setup.h @@ -28,5 +28,7 @@ extern unsigned char c6x_fuse_mac[6]; extern void machine_init(unsigned long dt_ptr); extern void time_init(void); +extern void coherent_mem_init(u32 start, u32 size); + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_C6X_SETUP_H */ diff --git a/arch/c6x/kernel/Makefile b/arch/c6x/kernel/Makefile index 02f340d7b8fe..fbe74174de87 100644 --- a/arch/c6x/kernel/Makefile +++ b/arch/c6x/kernel/Makefile @@ -8,6 +8,6 @@ extra-y := head.o vmlinux.lds obj-y := process.o traps.o irq.o signal.o ptrace.o obj-y += setup.o sys_c6x.o time.o devicetree.o obj-y += switch_to.o entry.o vectors.o c6x_ksyms.o -obj-y += soc.o dma.o +obj-y += soc.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c deleted file mode 100644 index 9fff8be75f58..000000000000 --- a/arch/c6x/kernel/dma.c +++ /dev/null @@ -1,149 +0,0 @@ -/* - * Copyright (C) 2011 Texas Instruments Incorporated - * Author: Mark Salter <msalter@redhat.com> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#include <linux/module.h> -#include <linux/dma-mapping.h> -#include <linux/mm.h> -#include <linux/mm_types.h> -#include <linux/scatterlist.h> - -#include <asm/cacheflush.h> - -static void c6x_dma_sync(dma_addr_t handle, size_t size, - enum dma_data_direction dir) -{ - unsigned long paddr = handle; - - BUG_ON(!valid_dma_direction(dir)); - - switch (dir) { - case DMA_FROM_DEVICE: - L2_cache_block_invalidate(paddr, paddr + size); - break; - case DMA_TO_DEVICE: - L2_cache_block_writeback(paddr, paddr + size); - break; - case DMA_BIDIRECTIONAL: - L2_cache_block_writeback_invalidate(paddr, paddr + size); - break; - default: - break; - } -} - -static dma_addr_t c6x_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t handle = virt_to_phys(page_address(page) + offset); - - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(handle, size, dir); - - return handle; -} - -static void c6x_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(handle, size, dir); -} - -static int c6x_dma_map_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) { - sg->dma_address = sg_phys(sg); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - c6x_dma_sync(sg->dma_address, sg->length, dir); - } - - return nents; -} - -static void c6x_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - if (attrs & DMA_ATTR_SKIP_CPU_SYNC) - return; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync(sg_dma_address(sg), sg->length, dir); -} - -static void c6x_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - c6x_dma_sync(handle, size, dir); - -} - -static void c6x_dma_sync_single_for_device(struct device *dev, - dma_addr_t handle, size_t size, enum dma_data_direction dir) -{ - c6x_dma_sync(handle, size, dir); - -} - -static void c6x_dma_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sglist, int nents, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync_single_for_cpu(dev, sg_dma_address(sg), - sg->length, dir); - -} - -static void c6x_dma_sync_sg_for_device(struct device *dev, - struct scatterlist *sglist, int nents, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sglist, sg, nents, i) - c6x_dma_sync_single_for_device(dev, sg_dma_address(sg), - sg->length, dir); - -} - -const struct dma_map_ops c6x_dma_ops = { - .alloc = c6x_dma_alloc, - .free = c6x_dma_free, - .map_page = c6x_dma_map_page, - .unmap_page = c6x_dma_unmap_page, - .map_sg = c6x_dma_map_sg, - .unmap_sg = c6x_dma_unmap_sg, - .sync_single_for_device = c6x_dma_sync_single_for_device, - .sync_single_for_cpu = c6x_dma_sync_single_for_cpu, - .sync_sg_for_device = c6x_dma_sync_sg_for_device, - .sync_sg_for_cpu = c6x_dma_sync_sg_for_cpu, -}; -EXPORT_SYMBOL(c6x_dma_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index 4c1d4b84dd2b..5c60aea3b75a 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -244,7 +244,6 @@ static struct exception_info eexcept_table[128] = { static void do_trap(struct exception_info *except_info, struct pt_regs *regs) { unsigned long addr = instruction_pointer(regs); - siginfo_t info; if (except_info->code != TRAP_BRKPT) pr_err("TRAP: %s PC[0x%lx] signo[%d] code[%d]\n", @@ -253,12 +252,8 @@ static void do_trap(struct exception_info *except_info, struct pt_regs *regs) die_if_kernel(except_info->kernel_str, regs, addr); - info.si_signo = except_info->signo; - info.si_errno = 0; - info.si_code = except_info->code; - info.si_addr = (void __user *)addr; - - force_sig_info(except_info->signo, &info, current); + force_sig_fault(except_info->signo, except_info->code, + (void __user *)addr, current); } /* diff --git a/arch/c6x/mm/dma-coherent.c b/arch/c6x/mm/dma-coherent.c index 95e38ad27c69..d0a8e0c4b27e 100644 --- a/arch/c6x/mm/dma-coherent.c +++ b/arch/c6x/mm/dma-coherent.c @@ -19,10 +19,12 @@ #include <linux/bitops.h> #include <linux/module.h> #include <linux/interrupt.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/memblock.h> +#include <asm/cacheflush.h> #include <asm/page.h> +#include <asm/setup.h> /* * DMA coherent memory management, can be redefined using the memdma= @@ -73,7 +75,7 @@ static void __free_dma_pages(u32 addr, int order) * Allocate DMA coherent memory space and return both the kernel * virtual and DMA address for that space. */ -void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { u32 paddr; @@ -98,7 +100,7 @@ void *c6x_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, /* * Free DMA coherent memory as defined by the above mapping. */ -void c6x_dma_free(struct device *dev, size_t size, void *vaddr, +void arch_dma_free(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, unsigned long attrs) { int order; @@ -139,3 +141,35 @@ void __init coherent_mem_init(phys_addr_t start, u32 size) dma_bitmap = phys_to_virt(bitmap_phys); memset(dma_bitmap, 0, dma_pages * PAGE_SIZE); } + +static void c6x_dma_sync(struct device *dev, phys_addr_t paddr, size_t size, + enum dma_data_direction dir) +{ + BUG_ON(!valid_dma_direction(dir)); + + switch (dir) { + case DMA_FROM_DEVICE: + L2_cache_block_invalidate(paddr, paddr + size); + break; + case DMA_TO_DEVICE: + L2_cache_block_writeback(paddr, paddr + size); + break; + case DMA_BIDIRECTIONAL: + L2_cache_block_writeback_invalidate(paddr, paddr + size); + break; + default: + break; + } +} + +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + return c6x_dma_sync(dev, paddr, size, dir); +} + +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + return c6x_dma_sync(dev, paddr, size, dir); +} diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 14bac06b7116..a5d0b2991f47 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -3,6 +3,7 @@ generic-y += barrier.h generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h +generic-y += compat.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/h8300/include/asm/pci.h b/arch/h8300/include/asm/pci.h index 7c9e55d62215..d4d345a52092 100644 --- a/arch/h8300/include/asm/pci.h +++ b/arch/h8300/include/asm/pci.h @@ -15,6 +15,4 @@ static inline void pcibios_penalize_isa_irq(int irq, int active) /* We don't do dynamic PCI IRQ allocation */ } -#define PCI_DMA_BUS_IS_PHYS (1) - #endif /* _ASM_H8300_PCI_H */ diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index 76d2f20d525e..37adb2003033 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig @@ -19,6 +19,7 @@ config HEXAGON select GENERIC_IRQ_SHOW select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK + select NEED_SG_DMA_LENGTH select NO_IOPORT_MAP select GENERIC_IOMAP select GENERIC_SMP_IDLE_THREAD @@ -63,9 +64,6 @@ config GENERIC_CSUM config GENERIC_IRQ_PROBE def_bool y -config NEED_SG_DMA_LENGTH - def_bool y - config RWSEM_GENERIC_SPINLOCK def_bool n diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index e9743f689fb8..dd2fd9c0d292 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h +generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/hexagon/include/asm/io.h b/arch/hexagon/include/asm/io.h index 9e8621d94ee9..e17262ad125e 100644 --- a/arch/hexagon/include/asm/io.h +++ b/arch/hexagon/include/asm/io.h @@ -216,6 +216,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, memcpy((void *) dst, src, count); } +static inline void memset_io(volatile void __iomem *addr, int value, + size_t size) +{ + memset((void __force *)addr, value, size); +} + #define PCI_IO_ADDR (volatile void __iomem *) /* diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index ad8347c29dcf..77459df34e2e 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -208,7 +208,6 @@ const struct dma_map_ops hexagon_dma_ops = { .sync_single_for_cpu = hexagon_sync_single_for_cpu, .sync_single_for_device = hexagon_sync_single_for_device, .mapping_error = hexagon_mapping_error, - .is_phys = 1, }; void __init hexagon_dma_init(void) diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 2942a9204a9a..91ee04842c22 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -412,10 +412,6 @@ void do_trap0(struct pt_regs *regs) case TRAP_DEBUG: /* Trap0 0xdb is debug breakpoint */ if (user_mode(regs)) { - struct siginfo info; - - info.si_signo = SIGTRAP; - info.si_errno = 0; /* * Some architecures add some per-thread state * to distinguish between breakpoint traps and @@ -423,9 +419,8 @@ void do_trap0(struct pt_regs *regs) * set the si_code value appropriately, or we * may want to use a different trap0 flavor. */ - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *) pt_elr(regs); - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, + (void __user *) pt_elr(regs), current); } else { #ifdef CONFIG_KGDB kgdb_handle_exception(pt_cause(regs), SIGTRAP, diff --git a/arch/hexagon/lib/checksum.c b/arch/hexagon/lib/checksum.c index 617506d1a559..7cd0a2259269 100644 --- a/arch/hexagon/lib/checksum.c +++ b/arch/hexagon/lib/checksum.c @@ -199,3 +199,4 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) memcpy(dst, src, len); return csum_partial(dst, len, sum); } +EXPORT_SYMBOL(csum_partial_copy_nocheck); diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 3eec33c5cfd7..933bbcef5363 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -50,7 +50,7 @@ void do_page_fault(unsigned long address, long cause, struct pt_regs *regs) { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - siginfo_t info; + int si_signo; int si_code = SEGV_MAPERR; int fault; const struct exception_table_entry *fixup; @@ -140,28 +140,22 @@ good_area: * unable to fix up the page fault. */ if (fault & VM_FAULT_SIGBUS) { - info.si_signo = SIGBUS; - info.si_code = BUS_ADRERR; + si_signo = SIGBUS; + si_code = BUS_ADRERR; } /* Address is not in the memory map */ else { - info.si_signo = SIGSEGV; - info.si_code = SEGV_ACCERR; + si_signo = SIGSEGV; + si_code = SEGV_ACCERR; } - info.si_errno = 0; - info.si_addr = (void __user *)address; - force_sig_info(info.si_signo, &info, current); + force_sig_fault(si_signo, si_code, (void __user *)address, current); return; bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void *)address; - force_sig_info(info.si_signo, &info, current); + force_sig_fault(SIGSEGV, si_code, (void __user *)address, current); return; } /* Kernel-mode fault falls through */ diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index bbe12a038d21..792437d526c6 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -29,7 +29,6 @@ config IA64 select HAVE_FUNCTION_TRACER select TTY select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP select HAVE_VIRT_CPU_ACCOUNTING @@ -54,6 +53,8 @@ config IA64 select MODULES_USE_ELF_RELA select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_AUDITSYSCALL + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH default y help The Itanium Processor Family is Intel's 64-bit successor to @@ -78,18 +79,6 @@ config MMU bool default y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - -config SWIOTLB - bool - config STACKTRACE_SUPPORT def_bool y @@ -146,7 +135,6 @@ config IA64_GENERIC bool "generic" select NUMA select ACPI_NUMA - select DMA_DIRECT_OPS select SWIOTLB select PCI_MSI help @@ -167,7 +155,6 @@ config IA64_GENERIC config IA64_DIG bool "DIG-compliant" - select DMA_DIRECT_OPS select SWIOTLB config IA64_DIG_VTD @@ -183,7 +170,6 @@ config IA64_HP_ZX1 config IA64_HP_ZX1_SWIOTLB bool "HP-zx1/sx1000 with software I/O TLB" - select DMA_DIRECT_OPS select SWIOTLB help Build a kernel that runs on HP zx1 and sx1000 systems even when they @@ -207,7 +193,6 @@ config IA64_SGI_UV bool "SGI-UV" select NUMA select ACPI_NUMA - select DMA_DIRECT_OPS select SWIOTLB help Selecting this option will optimize the kernel for use on UV based @@ -218,7 +203,6 @@ config IA64_SGI_UV config IA64_HP_SIM bool "Ski-simulator" - select DMA_DIRECT_OPS select SWIOTLB depends on !PM @@ -397,7 +381,7 @@ config ARCH_DISCONTIGMEM_ENABLE Say Y to support efficient handling of discontiguous physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) or have huge holes in the physical address space for other reasons. - See <file:Documentation/vm/numa> for more. + See <file:Documentation/vm/numa.rst> for more. config ARCH_FLATMEM_ENABLE def_bool y @@ -613,6 +597,3 @@ source "security/Kconfig" source "crypto/Kconfig" source "lib/Kconfig" - -config IOMMU_HELPER - def_bool (IA64_HP_ZX1 || IA64_HP_ZX1_SWIOTLB || IA64_GENERIC || SWIOTLB) diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index aec4a3354abe..ee5b652d320a 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -1845,9 +1845,6 @@ static void ioc_init(unsigned long hpa, struct ioc *ioc) ioc_resource_init(ioc); ioc_sac_init(ioc); - if ((long) ~iovp_mask > (long) ia64_max_iommu_merge_mask) - ia64_max_iommu_merge_mask = ~iovp_mask; - printk(KERN_INFO PFX "%s %d.%d HPA 0x%lx IOVA space %dMb at 0x%lx\n", ioc->name, (ioc->rev >> 4) & 0xF, ioc->rev & 0xF, @@ -1942,19 +1939,6 @@ static const struct seq_operations ioc_seq_ops = { .show = ioc_show }; -static int -ioc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ioc_seq_ops); -} - -static const struct file_operations ioc_fops = { - .open = ioc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void __init ioc_proc_init(void) { @@ -1964,7 +1948,7 @@ ioc_proc_init(void) if (!dir) return; - proc_create(ioc_list->name, 0, dir, &ioc_fops); + proc_create_seq(ioc_list->name, 0, dir, &ioc_seq_ops); } #endif diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index a419ccf33cde..663388a73d4e 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -435,19 +435,6 @@ static int rs_proc_show(struct seq_file *m, void *v) return 0; } -static int rs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rs_proc_show, NULL); -} - -static const struct file_operations rs_proc_fops = { - .owner = THIS_MODULE, - .open = rs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_operations hp_ops = { .open = rs_open, .close = rs_close, @@ -462,7 +449,7 @@ static const struct tty_operations hp_ops = { .unthrottle = rs_unthrottle, .send_xchar = rs_send_xchar, .hangup = rs_hangup, - .proc_fops = &rs_proc_fops, + .proc_show = rs_proc_show, }; static const struct tty_port_operations hp_port_ops = { diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild index 6dd867873364..557bbc8ba9f5 100644 --- a/arch/ia64/include/asm/Kbuild +++ b/arch/ia64/include/asm/Kbuild @@ -1,3 +1,4 @@ +generic-y += compat.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/ia64/include/asm/hardirq.h b/arch/ia64/include/asm/hardirq.h index bdc4669c71c3..ccde7c2ba00f 100644 --- a/arch/ia64/include/asm/hardirq.h +++ b/arch/ia64/include/asm/hardirq.h @@ -13,7 +13,7 @@ #define __ARCH_IRQ_STAT 1 -#define local_softirq_pending() (local_cpu_data->softirq_pending) +#define local_softirq_pending_ref ia64_cpu_info.softirq_pending #include <linux/threads.h> #include <linux/irq.h> diff --git a/arch/ia64/include/asm/pci.h b/arch/ia64/include/asm/pci.h index b1d04e8bafc8..780e8744ba85 100644 --- a/arch/ia64/include/asm/pci.h +++ b/arch/ia64/include/asm/pci.h @@ -30,23 +30,6 @@ struct pci_vector_struct { #define PCIBIOS_MIN_IO 0x1000 #define PCIBIOS_MIN_MEM 0x10000000 -/* - * PCI_DMA_BUS_IS_PHYS should be set to 1 if there is _necessarily_ a direct - * correspondence between device bus addresses and CPU physical addresses. - * Platforms with a hardware I/O MMU _must_ turn this off to suppress the - * bounce buffer handling code in the block and network device layers. - * Platforms with separate bus address spaces _must_ turn this off and provide - * a device DMA mapping implementation that takes care of the necessary - * address translation. - * - * For now, the ia64 platforms which may have separate/multiple bus address - * spaces all have I/O MMUs which support the merging of physically - * discontiguous buffers, so we can use that as the sole factor to determine - * the setting of PCI_DMA_BUS_IS_PHYS. - */ -extern unsigned long ia64_max_iommu_merge_mask; -#define PCI_DMA_BUS_IS_PHYS (ia64_max_iommu_merge_mask == ~0UL) - #define HAVE_PCI_MMAP #define ARCH_GENERIC_PCI_MMAP_RESOURCE #define arch_can_pci_mmap_wc() 1 diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild index c0527cfc48f0..3982e673e967 100644 --- a/arch/ia64/include/uapi/asm/Kbuild +++ b/arch/ia64/include/uapi/asm/Kbuild @@ -2,5 +2,9 @@ include include/uapi/asm-generic/Kbuild.asm generic-y += bpf_perf_event.h +generic-y += ipcbuf.h generic-y += kvm_para.h +generic-y += msgbuf.h generic-y += poll.h +generic-y += sembuf.h +generic-y += shmbuf.h diff --git a/arch/ia64/include/uapi/asm/ipcbuf.h b/arch/ia64/include/uapi/asm/ipcbuf.h deleted file mode 100644 index 90d6445a14df..000000000000 --- a/arch/ia64/include/uapi/asm/ipcbuf.h +++ /dev/null @@ -1,2 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#include <asm-generic/ipcbuf.h> diff --git a/arch/ia64/include/uapi/asm/msgbuf.h b/arch/ia64/include/uapi/asm/msgbuf.h deleted file mode 100644 index aa25df92d9dc..000000000000 --- a/arch/ia64/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1,28 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_MSGBUF_H -#define _ASM_IA64_MSGBUF_H - -/* - * The msqid64_ds structure for IA-64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ - __kernel_time_t msg_rtime; /* last msgrcv time */ - __kernel_time_t msg_ctime; /* last change time */ - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _ASM_IA64_MSGBUF_H */ diff --git a/arch/ia64/include/uapi/asm/sembuf.h b/arch/ia64/include/uapi/asm/sembuf.h deleted file mode 100644 index 6ed058760afc..000000000000 --- a/arch/ia64/include/uapi/asm/sembuf.h +++ /dev/null @@ -1,23 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_SEMBUF_H -#define _ASM_IA64_SEMBUF_H - -/* - * The semid64_ds structure for IA-64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct semid64_ds { - struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - __kernel_time_t sem_otime; /* last semop time */ - __kernel_time_t sem_ctime; /* last change time */ - unsigned long sem_nsems; /* no. of semaphores in array */ - unsigned long __unused1; - unsigned long __unused2; -}; - -#endif /* _ASM_IA64_SEMBUF_H */ diff --git a/arch/ia64/include/uapi/asm/shmbuf.h b/arch/ia64/include/uapi/asm/shmbuf.h deleted file mode 100644 index 6ef57cb70dee..000000000000 --- a/arch/ia64/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _ASM_IA64_SHMBUF_H -#define _ASM_IA64_SHMBUF_H - -/* - * The shmid64_ds structure for IA-64 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 2 miscellaneous 64-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - __kernel_time_t shm_dtime; /* last detach time */ - __kernel_time_t shm_ctime; /* last change time */ - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused1; - unsigned long __unused2; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _ASM_IA64_SHMBUF_H */ diff --git a/arch/ia64/include/uapi/asm/siginfo.h b/arch/ia64/include/uapi/asm/siginfo.h index 5aa454ed89db..52b5af424511 100644 --- a/arch/ia64/include/uapi/asm/siginfo.h +++ b/arch/ia64/include/uapi/asm/siginfo.h @@ -27,11 +27,4 @@ #define __ISR_VALID_BIT 0 #define __ISR_VALID (1 << __ISR_VALID_BIT) -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - #endif /* _UAPI_ASM_IA64_SIGINFO_H */ diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index 9bcc908bc85e..a61f6c6a36f8 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -62,6 +62,7 @@ ia64_emulate_brl (struct pt_regs *regs, unsigned long ar_ec) struct illegal_op_return rv; long tmp_taken, unimplemented_address; + clear_siginfo(&siginfo); rv.fkt = (unsigned long) -1; /* diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index f2d57e66fd86..7a471d8d67d4 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -9,16 +9,6 @@ int iommu_detected __read_mostly; const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); - const struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; diff --git a/arch/ia64/kernel/palinfo.c b/arch/ia64/kernel/palinfo.c index b6e597860888..f4a94241265c 100644 --- a/arch/ia64/kernel/palinfo.c +++ b/arch/ia64/kernel/palinfo.c @@ -920,18 +920,6 @@ static int proc_palinfo_show(struct seq_file *m, void *v) return 0; } -static int proc_palinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_palinfo_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_palinfo_fops = { - .open = proc_palinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int palinfo_add_proc(unsigned int cpu) { pal_func_cpu_u_t f; @@ -948,8 +936,8 @@ static int palinfo_add_proc(unsigned int cpu) for (j=0; j < NR_PALINFO_ENTRIES; j++) { f.func_id = j; - proc_create_data(palinfo_entries[j].name, 0, cpu_dir, - &proc_palinfo_fops, (void *)f.value); + proc_create_single_data(palinfo_entries[j].name, 0, cpu_dir, + proc_palinfo_show, (void *)f.value); } return 0; } diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 8fb280e33114..3b38c717008a 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -5708,13 +5708,6 @@ const struct seq_operations pfm_seq_ops = { .show = pfm_proc_show }; -static int -pfm_proc_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &pfm_seq_ops); -} - - /* * we come here as soon as local_cpu_data->pfm_syst_wide is set. this happens * during pfm_enable() hence before pfm_start(). We cannot assume monitoring @@ -6537,13 +6530,6 @@ found: return 0; } -static const struct file_operations pfm_proc_fops = { - .open = pfm_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - int __init pfm_init(void) { @@ -6615,7 +6601,7 @@ pfm_init(void) /* * create /proc/perfmon (mostly for debugging purposes) */ - perfmon_dir = proc_create("perfmon", S_IRUGO, NULL, &pfm_proc_fops); + perfmon_dir = proc_create_seq("perfmon", S_IRUGO, NULL, &pfm_seq_ops); if (perfmon_dir == NULL) { printk(KERN_ERR "perfmon: cannot create /proc entry, perfmon disabled\n"); pmu_conf = NULL; diff --git a/arch/ia64/kernel/salinfo.c b/arch/ia64/kernel/salinfo.c index 52c404b08904..aba1f463a8dd 100644 --- a/arch/ia64/kernel/salinfo.c +++ b/arch/ia64/kernel/salinfo.c @@ -54,8 +54,6 @@ MODULE_AUTHOR("Jesse Barnes <jbarnes@sgi.com>"); MODULE_DESCRIPTION("/proc interface to IA-64 SAL features"); MODULE_LICENSE("GPL"); -static const struct file_operations proc_salinfo_fops; - typedef struct { const char *name; /* name of the proc entry */ unsigned long feature; /* feature bit */ @@ -578,6 +576,17 @@ static int salinfo_cpu_pre_down(unsigned int cpu) return 0; } +/* + * 'data' contains an integer that corresponds to the feature we're + * testing + */ +static int proc_salinfo_show(struct seq_file *m, void *v) +{ + unsigned long data = (unsigned long)v; + seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); + return 0; +} + static int __init salinfo_init(void) { @@ -593,9 +602,9 @@ salinfo_init(void) for (i=0; i < NR_SALINFO_ENTRIES; i++) { /* pass the feature bit in question as misc data */ - *sdir++ = proc_create_data(salinfo_entries[i].name, 0, salinfo_dir, - &proc_salinfo_fops, - (void *)salinfo_entries[i].feature); + *sdir++ = proc_create_single_data(salinfo_entries[i].name, 0, + salinfo_dir, proc_salinfo_show, + (void *)salinfo_entries[i].feature); } for (i = 0; i < ARRAY_SIZE(salinfo_log_name); i++) { @@ -633,27 +642,4 @@ salinfo_init(void) return 0; } -/* - * 'data' contains an integer that corresponds to the feature we're - * testing - */ -static int proc_salinfo_show(struct seq_file *m, void *v) -{ - unsigned long data = (unsigned long)v; - seq_puts(m, (sal_platform_features & data) ? "1\n" : "0\n"); - return 0; -} - -static int proc_salinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_salinfo_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_salinfo_fops = { - .open = proc_salinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - module_init(salinfo_init); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index dee56bcb993d..ad43cbf70628 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -124,18 +124,6 @@ unsigned long ia64_i_cache_stride_shift = ~0; unsigned long ia64_cache_stride_shift = ~0; /* - * The merge_mask variable needs to be set to (max(iommu_page_size(iommu)) - 1). This - * mask specifies a mask of address bits that must be 0 in order for two buffers to be - * mergeable by the I/O MMU (i.e., the end address of the first buffer and the start - * address of the second buffer must be aligned to (merge_mask+1) in order to be - * mergeable). By default, we assume there is no I/O MMU which can merge physically - * discontiguous buffers, so we set the merge_mask to ~0UL, which corresponds to a iommu - * page-size of 2^64. - */ -unsigned long ia64_max_iommu_merge_mask = ~0UL; -EXPORT_SYMBOL(ia64_max_iommu_merge_mask); - -/* * We use a special marker for the end of memory and it uses the extra (+1) slot */ struct rsvd_region rsvd_region[IA64_MAX_RSVD_REGIONS + 1] __initdata; diff --git a/arch/ia64/kernel/signal.c b/arch/ia64/kernel/signal.c index 54547c7cf8a2..d1234a5ba4c5 100644 --- a/arch/ia64/kernel/signal.c +++ b/arch/ia64/kernel/signal.c @@ -153,6 +153,7 @@ ia64_rt_sigreturn (struct sigscratch *scr) return retval; give_sigsegv: + clear_siginfo(&si); si.si_signo = SIGSEGV; si.si_errno = 0; si.si_code = SI_KERNEL; @@ -236,6 +237,7 @@ force_sigsegv_info (int sig, void __user *addr) unsigned long flags; struct siginfo si; + clear_siginfo(&si); if (sig == SIGSEGV) { /* * Acquiring siglock around the sa_handler-update is almost diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 6d4e76a4267f..c6f4932073a1 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -104,6 +104,7 @@ __kprobes ia64_bad_break (unsigned long break_num, struct pt_regs *regs) int sig, code; /* SIGILL, SIGFPE, SIGSEGV, and SIGBUS want these field initialized: */ + clear_siginfo(&siginfo); siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); siginfo.si_imm = break_num; siginfo.si_flags = 0; /* clear __ISR_VALID */ @@ -293,7 +294,6 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) { long exception, bundle[2]; unsigned long fault_ip; - struct siginfo siginfo; fault_ip = regs->cr_iip; if (!fp_fault && (ia64_psr(regs)->ri == 0)) @@ -344,13 +344,16 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) printk(KERN_ERR "handle_fpu_swa: fp_emulate() returned -1\n"); return -1; } else { + struct siginfo siginfo; + /* is next instruction a trap? */ if (exception & 2) { ia64_increment_ip(regs); } + clear_siginfo(&siginfo); siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = FPE_FIXME; /* default code */ + siginfo.si_code = FPE_FLTUNK; /* default code */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x11) { siginfo.si_code = FPE_FLTINV; @@ -372,9 +375,12 @@ handle_fpu_swa (int fp_fault, struct pt_regs *regs, unsigned long isr) return -1; } else if (exception != 0) { /* raise exception */ + struct siginfo siginfo; + + clear_siginfo(&siginfo); siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; - siginfo.si_code = FPE_FIXME; /* default code */ + siginfo.si_code = FPE_FLTUNK; /* default code */ siginfo.si_addr = (void __user *) (regs->cr_iip + ia64_psr(regs)->ri); if (isr & 0x880) { siginfo.si_code = FPE_FLTOVF; @@ -420,7 +426,7 @@ ia64_illegal_op_fault (unsigned long ec, long arg1, long arg2, long arg3, if (die_if_kernel(buf, ®s, 0)) return rv; - memset(&si, 0, sizeof(si)); + clear_siginfo(&si); si.si_signo = SIGILL; si.si_code = ILL_ILLOPC; si.si_addr = (void __user *) (regs.cr_iip + ia64_psr(®s)->ri); @@ -434,7 +440,6 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, long arg7, struct pt_regs regs) { unsigned long code, error = isr, iip; - struct siginfo siginfo; char buf[128]; int result, sig; static const char *reason[] = { @@ -485,6 +490,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 26: /* NaT Consumption */ if (user_mode(®s)) { + struct siginfo siginfo; void __user *addr; if (((isr >> 4) & 0xf) == 2) { @@ -499,6 +505,7 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, addr = (void __user *) (regs.cr_iip + ia64_psr(®s)->ri); } + clear_siginfo(&siginfo); siginfo.si_signo = sig; siginfo.si_code = code; siginfo.si_errno = 0; @@ -515,6 +522,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 31: /* Unsupported Data Reference */ if (user_mode(®s)) { + struct siginfo siginfo; + + clear_siginfo(&siginfo); siginfo.si_signo = SIGILL; siginfo.si_code = ILL_ILLOPN; siginfo.si_errno = 0; @@ -531,6 +541,10 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, case 29: /* Debug */ case 35: /* Taken Branch Trap */ case 36: /* Single Step Trap */ + { + struct siginfo siginfo; + + clear_siginfo(&siginfo); if (fsys_mode(current, ®s)) { extern char __kernel_syscall_via_break[]; /* @@ -578,11 +592,15 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, siginfo.si_isr = isr; force_sig_info(SIGTRAP, &siginfo, current); return; + } case 32: /* fp fault */ case 33: /* fp trap */ result = handle_fpu_swa((vector == 32) ? 1 : 0, ®s, isr); if ((result < 0) || (current->thread.flags & IA64_THREAD_FPEMU_SIGFPE)) { + struct siginfo siginfo; + + clear_siginfo(&siginfo); siginfo.si_signo = SIGFPE; siginfo.si_errno = 0; siginfo.si_code = FPE_FLTINV; @@ -616,6 +634,9 @@ ia64_fault (unsigned long vector, unsigned long isr, unsigned long ifa, } else { /* Unimplemented Instr. Address Trap */ if (user_mode(®s)) { + struct siginfo siginfo; + + clear_siginfo(&siginfo); siginfo.si_signo = SIGILL; siginfo.si_code = ILL_BADIADDR; siginfo.si_errno = 0; diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 72e9b4242564..e309f9859acc 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -1537,6 +1537,7 @@ ia64_handle_unaligned (unsigned long ifa, struct pt_regs *regs) /* NOT_REACHED */ } force_sigbus: + clear_siginfo(&si); si.si_signo = SIGBUS; si.si_errno = 0; si.si_code = BUS_ADRALN; diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index dfdc152d6737..817fa120645f 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -85,7 +85,6 @@ ia64_do_page_fault (unsigned long address, unsigned long isr, struct pt_regs *re int signal = SIGSEGV, code = SEGV_MAPERR; struct vm_area_struct *vma, *prev_vma; struct mm_struct *mm = current->mm; - struct siginfo si; unsigned long mask; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -249,6 +248,9 @@ retry: return; } if (user_mode(regs)) { + struct siginfo si; + + clear_siginfo(&si); si.si_signo = signal; si.si_errno = 0; si.si_code = code; diff --git a/arch/ia64/sn/kernel/io_common.c b/arch/ia64/sn/kernel/io_common.c index 11f2275570fb..8479e9a7ce16 100644 --- a/arch/ia64/sn/kernel/io_common.c +++ b/arch/ia64/sn/kernel/io_common.c @@ -480,11 +480,6 @@ sn_io_early_init(void) tioca_init_provider(); tioce_init_provider(); - /* - * This is needed to avoid bounce limit checks in the blk layer - */ - ia64_max_iommu_merge_mask = ~PAGE_MASK; - sn_irq_lh_init(); INIT_LIST_HEAD(&sn_sysdata_list); sn_init_cpei_timer(); diff --git a/arch/ia64/sn/kernel/sn2/prominfo_proc.c b/arch/ia64/sn/kernel/sn2/prominfo_proc.c index ec4de2b09653..e15457bf21ac 100644 --- a/arch/ia64/sn/kernel/sn2/prominfo_proc.c +++ b/arch/ia64/sn/kernel/sn2/prominfo_proc.c @@ -140,18 +140,6 @@ static int proc_fit_show(struct seq_file *m, void *v) return 0; } -static int proc_fit_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_fit_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_fit_fops = { - .open = proc_fit_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int proc_version_show(struct seq_file *m, void *v) { unsigned long nasid = (unsigned long)m->private; @@ -174,18 +162,6 @@ static int proc_version_show(struct seq_file *m, void *v) return 0; } -static int proc_version_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_version_show, PDE_DATA(inode)); -} - -static const struct file_operations proc_version_fops = { - .open = proc_version_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /* module entry points */ int __init prominfo_init(void); void __exit prominfo_exit(void); @@ -217,10 +193,10 @@ int __init prominfo_init(void) if (!dir) continue; nasid = cnodeid_to_nasid(cnodeid); - proc_create_data("fit", 0, dir, - &proc_fit_fops, (void *)nasid); - proc_create_data("version", 0, dir, - &proc_version_fops, (void *)nasid); + proc_create_single_data("fit", 0, dir, proc_fit_show, + (void *)nasid); + proc_create_single_data("version", 0, dir, proc_version_show, + (void *)nasid); } return 0; } diff --git a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c index 29cf8f8c08e9..c2a4d84297b0 100644 --- a/arch/ia64/sn/kernel/sn2/sn_proc_fs.c +++ b/arch/ia64/sn/kernel/sn2/sn_proc_fs.c @@ -18,33 +18,18 @@ static int partition_id_show(struct seq_file *s, void *p) return 0; } -static int partition_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, partition_id_show, NULL); -} - static int system_serial_number_show(struct seq_file *s, void *p) { seq_printf(s, "%s\n", sn_system_serial_number()); return 0; } -static int system_serial_number_open(struct inode *inode, struct file *file) -{ - return single_open(file, system_serial_number_show, NULL); -} - static int licenseID_show(struct seq_file *s, void *p) { seq_printf(s, "0x%llx\n", sn_partition_serial_number_val()); return 0; } -static int licenseID_open(struct inode *inode, struct file *file) -{ - return single_open(file, licenseID_show, NULL); -} - static int coherence_id_show(struct seq_file *s, void *p) { seq_printf(s, "%d\n", partition_coherence_id()); @@ -52,43 +37,10 @@ static int coherence_id_show(struct seq_file *s, void *p) return 0; } -static int coherence_id_open(struct inode *inode, struct file *file) -{ - return single_open(file, coherence_id_show, NULL); -} - /* /proc/sgi_sn/sn_topology uses seq_file, see sn_hwperf.c */ extern int sn_topology_open(struct inode *, struct file *); extern int sn_topology_release(struct inode *, struct file *); -static const struct file_operations proc_partition_id_fops = { - .open = partition_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_system_sn_fops = { - .open = system_serial_number_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_license_id_fops = { - .open = licenseID_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static const struct file_operations proc_coherence_id_fops = { - .open = coherence_id_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct file_operations proc_sn_topo_fops = { .open = sn_topology_open, .read = seq_read, @@ -104,13 +56,13 @@ void register_sn_procfs(void) if (!(sgi_proc_dir = proc_mkdir("sgi_sn", NULL))) return; - proc_create("partition_id", 0444, sgi_proc_dir, - &proc_partition_id_fops); - proc_create("system_serial_number", 0444, sgi_proc_dir, - &proc_system_sn_fops); - proc_create("licenseID", 0444, sgi_proc_dir, &proc_license_id_fops); - proc_create("coherence_id", 0444, sgi_proc_dir, - &proc_coherence_id_fops); + proc_create_single("partition_id", 0444, sgi_proc_dir, + partition_id_show); + proc_create_single("system_serial_number", 0444, sgi_proc_dir, + system_serial_number_show); + proc_create_single("licenseID", 0444, sgi_proc_dir, licenseID_show); + proc_create_single("coherence_id", 0444, sgi_proc_dir, + coherence_id_show); proc_create("sn_topology", 0444, sgi_proc_dir, &proc_sn_topo_fops); } diff --git a/arch/m68k/68000/timers.c b/arch/m68k/68000/timers.c index 252455bce144..71ddb4c98726 100644 --- a/arch/m68k/68000/timers.c +++ b/arch/m68k/68000/timers.c @@ -125,7 +125,9 @@ int m68328_hwclk(int set, struct rtc_time *t) { if (!set) { long now = RTCTIME; - t->tm_year = t->tm_mon = t->tm_mday = 1; + t->tm_year = 1; + t->tm_mon = 0; + t->tm_mday = 1; t->tm_hour = (now >> 24) % 24; t->tm_min = (now >> 16) % 60; t->tm_sec = now % 60; diff --git a/arch/m68k/Kconfig.bus b/arch/m68k/Kconfig.bus index d5e66ec136db..aef698fa50e5 100644 --- a/arch/m68k/Kconfig.bus +++ b/arch/m68k/Kconfig.bus @@ -59,6 +59,10 @@ config ATARI_ROM_ISA config GENERIC_ISA_DMA def_bool ISA +source "drivers/zorro/Kconfig" + +endif + config PCI bool "PCI support" depends on M54xx @@ -66,10 +70,8 @@ config PCI Enable the PCI bus. Support for the PCI bus hardware built into the ColdFire 547x and 548x processors. +if PCI source "drivers/pci/Kconfig" - -source "drivers/zorro/Kconfig" - endif if !MMU diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 0d27706f14d4..b2a6bc63f8cd 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -221,8 +221,10 @@ int dn_dummy_hwclk(int op, struct rtc_time *t) { t->tm_hour=rtc->hours; t->tm_mday=rtc->day_of_month; t->tm_wday=rtc->day_of_week; - t->tm_mon=rtc->month; + t->tm_mon = rtc->month - 1; t->tm_year=rtc->year; + if (t->tm_year < 70) + t->tm_year += 100; } else { rtc->second=t->tm_sec; rtc->minute=t->tm_min; @@ -230,8 +232,8 @@ int dn_dummy_hwclk(int op, struct rtc_time *t) { rtc->day_of_month=t->tm_mday; if(t->tm_wday!=-1) rtc->day_of_week=t->tm_wday; - rtc->month=t->tm_mon; - rtc->year=t->tm_year; + rtc->month = t->tm_mon + 1; + rtc->year = t->tm_year % 100; } return 0; diff --git a/arch/m68k/coldfire/pci.c b/arch/m68k/coldfire/pci.c index 3097fa2ca746..62b0eb6cf69a 100644 --- a/arch/m68k/coldfire/pci.c +++ b/arch/m68k/coldfire/pci.c @@ -23,20 +23,10 @@ /* * Memory and IO mappings. We use a 1:1 mapping for local host memory to - * PCI bus memory (no reason not to really). IO space doesn't matter, we - * always use access functions for that. The device configuration space is - * mapped over the IO map space when we enable it in the PCICAR register. + * PCI bus memory (no reason not to really). IO space is mapped in its own + * separate address region. The device configuration space is mapped over + * the IO map space when we enable it in the PCICAR register. */ -#define PCI_MEM_PA 0xf0000000 /* Host physical address */ -#define PCI_MEM_BA 0xf0000000 /* Bus physical address */ -#define PCI_MEM_SIZE 0x08000000 /* 128 MB */ -#define PCI_MEM_MASK (PCI_MEM_SIZE - 1) - -#define PCI_IO_PA 0xf8000000 /* Host physical address */ -#define PCI_IO_BA 0x00000000 /* Bus physical address */ -#define PCI_IO_SIZE 0x00010000 /* 64k */ -#define PCI_IO_MASK (PCI_IO_SIZE - 1) - static struct pci_bus *rootbus; static unsigned long iospace; @@ -56,13 +46,6 @@ static unsigned char mcf_host_irq[] = { 0, 69, 69, 71, 71, }; - -static inline void syncio(void) -{ - /* The ColdFire "nop" instruction waits for all bus IO to complete */ - __asm__ __volatile__ ("nop"); -} - /* * Configuration space access functions. Configuration space access is * through the IO mapping window, enabling it via the PCICAR register. @@ -84,9 +67,9 @@ static int mcf_pci_readconfig(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } - syncio(); addr = mcf_mk_pcicar(bus->number, devfn, where); __raw_writel(PCICAR_E | addr, PCICAR); + __raw_readl(PCICAR); addr = iospace + (where & 0x3); switch (size) { @@ -101,8 +84,8 @@ static int mcf_pci_readconfig(struct pci_bus *bus, unsigned int devfn, break; } - syncio(); __raw_writel(0, PCICAR); + __raw_readl(PCICAR); return PCIBIOS_SUCCESSFUL; } @@ -116,9 +99,9 @@ static int mcf_pci_writeconfig(struct pci_bus *bus, unsigned int devfn, return PCIBIOS_SUCCESSFUL; } - syncio(); addr = mcf_mk_pcicar(bus->number, devfn, where); __raw_writel(PCICAR_E | addr, PCICAR); + __raw_readl(PCICAR); addr = iospace + (where & 0x3); switch (size) { @@ -133,8 +116,8 @@ static int mcf_pci_writeconfig(struct pci_bus *bus, unsigned int devfn, break; } - syncio(); __raw_writel(0, PCICAR); + __raw_readl(PCICAR); return PCIBIOS_SUCCESSFUL; } @@ -144,89 +127,6 @@ static struct pci_ops mcf_pci_ops = { }; /* - * IO address space access functions. Pretty strait forward, these are - * directly mapped in to the IO mapping window. And that is mapped into - * virtual address space. - */ -u8 mcf_pci_inb(u32 addr) -{ - return __raw_readb(iospace + (addr & PCI_IO_MASK)); -} -EXPORT_SYMBOL(mcf_pci_inb); - -u16 mcf_pci_inw(u32 addr) -{ - return le16_to_cpu(__raw_readw(iospace + (addr & PCI_IO_MASK))); -} -EXPORT_SYMBOL(mcf_pci_inw); - -u32 mcf_pci_inl(u32 addr) -{ - return le32_to_cpu(__raw_readl(iospace + (addr & PCI_IO_MASK))); -} -EXPORT_SYMBOL(mcf_pci_inl); - -void mcf_pci_insb(u32 addr, u8 *buf, u32 len) -{ - for (; len; len--) - *buf++ = mcf_pci_inb(addr); -} -EXPORT_SYMBOL(mcf_pci_insb); - -void mcf_pci_insw(u32 addr, u16 *buf, u32 len) -{ - for (; len; len--) - *buf++ = mcf_pci_inw(addr); -} -EXPORT_SYMBOL(mcf_pci_insw); - -void mcf_pci_insl(u32 addr, u32 *buf, u32 len) -{ - for (; len; len--) - *buf++ = mcf_pci_inl(addr); -} -EXPORT_SYMBOL(mcf_pci_insl); - -void mcf_pci_outb(u8 v, u32 addr) -{ - __raw_writeb(v, iospace + (addr & PCI_IO_MASK)); -} -EXPORT_SYMBOL(mcf_pci_outb); - -void mcf_pci_outw(u16 v, u32 addr) -{ - __raw_writew(cpu_to_le16(v), iospace + (addr & PCI_IO_MASK)); -} -EXPORT_SYMBOL(mcf_pci_outw); - -void mcf_pci_outl(u32 v, u32 addr) -{ - __raw_writel(cpu_to_le32(v), iospace + (addr & PCI_IO_MASK)); -} -EXPORT_SYMBOL(mcf_pci_outl); - -void mcf_pci_outsb(u32 addr, const u8 *buf, u32 len) -{ - for (; len; len--) - mcf_pci_outb(*buf++, addr); -} -EXPORT_SYMBOL(mcf_pci_outsb); - -void mcf_pci_outsw(u32 addr, const u16 *buf, u32 len) -{ - for (; len; len--) - mcf_pci_outw(*buf++, addr); -} -EXPORT_SYMBOL(mcf_pci_outsw); - -void mcf_pci_outsl(u32 addr, const u32 *buf, u32 len) -{ - for (; len; len--) - mcf_pci_outl(*buf++, addr); -} -EXPORT_SYMBOL(mcf_pci_outsl); - -/* * Initialize the PCI bus registers, and scan the bus. */ static struct resource mcf_pci_mem = { diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index 37a8e5ab8728..a874e54404d1 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -98,8 +98,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -204,7 +204,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -233,12 +233,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -259,7 +259,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -310,7 +310,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -414,6 +413,7 @@ CONFIG_ARIADNE=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set CONFIG_HYDRA=y CONFIG_APNE=y CONFIG_ZORRO8390=y @@ -485,6 +485,7 @@ CONFIG_RTC_DRV_MSM6242=m CONFIG_RTC_DRV_RP5C01=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_AMIGA_BUILTIN_SERIAL=y @@ -621,6 +622,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -647,6 +649,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index 6a466266b852..8ce39e23aa42 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -392,6 +391,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -446,6 +446,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y @@ -580,6 +581,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -606,6 +608,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index b0691a7a3345..346c4e75edf8 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -401,6 +400,7 @@ CONFIG_ATARILANCE=y # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set CONFIG_NE2000=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set @@ -461,6 +461,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_NATFEAT=y @@ -602,6 +603,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -628,6 +630,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index 6f6470fa9a50..fca9c7aa71a3 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -391,6 +390,7 @@ CONFIG_BVME6000_NET=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 31a1a2b5e860..f9eab174915c 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -96,8 +96,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -202,7 +202,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -231,12 +231,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -257,7 +257,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -308,7 +308,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -393,6 +392,7 @@ CONFIG_HPLANCE=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -449,6 +449,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -582,6 +583,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -608,6 +610,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index 390d4a87441c..b52e597899eb 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -95,8 +95,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -201,7 +201,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -230,12 +230,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -256,7 +256,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -310,7 +310,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -410,6 +409,7 @@ CONFIG_MAC89x0=y # CONFIG_NET_VENDOR_MICREL is not set CONFIG_MACSONIC=y # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set CONFIG_MAC8390=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set @@ -471,6 +471,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -604,6 +605,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -630,6 +632,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 77be97d82dc3..2a84eeec5b02 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -105,8 +105,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -211,7 +211,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -240,12 +240,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -266,7 +266,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -320,7 +320,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -452,6 +451,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MICREL is not set CONFIG_MACSONIC=y # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set CONFIG_HYDRA=y CONFIG_MAC8390=y CONFIG_NE2000=y @@ -541,6 +541,7 @@ CONFIG_RTC_DRV_RP5C01=m CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_NATFEAT=y @@ -684,6 +685,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -710,6 +712,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 2ca140757b0f..476e69994340 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -93,8 +93,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -199,7 +199,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -228,12 +228,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -254,7 +254,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -305,7 +305,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -391,6 +390,7 @@ CONFIG_MVME147_NET=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 6a3b4dcc5aab..1477cda9146e 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -391,6 +390,7 @@ CONFIG_MVME16x_NET=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -439,6 +439,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -572,6 +573,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -598,6 +600,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index 2a3e29c97652..b3a543dc48a0 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -94,8 +94,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -200,7 +200,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -229,12 +229,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -255,7 +255,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -306,7 +306,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -400,6 +399,7 @@ CONFIG_VETH=m # CONFIG_NET_VENDOR_MARVELL is not set # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set CONFIG_NE2000=y # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set @@ -461,6 +461,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_HEARTBEAT=y CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y @@ -595,6 +596,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -621,6 +623,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index cba2494c99b2..d543ed5dfa96 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -91,8 +91,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -197,7 +197,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -226,12 +226,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -252,7 +252,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -303,7 +303,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -388,6 +387,7 @@ CONFIG_SUN3_82586=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -441,6 +441,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -573,6 +574,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -599,6 +601,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index d911561137fd..a67e54246023 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -91,8 +91,8 @@ CONFIG_NF_CONNTRACK_SANE=m CONFIG_NF_CONNTRACK_SIP=m CONFIG_NF_CONNTRACK_TFTP=m CONFIG_NF_TABLES=m -CONFIG_NF_TABLES_INET=m -CONFIG_NF_TABLES_NETDEV=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_EXTHDR=m CONFIG_NFT_META=m CONFIG_NFT_RT=m @@ -197,7 +197,7 @@ CONFIG_NF_SOCKET_IPV4=m CONFIG_NFT_CHAIN_ROUTE_IPV4=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NFT_CHAIN_NAT_IPV4=m @@ -226,12 +226,12 @@ CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m CONFIG_NF_SOCKET_IPV6=m CONFIG_NFT_CHAIN_ROUTE_IPV6=m -CONFIG_NFT_DUP_IPV6=m -CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_MASQ_IPV6=m CONFIG_NFT_REDIR_IPV6=m +CONFIG_NFT_DUP_IPV6=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -252,7 +252,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m CONFIG_IP6_NF_TARGET_NPT=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_NFT_BRIDGE_META=m CONFIG_NFT_BRIDGE_REJECT=m CONFIG_NF_LOG_BRIDGE=m @@ -303,7 +303,6 @@ CONFIG_NET_MPLS_GSO=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_NSH=m -CONFIG_NET_L3_MASTER_DEV=y CONFIG_AF_KCM=m # CONFIG_WIRELESS is not set CONFIG_PSAMPLE=m @@ -389,6 +388,7 @@ CONFIG_SUN3LANCE=y # CONFIG_NET_VENDOR_MICREL is not set # CONFIG_NET_VENDOR_NATSEMI is not set # CONFIG_NET_VENDOR_NETRONOME is not set +# CONFIG_NET_VENDOR_NI is not set # CONFIG_NET_VENDOR_QUALCOMM is not set # CONFIG_NET_VENDOR_RENESAS is not set # CONFIG_NET_VENDOR_ROCKER is not set @@ -441,6 +441,7 @@ CONFIG_RTC_CLASS=y CONFIG_RTC_DRV_GENERIC=m # CONFIG_VIRTIO_MENU is not set # CONFIG_IOMMU_SUPPORT is not set +CONFIG_DAX=m CONFIG_PROC_HARDWARE=y CONFIG_EXT4_FS=y CONFIG_REISERFS_FS=m @@ -574,6 +575,7 @@ CONFIG_CRYPTO_CRYPTD=m CONFIG_CRYPTO_MCRYPTD=m CONFIG_CRYPTO_TEST=m CONFIG_CRYPTO_CHACHA20POLY1305=m +CONFIG_CRYPTO_CFB=m CONFIG_CRYPTO_LRW=m CONFIG_CRYPTO_PCBC=m CONFIG_CRYPTO_KEYWRAP=m @@ -600,6 +602,8 @@ CONFIG_CRYPTO_KHAZAD=m CONFIG_CRYPTO_SALSA20=m CONFIG_CRYPTO_SEED=m CONFIG_CRYPTO_SERPENT=m +CONFIG_CRYPTO_SM4=m +CONFIG_CRYPTO_SPECK=m CONFIG_CRYPTO_TEA=m CONFIG_CRYPTO_TWOFISH=m CONFIG_CRYPTO_LZO=m diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 88a9d27df1ac..4d8d68c4e3dd 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += barrier.h +generic-y += compat.h generic-y += device.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/m68k/include/asm/atarihw.h b/arch/m68k/include/asm/atarihw.h index 972c8f33f055..9000b249d225 100644 --- a/arch/m68k/include/asm/atarihw.h +++ b/arch/m68k/include/asm/atarihw.h @@ -23,6 +23,7 @@ #include <linux/types.h> #include <asm/bootinfo-atari.h> #include <asm/raw_io.h> +#include <asm/kmap.h> extern u_long atari_mch_cookie; extern u_long atari_mch_type; diff --git a/arch/m68k/include/asm/delay.h b/arch/m68k/include/asm/delay.h index 7f474121e4ca..751712f8beea 100644 --- a/arch/m68k/include/asm/delay.h +++ b/arch/m68k/include/asm/delay.h @@ -49,8 +49,6 @@ extern void __bad_udelay(void); * The simpler m68k and ColdFire processors do not have a 32*32->64 * multiply instruction. So we need to handle them a little differently. * We use a bit of shifting and a single 32*32->32 multiply to get close. - * This is a macro so that the const version can factor out the first - * multiply and shift. */ #define HZSCALE (268435456 / (1000000 / HZ)) @@ -115,6 +113,13 @@ static inline void __udelay(unsigned long usecs) */ #define HZSCALE (268435456 / (1000000 / HZ)) -#define ndelay(n) __delay(DIV_ROUND_UP((n) * ((((HZSCALE) >> 11) * (loops_per_jiffy >> 11)) >> 6), 1000)) +static inline void ndelay(unsigned long nsec) +{ + __delay(DIV_ROUND_UP(nsec * + ((((HZSCALE) >> 11) * + (loops_per_jiffy >> 11)) >> 6), + 1000)); +} +#define ndelay(n) ndelay(n) #endif /* defined(_M68K_DELAY_H) */ diff --git a/arch/m68k/include/asm/io.h b/arch/m68k/include/asm/io.h index 756089cc019c..ca2849afb087 100644 --- a/arch/m68k/include/asm/io.h +++ b/arch/m68k/include/asm/io.h @@ -1,14 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifdef __uClinux__ +#if defined(__uClinux__) || defined(CONFIG_COLDFIRE) #include <asm/io_no.h> #else #include <asm/io_mm.h> #endif - -#define readb_relaxed(addr) readb(addr) -#define readw_relaxed(addr) readw(addr) -#define readl_relaxed(addr) readl(addr) - -#define writeb_relaxed(b, addr) writeb(b, addr) -#define writew_relaxed(b, addr) writew(b, addr) -#define writel_relaxed(b, addr) writel(b, addr) diff --git a/arch/m68k/include/asm/io_mm.h b/arch/m68k/include/asm/io_mm.h index ed5333e87879..fe485f4f5fac 100644 --- a/arch/m68k/include/asm/io_mm.h +++ b/arch/m68k/include/asm/io_mm.h @@ -26,6 +26,7 @@ #include <linux/compiler.h> #include <asm/raw_io.h> #include <asm/virtconvert.h> +#include <asm/kmap.h> #include <asm-generic/iomap.h> @@ -85,53 +86,7 @@ #endif /* ATARI_ROM_ISA */ -#if defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE) - -#define HAVE_ARCH_PIO_SIZE -#define PIO_OFFSET 0 -#define PIO_MASK 0xffff -#define PIO_RESERVED 0x10000 - -u8 mcf_pci_inb(u32 addr); -u16 mcf_pci_inw(u32 addr); -u32 mcf_pci_inl(u32 addr); -void mcf_pci_insb(u32 addr, u8 *buf, u32 len); -void mcf_pci_insw(u32 addr, u16 *buf, u32 len); -void mcf_pci_insl(u32 addr, u32 *buf, u32 len); - -void mcf_pci_outb(u8 v, u32 addr); -void mcf_pci_outw(u16 v, u32 addr); -void mcf_pci_outl(u32 v, u32 addr); -void mcf_pci_outsb(u32 addr, const u8 *buf, u32 len); -void mcf_pci_outsw(u32 addr, const u16 *buf, u32 len); -void mcf_pci_outsl(u32 addr, const u32 *buf, u32 len); - -#define inb mcf_pci_inb -#define inb_p mcf_pci_inb -#define inw mcf_pci_inw -#define inw_p mcf_pci_inw -#define inl mcf_pci_inl -#define inl_p mcf_pci_inl -#define insb mcf_pci_insb -#define insw mcf_pci_insw -#define insl mcf_pci_insl - -#define outb mcf_pci_outb -#define outb_p mcf_pci_outb -#define outw mcf_pci_outw -#define outw_p mcf_pci_outw -#define outl mcf_pci_outl -#define outl_p mcf_pci_outl -#define outsb mcf_pci_outsb -#define outsw mcf_pci_outsw -#define outsl mcf_pci_outsl - -#define readb(addr) in_8(addr) -#define writeb(v, addr) out_8((addr), (v)) -#define readw(addr) in_le16(addr) -#define writew(v, addr) out_le16((addr), (v)) - -#elif defined(CONFIG_ISA) || defined(CONFIG_ATARI_ROM_ISA) +#if defined(CONFIG_ISA) || defined(CONFIG_ATARI_ROM_ISA) #if MULTI_ISA == 0 #undef MULTI_ISA @@ -414,8 +369,7 @@ static inline void isa_delay(void) #define writew(val, addr) out_le16((addr), (val)) #endif /* CONFIG_ATARI_ROM_ISA */ -#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) && \ - !(defined(CONFIG_PCI) && defined(CONFIG_COLDFIRE)) +#if !defined(CONFIG_ISA) && !defined(CONFIG_ATARI_ROM_ISA) /* * We need to define dummy functions for GENERIC_IOMAP support. */ @@ -461,39 +415,6 @@ static inline void isa_delay(void) #define mmiowb() -static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); -} -static inline void __iomem *ioremap_nocache(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); -} -#define ioremap_uc ioremap_nocache -static inline void __iomem *ioremap_wt(unsigned long physaddr, - unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); -} -static inline void __iomem *ioremap_fullcache(unsigned long physaddr, - unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_FULL_CACHING); -} - -static inline void memset_io(volatile void __iomem *addr, unsigned char val, int count) -{ - __builtin_memset((void __force *) addr, val, count); -} -static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, int count) -{ - __builtin_memcpy(dst, (void __force *) src, count); -} -static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int count) -{ - __builtin_memcpy((void __force *) dst, src, count); -} - #ifndef CONFIG_SUN3 #define IO_SPACE_LIMIT 0xffff #else @@ -515,13 +436,12 @@ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, int */ #define xlate_dev_kmem_ptr(p) p -static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) -{ - return (void __iomem *) port; -} +#define readb_relaxed(addr) readb(addr) +#define readw_relaxed(addr) readw(addr) +#define readl_relaxed(addr) readl(addr) -static inline void ioport_unmap(void __iomem *p) -{ -} +#define writeb_relaxed(b, addr) writeb(b, addr) +#define writew_relaxed(b, addr) writew(b, addr) +#define writel_relaxed(b, addr) writel(b, addr) #endif /* _IO_H */ diff --git a/arch/m68k/include/asm/io_no.h b/arch/m68k/include/asm/io_no.h index 86f45b403bcc..83a0a6d449f4 100644 --- a/arch/m68k/include/asm/io_no.h +++ b/arch/m68k/include/asm/io_no.h @@ -2,191 +2,148 @@ #ifndef _M68KNOMMU_IO_H #define _M68KNOMMU_IO_H -#ifdef __KERNEL__ - -#define ARCH_HAS_IOREMAP_WT - -#include <asm/virtconvert.h> -#include <asm-generic/iomap.h> - /* - * These are for ISA/PCI shared memory _only_ and should never be used - * on any other type of memory, including Zorro memory. They are meant to - * access the bus in the bus byte order which is little-endian!. - * - * readX/writeX() are used to access memory mapped devices. On some - * architectures the memory mapped IO stuff needs to be accessed - * differently. On the m68k architecture, we just read/write the - * memory location directly. - */ -/* ++roman: The assignments to temp. vars avoid that gcc sometimes generates - * two accesses to memory, which may be undesirable for some devices. + * Convert a physical memory address into a IO memory address. + * For us this is trivially a type cast. */ +#define iomem(a) ((void __iomem *) (a)) /* - * swap functions are sometimes needed to interface little-endian hardware + * The non-MMU m68k and ColdFire IO and memory mapped hardware access + * functions have always worked in CPU native endian. We need to define + * that behavior here first before we include asm-generic/io.h. */ -static inline unsigned short _swapw(volatile unsigned short v) -{ - return ((v << 8) | (v >> 8)); -} - -static inline unsigned int _swapl(volatile unsigned long v) -{ - return ((v << 24) | ((v & 0xff00) << 8) | ((v & 0xff0000) >> 8) | (v >> 24)); -} - -#define readb(addr) \ +#define __raw_readb(addr) \ ({ unsigned char __v = (*(volatile unsigned char *) (addr)); __v; }) -#define readw(addr) \ +#define __raw_readw(addr) \ ({ unsigned short __v = (*(volatile unsigned short *) (addr)); __v; }) -#define readl(addr) \ +#define __raw_readl(addr) \ ({ unsigned int __v = (*(volatile unsigned int *) (addr)); __v; }) -#define writeb(b,addr) (void)((*(volatile unsigned char *) (addr)) = (b)) -#define writew(b,addr) (void)((*(volatile unsigned short *) (addr)) = (b)) -#define writel(b,addr) (void)((*(volatile unsigned int *) (addr)) = (b)) +#define __raw_writeb(b, addr) (void)((*(volatile unsigned char *) (addr)) = (b)) +#define __raw_writew(b, addr) (void)((*(volatile unsigned short *) (addr)) = (b)) +#define __raw_writel(b, addr) (void)((*(volatile unsigned int *) (addr)) = (b)) -#define __raw_readb readb -#define __raw_readw readw -#define __raw_readl readl -#define __raw_writeb writeb -#define __raw_writew writew -#define __raw_writel writel +#if defined(CONFIG_COLDFIRE) +/* + * For ColdFire platforms we may need to do some extra checks for what + * type of address range we are accessing. Include the ColdFire platform + * definitions so we can figure out if need to do something special. + */ +#include <asm/byteorder.h> +#include <asm/coldfire.h> +#include <asm/mcfsim.h> +#endif /* CONFIG_COLDFIRE */ -static inline void io_outsb(unsigned int addr, const void *buf, int len) +#if defined(IOMEMBASE) +/* + * The ColdFire SoC internal peripherals are mapped into virtual address + * space using the ACR registers of the cache control unit. This means we + * are using a 1:1 physical:virtual mapping for them. We can quickly + * determine if we are accessing an internal peripheral device given the + * physical or vitrual address using the same range check. This check logic + * applies just the same of there is no MMU but something like a PCI bus + * is present. + */ +static int __cf_internalio(unsigned long addr) { - volatile unsigned char *ap = (volatile unsigned char *) addr; - unsigned char *bp = (unsigned char *) buf; - while (len--) - *ap = *bp++; + return (addr >= IOMEMBASE) && (addr <= IOMEMBASE + IOMEMSIZE - 1); } -static inline void io_outsw(unsigned int addr, const void *buf, int len) +static int cf_internalio(const volatile void __iomem *addr) { - volatile unsigned short *ap = (volatile unsigned short *) addr; - unsigned short *bp = (unsigned short *) buf; - while (len--) - *ap = _swapw(*bp++); + return __cf_internalio((unsigned long) addr); } -static inline void io_outsl(unsigned int addr, const void *buf, int len) +/* + * We need to treat built-in peripherals and bus based address ranges + * differently. Local built-in peripherals (and the ColdFire SoC parts + * have quite a lot of them) are always native endian - which is big + * endian on m68k/ColdFire. Bus based address ranges, like the PCI bus, + * are accessed little endian - so we need to byte swap those. + */ +#define readw readw +static inline u16 readw(const volatile void __iomem *addr) { - volatile unsigned int *ap = (volatile unsigned int *) addr; - unsigned int *bp = (unsigned int *) buf; - while (len--) - *ap = _swapl(*bp++); + if (cf_internalio(addr)) + return __raw_readw(addr); + return __le16_to_cpu(__raw_readw(addr)); } -static inline void io_insb(unsigned int addr, void *buf, int len) +#define readl readl +static inline u32 readl(const volatile void __iomem *addr) { - volatile unsigned char *ap = (volatile unsigned char *) addr; - unsigned char *bp = (unsigned char *) buf; - while (len--) - *bp++ = *ap; + if (cf_internalio(addr)) + return __raw_readl(addr); + return __le32_to_cpu(__raw_readl(addr)); } -static inline void io_insw(unsigned int addr, void *buf, int len) +#define writew writew +static inline void writew(u16 value, volatile void __iomem *addr) { - volatile unsigned short *ap = (volatile unsigned short *) addr; - unsigned short *bp = (unsigned short *) buf; - while (len--) - *bp++ = _swapw(*ap); + if (cf_internalio(addr)) + __raw_writew(value, addr); + else + __raw_writew(__cpu_to_le16(value), addr); } -static inline void io_insl(unsigned int addr, void *buf, int len) +#define writel writel +static inline void writel(u32 value, volatile void __iomem *addr) { - volatile unsigned int *ap = (volatile unsigned int *) addr; - unsigned int *bp = (unsigned int *) buf; - while (len--) - *bp++ = _swapl(*ap); + if (cf_internalio(addr)) + __raw_writel(value, addr); + else + __raw_writel(__cpu_to_le32(value), addr); } -#define mmiowb() - -/* - * make the short names macros so specific devices - * can override them as required - */ - -#define memset_io(a,b,c) memset((void *)(a),(b),(c)) -#define memcpy_fromio(a,b,c) memcpy((a),(void *)(b),(c)) -#define memcpy_toio(a,b,c) memcpy((void *)(a),(b),(c)) - -#define inb(addr) readb(addr) -#define inw(addr) readw(addr) -#define inl(addr) readl(addr) -#define outb(x,addr) ((void) writeb(x,addr)) -#define outw(x,addr) ((void) writew(x,addr)) -#define outl(x,addr) ((void) writel(x,addr)) - -#define inb_p(addr) inb(addr) -#define inw_p(addr) inw(addr) -#define inl_p(addr) inl(addr) -#define outb_p(x,addr) outb(x,addr) -#define outw_p(x,addr) outw(x,addr) -#define outl_p(x,addr) outl(x,addr) +#else -#define outsb(a,b,l) io_outsb(a,b,l) -#define outsw(a,b,l) io_outsw(a,b,l) -#define outsl(a,b,l) io_outsl(a,b,l) +#define readb __raw_readb +#define readw __raw_readw +#define readl __raw_readl +#define writeb __raw_writeb +#define writew __raw_writew +#define writel __raw_writel -#define insb(a,b,l) io_insb(a,b,l) -#define insw(a,b,l) io_insw(a,b,l) -#define insl(a,b,l) io_insl(a,b,l) - -#define IO_SPACE_LIMIT 0xffffffff - - -/* Values for nocacheflag and cmode */ -#define IOMAP_FULL_CACHING 0 -#define IOMAP_NOCACHE_SER 1 -#define IOMAP_NOCACHE_NONSER 2 -#define IOMAP_WRITETHROUGH 3 - -static inline void *__ioremap(unsigned long physaddr, unsigned long size, int cacheflag) -{ - return (void *) physaddr; -} -static inline void *ioremap(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); -} -static inline void *ioremap_nocache(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); -} -static inline void *ioremap_wt(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); -} -static inline void *ioremap_fullcache(unsigned long physaddr, unsigned long size) -{ - return __ioremap(physaddr, size, IOMAP_FULL_CACHING); -} - -#define iounmap(addr) do { } while(0) +#endif /* IOMEMBASE */ +#if defined(CONFIG_PCI) /* - * Convert a physical pointer to a virtual kernel pointer for /dev/mem - * access + * Support for PCI bus access uses the asm-generic access functions. + * We need to supply the base address and masks for the normal memory + * and IO address space mappings. */ -#define xlate_dev_mem_ptr(p) __va(p) +#define PCI_MEM_PA 0xf0000000 /* Host physical address */ +#define PCI_MEM_BA 0xf0000000 /* Bus physical address */ +#define PCI_MEM_SIZE 0x08000000 /* 128 MB */ +#define PCI_MEM_MASK (PCI_MEM_SIZE - 1) + +#define PCI_IO_PA 0xf8000000 /* Host physical address */ +#define PCI_IO_BA 0x00000000 /* Bus physical address */ +#define PCI_IO_SIZE 0x00010000 /* 64k */ +#define PCI_IO_MASK (PCI_IO_SIZE - 1) + +#define HAVE_ARCH_PIO_SIZE +#define PIO_OFFSET 0 +#define PIO_MASK 0xffff +#define PIO_RESERVED 0x10000 +#define PCI_IOBASE ((void __iomem *) PCI_IO_PA) +#define PCI_SPACE_LIMIT PCI_IO_MASK +#endif /* CONFIG_PCI */ /* - * Convert a virtual cached pointer to an uncached pointer + * These are defined in kmap.h as static inline functions. To maintain + * previous behavior we put these define guards here so io_mm.h doesn't + * see them. */ -#define xlate_dev_kmem_ptr(p) p +#ifdef CONFIG_MMU +#define memset_io memset_io +#define memcpy_fromio memcpy_fromio +#define memcpy_toio memcpy_toio +#endif -static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) -{ - return (void __iomem *) port; -} - -static inline void ioport_unmap(void __iomem *p) -{ -} - -#endif /* __KERNEL__ */ +#include <asm/kmap.h> +#include <asm/virtconvert.h> +#include <asm-generic/io.h> #endif /* _M68KNOMMU_IO_H */ diff --git a/arch/m68k/include/asm/kmap.h b/arch/m68k/include/asm/kmap.h new file mode 100644 index 000000000000..84b8333db8ad --- /dev/null +++ b/arch/m68k/include/asm/kmap.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _KMAP_H +#define _KMAP_H + +#ifdef CONFIG_MMU + +/* Values for nocacheflag and cmode */ +#define IOMAP_FULL_CACHING 0 +#define IOMAP_NOCACHE_SER 1 +#define IOMAP_NOCACHE_NONSER 2 +#define IOMAP_WRITETHROUGH 3 + +/* + * These functions exported by arch/m68k/mm/kmap.c. + * Only needed on MMU enabled systems. + */ +extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, + int cacheflag); +extern void iounmap(void __iomem *addr); +extern void __iounmap(void *addr, unsigned long size); + +#define ioremap ioremap +static inline void __iomem *ioremap(unsigned long physaddr, unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); +} + +#define ioremap_nocache ioremap_nocache +static inline void __iomem *ioremap_nocache(unsigned long physaddr, + unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_NOCACHE_SER); +} + +#define ioremap_uc ioremap_nocache +static inline void __iomem *ioremap_wt(unsigned long physaddr, + unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_WRITETHROUGH); +} + +#define ioremap_fillcache ioremap_fullcache +static inline void __iomem *ioremap_fullcache(unsigned long physaddr, + unsigned long size) +{ + return __ioremap(physaddr, size, IOMAP_FULL_CACHING); +} + +static inline void memset_io(volatile void __iomem *addr, unsigned char val, + int count) +{ + __builtin_memset((void __force *) addr, val, count); +} + +static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, + int count) +{ + __builtin_memcpy(dst, (void __force *) src, count); +} + +static inline void memcpy_toio(volatile void __iomem *dst, const void *src, + int count) +{ + __builtin_memcpy((void __force *) dst, src, count); +} + +#endif /* CONFIG_MMU */ + +#define ioport_map ioport_map +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return (void __iomem *) port; +} + +#define ioport_unmap ioport_unmap +static inline void ioport_unmap(void __iomem *p) +{ +} + +#endif /* _KMAP_H */ diff --git a/arch/m68k/include/asm/nubus.h b/arch/m68k/include/asm/nubus.h index d0d2039e434e..c2281da6c51a 100644 --- a/arch/m68k/include/asm/nubus.h +++ b/arch/m68k/include/asm/nubus.h @@ -3,6 +3,7 @@ #define _ASM_M68K_NUBUS_H #include <asm/raw_io.h> +#include <asm/kmap.h> #define nubus_readb raw_inb #define nubus_readw raw_inw diff --git a/arch/m68k/include/asm/pci.h b/arch/m68k/include/asm/pci.h index ef26fae8cf0b..5a4bc223743b 100644 --- a/arch/m68k/include/asm/pci.h +++ b/arch/m68k/include/asm/pci.h @@ -4,12 +4,6 @@ #include <asm-generic/pci.h> -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #define pcibios_assign_all_busses() 1 #define PCIBIOS_MIN_IO 0x00000100 diff --git a/arch/m68k/include/asm/q40_master.h b/arch/m68k/include/asm/q40_master.h index 3a89c088800c..9b00fb8079e6 100644 --- a/arch/m68k/include/asm/q40_master.h +++ b/arch/m68k/include/asm/q40_master.h @@ -8,7 +8,7 @@ #define _Q40_MASTER_H #include <asm/raw_io.h> - +#include <asm/kmap.h> #define q40_master_addr 0xff000000 diff --git a/arch/m68k/include/asm/raw_io.h b/arch/m68k/include/asm/raw_io.h index 05e940c29b54..85761255dde5 100644 --- a/arch/m68k/include/asm/raw_io.h +++ b/arch/m68k/include/asm/raw_io.h @@ -13,20 +13,6 @@ #include <asm/byteorder.h> - -/* Values for nocacheflag and cmode */ -#define IOMAP_FULL_CACHING 0 -#define IOMAP_NOCACHE_SER 1 -#define IOMAP_NOCACHE_NONSER 2 -#define IOMAP_WRITETHROUGH 3 - -extern void iounmap(void __iomem *addr); - -extern void __iomem *__ioremap(unsigned long physaddr, unsigned long size, - int cacheflag); -extern void __iounmap(void *addr, unsigned long size); - - /* ++roman: The assignments to temp. vars avoid that gcc sometimes generates * two accesses to memory, which may be undesirable for some devices. */ diff --git a/arch/m68k/include/asm/uaccess_mm.h b/arch/m68k/include/asm/uaccess_mm.h index 75c172e909ac..c4cb889660aa 100644 --- a/arch/m68k/include/asm/uaccess_mm.h +++ b/arch/m68k/include/asm/uaccess_mm.h @@ -141,10 +141,12 @@ asm volatile ("\n" \ case 4: \ __get_user_asm(__gu_err, x, ptr, u32, l, r, -EFAULT); \ break; \ -/* case 8: disabled because gcc-4.1 has a broken typeof \ - { \ - const void *__gu_ptr = (ptr); \ - u64 __gu_val; \ + case 8: { \ + const void *__gu_ptr = (ptr); \ + union { \ + u64 l; \ + __typeof__(*(ptr)) t; \ + } __gu_val; \ asm volatile ("\n" \ "1: "MOVES".l (%2)+,%1\n" \ "2: "MOVES".l (%2),%R1\n" \ @@ -162,13 +164,13 @@ asm volatile ("\n" \ " .long 1b,10b\n" \ " .long 2b,10b\n" \ " .previous" \ - : "+d" (__gu_err), "=&r" (__gu_val), \ + : "+d" (__gu_err), "=&r" (__gu_val.l), \ "+a" (__gu_ptr) \ : "i" (-EFAULT) \ : "memory"); \ - (x) = (__force typeof(*(ptr)))__gu_val; \ + (x) = __gu_val.t; \ break; \ - } */ \ + } \ default: \ __gu_err = __get_user_bad(); \ break; \ diff --git a/arch/m68k/include/asm/vga.h b/arch/m68k/include/asm/vga.h index 010a624d1b39..4742e6bc3ab8 100644 --- a/arch/m68k/include/asm/vga.h +++ b/arch/m68k/include/asm/vga.h @@ -2,7 +2,15 @@ #ifndef _ASM_M68K_VGA_H #define _ASM_M68K_VGA_H +/* + * Some ColdFire platforms do in fact have a PCI bus. So for those we want + * to use the real IO access functions, don't fake them out or redirect them + * for that case. + */ +#ifndef CONFIG_PCI + #include <asm/raw_io.h> +#include <asm/kmap.h> /* * FIXME @@ -25,4 +33,5 @@ #define writeb raw_outb #define writew raw_outw +#endif /* CONFIG_PCI */ #endif /* _ASM_M68K_VGA_H */ diff --git a/arch/m68k/include/asm/virtconvert.h b/arch/m68k/include/asm/virtconvert.h index 4aea6be7b220..dfe43083b579 100644 --- a/arch/m68k/include/asm/virtconvert.h +++ b/arch/m68k/include/asm/virtconvert.h @@ -16,11 +16,13 @@ /* * Change virtual addresses to physical addresses and vv. */ +#define virt_to_phys virt_to_phys static inline unsigned long virt_to_phys(void *address) { return __pa(address); } +#define phys_to_virt phys_to_virt static inline void *phys_to_virt(unsigned long address) { return __va(address); diff --git a/arch/m68k/include/asm/zorro.h b/arch/m68k/include/asm/zorro.h index 96f64bf7bcaa..60fc4b6f294d 100644 --- a/arch/m68k/include/asm/zorro.h +++ b/arch/m68k/include/asm/zorro.h @@ -3,6 +3,7 @@ #define _ASM_M68K_ZORRO_H #include <asm/raw_io.h> +#include <asm/kmap.h> #define z_readb raw_inb #define z_readw raw_inw diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index c01b9b8f97bf..463572c4943f 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -9,6 +9,7 @@ #include <linux/dma-mapping.h> #include <linux/device.h> #include <linux/kernel.h> +#include <linux/platform_device.h> #include <linux/scatterlist.h> #include <linux/slab.h> #include <linux/vmalloc.h> @@ -165,3 +166,12 @@ const struct dma_map_ops m68k_dma_ops = { .sync_sg_for_device = m68k_dma_sync_sg_for_device, }; EXPORT_SYMBOL(m68k_dma_ops); + +void arch_setup_pdev_archdata(struct platform_device *pdev) +{ + if (pdev->dev.coherent_dma_mask == DMA_MASK_NONE && + pdev->dev.dma_mask == NULL) { + pdev->dev.coherent_dma_mask = DMA_BIT_MASK(32); + pdev->dev.dma_mask = &pdev->dev.coherent_dma_mask; + } +} diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index dd25bfc22fb4..f35e3ebd6331 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -527,21 +527,9 @@ static int hardware_proc_show(struct seq_file *m, void *v) return 0; } -static int hardware_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, hardware_proc_show, NULL); -} - -static const struct file_operations hardware_proc_fops = { - .open = hardware_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init proc_hardware_init(void) { - proc_create("hardware", 0, NULL, &hardware_proc_fops); + proc_create_single("hardware", 0, NULL, hardware_proc_show); return 0; } module_init(proc_hardware_init); diff --git a/arch/m68k/kernel/signal.c b/arch/m68k/kernel/signal.c index f7cd5ecfacd3..72850b85ecf8 100644 --- a/arch/m68k/kernel/signal.c +++ b/arch/m68k/kernel/signal.c @@ -576,41 +576,42 @@ static inline int rt_save_fpu_state(struct ucontext __user *uc, struct pt_regs * static inline void siginfo_build_tests(void) { - /* This needs to be tested on m68k as it has a lesser - * alignment requirment than x86 and that can cause surprises. + /* + * This needs to be tested on m68k as it has a lesser + * alignment requirement than x86 and that can cause surprises. */ /* This is part of the ABI and can never change in size: */ BUILD_BUG_ON(sizeof(siginfo_t) != 128); - /* Ensure the know fields never change in location */ + /* Ensure the known fields never change in location */ BUILD_BUG_ON(offsetof(siginfo_t, si_signo) != 0); BUILD_BUG_ON(offsetof(siginfo_t, si_errno) != 4); BUILD_BUG_ON(offsetof(siginfo_t, si_code) != 8); /* _kill */ - BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10); /* _timer */ - BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_tid) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_overrun) != 0x10); BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x14); /* _rt */ - BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10); BUILD_BUG_ON(offsetof(siginfo_t, si_value) != 0x14); /* _sigchld */ - BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_pid) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_uid) != 0x10); BUILD_BUG_ON(offsetof(siginfo_t, si_status) != 0x14); BUILD_BUG_ON(offsetof(siginfo_t, si_utime) != 0x18); - BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x1C); + BUILD_BUG_ON(offsetof(siginfo_t, si_stime) != 0x1c); /* _sigfault */ - BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_addr) != 0x0c); /* _sigfault._mcerr */ BUILD_BUG_ON(offsetof(siginfo_t, si_addr_lsb) != 0x10); @@ -623,11 +624,11 @@ static inline void siginfo_build_tests(void) BUILD_BUG_ON(offsetof(siginfo_t, si_pkey) != 0x12); /* _sigpoll */ - BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_band) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_fd) != 0x10); /* _sigsys */ - BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0C); + BUILD_BUG_ON(offsetof(siginfo_t, si_call_addr) != 0x0c); BUILD_BUG_ON(offsetof(siginfo_t, si_syscall) != 0x10); BUILD_BUG_ON(offsetof(siginfo_t, si_arch) != 0x14); diff --git a/arch/m68k/kernel/time.c b/arch/m68k/kernel/time.c index 97dd4e26f234..3a8b47f8f97b 100644 --- a/arch/m68k/kernel/time.c +++ b/arch/m68k/kernel/time.c @@ -71,23 +71,26 @@ static irqreturn_t timer_interrupt(int irq, void *dummy) return IRQ_HANDLED; } -void read_persistent_clock(struct timespec *ts) +#ifdef CONFIG_M68KCLASSIC +#if !IS_BUILTIN(CONFIG_RTC_DRV_GENERIC) +void read_persistent_clock64(struct timespec64 *ts) { struct rtc_time time; + ts->tv_sec = 0; ts->tv_nsec = 0; - if (mach_hwclk) { - mach_hwclk(0, &time); + if (!mach_hwclk) + return; - if ((time.tm_year += 1900) < 1970) - time.tm_year += 100; - ts->tv_sec = mktime(time.tm_year, time.tm_mon, time.tm_mday, - time.tm_hour, time.tm_min, time.tm_sec); - } + mach_hwclk(0, &time); + + ts->tv_sec = mktime64(time.tm_year + 1900, time.tm_mon + 1, time.tm_mday, + time.tm_hour, time.tm_min, time.tm_sec); } +#endif -#if defined(CONFIG_ARCH_USES_GETTIMEOFFSET) && IS_ENABLED(CONFIG_RTC_DRV_GENERIC) +#if IS_ENABLED(CONFIG_RTC_DRV_GENERIC) static int rtc_generic_get_time(struct device *dev, struct rtc_time *tm) { mach_hwclk(0, tm); @@ -145,8 +148,8 @@ static int __init rtc_init(void) } module_init(rtc_init); - -#endif /* CONFIG_ARCH_USES_GETTIMEOFFSET */ +#endif /* CONFIG_RTC_DRV_GENERIC */ +#endif /* CONFIG M68KCLASSIC */ void __init time_init(void) { diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index c1cc4e99aa94..b2fd000b9285 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -1007,9 +1007,9 @@ void bad_super_trap (struct frame *fp) asmlinkage void trap_c(struct frame *fp) { - int sig; + int sig, si_code; + void __user *addr; int vector = (fp->ptregs.vector >> 2) & 0xff; - siginfo_t info; if (fp->ptregs.sr & PS_S) { if (vector == VEC_TRACE) { @@ -1029,21 +1029,21 @@ asmlinkage void trap_c(struct frame *fp) /* send the appropriate signal to the user program */ switch (vector) { case VEC_ADDRERR: - info.si_code = BUS_ADRALN; + si_code = BUS_ADRALN; sig = SIGBUS; break; case VEC_ILLEGAL: case VEC_LINE10: case VEC_LINE11: - info.si_code = ILL_ILLOPC; + si_code = ILL_ILLOPC; sig = SIGILL; break; case VEC_PRIV: - info.si_code = ILL_PRVOPC; + si_code = ILL_PRVOPC; sig = SIGILL; break; case VEC_COPROC: - info.si_code = ILL_COPROC; + si_code = ILL_COPROC; sig = SIGILL; break; case VEC_TRAP1: @@ -1060,76 +1060,74 @@ asmlinkage void trap_c(struct frame *fp) case VEC_TRAP12: case VEC_TRAP13: case VEC_TRAP14: - info.si_code = ILL_ILLTRP; + si_code = ILL_ILLTRP; sig = SIGILL; break; case VEC_FPBRUC: case VEC_FPOE: case VEC_FPNAN: - info.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; sig = SIGFPE; break; case VEC_FPIR: - info.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; sig = SIGFPE; break; case VEC_FPDIVZ: - info.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; sig = SIGFPE; break; case VEC_FPUNDER: - info.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; sig = SIGFPE; break; case VEC_FPOVER: - info.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; sig = SIGFPE; break; case VEC_ZERODIV: - info.si_code = FPE_INTDIV; + si_code = FPE_INTDIV; sig = SIGFPE; break; case VEC_CHK: case VEC_TRAP: - info.si_code = FPE_INTOVF; + si_code = FPE_INTOVF; sig = SIGFPE; break; case VEC_TRACE: /* ptrace single step */ - info.si_code = TRAP_TRACE; + si_code = TRAP_TRACE; sig = SIGTRAP; break; case VEC_TRAP15: /* breakpoint */ - info.si_code = TRAP_BRKPT; + si_code = TRAP_BRKPT; sig = SIGTRAP; break; default: - info.si_code = ILL_ILLOPC; + si_code = ILL_ILLOPC; sig = SIGILL; break; } - info.si_signo = sig; - info.si_errno = 0; switch (fp->ptregs.format) { default: - info.si_addr = (void *) fp->ptregs.pc; + addr = (void __user *) fp->ptregs.pc; break; case 2: - info.si_addr = (void *) fp->un.fmt2.iaddr; + addr = (void __user *) fp->un.fmt2.iaddr; break; case 7: - info.si_addr = (void *) fp->un.fmt7.effaddr; + addr = (void __user *) fp->un.fmt7.effaddr; break; case 9: - info.si_addr = (void *) fp->un.fmt9.iaddr; + addr = (void __user *) fp->un.fmt9.iaddr; break; case 10: - info.si_addr = (void *) fp->un.fmta.daddr; + addr = (void __user *) fp->un.fmta.daddr; break; case 11: - info.si_addr = (void *) fp->un.fmtb.daddr; + addr = (void __user*) fp->un.fmtb.daddr; break; } - force_sig_info (sig, &info, current); + force_sig_fault(sig, si_code, addr, current); } void die_if_kernel (char *str, struct pt_regs *fp, int nr) @@ -1161,12 +1159,6 @@ asmlinkage void fpsp040_die(void) #ifdef CONFIG_M68KFPU_EMU asmlinkage void fpemu_signal(int signal, int code, void *addr) { - siginfo_t info; - - info.si_signo = signal; - info.si_errno = 0; - info.si_code = code; - info.si_addr = addr; - force_sig_info(signal, &info, current); + force_sig_fault(signal, code, addr, current); } #endif diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 0c3275aa0197..e522307db47c 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -1005,7 +1005,7 @@ int __init mac_platform_init(void) struct resource swim_rsrc = { .flags = IORESOURCE_MEM, .start = (resource_size_t)swim_base, - .end = (resource_size_t)swim_base + 0x2000, + .end = (resource_size_t)swim_base + 0x1FFF, }; platform_device_register_simple("swim", -1, &swim_rsrc, 1); diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 03253c4f8e6a..f2ff3779875a 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -21,35 +21,32 @@ extern void die_if_kernel(char *, struct pt_regs *, long); int send_fault_sig(struct pt_regs *regs) { - siginfo_t siginfo; + int signo, si_code; + void __user *addr; - clear_siginfo(&siginfo); - siginfo.si_signo = current->thread.signo; - siginfo.si_code = current->thread.code; - siginfo.si_addr = (void *)current->thread.faddr; - pr_debug("send_fault_sig: %p,%d,%d\n", siginfo.si_addr, - siginfo.si_signo, siginfo.si_code); + signo = current->thread.signo; + si_code = current->thread.code; + addr = (void __user *)current->thread.faddr; + pr_debug("send_fault_sig: %p,%d,%d\n", addr, signo, si_code); if (user_mode(regs)) { - force_sig_info(siginfo.si_signo, - &siginfo, current); + force_sig_fault(signo, si_code, addr, current); } else { if (fixup_exception(regs)) return -1; - //if (siginfo.si_signo == SIGBUS) - // force_sig_info(siginfo.si_signo, - // &siginfo, current); + //if (signo == SIGBUS) + // force_sig_fault(si_signo, si_code, addr, current); /* * Oops. The kernel tried to access some bad page. We'll have to * terminate things with extreme prejudice. */ - if ((unsigned long)siginfo.si_addr < PAGE_SIZE) + if ((unsigned long)addr < PAGE_SIZE) pr_alert("Unable to handle kernel NULL pointer dereference"); else pr_alert("Unable to handle kernel access"); - pr_cont(" at virtual address %p\n", siginfo.si_addr); + pr_cont(" at virtual address %p\n", addr); die_if_kernel("Oops", regs, 0 /*error_code*/); do_exit(SIGKILL); } diff --git a/arch/m68k/mm/kmap.c b/arch/m68k/mm/kmap.c index c2a38321c96d..40a3b327da07 100644 --- a/arch/m68k/mm/kmap.c +++ b/arch/m68k/mm/kmap.c @@ -89,7 +89,8 @@ static inline void free_io_area(void *addr) for (p = &iolist ; (tmp = *p) ; p = &tmp->next) { if (tmp->addr == addr) { *p = tmp->next; - __iounmap(tmp->addr, tmp->size); + /* remove gap added in get_io_area() */ + __iounmap(tmp->addr, tmp->size - IO_SIZE); kfree(tmp); return; } @@ -125,6 +126,10 @@ void __iomem *__ioremap(unsigned long physaddr, unsigned long size, int cachefla return (void __iomem *)physaddr; } #endif +#ifdef CONFIG_COLDFIRE + if (__cf_internalio(physaddr)) + return (void __iomem *) physaddr; +#endif #ifdef DEBUG printk("ioremap: 0x%lx,0x%lx(%d) - ", physaddr, size, cacheflag); @@ -235,6 +240,10 @@ void iounmap(void __iomem *addr) ((unsigned long)addr > 0x60000000))) free_io_area((__force void *)addr); #else +#ifdef CONFIG_COLDFIRE + if (cf_internalio(addr)) + return; +#endif free_io_area((__force void *)addr); #endif } diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index 8778612d1f31..f8a710fd84cd 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -153,12 +153,14 @@ int mvme147_hwclk(int op, struct rtc_time *t) if (!op) { m147_rtc->ctrl = RTC_READ; t->tm_year = bcd2int (m147_rtc->bcd_year); - t->tm_mon = bcd2int (m147_rtc->bcd_mth); + t->tm_mon = bcd2int(m147_rtc->bcd_mth) - 1; t->tm_mday = bcd2int (m147_rtc->bcd_dom); t->tm_hour = bcd2int (m147_rtc->bcd_hr); t->tm_min = bcd2int (m147_rtc->bcd_min); t->tm_sec = bcd2int (m147_rtc->bcd_sec); m147_rtc->ctrl = 0; + if (t->tm_year < 70) + t->tm_year += 100; } return 0; } diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index 6fa06d4d16bf..4ffd9ef98de4 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -400,12 +400,14 @@ int mvme16x_hwclk(int op, struct rtc_time *t) if (!op) { rtc->ctrl = RTC_READ; t->tm_year = bcd2int (rtc->bcd_year); - t->tm_mon = bcd2int (rtc->bcd_mth); + t->tm_mon = bcd2int(rtc->bcd_mth) - 1; t->tm_mday = bcd2int (rtc->bcd_dom); t->tm_hour = bcd2int (rtc->bcd_hr); t->tm_min = bcd2int (rtc->bcd_min); t->tm_sec = bcd2int (rtc->bcd_sec); rtc->ctrl = 0; + if (t->tm_year < 70) + t->tm_year += 100; } return 0; } diff --git a/arch/m68k/sun3/intersil.c b/arch/m68k/sun3/intersil.c index 2cd0bcbe6f30..d911070af02a 100644 --- a/arch/m68k/sun3/intersil.c +++ b/arch/m68k/sun3/intersil.c @@ -48,9 +48,9 @@ int sun3_hwclk(int set, struct rtc_time *t) todintersil->hour = t->tm_hour; todintersil->minute = t->tm_min; todintersil->second = t->tm_sec; - todintersil->month = t->tm_mon; + todintersil->month = t->tm_mon + 1; todintersil->day = t->tm_mday; - todintersil->year = t->tm_year - 68; + todintersil->year = (t->tm_year - 68) % 100; todintersil->weekday = t->tm_wday; } else { /* read clock */ @@ -58,10 +58,12 @@ int sun3_hwclk(int set, struct rtc_time *t) t->tm_hour = todintersil->hour; t->tm_min = todintersil->minute; t->tm_sec = todintersil->second; - t->tm_mon = todintersil->month; + t->tm_mon = todintersil->month - 1; t->tm_mday = todintersil->day; t->tm_year = todintersil->year + 68; t->tm_wday = todintersil->weekday; + if (t->tm_year < 70) + t->tm_year += 100; } intersil_clock->cmd_reg = START_VAL; diff --git a/arch/m68k/sun3x/time.c b/arch/m68k/sun3x/time.c index 7a2c53d9f779..047e2bcee3d7 100644 --- a/arch/m68k/sun3x/time.c +++ b/arch/m68k/sun3x/time.c @@ -52,8 +52,8 @@ int sun3x_hwclk(int set, struct rtc_time *t) h->hour = bin2bcd(t->tm_hour); h->wday = bin2bcd(t->tm_wday); h->mday = bin2bcd(t->tm_mday); - h->month = bin2bcd(t->tm_mon); - h->year = bin2bcd(t->tm_year); + h->month = bin2bcd(t->tm_mon + 1); + h->year = bin2bcd(t->tm_year % 100); h->csr &= ~C_WRITE; } else { h->csr |= C_READ; @@ -62,9 +62,11 @@ int sun3x_hwclk(int set, struct rtc_time *t) t->tm_hour = bcd2bin(h->hour); t->tm_wday = bcd2bin(h->wday); t->tm_mday = bcd2bin(h->mday); - t->tm_mon = bcd2bin(h->month); + t->tm_mon = bcd2bin(h->month) - 1; t->tm_year = bcd2bin(h->year); h->csr &= ~C_READ; + if (t->tm_year < 70) + t->tm_year += 100; } local_irq_restore(flags); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 3817a3e2146c..d14782100088 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -19,7 +19,6 @@ config MICROBLAZE select HAVE_ARCH_HASH select HAVE_ARCH_KGDB select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_FTRACE_MCOUNT_RECORD select HAVE_FUNCTION_GRAPH_TRACER diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 3c80a5a308ed..fe6a6c6e5003 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += barrier.h generic-y += bitops.h generic-y += bug.h generic-y += bugs.h +generic-y += compat.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/microblaze/include/asm/pci.h b/arch/microblaze/include/asm/pci.h index 5de871eb4a59..00337861472e 100644 --- a/arch/microblaze/include/asm/pci.h +++ b/arch/microblaze/include/asm/pci.h @@ -62,12 +62,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -/* The PCI address space does equal the physical memory - * address space (no IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index c91e8cef98dd..3145e7dc8ab1 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -184,14 +184,3 @@ const struct dma_map_ops dma_nommu_ops = { .sync_sg_for_device = dma_nommu_sync_sg_for_device, }; EXPORT_SYMBOL(dma_nommu_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index e6f338d0496b..eafff21fcb0e 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -60,16 +60,10 @@ asmlinkage void sw_exception(struct pt_regs *regs) void _exception(int signr, struct pt_regs *regs, int code, unsigned long addr) { - siginfo_t info; - if (kernel_mode(regs)) die("Exception in kernel mode", regs, signr); - info.si_signo = signr; - info.si_errno = 0; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signr, &info, current); + force_sig_fault(signr, code, (void __user *)addr, current); } asmlinkage void full_exception(struct pt_regs *regs, unsigned int type, diff --git a/arch/microblaze/mm/fault.c b/arch/microblaze/mm/fault.c index f91b30f8aaa8..af607447c683 100644 --- a/arch/microblaze/mm/fault.c +++ b/arch/microblaze/mm/fault.c @@ -88,7 +88,6 @@ void do_page_fault(struct pt_regs *regs, unsigned long address, { struct vm_area_struct *vma; struct mm_struct *mm = current->mm; - siginfo_t info; int code = SEGV_MAPERR; int is_write = error_code & ESR_S; int fault; @@ -269,11 +268,6 @@ bad_area_nosemaphore: /* User mode accesses cause a SIGSEGV */ if (user_mode(regs)) { _exception(SIGSEGV, regs, code, address); -/* info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = code; - info.si_addr = (void *) address; - force_sig_info(SIGSEGV, &info, current);*/ return; } @@ -295,11 +289,7 @@ out_of_memory: do_sigbus: up_read(&mm->mmap_sem); if (user_mode(regs)) { - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); return; } bad_page_fault(regs, address, SIGBUS); diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 225c95da23ce..7074b2215f36 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -42,7 +42,6 @@ config MIPS select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_EXIT_THREAD @@ -132,7 +131,7 @@ config MIPS_GENERIC config MIPS_ALCHEMY bool "Alchemy processor based machines" - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select CEVT_R4K select CSRC_R4K select IRQ_MIPS_CPU @@ -890,7 +889,7 @@ config CAVIUM_OCTEON_SOC bool "Cavium Networks Octeon SoC based boards" select CEVT_R4K select ARCH_HAS_PHYS_TO_DMA - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select DMA_COHERENT select SYS_SUPPORTS_64BIT_KERNEL select SYS_SUPPORTS_BIG_ENDIAN @@ -912,6 +911,7 @@ config CAVIUM_OCTEON_SOC select MIPS_NR_CPU_NR_MAP_1024 select BUILTIN_DTB select MTD_COMPLEX_MAPPINGS + select SWIOTLB select SYS_SUPPORTS_RELOCATABLE help This option supports all of the Octeon reference boards from Cavium @@ -936,7 +936,7 @@ config NLM_XLR_BOARD select SWAP_IO_SPACE select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_HIGHMEM select DMA_COHERENT @@ -962,7 +962,7 @@ config NLM_XLP_BOARD select HW_HAS_PCI select SYS_SUPPORTS_32BIT_KERNEL select SYS_SUPPORTS_64BIT_KERNEL - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT select GPIOLIB select SYS_SUPPORTS_BIG_ENDIAN select SYS_SUPPORTS_LITTLE_ENDIAN @@ -1101,9 +1101,6 @@ config GPIO_TXX9 config FW_CFE bool -config ARCH_DMA_ADDR_T_64BIT - def_bool (HIGHMEM && ARCH_PHYS_ADDR_T_64BIT) || 64BIT - config ARCH_SUPPORTS_UPROBES bool @@ -1122,9 +1119,6 @@ config DMA_NONCOHERENT bool select NEED_DMA_MAP_STATE -config NEED_DMA_MAP_STATE - bool - config SYS_HAS_EARLY_PRINTK bool @@ -1373,6 +1367,7 @@ config CPU_LOONGSON3 select MIPS_PGD_C0_CONTEXT select MIPS_L1_CACHE_SHIFT_6 select GPIOLIB + select SWIOTLB help The Loongson 3 processor implements the MIPS64R2 instruction set with many extensions. @@ -1770,7 +1765,7 @@ config CPU_MIPS32_R5_XPA depends on SYS_SUPPORTS_HIGHMEM select XPA select HIGHMEM - select ARCH_PHYS_ADDR_T_64BIT + select PHYS_ADDR_T_64BIT default n help Choose this option if you want to enable the Extended Physical @@ -2402,9 +2397,6 @@ config SB1_PASS_2_1_WORKAROUNDS default y -config ARCH_PHYS_ADDR_T_64BIT - bool - choice prompt "SmartMIPS or microMIPS ASE support" @@ -2556,7 +2548,7 @@ config ARCH_DISCONTIGMEM_ENABLE Say Y to support efficient handling of discontiguous physical memory, for architectures which are either NUMA (Non-Uniform Memory Access) or have huge holes in the physical address space for other reasons. - See <file:Documentation/vm/numa> for more. + See <file:Documentation/vm/numa.rst> for more. config ARCH_SPARSEMEM_ENABLE bool diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index b3043c08f769..aee8d7b8f091 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -18,9 +18,9 @@ #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) #endif -#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780) -#include <asm/mach-jz4740/base.h> -#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset)) +#ifdef CONFIG_MACH_INGENIC +#define INGENIC_UART0_BASE_ADDR 0x10030000 +#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset)) #endif #ifdef CONFIG_CPU_XLR diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile index 9987e0e378c5..69ca00590b8d 100644 --- a/arch/mips/boot/dts/xilfpga/Makefile +++ b/arch/mips/boot/dts/xilfpga/Makefile @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb - -obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/cavium-octeon/Kconfig b/arch/mips/cavium-octeon/Kconfig index b5eee1a57d6c..4984e462be30 100644 --- a/arch/mips/cavium-octeon/Kconfig +++ b/arch/mips/cavium-octeon/Kconfig @@ -67,18 +67,6 @@ config CAVIUM_OCTEON_LOCK_L2_MEMCPY help Lock the kernel's implementation of memcpy() into L2. -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - -config SWIOTLB - def_bool y - select DMA_DIRECT_OPS - select IOMMU_HELPER - select NEED_SG_DMA_LENGTH - config OCTEON_ILM tristate "Module to measure interrupt latency using Octeon CIU Timer" help diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index b51432dd10b6..0dd0d5d460a5 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb its-y := vmlinux.its.S its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S +its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h index 9a0fa66b81ac..78675f19440f 100644 --- a/arch/mips/include/asm/compat.h +++ b/arch/mips/include/asm/compat.h @@ -14,7 +14,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_suseconds_t; @@ -38,24 +37,16 @@ typedef struct { typedef s32 compat_timer_t; typedef s32 compat_key_t; +typedef s16 compat_short_t; typedef s32 compat_int_t; typedef s32 compat_long_t; typedef s64 compat_s64; +typedef u16 compat_ushort_t; typedef u32 compat_uint_t; typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; s32 st_pad1[3]; @@ -168,35 +159,35 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_time_t sem_ctime; + compat_ulong_t sem_otime; + compat_ulong_t sem_ctime; compat_ulong_t sem_nsems; - compat_ulong_t __unused1; - compat_ulong_t __unused2; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime_high; }; struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; #ifndef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused1; + compat_ulong_t msg_stime_high; #endif - compat_time_t msg_stime; + compat_ulong_t msg_stime; #ifdef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused1; + compat_ulong_t msg_stime_high; #endif #ifndef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused2; + compat_ulong_t msg_rtime_high; #endif - compat_time_t msg_rtime; + compat_ulong_t msg_rtime; #ifdef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused2; + compat_ulong_t msg_rtime_high; #endif #ifndef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused3; + compat_ulong_t msg_ctime_high; #endif - compat_time_t msg_ctime; + compat_ulong_t msg_ctime; #ifdef CONFIG_CPU_LITTLE_ENDIAN - compat_ulong_t __unused3; + compat_ulong_t msg_ctime_high; #endif compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; @@ -210,14 +201,16 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_time_t shm_dtime; - compat_time_t shm_ctime; + compat_ulong_t shm_atime; + compat_ulong_t shm_dtime; + compat_ulong_t shm_ctime; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; - compat_ulong_t __unused1; - compat_ulong_t __unused2; + compat_ushort_t shm_atime_high; + compat_ushort_t shm_dtime_high; + compat_ushort_t shm_ctime_high; + compat_ushort_t __unused2; }; /* MIPS has unusual order of fields in stack_t */ diff --git a/arch/mips/include/asm/pci.h b/arch/mips/include/asm/pci.h index 2339f42f047a..436099883022 100644 --- a/arch/mips/include/asm/pci.h +++ b/arch/mips/include/asm/pci.h @@ -121,13 +121,6 @@ extern unsigned long PCIBIOS_MIN_MEM; #include <linux/string.h> #include <asm/io.h> -/* - * The PCI address space does equal the physical memory address space. - * The networking and block device layers use this boolean for bounce - * buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - #ifdef CONFIG_PCI_DOMAINS_GENERIC static inline int pci_proc_domain(struct pci_bus *bus) { diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h index eb4d0f9d7364..46aa15b13e4e 100644 --- a/arch/mips/include/uapi/asm/msgbuf.h +++ b/arch/mips/include/uapi/asm/msgbuf.h @@ -9,33 +9,15 @@ * between kernel and user space. * * Pad space is left for: - * - extension of time_t to 64-bit on 32-bitsystem to solve the y2038 problem * - 2 miscellaneous unsigned long values */ +#if defined(__mips64) struct msqid64_ds { struct ipc64_perm msg_perm; -#if !defined(__mips64) && defined(__MIPSEB__) - unsigned long __unused1; -#endif __kernel_time_t msg_stime; /* last msgsnd time */ -#if !defined(__mips64) && defined(__MIPSEL__) - unsigned long __unused1; -#endif -#if !defined(__mips64) && defined(__MIPSEB__) - unsigned long __unused2; -#endif __kernel_time_t msg_rtime; /* last msgrcv time */ -#if !defined(__mips64) && defined(__MIPSEL__) - unsigned long __unused2; -#endif -#if !defined(__mips64) && defined(__MIPSEB__) - unsigned long __unused3; -#endif __kernel_time_t msg_ctime; /* last change time */ -#if !defined(__mips64) && defined(__MIPSEL__) - unsigned long __unused3; -#endif unsigned long msg_cbytes; /* current number of bytes on queue */ unsigned long msg_qnum; /* number of messages in queue */ unsigned long msg_qbytes; /* max number of bytes on queue */ @@ -44,5 +26,42 @@ struct msqid64_ds { unsigned long __unused4; unsigned long __unused5; }; +#elif defined (__MIPSEB__) +struct msqid64_ds { + struct ipc64_perm msg_perm; + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; +#elif defined (__MIPSEL__) +struct msqid64_ds { + struct ipc64_perm msg_perm; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_stime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_rtime_high; + unsigned long msg_ctime; /* last change time */ + unsigned long msg_ctime_high; + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused4; + unsigned long __unused5; +}; +#else +#warning no endianess set +#endif #endif /* _ASM_MSGBUF_H */ diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h index 2c0f507ab7d1..60c89e6cb25b 100644 --- a/arch/mips/include/uapi/asm/sembuf.h +++ b/arch/mips/include/uapi/asm/sembuf.h @@ -7,10 +7,11 @@ * Note extra padding because this structure is passed back and forth * between kernel and user space. * - * Pad space is left for: - * - 2 miscellaneous 64-bit values + * Pad space is left for 2 miscellaneous 64-bit values on mips64, + * but used for the upper 32 bit of the time values on mips32. */ +#ifdef __mips64 struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ __kernel_time_t sem_otime; /* last semop time */ @@ -19,5 +20,15 @@ struct semid64_ds { unsigned long __unused1; unsigned long __unused2; }; +#else +struct semid64_ds { + struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime; /* last change time */ + unsigned long sem_nsems; /* no. of semaphores in array */ + unsigned long sem_otime_high; + unsigned long sem_ctime_high; +}; +#endif #endif /* _ASM_SEMBUF_H */ diff --git a/arch/mips/include/uapi/asm/shmbuf.h b/arch/mips/include/uapi/asm/shmbuf.h index 379e6bca518b..9b9bba3401f2 100644 --- a/arch/mips/include/uapi/asm/shmbuf.h +++ b/arch/mips/include/uapi/asm/shmbuf.h @@ -7,10 +7,13 @@ * Note extra padding because this structure is passed back and forth * between kernel and user space. * - * Pad space is left for: - * - 2 miscellaneous 32-bit rsp. 64-bit values + * As MIPS was lacking proper padding after shm_?time, we use 48 bits + * of the padding at the end to store a few additional bits of the time. + * libc implementations need to take care to convert this into a proper + * data structure when moving to 64-bit time_t. */ +#ifdef __mips64 struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ size_t shm_segsz; /* size of segment (bytes) */ @@ -23,6 +26,22 @@ struct shmid64_ds { unsigned long __unused1; unsigned long __unused2; }; +#else +struct shmid64_ds { + struct ipc64_perm shm_perm; /* operation perms */ + size_t shm_segsz; /* size of segment (bytes) */ + unsigned long shm_atime; /* last attach time */ + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_ctime; /* last change time */ + __kernel_pid_t shm_cpid; /* pid of creator */ + __kernel_pid_t shm_lpid; /* pid of last operator */ + unsigned long shm_nattch; /* no. of current attaches */ + unsigned short shm_atime_high; + unsigned short shm_dtime_high; + unsigned short shm_ctime_high; + unsigned short __unused1; +}; +#endif struct shminfo64 { unsigned long shmmax; diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index b9e9bf628849..3775a8d694fb 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -721,6 +721,10 @@ int mips_set_process_fp_mode(struct task_struct *task, unsigned int value) if (value & ~known_bits) return -EOPNOTSUPP; + /* Setting FRE without FR is not supported. */ + if ((value & (PR_FP_MODE_FR | PR_FP_MODE_FRE)) == PR_FP_MODE_FRE) + return -EOPNOTSUPP; + /* Avoid inadvertently triggering emulation */ if ((value & PR_FP_MODE_FR) && raw_cpu_has_fpu && !(raw_current_cpu_data.fpu_id & MIPS_FPIR_F64)) diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0b23b1ad99e6..0c0c23c9c9f5 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR and FIR registers separately. */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target, void *kbuf, void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); int err; if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) @@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target, err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.fcr31, fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &boot_cpu_data.fpu_id, + fir_pos, fir_pos + sizeof(u32)); return err; } @@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR register separately. Ignore the incoming FIR register + * contents though, as the register is read-only. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); u32 fcr31; int err; @@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target, ptrace_setfcr31(target, fcr31); } + if (count > 0) + err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + fir_pos, + fir_pos + sizeof(u32)); + return err; } @@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request, fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -804,7 +818,7 @@ long arch_ptrace(struct task_struct *child, long request, break; } #endif - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; @@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request, init_fp_ctx(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 2b9260f92ccd..f30c381d3e1c 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; } fregs = get_fpu_regs(child); - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -109,7 +109,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, addr & 1); break; } - tmp = get_fpr32(&fregs[addr - FPR_BASE], 0); + tmp = get_fpr64(&fregs[addr - FPR_BASE], 0); break; case PC: tmp = regs->cp0_epc; @@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, sizeof(child->thread.fpu)); child->thread.fpu.fcr31 = 0; } - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kernel/signal32.c b/arch/mips/kernel/signal32.c index c4db910a8794..b5d9e1784aff 100644 --- a/arch/mips/kernel/signal32.c +++ b/arch/mips/kernel/signal32.c @@ -8,13 +8,13 @@ * Copyright (C) 1999, 2000 Silicon Graphics, Inc. * Copyright (C) 2016, Imagination Technologies Ltd. */ +#include <linux/compat.h> #include <linux/compiler.h> #include <linux/errno.h> #include <linux/kernel.h> #include <linux/signal.h> #include <linux/syscalls.h> -#include <asm/compat.h> #include <asm/compat-signal.h> #include <linux/uaccess.h> #include <asm/unistd.h> diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 967e9e4e795e..d67fa74622ee 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -699,17 +699,11 @@ static int simulate_sync(struct pt_regs *regs, unsigned int opcode) asmlinkage void do_ov(struct pt_regs *regs) { enum ctx_state prev_state; - siginfo_t info; - - clear_siginfo(&info); - info.si_signo = SIGFPE; - info.si_code = FPE_INTOVF; - info.si_addr = (void __user *)regs->cp0_epc; prev_state = exception_enter(); die_if_kernel("Integer overflow", regs); - force_sig_info(SIGFPE, &info, current); + force_sig_fault(SIGFPE, FPE_INTOVF, (void __user *)regs->cp0_epc, current); exception_exit(prev_state); } @@ -722,32 +716,27 @@ asmlinkage void do_ov(struct pt_regs *regs) void force_fcr31_sig(unsigned long fcr31, void __user *fault_addr, struct task_struct *tsk) { - struct siginfo si; - - clear_siginfo(&si); - si.si_addr = fault_addr; - si.si_signo = SIGFPE; + int si_code = FPE_FLTUNK; if (fcr31 & FPU_CSR_INV_X) - si.si_code = FPE_FLTINV; + si_code = FPE_FLTINV; else if (fcr31 & FPU_CSR_DIV_X) - si.si_code = FPE_FLTDIV; + si_code = FPE_FLTDIV; else if (fcr31 & FPU_CSR_OVF_X) - si.si_code = FPE_FLTOVF; + si_code = FPE_FLTOVF; else if (fcr31 & FPU_CSR_UDF_X) - si.si_code = FPE_FLTUND; + si_code = FPE_FLTUND; else if (fcr31 & FPU_CSR_INE_X) - si.si_code = FPE_FLTRES; + si_code = FPE_FLTRES; - force_sig_info(SIGFPE, &si, tsk); + force_sig_fault(SIGFPE, si_code, fault_addr, tsk); } int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) { - struct siginfo si; + int si_code; struct vm_area_struct *vma; - clear_siginfo(&si); switch (sig) { case 0: return 0; @@ -757,23 +746,18 @@ int process_fpemu_return(int sig, void __user *fault_addr, unsigned long fcr31) return 1; case SIGBUS: - si.si_addr = fault_addr; - si.si_signo = sig; - si.si_code = BUS_ADRERR; - force_sig_info(sig, &si, current); + force_sig_fault(SIGBUS, BUS_ADRERR, fault_addr, current); return 1; case SIGSEGV: - si.si_addr = fault_addr; - si.si_signo = sig; down_read(¤t->mm->mmap_sem); vma = find_vma(current->mm, (unsigned long)fault_addr); if (vma && (vma->vm_start <= (unsigned long)fault_addr)) - si.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; else - si.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; up_read(¤t->mm->mmap_sem); - force_sig_info(sig, &si, current); + force_sig_fault(SIGSEGV, si_code, fault_addr, current); return 1; default: @@ -896,10 +880,8 @@ out: void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, const char *str) { - siginfo_t info; char b[40]; - clear_siginfo(&info); #ifdef CONFIG_KGDB_LOW_LEVEL_TRAP if (kgdb_ll_trap(DIE_TRAP, str, regs, code, current->thread.trap_nr, SIGTRAP) == NOTIFY_STOP) @@ -921,13 +903,9 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, case BRK_DIVZERO: scnprintf(b, sizeof(b), "%s instruction in kernel code", str); die_if_kernel(b, regs); - if (code == BRK_DIVZERO) - info.si_code = FPE_INTDIV; - else - info.si_code = FPE_INTOVF; - info.si_signo = SIGFPE; - info.si_addr = (void __user *) regs->cp0_epc; - force_sig_info(SIGFPE, &info, current); + force_sig_fault(SIGFPE, + code == BRK_DIVZERO ? FPE_INTDIV : FPE_INTOVF, + (void __user *) regs->cp0_epc, current); break; case BRK_BUG: die_if_kernel("Kernel bug detected", regs); @@ -952,9 +930,7 @@ void do_trap_or_bp(struct pt_regs *regs, unsigned int code, int si_code, scnprintf(b, sizeof(b), "%s instruction in kernel code", str); die_if_kernel(b, regs); if (si_code) { - info.si_signo = SIGTRAP; - info.si_code = si_code; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, si_code, NULL, current); } else { force_sig(SIGTRAP, current); } @@ -1506,13 +1482,8 @@ asmlinkage void do_mdmx(struct pt_regs *regs) */ asmlinkage void do_watch(struct pt_regs *regs) { - siginfo_t info; enum ctx_state prev_state; - clear_siginfo(&info); - info.si_signo = SIGTRAP; - info.si_code = TRAP_HWBKPT; - prev_state = exception_enter(); /* * Clear WP (bit 22) bit of cause register so we don't loop @@ -1528,7 +1499,7 @@ asmlinkage void do_watch(struct pt_regs *regs) if (test_tsk_thread_flag(current, TIF_LOAD_WATCH)) { mips_read_watch_registers(); local_irq_enable(); - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, NULL, current); } else { mips_clear_watch_registers(); local_irq_enable(); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2549fdd27ee1..0f725e9cee8f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, diff --git a/arch/mips/loongson64/Kconfig b/arch/mips/loongson64/Kconfig index 72af0c183969..c79e6a565572 100644 --- a/arch/mips/loongson64/Kconfig +++ b/arch/mips/loongson64/Kconfig @@ -130,21 +130,6 @@ config LOONGSON_UART_BASE default y depends on EARLY_PRINTK || SERIAL_8250 -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - -config SWIOTLB - bool "Soft IOMMU Support for All-Memory DMA" - default y - depends on CPU_LOONGSON3 - select DMA_DIRECT_OPS - select IOMMU_HELPER - select NEED_SG_DMA_LENGTH - select NEED_DMA_MAP_STATE - config PHYS48_TO_HT40 bool default y if CPU_LOONGSON3 diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 6f534b209971..e12dfa48b478 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* * Either no secondary cache or the available caches don't have the * subset property so we have to flush the primary caches - * explicitly + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. */ - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index dcafa43613b6..f9fef0028ca2 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -402,13 +402,3 @@ static const struct dma_map_ops mips_default_dma_map_ops = { const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; EXPORT_SYMBOL(mips_dma_map_ops); - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init mips_dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(mips_dma_init); diff --git a/arch/mips/mm/fault.c b/arch/mips/mm/fault.c index 4f8f5bf46977..5f71f2b903b7 100644 --- a/arch/mips/mm/fault.c +++ b/arch/mips/mm/fault.c @@ -42,7 +42,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, struct task_struct *tsk = current; struct mm_struct *mm = tsk->mm; const int field = sizeof(unsigned long) * 2; - siginfo_t info; + int si_code; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -63,7 +63,7 @@ static void __kprobes __do_page_fault(struct pt_regs *regs, unsigned long write, return; #endif - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The @@ -112,7 +112,7 @@ retry: * we can handle it.. */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; if (write) { if (!(vma->vm_flags & VM_WRITE)) @@ -223,11 +223,7 @@ bad_area_nosemaphore: pr_cont("\n"); } current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void __user *) address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); return; } @@ -283,11 +279,7 @@ do_sigbus: #endif current->thread.trap_nr = (regs->cp0_cause >> 2) & 0x1f; tsk->thread.cp0_badvaddr = address; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *) address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); return; #ifndef CONFIG_64BIT diff --git a/arch/mips/netlogic/Kconfig b/arch/mips/netlogic/Kconfig index 7fcfc7fe9f14..412351c5acc6 100644 --- a/arch/mips/netlogic/Kconfig +++ b/arch/mips/netlogic/Kconfig @@ -83,10 +83,4 @@ endif config NLM_COMMON bool -config IOMMU_HELPER - bool - -config NEED_SG_DMA_LENGTH - bool - endif diff --git a/arch/mips/pci/ops-pmcmsp.c b/arch/mips/pci/ops-pmcmsp.c index dd2d9f7e9412..7649372103af 100644 --- a/arch/mips/pci/ops-pmcmsp.c +++ b/arch/mips/pci/ops-pmcmsp.c @@ -83,18 +83,6 @@ static int show_msp_pci_counts(struct seq_file *m, void *v) return 0; } -static int msp_pci_rd_cnt_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_msp_pci_counts, NULL); -} - -static const struct file_operations msp_pci_rd_cnt_fops = { - .open = msp_pci_rd_cnt_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /***************************************************************************** * * FUNCTION: gen_pci_cfg_wr_show @@ -160,18 +148,6 @@ static int gen_pci_cfg_wr_show(struct seq_file *m, void *v) return 0; } -static int gen_pci_cfg_wr_open(struct inode *inode, struct file *file) -{ - return single_open(file, gen_pci_cfg_wr_show, NULL); -} - -static const struct file_operations gen_pci_cfg_wr_fops = { - .open = gen_pci_cfg_wr_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - /***************************************************************************** * * FUNCTION: pci_proc_init @@ -188,8 +164,8 @@ static const struct file_operations gen_pci_cfg_wr_fops = { ****************************************************************************/ static void pci_proc_init(void) { - proc_create("pmc_msp_pci_rd_cnt", 0, NULL, &msp_pci_rd_cnt_fops); - proc_create("pmc_msp_pci_cfg_wr", 0, NULL, &gen_pci_cfg_wr_fops); + proc_create_single("pmc_msp_pci_rd_cnt", 0, NULL, show_msp_pci_counts); + proc_create_single("pmc_msp_pci_cfg_wr", 0, NULL, gen_pci_cfg_wr_show); } #endif /* CONFIG_PROC_FS && PCI_COUNTERS */ diff --git a/arch/mips/sibyte/common/bus_watcher.c b/arch/mips/sibyte/common/bus_watcher.c index a4e55999ecb4..4bb85de9229b 100644 --- a/arch/mips/sibyte/common/bus_watcher.c +++ b/arch/mips/sibyte/common/bus_watcher.c @@ -142,24 +142,12 @@ static int bw_proc_show(struct seq_file *m, void *v) return 0; } -static int bw_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, bw_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations bw_proc_fops = { - .open = bw_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void create_proc_decoder(struct bw_stats_struct *stats) { struct proc_dir_entry *ent; - ent = proc_create_data("bus_watcher", S_IWUSR | S_IRUGO, NULL, - &bw_proc_fops, stats); + ent = proc_create_single_data("bus_watcher", S_IWUSR | S_IRUGO, NULL, + bw_proc_show, stats); if (!ent) { printk(KERN_INFO "Unable to initialize bus_watcher /proc entry\n"); return; diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index 249f38d3388f..6aed974276d8 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig @@ -5,10 +5,19 @@ config NDS32 def_bool y + select ARCH_HAS_SYNC_DMA_FOR_CPU + select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_WANT_FRAME_POINTERS if FTRACE select CLKSRC_MMIO select CLONE_BACKWARDS select COMMON_CLK + select DMA_NONCOHERENT_OPS + select GENERIC_ASHLDI3 + select GENERIC_ASHRDI3 + select GENERIC_LSHRDI3 + select GENERIC_CMPDI2 + select GENERIC_MULDI3 + select GENERIC_UCMPDI2 select GENERIC_ATOMIC64 select GENERIC_CPU_DEVICES select GENERIC_CLOCKEVENTS @@ -82,6 +91,7 @@ endmenu menu "Kernel Features" source "kernel/Kconfig.preempt" +source "kernel/Kconfig.freezer" source "mm/Kconfig" source "kernel/Kconfig.hz" endmenu diff --git a/arch/nds32/Kconfig.cpu b/arch/nds32/Kconfig.cpu index ba44cc539da9..b8c8984d1456 100644 --- a/arch/nds32/Kconfig.cpu +++ b/arch/nds32/Kconfig.cpu @@ -1,10 +1,11 @@ comment "Processor Features" config CPU_BIG_ENDIAN - bool "Big endian" + def_bool !CPU_LITTLE_ENDIAN config CPU_LITTLE_ENDIAN - def_bool !CPU_BIG_ENDIAN + bool "Little endian" + default y config HWZOL bool "hardware zero overhead loop support" diff --git a/arch/nds32/Makefile b/arch/nds32/Makefile index 91f933d5a962..513bb2e9baf9 100644 --- a/arch/nds32/Makefile +++ b/arch/nds32/Makefile @@ -23,9 +23,6 @@ export TEXTADDR # If we have a machine-specific directory, then include it in the build. core-y += arch/nds32/kernel/ arch/nds32/mm/ libs-y += arch/nds32/lib/ -LIBGCC_PATH := \ - $(shell $(CC) $(KBUILD_CFLAGS) $(KCFLAGS) -print-libgcc-file-name) -libs-y += $(LIBGCC_PATH) ifneq '$(CONFIG_NDS32_BUILTIN_DTB)' '""' BUILTIN_DTB := y @@ -35,8 +32,12 @@ endif ifdef CONFIG_CPU_LITTLE_ENDIAN KBUILD_CFLAGS += $(call cc-option, -EL) +KBUILD_AFLAGS += $(call cc-option, -EL) +LDFLAGS += $(call cc-option, -EL) else KBUILD_CFLAGS += $(call cc-option, -EB) +KBUILD_AFLAGS += $(call cc-option, -EB) +LDFLAGS += $(call cc-option, -EB) endif boot := arch/nds32/boot diff --git a/arch/nds32/include/asm/Kbuild b/arch/nds32/include/asm/Kbuild index 06bdf8167f5a..dbc4e5422550 100644 --- a/arch/nds32/include/asm/Kbuild +++ b/arch/nds32/include/asm/Kbuild @@ -9,13 +9,16 @@ generic-y += checksum.h generic-y += clkdev.h generic-y += cmpxchg.h generic-y += cmpxchg-local.h +generic-y += compat.h generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += dma.h +generic-y += dma-mapping.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h +generic-y += export.h generic-y += fb.h generic-y += fcntl.h generic-y += ftrace.h @@ -49,6 +52,7 @@ generic-y += switch_to.h generic-y += timex.h generic-y += topology.h generic-y += trace_clock.h +generic-y += xor.h generic-y += unaligned.h generic-y += user.h generic-y += vga.h diff --git a/arch/nds32/include/asm/bitfield.h b/arch/nds32/include/asm/bitfield.h index c73f71d67744..8e84fc385b94 100644 --- a/arch/nds32/include/asm/bitfield.h +++ b/arch/nds32/include/asm/bitfield.h @@ -336,7 +336,7 @@ #define INT_MASK_mskIDIVZE ( 0x1 << INT_MASK_offIDIVZE ) #define INT_MASK_mskDSSIM ( 0x1 << INT_MASK_offDSSIM ) -#define INT_MASK_INITAIAL_VAL 0x10003 +#define INT_MASK_INITAIAL_VAL (INT_MASK_mskDSSIM|INT_MASK_mskIDIVZE) /****************************************************************************** * ir15: INT_PEND (Interrupt Pending Register) @@ -396,6 +396,7 @@ #define MMU_CTL_D8KB 1 #define MMU_CTL_UNA ( 0x1 << MMU_CTL_offUNA ) +#define MMU_CTL_CACHEABLE_NON 0 #define MMU_CTL_CACHEABLE_WB 2 #define MMU_CTL_CACHEABLE_WT 3 diff --git a/arch/nds32/include/asm/cacheflush.h b/arch/nds32/include/asm/cacheflush.h index 1240f148ec0f..10b48f0d8e85 100644 --- a/arch/nds32/include/asm/cacheflush.h +++ b/arch/nds32/include/asm/cacheflush.h @@ -32,6 +32,8 @@ void flush_anon_page(struct vm_area_struct *vma, #define ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE void flush_kernel_dcache_page(struct page *page); +void flush_kernel_vmap_range(void *addr, int size); +void invalidate_kernel_vmap_range(void *addr, int size); void flush_icache_range(unsigned long start, unsigned long end); void flush_icache_page(struct vm_area_struct *vma, struct page *page); #define flush_dcache_mmap_lock(mapping) xa_lock_irq(&(mapping)->i_pages) diff --git a/arch/nds32/include/asm/dma-mapping.h b/arch/nds32/include/asm/dma-mapping.h deleted file mode 100644 index 2dd47d245c25..000000000000 --- a/arch/nds32/include/asm/dma-mapping.h +++ /dev/null @@ -1,14 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -// Copyright (C) 2005-2017 Andes Technology Corporation - -#ifndef ASMNDS32_DMA_MAPPING_H -#define ASMNDS32_DMA_MAPPING_H - -extern struct dma_map_ops nds32_dma_ops; - -static inline struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) -{ - return &nds32_dma_ops; -} - -#endif diff --git a/arch/nds32/include/asm/io.h b/arch/nds32/include/asm/io.h index 966e71b3c960..71cd226d6863 100644 --- a/arch/nds32/include/asm/io.h +++ b/arch/nds32/include/asm/io.h @@ -4,6 +4,8 @@ #ifndef __ASM_NDS32_IO_H #define __ASM_NDS32_IO_H +#include <linux/types.h> + extern void iounmap(volatile void __iomem *addr); #define __raw_writeb __raw_writeb static inline void __raw_writeb(u8 val, volatile void __iomem *addr) diff --git a/arch/nds32/include/asm/page.h b/arch/nds32/include/asm/page.h index e27365c097b6..947f0491c9a7 100644 --- a/arch/nds32/include/asm/page.h +++ b/arch/nds32/include/asm/page.h @@ -27,6 +27,9 @@ extern void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma); extern void clear_user_highpage(struct page *page, unsigned long vaddr); +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to); +void clear_user_page(void *addr, unsigned long vaddr, struct page *page); #define __HAVE_ARCH_COPY_USER_HIGHPAGE #define clear_user_highpage clear_user_highpage #else diff --git a/arch/nds32/include/asm/pgtable.h b/arch/nds32/include/asm/pgtable.h index 6783937edbeb..d3e19a55cf53 100644 --- a/arch/nds32/include/asm/pgtable.h +++ b/arch/nds32/include/asm/pgtable.h @@ -152,6 +152,7 @@ extern void __pgd_error(const char *file, int line, unsigned long val); #define PAGE_CACHE_L1 __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE) #define PAGE_MEMORY __pgprot(_HAVE_PAGE_L | _PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) #define PAGE_KERNEL __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_E | _PAGE_G | _PAGE_CACHE_SHRD) +#define PAGE_SHARED __pgprot(_PAGE_V | _PAGE_M_URW_KRW | _PAGE_D | _PAGE_CACHE_SHRD) #define PAGE_DEVICE __pgprot(_PAGE_V | _PAGE_M_KRW | _PAGE_D | _PAGE_G | _PAGE_C_DEV) #endif /* __ASSEMBLY__ */ diff --git a/arch/nds32/kernel/dma.c b/arch/nds32/kernel/dma.c index d291800fc621..d0dbd4fe9645 100644 --- a/arch/nds32/kernel/dma.c +++ b/arch/nds32/kernel/dma.c @@ -3,17 +3,14 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/export.h> #include <linux/string.h> -#include <linux/scatterlist.h> -#include <linux/dma-mapping.h> +#include <linux/dma-noncoherent.h> #include <linux/io.h> #include <linux/cache.h> #include <linux/highmem.h> #include <linux/slab.h> #include <asm/cacheflush.h> #include <asm/tlbflush.h> -#include <asm/dma-mapping.h> #include <asm/proc-fns.h> /* @@ -22,11 +19,6 @@ static pte_t *consistent_pte; static DEFINE_RAW_SPINLOCK(consistent_lock); -enum master_type { - FOR_CPU = 0, - FOR_DEVICE = 1, -}; - /* * VM region handling support. * @@ -124,10 +116,8 @@ out: return c; } -/* FIXME: attrs is not used. */ -static void *nds32_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t * handle, gfp_t gfp, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *handle, + gfp_t gfp, unsigned long attrs) { struct page *page; struct arch_vm_region *c; @@ -232,8 +222,8 @@ no_page: return NULL; } -static void nds32_dma_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t handle, unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *cpu_addr, + dma_addr_t handle, unsigned long attrs) { struct arch_vm_region *c; unsigned long flags, addr; @@ -333,145 +323,69 @@ static int __init consistent_init(void) } core_initcall(consistent_init); -static void consistent_sync(void *vaddr, size_t size, int direction, int master_type); -static dma_addr_t nds32_dma_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - consistent_sync((void *)(page_address(page) + offset), size, dir, FOR_DEVICE); - return page_to_phys(page) + offset; -} - -static void nds32_dma_unmap_page(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - consistent_sync(phys_to_virt(handle), size, dir, FOR_CPU); -} - -/* - * Make an area consistent for devices. - */ -static void consistent_sync(void *vaddr, size_t size, int direction, int master_type) -{ - unsigned long start = (unsigned long)vaddr; - unsigned long end = start + size; - - if (master_type == FOR_CPU) { - switch (direction) { - case DMA_TO_DEVICE: - break; - case DMA_FROM_DEVICE: - case DMA_BIDIRECTIONAL: - cpu_dma_inval_range(start, end); - break; - default: - BUG(); - } - } else { - /* FOR_DEVICE */ - switch (direction) { - case DMA_FROM_DEVICE: - break; - case DMA_TO_DEVICE: - case DMA_BIDIRECTIONAL: - cpu_dma_wb_range(start, end); - break; - default: - BUG(); - } - } -} -static int nds32_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir, - unsigned long attrs) +static inline void cache_op(phys_addr_t paddr, size_t size, + void (*fn)(unsigned long start, unsigned long end)) { - int i; + struct page *page = pfn_to_page(paddr >> PAGE_SHIFT); + unsigned offset = paddr & ~PAGE_MASK; + size_t left = size; + unsigned long start; - for (i = 0; i < nents; i++, sg++) { - void *virt; - unsigned long pfn; - struct page *page = sg_page(sg); + do { + size_t len = left; - sg->dma_address = sg_phys(sg); - pfn = page_to_pfn(page) + sg->offset / PAGE_SIZE; - page = pfn_to_page(pfn); if (PageHighMem(page)) { - virt = kmap_atomic(page); - consistent_sync(virt, sg->length, dir, FOR_CPU); - kunmap_atomic(virt); + void *addr; + + if (offset + len > PAGE_SIZE) { + if (offset >= PAGE_SIZE) { + page += offset >> PAGE_SHIFT; + offset &= ~PAGE_MASK; + } + len = PAGE_SIZE - offset; + } + + addr = kmap_atomic(page); + start = (unsigned long)(addr + offset); + fn(start, start + len); + kunmap_atomic(addr); } else { - if (sg->offset > PAGE_SIZE) - panic("sg->offset:%08x > PAGE_SIZE\n", - sg->offset); - virt = page_address(page) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_CPU); + start = (unsigned long)phys_to_virt(paddr); + fn(start, start + size); } - } - return nents; + offset = 0; + page++; + left -= len; + } while (left); } -static void nds32_dma_unmap_sg(struct device *dev, struct scatterlist *sg, - int nhwentries, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { -} - -static void -nds32_dma_sync_single_for_cpu(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_CPU); -} - -static void -nds32_dma_sync_single_for_device(struct device *dev, dma_addr_t handle, - size_t size, enum dma_data_direction dir) -{ - consistent_sync((void *)phys_to_virt(handle), size, dir, FOR_DEVICE); -} - -static void -nds32_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, - enum dma_data_direction dir) -{ - int i; - - for (i = 0; i < nents; i++, sg++) { - char *virt = - page_address((struct page *)sg->page_link) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_CPU); + switch (dir) { + case DMA_FROM_DEVICE: + break; + case DMA_TO_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_wb_range); + break; + default: + BUG(); } } -static void -nds32_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nents, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - int i; - - for (i = 0; i < nents; i++, sg++) { - char *virt = - page_address((struct page *)sg->page_link) + sg->offset; - consistent_sync(virt, sg->length, dir, FOR_DEVICE); + switch (dir) { + case DMA_TO_DEVICE: + break; + case DMA_FROM_DEVICE: + case DMA_BIDIRECTIONAL: + cache_op(paddr, size, cpu_dma_inval_range); + break; + default: + BUG(); } } - -struct dma_map_ops nds32_dma_ops = { - .alloc = nds32_dma_alloc_coherent, - .free = nds32_dma_free, - .map_page = nds32_dma_map_page, - .unmap_page = nds32_dma_unmap_page, - .map_sg = nds32_dma_map_sg, - .unmap_sg = nds32_dma_unmap_sg, - .sync_single_for_device = nds32_dma_sync_single_for_device, - .sync_single_for_cpu = nds32_dma_sync_single_for_cpu, - .sync_sg_for_cpu = nds32_dma_sync_sg_for_cpu, - .sync_sg_for_device = nds32_dma_sync_sg_for_device, -}; - -EXPORT_SYMBOL(nds32_dma_ops); diff --git a/arch/nds32/kernel/ex-entry.S b/arch/nds32/kernel/ex-entry.S index a72e83d804f5..b8ae4e9a6b93 100644 --- a/arch/nds32/kernel/ex-entry.S +++ b/arch/nds32/kernel/ex-entry.S @@ -118,7 +118,7 @@ common_exception_handler: /* interrupt */ 2: #ifdef CONFIG_TRACE_IRQFLAGS - jal arch_trace_hardirqs_off + jal trace_hardirqs_off #endif move $r0, $sp sethi $lp, hi20(ret_from_intr) diff --git a/arch/nds32/kernel/head.S b/arch/nds32/kernel/head.S index 71f57bd70f3b..c5fdae174ced 100644 --- a/arch/nds32/kernel/head.S +++ b/arch/nds32/kernel/head.S @@ -57,14 +57,32 @@ _nodtb: isb mtsr $r4, $L1_PPTB ! load page table pointer\n" -/* set NTC0 cacheable/writeback, mutliple page size in use */ +#ifdef CONFIG_CPU_DCACHE_DISABLE + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_NON +#else + #ifdef CONFIG_CPU_DCACHE_WRITETHROUGH + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WT + #else + #define MMU_CTL_NTCC MMU_CTL_CACHEABLE_WB + #endif +#endif + +/* set NTC cacheability, mutliple page size in use */ mfsr $r3, $MMU_CTL - li $r0, #~MMU_CTL_mskNTC0 - and $r3, $r3, $r0 +#if CONFIG_MEMORY_START >= 0xc0000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC3) +#elif CONFIG_MEMORY_START >= 0x80000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC2) +#elif CONFIG_MEMORY_START >= 0x40000000 + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC1) +#else + ori $r3, $r3, (MMU_CTL_NTCC << MMU_CTL_offNTC0) +#endif + #ifdef CONFIG_ANDES_PAGE_SIZE_4KB - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)) + ori $r3, $r3, #(MMU_CTL_mskMPZIU) #else - ori $r3, $r3, #(MMU_CTL_mskMPZIU|(MMU_CTL_CACHEABLE_WB << MMU_CTL_offNTC0)|MMU_CTL_D8KB) + ori $r3, $r3, #(MMU_CTL_mskMPZIU|MMU_CTL_D8KB) #endif #ifdef CONFIG_HW_SUPPORT_UNALIGNMENT_ACCESS li $r0, #MMU_CTL_UNA diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index ba910e9e4ecb..2f5b2ccebe47 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -293,6 +293,9 @@ void __init setup_arch(char **cmdline_p) /* paging_init() sets up the MMU and marks all pages as reserved */ paging_init(); + /* invalidate all TLB entries because the new mapping is created */ + __nds32__tlbop_flua(); + /* use generic way to parse */ parse_early_param(); diff --git a/arch/nds32/kernel/stacktrace.c b/arch/nds32/kernel/stacktrace.c index bc70113c0e84..8b231e910ea6 100644 --- a/arch/nds32/kernel/stacktrace.c +++ b/arch/nds32/kernel/stacktrace.c @@ -9,6 +9,7 @@ void save_stack_trace(struct stack_trace *trace) { save_stack_trace_tsk(current, trace); } +EXPORT_SYMBOL_GPL(save_stack_trace); void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) { @@ -45,3 +46,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) fpn = (unsigned long *)fpp; } } +EXPORT_SYMBOL_GPL(save_stack_trace_tsk); diff --git a/arch/nds32/kernel/traps.c b/arch/nds32/kernel/traps.c index 6e34eb9824a4..a6205fd4db52 100644 --- a/arch/nds32/kernel/traps.c +++ b/arch/nds32/kernel/traps.c @@ -222,19 +222,13 @@ void die_if_kernel(const char *str, struct pt_regs *regs, int err) int bad_syscall(int n, struct pt_regs *regs) { - siginfo_t info; - if (current->personality != PER_LINUX) { send_sig(SIGSEGV, current, 1); return regs->uregs[0]; } - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLTRP; - info.si_addr = (void __user *)instruction_pointer(regs) - 4; - - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_ILLTRP, + (void __user *)instruction_pointer(regs) - 4, current); die_if_kernel("Oops - bad syscall", regs, n); return regs->uregs[0]; } @@ -287,16 +281,11 @@ void __init early_trap_init(void) void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code, int si_code) { - struct siginfo info; - tsk->thread.trap_no = ENTRY_DEBUG_RELATED; tsk->thread.error_code = error_code; - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = si_code; - info.si_addr = (void __user *)instruction_pointer(regs); - force_sig_info(SIGTRAP, &info, tsk); + force_sig_fault(SIGTRAP, si_code, + (void __user *)instruction_pointer(regs), tsk); } void do_debug_trap(unsigned long entry, unsigned long addr, @@ -318,29 +307,22 @@ void do_debug_trap(unsigned long entry, unsigned long addr, void unhandled_interruption(struct pt_regs *regs) { - siginfo_t si; pr_emerg("unhandled_interruption\n"); show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - si.si_signo = SIGKILL; - si.si_errno = 0; - force_sig_info(SIGKILL, &si, current); + force_sig(SIGKILL, current); } void unhandled_exceptions(unsigned long entry, unsigned long addr, unsigned long type, struct pt_regs *regs) { - siginfo_t si; pr_emerg("Unhandled Exception: entry: %lx addr:%lx itype:%lx\n", entry, addr, type); show_regs(regs); if (!user_mode(regs)) do_exit(SIGKILL); - si.si_signo = SIGKILL; - si.si_errno = 0; - si.si_addr = (void *)addr; - force_sig_info(SIGKILL, &si, current); + force_sig(SIGKILL, current); } extern int do_page_fault(unsigned long entry, unsigned long addr, @@ -363,14 +345,11 @@ void do_dispatch_tlb_misc(unsigned long entry, unsigned long addr, void do_revinsn(struct pt_regs *regs) { - siginfo_t si; pr_emerg("Reserved Instruction\n"); show_regs(regs); if (!user_mode(regs)) do_exit(SIGILL); - si.si_signo = SIGILL; - si.si_errno = 0; - force_sig_info(SIGILL, &si, current); + force_sig(SIGILL, current); } #ifdef CONFIG_ALIGNMENT_TRAP diff --git a/arch/nds32/kernel/vdso.c b/arch/nds32/kernel/vdso.c index f1198d7a5654..016f15891f6d 100644 --- a/arch/nds32/kernel/vdso.c +++ b/arch/nds32/kernel/vdso.c @@ -23,7 +23,7 @@ #include <asm/vdso_timer_info.h> #include <asm/cache_info.h> extern struct cache_info L1_cache_info[2]; -extern char vdso_start, vdso_end; +extern char vdso_start[], vdso_end[]; static unsigned long vdso_pages __ro_after_init; static unsigned long timer_mapping_base; @@ -66,16 +66,16 @@ static int __init vdso_init(void) int i; struct page **vdso_pagelist; - if (memcmp(&vdso_start, "\177ELF", 4)) { + if (memcmp(vdso_start, "\177ELF", 4)) { pr_err("vDSO is not a valid ELF object!\n"); return -EINVAL; } /* Creat a timer io mapping to get clock cycles counter */ get_timer_node_info(); - vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; + vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT; pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", - vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); + vdso_pages + 1, vdso_pages, vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist */ vdso_pagelist = kcalloc(vdso_pages, sizeof(struct page *), GFP_KERNEL); @@ -83,7 +83,7 @@ static int __init vdso_init(void) return -ENOMEM; for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); + vdso_pagelist[i] = virt_to_page(vdso_start + i * PAGE_SIZE); vdso_spec[1].pages = &vdso_pagelist[0]; return 0; diff --git a/arch/nds32/lib/copy_page.S b/arch/nds32/lib/copy_page.S index 4a2ff85f17ee..f8701ed161a8 100644 --- a/arch/nds32/lib/copy_page.S +++ b/arch/nds32/lib/copy_page.S @@ -2,6 +2,7 @@ // Copyright (C) 2005-2017 Andes Technology Corporation #include <linux/linkage.h> +#include <asm/export.h> #include <asm/page.h> .text @@ -16,6 +17,7 @@ ENTRY(copy_page) popm $r2, $r10 ret ENDPROC(copy_page) +EXPORT_SYMBOL(copy_page) ENTRY(clear_page) pushm $r1, $r9 @@ -35,3 +37,4 @@ ENTRY(clear_page) popm $r1, $r9 ret ENDPROC(clear_page) +EXPORT_SYMBOL(clear_page) diff --git a/arch/nds32/mm/alignment.c b/arch/nds32/mm/alignment.c index b96a01b10ca7..e1aed9dc692d 100644 --- a/arch/nds32/mm/alignment.c +++ b/arch/nds32/mm/alignment.c @@ -19,7 +19,7 @@ #define RA(inst) (((inst) >> 15) & 0x1FUL) #define RB(inst) (((inst) >> 10) & 0x1FUL) #define SV(inst) (((inst) >> 8) & 0x3UL) -#define IMM(inst) (((inst) >> 0) & 0x3FFFUL) +#define IMM(inst) (((inst) >> 0) & 0x7FFFUL) #define RA3(inst) (((inst) >> 3) & 0x7UL) #define RT3(inst) (((inst) >> 6) & 0x7UL) @@ -28,6 +28,9 @@ #define RA5(inst) (((inst) >> 0) & 0x1FUL) #define RT4(inst) (((inst) >> 5) & 0xFUL) +#define GET_IMMSVAL(imm_value) \ + (((imm_value >> 14) & 0x1) ? (imm_value - 0x8000) : imm_value) + #define __get8_data(val,addr,err) \ __asm__( \ "1: lbi.bi %1, [%2], #1\n" \ @@ -467,7 +470,7 @@ static inline int do_32(unsigned long inst, struct pt_regs *regs) } if (imm) - shift = IMM(inst) * len; + shift = GET_IMMSVAL(IMM(inst)) * len; else shift = *idx_to_addr(regs, RB(inst)) << SV(inst); @@ -552,7 +555,7 @@ static struct ctl_table alignment_tbl[3] = { static struct ctl_table nds32_sysctl_table[2] = { { - .procname = "unaligned_acess", + .procname = "unaligned_access", .mode = 0555, .child = alignment_tbl}, {} diff --git a/arch/nds32/mm/cacheflush.c b/arch/nds32/mm/cacheflush.c index 6eb786a399a2..ce8fd34497bf 100644 --- a/arch/nds32/mm/cacheflush.c +++ b/arch/nds32/mm/cacheflush.c @@ -147,6 +147,25 @@ void flush_cache_vunmap(unsigned long start, unsigned long end) cpu_icache_inval_all(); } +void copy_user_page(void *vto, void *vfrom, unsigned long vaddr, + struct page *to) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + copy_page(vto, vfrom); + cpu_dcache_wbinval_page((unsigned long)vto); + cpu_icache_inval_page((unsigned long)vto); +} + +void clear_user_page(void *addr, unsigned long vaddr, struct page *page) +{ + cpu_dcache_wbinval_page((unsigned long)vaddr); + cpu_icache_inval_page((unsigned long)vaddr); + clear_page(addr); + cpu_dcache_wbinval_page((unsigned long)addr); + cpu_icache_inval_page((unsigned long)addr); +} + void copy_user_highpage(struct page *to, struct page *from, unsigned long vaddr, struct vm_area_struct *vma) { @@ -156,11 +175,9 @@ void copy_user_highpage(struct page *to, struct page *from, pto = page_to_phys(to); pfrom = page_to_phys(from); + local_irq_save(flags); if (aliasing(vaddr, (unsigned long)kfrom)) cpu_dcache_wb_page((unsigned long)kfrom); - if (aliasing(vaddr, (unsigned long)kto)) - cpu_dcache_inval_page((unsigned long)kto); - local_irq_save(flags); vto = kremap0(vaddr, pto); vfrom = kremap1(vaddr, pfrom); copy_page((void *)vto, (void *)vfrom); @@ -198,21 +215,25 @@ void flush_dcache_page(struct page *page) if (mapping && !mapping_mapped(mapping)) set_bit(PG_dcache_dirty, &page->flags); else { - int i, pc; - unsigned long vto, kaddr, flags; + unsigned long kaddr, flags; + kaddr = (unsigned long)page_address(page); - cpu_dcache_wbinval_page(kaddr); - pc = CACHE_SET(DCACHE) * CACHE_LINE_SIZE(DCACHE) / PAGE_SIZE; local_irq_save(flags); - for (i = 0; i < pc; i++) { - vto = - kremap0(kaddr + i * PAGE_SIZE, page_to_phys(page)); - cpu_dcache_wbinval_page(vto); - kunmap01(vto); + cpu_dcache_wbinval_page(kaddr); + if (mapping) { + unsigned long vaddr, kto; + + vaddr = page->index << PAGE_SHIFT; + if (aliasing(vaddr, kaddr)) { + kto = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(kto); + kunmap01(kto); + } } local_irq_restore(flags); } } +EXPORT_SYMBOL(flush_dcache_page); void copy_to_user_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr, void *dst, void *src, int len) @@ -251,7 +272,7 @@ void copy_from_user_page(struct vm_area_struct *vma, struct page *page, void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vaddr) { - unsigned long flags; + unsigned long kaddr, flags, ktmp; if (!PageAnon(page)) return; @@ -261,7 +282,12 @@ void flush_anon_page(struct vm_area_struct *vma, local_irq_save(flags); if (vma->vm_flags & VM_EXEC) cpu_icache_inval_page(vaddr & PAGE_MASK); - cpu_dcache_wbinval_page((unsigned long)page_address(page)); + kaddr = (unsigned long)page_address(page); + if (aliasing(vaddr, kaddr)) { + ktmp = kremap0(vaddr, page_to_phys(page)); + cpu_dcache_wbinval_page(ktmp); + kunmap01(ktmp); + } local_irq_restore(flags); } @@ -272,6 +298,25 @@ void flush_kernel_dcache_page(struct page *page) cpu_dcache_wbinval_page((unsigned long)page_address(page)); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_kernel_dcache_page); + +void flush_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_wb_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(flush_kernel_vmap_range); + +void invalidate_kernel_vmap_range(void *addr, int size) +{ + unsigned long flags; + local_irq_save(flags); + cpu_dcache_inval_range((unsigned long)addr, (unsigned long)addr + size); + local_irq_restore(flags); +} +EXPORT_SYMBOL(invalidate_kernel_vmap_range); void flush_icache_range(unsigned long start, unsigned long end) { @@ -283,6 +328,7 @@ void flush_icache_range(unsigned long start, unsigned long end) cpu_cache_wbinval_range(start, end, 1); local_irq_restore(flags); } +EXPORT_SYMBOL(flush_icache_range); void flush_icache_page(struct vm_area_struct *vma, struct page *page) { diff --git a/arch/nds32/mm/fault.c b/arch/nds32/mm/fault.c index 3a246fb8098c..9bdb7c3ecbb6 100644 --- a/arch/nds32/mm/fault.c +++ b/arch/nds32/mm/fault.c @@ -72,7 +72,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - siginfo_t info; + int si_code; int fault; unsigned int mask = VM_READ | VM_WRITE | VM_EXEC; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -80,7 +80,7 @@ void do_page_fault(unsigned long entry, unsigned long addr, error_code = error_code & (ITYPE_mskINST | ITYPE_mskETYPE); tsk = current; mm = tsk->mm; - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. @@ -161,7 +161,7 @@ retry: */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; /* first do some preliminary protection checks */ if (entry == ENTRY_PTE_NOT_PRESENT) { @@ -266,11 +266,7 @@ bad_area_nosemaphore: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)addr; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)addr, tsk); return; } @@ -339,11 +335,7 @@ do_sigbus: tsk->thread.address = addr; tsk->thread.error_code = error_code; tsk->thread.trap_no = entry; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)addr; - force_sig_info(SIGBUS, &info, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)addr, tsk); return; diff --git a/arch/nds32/mm/init.c b/arch/nds32/mm/init.c index 93ee0160720b..c713d2ad55dc 100644 --- a/arch/nds32/mm/init.c +++ b/arch/nds32/mm/init.c @@ -30,6 +30,7 @@ extern unsigned long phys_initrd_size; * zero-initialized data and COW. */ struct page *empty_zero_page; +EXPORT_SYMBOL(empty_zero_page); static void __init zone_sizes_init(void) { diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index d232da2cbb38..64ed3d656956 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -4,6 +4,7 @@ generic-y += bitops.h generic-y += bug.h generic-y += bugs.h generic-y += cmpxchg.h +generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 8184e7d6b385..3bc3cd22b750 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -26,13 +26,7 @@ static DEFINE_SPINLOCK(die_lock); static void _send_sig(int signo, int code, unsigned long addr) { - siginfo_t info; - - info.si_signo = signo; - info.si_errno = 0; - info.si_code = code; - info.si_addr = (void __user *) addr; - force_sig_info(signo, &info, current); + force_sig_fault(signo, code, (void __user *) addr, current); } void die(const char *str, struct pt_regs *regs, long err) diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index f05c722a21f8..65964d390b10 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -2,6 +2,7 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h generic-y += checksum.h +generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index a945f00011b4..ec7fd45704d2 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -247,14 +247,3 @@ const struct dma_map_ops or1k_dma_map_ops = { .sync_single_for_device = or1k_sync_single_for_device, }; EXPORT_SYMBOL(or1k_dma_map_ops); - -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - - return 0; -} -fs_initcall(dma_init); diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 113c175fe469..fac246e6f37a 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -250,27 +250,16 @@ void __init trap_init(void) asmlinkage void do_trap(struct pt_regs *regs, unsigned long address) { - siginfo_t info; - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_TRACE; - info.si_addr = (void *)address; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_TRACE, (void __user *)address, current); regs->pc += 4; } asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) { - siginfo_t info; - if (user_mode(regs)) { /* Send a SIGBUS */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)address, current); } else { printk("KERNEL: Unaligned Access 0x%.8lx\n", address); show_registers(regs); @@ -281,15 +270,9 @@ asmlinkage void do_unaligned_access(struct pt_regs *regs, unsigned long address) asmlinkage void do_bus_fault(struct pt_regs *regs, unsigned long address) { - siginfo_t info; - if (user_mode(regs)) { /* Send a SIGBUS */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, current); } else { /* Kernel mode */ printk("KERNEL: Bus error (SIGBUS) 0x%.8lx\n", address); show_registers(regs); @@ -464,7 +447,6 @@ static inline void simulate_swa(struct pt_regs *regs, unsigned long address, asmlinkage void do_illegal_instruction(struct pt_regs *regs, unsigned long address) { - siginfo_t info; unsigned int op; unsigned int insn = *((unsigned int *)address); @@ -485,11 +467,7 @@ asmlinkage void do_illegal_instruction(struct pt_regs *regs, if (user_mode(regs)) { /* Send a SIGILL */ - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)address, current); } else { /* Kernel mode */ printk("KERNEL: Illegal instruction (SIGILL) 0x%.8lx\n", address); diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index d0021dfae20a..9f011d16cc46 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -52,7 +52,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, struct task_struct *tsk; struct mm_struct *mm; struct vm_area_struct *vma; - siginfo_t info; + int si_code; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; @@ -97,7 +97,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address, } mm = tsk->mm; - info.si_code = SEGV_MAPERR; + si_code = SEGV_MAPERR; /* * If we're in an interrupt or have no user @@ -139,7 +139,7 @@ retry: */ good_area: - info.si_code = SEGV_ACCERR; + si_code = SEGV_ACCERR; /* first do some preliminary protection checks */ @@ -213,11 +213,7 @@ bad_area_nosemaphore: /* User mode accesses just cause a SIGSEGV */ if (user_mode(regs)) { - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *)address; - force_sig_info(SIGSEGV, &info, tsk); + force_sig_fault(SIGSEGV, si_code, (void __user *)address, tsk); return; } @@ -282,11 +278,7 @@ do_sigbus: * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *)address; - force_sig_info(SIGBUS, &info, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, tsk); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index fc5a574c3482..4d8f64d48597 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -51,6 +51,8 @@ config PARISC select GENERIC_CLOCKEVENTS select ARCH_NO_COHERENT_DMA_MMAP select CPU_NO_EFFICIENT_FFS + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH help The PA-RISC microprocessor is designed by Hewlett-Packard and used @@ -111,12 +113,6 @@ config PM config STACKTRACE_SUPPORT def_bool y -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config ISA_DMA_API bool diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index e2364ff59180..34ac503e28ad 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -123,6 +123,9 @@ INSTALL_TARGETS = zinstall install PHONY += bzImage $(BOOT_TARGETS) $(INSTALL_TARGETS) +# Default kernel to build +all: bzImage + zImage: vmlinuz Image: vmlinux diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h index 57b8b2a2fd4e..ab8a54771507 100644 --- a/arch/parisc/include/asm/compat.h +++ b/arch/parisc/include/asm/compat.h @@ -13,7 +13,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u32 __compat_uid_t; @@ -40,16 +39,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; /* dev_t is 32 bits on parisc */ compat_ino_t st_ino; /* 32 bits */ @@ -149,10 +138,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - unsigned int __unused1; - compat_time_t sem_otime; - unsigned int __unused2; - compat_time_t sem_ctime; + unsigned int sem_otime_high; + unsigned int sem_otime; + unsigned int sem_ctime_high; + unsigned int sem_ctime; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -160,12 +149,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - unsigned int __unused1; - compat_time_t msg_stime; - unsigned int __unused2; - compat_time_t msg_rtime; - unsigned int __unused3; - compat_time_t msg_ctime; + unsigned int msg_stime_high; + unsigned int msg_stime; + unsigned int msg_rtime_high; + unsigned int msg_rtime; + unsigned int msg_ctime_high; + unsigned int msg_ctime; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -177,12 +166,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; - unsigned int __unused1; - compat_time_t shm_atime; - unsigned int __unused2; - compat_time_t shm_dtime; - unsigned int __unused3; - compat_time_t shm_ctime; + unsigned int shm_atime_high; + unsigned int shm_atime; + unsigned int shm_dtime_high; + unsigned int shm_dtime; + unsigned int shm_ctime_high; + unsigned int shm_ctime; unsigned int __unused4; compat_size_t shm_segsz; compat_pid_t shm_cpid; diff --git a/arch/parisc/include/asm/hardirq.h b/arch/parisc/include/asm/hardirq.h index 077815169258..1a1235a9d533 100644 --- a/arch/parisc/include/asm/hardirq.h +++ b/arch/parisc/include/asm/hardirq.h @@ -34,14 +34,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member) #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) #define __inc_irq_stat(member) __this_cpu_inc(irq_stat.member) -#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) \ - this_cpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x)) - #define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq) #endif /* _PARISC_HARDIRQ_H */ diff --git a/arch/parisc/include/asm/pci.h b/arch/parisc/include/asm/pci.h index 96b7deec512d..3328fd17c19d 100644 --- a/arch/parisc/include/asm/pci.h +++ b/arch/parisc/include/asm/pci.h @@ -88,29 +88,6 @@ struct pci_hba_data { #endif /* !CONFIG_64BIT */ /* - * If the PCI device's view of memory is the same as the CPU's view of memory, - * PCI_DMA_BUS_IS_PHYS is true. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#ifdef CONFIG_PA20 -/* All PA-2.0 machines have an IOMMU. */ -#define PCI_DMA_BUS_IS_PHYS 0 -#define parisc_has_iommu() do { } while (0) -#else - -#if defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA) -extern int parisc_bus_is_phys; /* in arch/parisc/kernel/setup.c */ -#define PCI_DMA_BUS_IS_PHYS parisc_bus_is_phys -#define parisc_has_iommu() do { parisc_bus_is_phys = 0; } while (0) -#else -#define PCI_DMA_BUS_IS_PHYS 1 -#define parisc_has_iommu() do { } while (0) -#endif - -#endif /* !CONFIG_PA20 */ - - -/* ** Most PCI devices (eg Tulip, NCR720) also export the same registers ** to both MMIO and I/O port space. Due to poor performance of I/O Port ** access under HP PCI bus adapters, strongly recommend the use of MMIO diff --git a/arch/parisc/include/uapi/asm/msgbuf.h b/arch/parisc/include/uapi/asm/msgbuf.h index b48b810e626b..6a2e9ab2ef8d 100644 --- a/arch/parisc/include/uapi/asm/msgbuf.h +++ b/arch/parisc/include/uapi/asm/msgbuf.h @@ -10,31 +10,30 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ struct msqid64_ds { struct ipc64_perm msg_perm; -#if __BITS_PER_LONG != 64 - unsigned int __pad1; -#endif +#if __BITS_PER_LONG == 64 __kernel_time_t msg_stime; /* last msgsnd time */ -#if __BITS_PER_LONG != 64 - unsigned int __pad2; -#endif __kernel_time_t msg_rtime; /* last msgrcv time */ -#if __BITS_PER_LONG != 64 - unsigned int __pad3; -#endif __kernel_time_t msg_ctime; /* last change time */ - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused1; - unsigned long __unused2; +#else + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ +#endif + unsigned long msg_cbytes; /* current number of bytes on queue */ + unsigned long msg_qnum; /* number of messages in queue */ + unsigned long msg_qbytes; /* max number of bytes on queue */ + __kernel_pid_t msg_lspid; /* pid of last msgsnd */ + __kernel_pid_t msg_lrpid; /* last receive pid */ + unsigned long __unused1; + unsigned long __unused2; }; #endif /* _PARISC_MSGBUF_H */ diff --git a/arch/parisc/include/uapi/asm/sembuf.h b/arch/parisc/include/uapi/asm/sembuf.h index 746c5d86a9b1..3c31163b1241 100644 --- a/arch/parisc/include/uapi/asm/sembuf.h +++ b/arch/parisc/include/uapi/asm/sembuf.h @@ -10,21 +10,21 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ -#if __BITS_PER_LONG != 64 - unsigned int __pad1; -#endif +#if __BITS_PER_LONG == 64 __kernel_time_t sem_otime; /* last semop time */ -#if __BITS_PER_LONG != 64 - unsigned int __pad2; -#endif __kernel_time_t sem_ctime; /* last change time */ - unsigned long sem_nsems; /* no. of semaphores in array */ +#else + unsigned long sem_otime_high; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime_high; + unsigned long sem_ctime; /* last change time */ +#endif + unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused1; unsigned long __unused2; }; diff --git a/arch/parisc/include/uapi/asm/shmbuf.h b/arch/parisc/include/uapi/asm/shmbuf.h index cd4dbce55d0b..c89b3dd8db21 100644 --- a/arch/parisc/include/uapi/asm/shmbuf.h +++ b/arch/parisc/include/uapi/asm/shmbuf.h @@ -10,25 +10,22 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ -#if __BITS_PER_LONG != 64 - unsigned int __pad1; -#endif +#if __BITS_PER_LONG == 64 __kernel_time_t shm_atime; /* last attach time */ -#if __BITS_PER_LONG != 64 - unsigned int __pad2; -#endif __kernel_time_t shm_dtime; /* last detach time */ -#if __BITS_PER_LONG != 64 - unsigned int __pad3; -#endif __kernel_time_t shm_ctime; /* last change time */ -#if __BITS_PER_LONG != 64 +#else + unsigned long shm_atime_high; + unsigned long shm_atime; /* last attach time */ + unsigned long shm_dtime_high; + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_ctime_high; + unsigned long shm_ctime; /* last change time */ unsigned int __pad4; #endif __kernel_size_t shm_segsz; /* size of segment (bytes) */ diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 3b8507f71050..e0e1c9775c32 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -268,7 +268,7 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa) * Walks up the device tree looking for a device of the specified type. * If it finds it, it returns it. If not, it returns NULL. */ -const struct parisc_device * __init +const struct parisc_device * find_pa_parent_type(const struct parisc_device *padev, int type) { const struct device *dev = &padev->dev; @@ -448,7 +448,8 @@ static int match_by_id(struct device * dev, void * data) * Checks all the children of @parent for a matching @id. If none * found, it allocates a new device and returns it. */ -static struct parisc_device * alloc_tree_node(struct device *parent, char id) +static struct parisc_device * __init alloc_tree_node( + struct device *parent, char id) { struct match_id_data d = { .id = id, @@ -825,8 +826,8 @@ static void walk_lower_bus(struct parisc_device *dev) * devices which are not physically connected (such as extra serial & * keyboard ports). This problem is not yet solved. */ -static void walk_native_bus(unsigned long io_io_low, unsigned long io_io_high, - struct device *parent) +static void __init walk_native_bus(unsigned long io_io_low, + unsigned long io_io_high, struct device *parent) { int i, devices_found = 0; unsigned long hpa = io_io_low; diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 91bc0cac03a1..6df07ce4f3c2 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -367,19 +367,6 @@ static int proc_pcxl_dma_show(struct seq_file *m, void *v) return 0; } -static int proc_pcxl_dma_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_pcxl_dma_show, NULL); -} - -static const struct file_operations proc_pcxl_dma_ops = { - .owner = THIS_MODULE, - .open = proc_pcxl_dma_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init pcxl_dma_init(void) { @@ -397,8 +384,8 @@ pcxl_dma_init(void) "pcxl_dma_init: Unable to create gsc /proc dir entry\n"); else { struct proc_dir_entry* ent; - ent = proc_create("pcxl_dma", 0, proc_gsc_root, - &proc_pcxl_dma_ops); + ent = proc_create_single("pcxl_dma", 0, proc_gsc_root, + proc_pcxl_dma_show); if (!ent) printk(KERN_WARNING "pci-dma.c: Unable to create pcxl_dma /proc entry.\n"); diff --git a/arch/parisc/kernel/pci.c b/arch/parisc/kernel/pci.c index 13ee3569959a..ae684ac6efb6 100644 --- a/arch/parisc/kernel/pci.c +++ b/arch/parisc/kernel/pci.c @@ -174,7 +174,7 @@ void pcibios_set_master(struct pci_dev *dev) * pcibios_init_bridge() initializes cache line and default latency * for pci controllers and pci-pci bridges */ -void __init pcibios_init_bridge(struct pci_dev *dev) +void __ref pcibios_init_bridge(struct pci_dev *dev) { unsigned short bridge_ctl, bridge_ctl_new; diff --git a/arch/parisc/kernel/pdc_chassis.c b/arch/parisc/kernel/pdc_chassis.c index 3e04242de5a7..28e07482b0f1 100644 --- a/arch/parisc/kernel/pdc_chassis.c +++ b/arch/parisc/kernel/pdc_chassis.c @@ -266,18 +266,6 @@ static int pdc_chassis_warn_show(struct seq_file *m, void *v) return 0; } -static int pdc_chassis_warn_open(struct inode *inode, struct file *file) -{ - return single_open(file, pdc_chassis_warn_show, NULL); -} - -static const struct file_operations pdc_chassis_warn_fops = { - .open = pdc_chassis_warn_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init pdc_chassis_create_procfs(void) { unsigned long test; @@ -292,7 +280,7 @@ static int __init pdc_chassis_create_procfs(void) printk(KERN_INFO "Enabling PDC chassis warnings support v%s\n", PDC_CHASSIS_VER); - proc_create("chassis", 0400, NULL, &pdc_chassis_warn_fops); + proc_create_single("chassis", 0400, NULL, pdc_chassis_warn_show); return 0; } diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c index 1a2be6e639b5..7aa1d4d0d444 100644 --- a/arch/parisc/kernel/ptrace.c +++ b/arch/parisc/kernel/ptrace.c @@ -76,8 +76,6 @@ void user_enable_single_step(struct task_struct *task) set_tsk_thread_flag(task, TIF_SINGLESTEP); if (pa_psw(task)->n) { - struct siginfo si; - /* Nullified, just crank over the queue. */ task_regs(task)->iaoq[0] = task_regs(task)->iaoq[1]; task_regs(task)->iasq[0] = task_regs(task)->iasq[1]; @@ -90,11 +88,9 @@ void user_enable_single_step(struct task_struct *task) ptrace_disable(task); /* Don't wake up the task, but let the parent know something happened. */ - si.si_code = TRAP_TRACE; - si.si_addr = (void __user *) (task_regs(task)->iaoq[0] & ~3); - si.si_signo = SIGTRAP; - si.si_errno = 0; - force_sig_info(SIGTRAP, &si, task); + force_sig_fault(SIGTRAP, TRAP_TRACE, + (void __user *) (task_regs(task)->iaoq[0] & ~3), + task); /* notify_parent(task, SIGCHLD); */ return; } diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 0e9675f857a5..8d3a7b80ac42 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -58,11 +58,6 @@ struct proc_dir_entry * proc_runway_root __read_mostly = NULL; struct proc_dir_entry * proc_gsc_root __read_mostly = NULL; struct proc_dir_entry * proc_mckinley_root __read_mostly = NULL; -#if !defined(CONFIG_PA20) && (defined(CONFIG_IOMMU_CCIO) || defined(CONFIG_IOMMU_SBA)) -int parisc_bus_is_phys __read_mostly = 1; /* Assume no IOMMU is present */ -EXPORT_SYMBOL(parisc_bus_is_phys); -#endif - void __init setup_cmdline(char **cmdline_p) { extern unsigned int boot_args[]; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 4065b5e48c9d..5e26dbede5fc 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -423,8 +423,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) } #ifdef CONFIG_PROC_FS -int __init -setup_profiling_timer(unsigned int multiplier) +int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index c3830400ca28..a1e772f909cb 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -205,7 +205,7 @@ static int __init rtc_init(void) device_initcall(rtc_init); #endif -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { static struct pdc_tod tod_data; if (pdc_tod_read(&tod_data) == 0) { diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 68e671a11987..4309ad31a874 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -297,13 +297,8 @@ void die_if_kernel(char *str, struct pt_regs *regs, long err) #define GDB_BREAK_INSN 0x10004 static void handle_gdb_break(struct pt_regs *regs, int wot) { - struct siginfo si; - - si.si_signo = SIGTRAP; - si.si_errno = 0; - si.si_code = wot; - si.si_addr = (void __user *) (regs->iaoq[0] & ~3); - force_sig_info(SIGTRAP, &si, current); + force_sig_fault(SIGTRAP, wot, + (void __user *) (regs->iaoq[0] & ~3), current); } static void handle_break(struct pt_regs *regs) @@ -487,7 +482,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) { unsigned long fault_address = 0; unsigned long fault_space = 0; - struct siginfo si; + int si_code; if (code == 1) pdc_console_restart(); /* switch back to pdc if HPMC */ @@ -571,7 +566,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) case 8: /* Illegal instruction trap */ die_if_kernel("Illegal instruction", regs, code); - si.si_code = ILL_ILLOPC; + si_code = ILL_ILLOPC; goto give_sigill; case 9: @@ -582,7 +577,7 @@ void notrace handle_interruption(int code, struct pt_regs *regs) case 10: /* Privileged operation trap */ die_if_kernel("Privileged operation", regs, code); - si.si_code = ILL_PRVOPC; + si_code = ILL_PRVOPC; goto give_sigill; case 11: @@ -605,20 +600,16 @@ void notrace handle_interruption(int code, struct pt_regs *regs) } die_if_kernel("Privileged register usage", regs, code); - si.si_code = ILL_PRVREG; + si_code = ILL_PRVREG; give_sigill: - si.si_signo = SIGILL; - si.si_errno = 0; - si.si_addr = (void __user *) regs->iaoq[0]; - force_sig_info(SIGILL, &si, current); + force_sig_fault(SIGILL, si_code, + (void __user *) regs->iaoq[0], current); return; case 12: /* Overflow Trap, let the userland signal handler do the cleanup */ - si.si_signo = SIGFPE; - si.si_code = FPE_INTOVF; - si.si_addr = (void __user *) regs->iaoq[0]; - force_sig_info(SIGFPE, &si, current); + force_sig_fault(SIGFPE, FPE_INTOVF, + (void __user *) regs->iaoq[0], current); return; case 13: @@ -626,13 +617,11 @@ void notrace handle_interruption(int code, struct pt_regs *regs) The condition succeeds in an instruction which traps on condition */ if(user_mode(regs)){ - si.si_signo = SIGFPE; /* Let userspace app figure it out from the insn pointed * to by si_addr. */ - si.si_code = FPE_CONDTRAP; - si.si_addr = (void __user *) regs->iaoq[0]; - force_sig_info(SIGFPE, &si, current); + force_sig_fault(SIGFPE, FPE_CONDTRAP, + (void __user *) regs->iaoq[0], current); return; } /* The kernel doesn't want to handle condition codes */ @@ -741,14 +730,10 @@ void notrace handle_interruption(int code, struct pt_regs *regs) return; die_if_kernel("Protection id trap", regs, code); - si.si_code = SEGV_MAPERR; - si.si_signo = SIGSEGV; - si.si_errno = 0; - if (code == 7) - si.si_addr = (void __user *) regs->iaoq[0]; - else - si.si_addr = (void __user *) regs->ior; - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, + (code == 7)? + ((void __user *) regs->iaoq[0]) : + ((void __user *) regs->ior), current); return; case 28: @@ -762,11 +747,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) "handle_interruption() pid=%d command='%s'\n", task_pid_nr(current), current->comm); /* SIGBUS, for lack of a better one. */ - si.si_signo = SIGBUS; - si.si_code = BUS_OBJERR; - si.si_errno = 0; - si.si_addr = (void __user *) regs->ior; - force_sig_info(SIGBUS, &si, current); + force_sig_fault(SIGBUS, BUS_OBJERR, + (void __user *)regs->ior, current); return; } pdc_chassis_send_status(PDC_CHASSIS_DIRECT_PANIC); @@ -781,11 +763,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) "User fault %d on space 0x%08lx, pid=%d command='%s'\n", code, fault_space, task_pid_nr(current), current->comm); - si.si_signo = SIGSEGV; - si.si_errno = 0; - si.si_code = SEGV_MAPERR; - si.si_addr = (void __user *) regs->ior; - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, + (void __user *)regs->ior, current); return; } } @@ -837,6 +816,17 @@ void __init initialize_ivt(const void *iva) if (pdc_instr(&instr) == PDC_OK) ivap[0] = instr; + /* + * Rules for the checksum of the HPMC handler: + * 1. The IVA does not point to PDC/PDH space (ie: the OS has installed + * its own IVA). + * 2. The word at IVA + 32 is nonzero. + * 3. If Length (IVA + 60) is not zero, then Length (IVA + 60) and + * Address (IVA + 56) are word-aligned. + * 4. The checksum of the 8 words starting at IVA + 32 plus the sum of + * the Length/4 words starting at Address is zero. + */ + /* Compute Checksum for HPMC handler */ length = os_hpmc_size; ivap[7] = length; diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index e36f7b75ab07..932bfc0b7cd8 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -452,7 +452,6 @@ void handle_unaligned(struct pt_regs *regs) unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0; int modify = 0; int ret = ERR_NOTHANDLED; - struct siginfo si; register int flop=0; /* true if this is a flop */ __inc_irq_stat(irq_unaligned_count); @@ -690,21 +689,15 @@ void handle_unaligned(struct pt_regs *regs) if (ret == ERR_PAGEFAULT) { - si.si_signo = SIGSEGV; - si.si_errno = 0; - si.si_code = SEGV_MAPERR; - si.si_addr = (void __user *)regs->ior; - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, + (void __user *)regs->ior, current); } else { force_sigbus: /* couldn't handle it ... */ - si.si_signo = SIGBUS; - si.si_errno = 0; - si.si_code = BUS_ADRALN; - si.si_addr = (void __user *)regs->ior; - force_sig_info(SIGBUS, &si, current); + force_sig_fault(SIGBUS, BUS_ADRALN, + (void __user *)regs->ior, current); } return; diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 2fb59d2e2b29..0590e05571d1 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -81,7 +81,6 @@ int handle_fpe(struct pt_regs *regs) { extern void printbinary(unsigned long x, int nbits); - struct siginfo si; unsigned int orig_sw, sw; int signalcode; /* need an intermediate copy of float regs because FPU emulation @@ -117,11 +116,8 @@ handle_fpe(struct pt_regs *regs) memcpy(regs->fr, frcopy, sizeof regs->fr); if (signalcode != 0) { - si.si_signo = signalcode >> 24; - si.si_errno = 0; - si.si_code = signalcode & 0xffffff; - si.si_addr = (void __user *) regs->iaoq[0]; - force_sig_info(si.si_signo, &si, current); + force_sig_fault(signalcode >> 24, signalcode & 0xffffff, + (void __user *) regs->iaoq[0], current); return -1; } diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e247edbca68e..a80117980fc2 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -353,23 +353,22 @@ bad_area: up_read(&mm->mmap_sem); if (user_mode(regs)) { - struct siginfo si; - unsigned int lsb = 0; + int signo, si_code; switch (code) { case 15: /* Data TLB miss fault/Data page fault */ /* send SIGSEGV when outside of vma */ if (!vma || address < vma->vm_start || address >= vma->vm_end) { - si.si_signo = SIGSEGV; - si.si_code = SEGV_MAPERR; + signo = SIGSEGV; + si_code = SEGV_MAPERR; break; } /* send SIGSEGV for wrong permissions */ if ((vma->vm_flags & acc_type) != acc_type) { - si.si_signo = SIGSEGV; - si.si_code = SEGV_ACCERR; + signo = SIGSEGV; + si_code = SEGV_ACCERR; break; } @@ -377,43 +376,40 @@ bad_area: /* fall through */ case 17: /* NA data TLB miss / page fault */ case 18: /* Unaligned access - PCXS only */ - si.si_signo = SIGBUS; - si.si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR; + signo = SIGBUS; + si_code = (code == 18) ? BUS_ADRALN : BUS_ADRERR; break; case 16: /* Non-access instruction TLB miss fault */ case 26: /* PCXL: Data memory access rights trap */ default: - si.si_signo = SIGSEGV; - si.si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR; + signo = SIGSEGV; + si_code = (code == 26) ? SEGV_ACCERR : SEGV_MAPERR; break; } - #ifdef CONFIG_MEMORY_FAILURE if (fault & (VM_FAULT_HWPOISON|VM_FAULT_HWPOISON_LARGE)) { + unsigned int lsb = 0; printk(KERN_ERR "MCE: Killing %s:%d due to hardware memory corruption fault at %08lx\n", tsk->comm, tsk->pid, address); - si.si_signo = SIGBUS; - si.si_code = BUS_MCEERR_AR; + /* + * Either small page or large page may be poisoned. + * In other words, VM_FAULT_HWPOISON_LARGE and + * VM_FAULT_HWPOISON are mutually exclusive. + */ + if (fault & VM_FAULT_HWPOISON_LARGE) + lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); + else if (fault & VM_FAULT_HWPOISON) + lsb = PAGE_SHIFT; + + force_sig_mceerr(BUS_MCEERR_AR, (void __user *) address, + lsb, current); + return; } #endif + show_signal_msg(regs, code, address, tsk, vma); - /* - * Either small page or large page may be poisoned. - * In other words, VM_FAULT_HWPOISON_LARGE and - * VM_FAULT_HWPOISON are mutually exclusive. - */ - if (fault & VM_FAULT_HWPOISON_LARGE) - lsb = hstate_index_to_shift(VM_FAULT_GET_HINDEX(fault)); - else if (fault & VM_FAULT_HWPOISON) - lsb = PAGE_SHIFT; - else - show_signal_msg(regs, code, address, tsk, vma); - si.si_addr_lsb = lsb; - - si.si_errno = 0; - si.si_addr = (void __user *) address; - force_sig_info(si.si_signo, &si, current); + force_sig_fault(signo, si_code, (void __user *) address, current); return; } diff --git a/arch/parisc/mm/init.c b/arch/parisc/mm/init.c index cab32ee824d2..2607d2d33405 100644 --- a/arch/parisc/mm/init.c +++ b/arch/parisc/mm/init.c @@ -516,7 +516,7 @@ static void __init map_pages(unsigned long start_vaddr, } } -void free_initmem(void) +void __ref free_initmem(void) { unsigned long init_begin = (unsigned long)__init_begin; unsigned long init_end = (unsigned long)__init_end; diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c32a181a7cbb..f674006dea2f 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -13,12 +13,6 @@ config 64BIT bool default y if PPC64 -config ARCH_PHYS_ADDR_T_64BIT - def_bool PPC64 || PHYS_64BIT - -config ARCH_DMA_ADDR_T_64BIT - def_bool ARCH_PHYS_ADDR_T_64BIT - config MMU bool default y @@ -187,7 +181,6 @@ config PPC select HAVE_CONTEXT_TRACKING if PPC64 select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL select HAVE_EBPF_JIT if PPC64 @@ -223,9 +216,11 @@ config PPC select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING select HAVE_IRQ_TIME_ACCOUNTING + select IOMMU_HELPER if PPC64 select IRQ_DOMAIN select IRQ_FORCED_THREADING select MODULES_USE_ELF_RELA + select NEED_SG_DMA_LENGTH select NO_BOOTMEM select OF select OF_EARLY_FLATTREE @@ -478,19 +473,6 @@ config MPROFILE_KERNEL depends on PPC64 && CPU_LITTLE_ENDIAN def_bool !DISABLE_MPROFILE_KERNEL -config IOMMU_HELPER - def_bool PPC64 - -config SWIOTLB - bool "SWIOTLB support" - default n - select IOMMU_HELPER - ---help--- - Support for IO bounce buffering for systems without an IOMMU. - This allows us to DMA to the full physical address space on - platforms where the size of a physical address is larger - than the bus address. Not all platforms support this. - config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ @@ -883,7 +865,7 @@ config PPC_MEM_KEYS page-based protections, but without requiring modification of the page tables when an application changes protection domains. - For details, see Documentation/vm/protection-keys.txt + For details, see Documentation/vm/protection-keys.rst If unsure, say y. @@ -913,9 +895,6 @@ config ZONE_DMA config NEED_DMA_MAP_STATE def_bool (PPC64 || NOT_COHERENT_CACHE) -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool depends on ISA_DMA_API diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index 62168e1158f1..85c8af2bb272 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -17,7 +17,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u32 __compat_uid_t; @@ -45,16 +44,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; compat_ino_t st_ino; @@ -173,10 +162,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - unsigned int __unused1; - compat_time_t sem_otime; - unsigned int __unused2; - compat_time_t sem_ctime; + unsigned int sem_otime_high; + unsigned int sem_otime; + unsigned int sem_ctime_high; + unsigned int sem_ctime; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -184,12 +173,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - unsigned int __unused1; - compat_time_t msg_stime; - unsigned int __unused2; - compat_time_t msg_rtime; - unsigned int __unused3; - compat_time_t msg_ctime; + unsigned int msg_stime_high; + unsigned int msg_stime; + unsigned int msg_rtime_high; + unsigned int msg_rtime; + unsigned int msg_ctime_high; + unsigned int msg_ctime; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -201,12 +190,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; - unsigned int __unused1; - compat_time_t shm_atime; - unsigned int __unused2; - compat_time_t shm_dtime; - unsigned int __unused3; - compat_time_t shm_ctime; + unsigned int shm_atime_high; + unsigned int shm_atime; + unsigned int shm_dtime_high; + unsigned int shm_dtime; + unsigned int shm_ctime_high; + unsigned int shm_ctime; unsigned int __unused4; compat_size_t shm_segsz; compat_pid_t shm_cpid; diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 471b2274fbeb..c40b4380951c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -74,6 +74,27 @@ */ #define EX_R3 EX_DAR +#define STF_ENTRY_BARRIER_SLOT \ + STF_ENTRY_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop + +#define STF_EXIT_BARRIER_SLOT \ + STF_EXIT_BARRIER_FIXUP_SECTION; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop; \ + nop + +/* + * r10 must be free to use, r13 must be paca + */ +#define INTERRUPT_TO_KERNEL \ + STF_ENTRY_BARRIER_SLOT + /* * Macros for annotating the expected destination of (h)rfid * @@ -90,16 +111,19 @@ rfid #define RFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback #define RFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ rfid; \ b rfi_flush_fallback @@ -108,21 +132,25 @@ hrfid #define HRFI_TO_USER \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_USER_OR_KERNEL \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_GUEST \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback #define HRFI_TO_UNKNOWN \ + STF_EXIT_BARRIER_SLOT; \ RFI_FLUSH_SLOT; \ hrfid; \ b hrfi_flush_fallback @@ -254,6 +282,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_1_PRE(area) \ OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ + INTERRUPT_TO_KERNEL; \ SAVE_CTR(r10, area); \ mfcr r9; diff --git a/arch/powerpc/include/asm/feature-fixups.h b/arch/powerpc/include/asm/feature-fixups.h index 1e82eb3caabd..a9b64df34e2a 100644 --- a/arch/powerpc/include/asm/feature-fixups.h +++ b/arch/powerpc/include/asm/feature-fixups.h @@ -187,6 +187,22 @@ label##3: \ FTR_ENTRY_OFFSET label##1b-label##3b; \ .popsection; +#define STF_ENTRY_BARRIER_FIXUP_SECTION \ +953: \ + .pushsection __stf_entry_barrier_fixup,"a"; \ + .align 2; \ +954: \ + FTR_ENTRY_OFFSET 953b-954b; \ + .popsection; + +#define STF_EXIT_BARRIER_FIXUP_SECTION \ +955: \ + .pushsection __stf_exit_barrier_fixup,"a"; \ + .align 2; \ +956: \ + FTR_ENTRY_OFFSET 955b-956b; \ + .popsection; + #define RFI_FLUSH_FIXUP_SECTION \ 951: \ .pushsection __rfi_flush_fixup,"a"; \ @@ -199,6 +215,9 @@ label##3: \ #ifndef __ASSEMBLY__ #include <linux/types.h> +extern long stf_barrier_fallback; +extern long __start___stf_entry_barrier_fixup, __stop___stf_entry_barrier_fixup; +extern long __start___stf_exit_barrier_fixup, __stop___stf_exit_barrier_fixup; extern long __start___rfi_flush_fixup, __stop___rfi_flush_fixup; void apply_feature_fixups(void); diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 9abddde372ab..b2dabd06659d 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -69,17 +69,30 @@ struct dyn_arch_ftrace { #endif #if defined(CONFIG_FTRACE_SYSCALLS) && !defined(__ASSEMBLY__) -#ifdef PPC64_ELF_ABI_v1 +/* + * Some syscall entry functions on powerpc start with "ppc_" (fork and clone, + * for instance) or ppc32_/ppc64_. We should also match the sys_ variant with + * those. + */ #define ARCH_HAS_SYSCALL_MATCH_SYM_NAME +#ifdef PPC64_ELF_ABI_v1 +static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) +{ + /* We need to skip past the initial dot, and the __se_sys alias */ + return !strcmp(sym + 1, name) || + (!strncmp(sym, ".__se_sys", 9) && !strcmp(sym + 6, name)) || + (!strncmp(sym, ".ppc_", 5) && !strcmp(sym + 5, name + 4)) || + (!strncmp(sym, ".ppc32_", 7) && !strcmp(sym + 7, name + 4)) || + (!strncmp(sym, ".ppc64_", 7) && !strcmp(sym + 7, name + 4)); +} +#else static inline bool arch_syscall_match_sym_name(const char *sym, const char *name) { - /* - * Compare the symbol name with the system call name. Skip the .sys or .SyS - * prefix from the symbol name and the sys prefix from the system call name and - * just match the rest. This is only needed on ppc64 since symbol names on - * 32bit do not start with a period so the generic function will work. - */ - return !strcmp(sym + 4, name + 3); + return !strcmp(sym, name) || + (!strncmp(sym, "__se_sys", 8) && !strcmp(sym + 5, name)) || + (!strncmp(sym, "ppc_", 4) && !strcmp(sym + 4, name + 4)) || + (!strncmp(sym, "ppc32_", 6) && !strcmp(sym + 6, name + 4)) || + (!strncmp(sym, "ppc64_", 6) && !strcmp(sym + 6, name + 4)); } #endif #endif /* CONFIG_FTRACE_SYSCALLS && !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/hardirq.h b/arch/powerpc/include/asm/hardirq.h index 5986d473722b..383f628acbf8 100644 --- a/arch/powerpc/include/asm/hardirq.h +++ b/arch/powerpc/include/asm/hardirq.h @@ -25,15 +25,8 @@ typedef struct { DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define __ARCH_IRQ_STAT - -#define local_softirq_pending() __this_cpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING #define __ARCH_IRQ_EXIT_IRQS_DISABLED -#define set_softirq_pending(x) __this_cpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) __this_cpu_or(irq_stat.__softirq_pending, (x)) - static inline void ack_bad_irq(unsigned int irq) { printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq); diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 4c02a7378d06..e7377b73cfec 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -96,6 +96,7 @@ struct kvmppc_vcore { struct kvm_vcpu *runner; struct kvm *kvm; u64 tb_offset; /* guest timebase - host timebase */ + u64 tb_offset_applied; /* timebase offset currently in force */ ulong lpcr; u32 arch_compat; ulong pcr; diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 4185f1c96125..3f109a3e3edb 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -165,7 +165,6 @@ struct paca_struct { u64 saved_msr; /* MSR saved here by enter_rtas */ u16 trap_save; /* Used when bad stack is encountered */ u8 irq_soft_mask; /* mask for irq soft masking */ - u8 soft_enabled; /* irq soft-enable flag */ u8 irq_happened; /* irq happened while soft-disabled */ u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index 401c62aad5e4..2af9ded80540 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -92,24 +92,6 @@ extern int pci_mmap_legacy_page_range(struct pci_bus *bus, #define HAVE_PCI_LEGACY 1 -#ifdef CONFIG_PPC64 - -/* The PCI address space does not equal the physical memory address - * space (we have an IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - -#else /* 32-bit */ - -/* The PCI address space does equal the physical memory - * address space (no IOMMU). The IDE and SCSI device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (1) - -#endif /* CONFIG_PPC64 */ - extern void pcibios_claim_one_bus(struct pci_bus *b); extern void pcibios_finish_adding_to_bus(struct pci_bus *bus); diff --git a/arch/powerpc/include/asm/security_features.h b/arch/powerpc/include/asm/security_features.h index fa4d2e1cf772..44989b22383c 100644 --- a/arch/powerpc/include/asm/security_features.h +++ b/arch/powerpc/include/asm/security_features.h @@ -12,6 +12,17 @@ extern unsigned long powerpc_security_features; extern bool rfi_flush; +/* These are bit flags */ +enum stf_barrier_type { + STF_BARRIER_NONE = 0x1, + STF_BARRIER_FALLBACK = 0x2, + STF_BARRIER_EIEIO = 0x4, + STF_BARRIER_SYNC_ORI = 0x8, +}; + +void setup_stf_barrier(void); +void do_stf_barrier_fixups(enum stf_barrier_type types); + static inline void security_ftr_set(unsigned long feature) { powerpc_security_features |= feature; diff --git a/arch/powerpc/include/asm/topology.h b/arch/powerpc/include/asm/topology.h index 9f421641a35c..16b077801a5f 100644 --- a/arch/powerpc/include/asm/topology.h +++ b/arch/powerpc/include/asm/topology.h @@ -91,6 +91,7 @@ extern int start_topology_update(void); extern int stop_topology_update(void); extern int prrn_is_enabled(void); extern int find_and_online_cpu_nid(int cpu); +extern int timed_topology_update(int nsecs); #else static inline int start_topology_update(void) { @@ -108,16 +109,12 @@ static inline int find_and_online_cpu_nid(int cpu) { return 0; } +static inline int timed_topology_update(int nsecs) +{ + return 0; +} #endif /* CONFIG_NUMA && CONFIG_PPC_SPLPAR */ -#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_NEED_MULTIPLE_NODES) -#if defined(CONFIG_PPC_SPLPAR) -extern int timed_topology_update(int nsecs); -#else -#define timed_topology_update(nsecs) -#endif /* CONFIG_PPC_SPLPAR */ -#endif /* CONFIG_HOTPLUG_CPU || CONFIG_NEED_MULTIPLE_NODES */ - #include <asm-generic/topology.h> #ifdef CONFIG_SMP diff --git a/arch/powerpc/include/uapi/asm/msgbuf.h b/arch/powerpc/include/uapi/asm/msgbuf.h index 65beb0942500..2b1b37797a47 100644 --- a/arch/powerpc/include/uapi/asm/msgbuf.h +++ b/arch/powerpc/include/uapi/asm/msgbuf.h @@ -10,18 +10,18 @@ struct msqid64_ds { struct ipc64_perm msg_perm; -#ifndef __powerpc64__ - unsigned int __unused1; -#endif +#ifdef __powerpc64__ __kernel_time_t msg_stime; /* last msgsnd time */ -#ifndef __powerpc64__ - unsigned int __unused2; -#endif __kernel_time_t msg_rtime; /* last msgrcv time */ -#ifndef __powerpc64__ - unsigned int __unused3; -#endif __kernel_time_t msg_ctime; /* last change time */ +#else + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ +#endif unsigned long msg_cbytes; /* current number of bytes on queue */ unsigned long msg_qnum; /* number of messages in queue */ unsigned long msg_qbytes; /* max number of bytes on queue */ diff --git a/arch/powerpc/include/uapi/asm/sembuf.h b/arch/powerpc/include/uapi/asm/sembuf.h index 8f393d60f02d..3f60946f77e3 100644 --- a/arch/powerpc/include/uapi/asm/sembuf.h +++ b/arch/powerpc/include/uapi/asm/sembuf.h @@ -15,20 +15,20 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values + * - 2 miscellaneous 32/64-bit values */ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ #ifndef __powerpc64__ - unsigned long __unused1; -#endif + unsigned long sem_otime_high; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime_high; + unsigned long sem_ctime; /* last change time */ +#else __kernel_time_t sem_otime; /* last semop time */ -#ifndef __powerpc64__ - unsigned long __unused2; -#endif __kernel_time_t sem_ctime; /* last change time */ +#endif unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused3; unsigned long __unused4; diff --git a/arch/powerpc/include/uapi/asm/shmbuf.h b/arch/powerpc/include/uapi/asm/shmbuf.h index deb1c3e503d3..b591c4d7e4c5 100644 --- a/arch/powerpc/include/uapi/asm/shmbuf.h +++ b/arch/powerpc/include/uapi/asm/shmbuf.h @@ -16,25 +16,22 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ -#ifndef __powerpc64__ - unsigned long __unused1; -#endif +#ifdef __powerpc64__ __kernel_time_t shm_atime; /* last attach time */ -#ifndef __powerpc64__ - unsigned long __unused2; -#endif __kernel_time_t shm_dtime; /* last detach time */ -#ifndef __powerpc64__ - unsigned long __unused3; -#endif __kernel_time_t shm_ctime; /* last change time */ -#ifndef __powerpc64__ +#else + unsigned long shm_atime_high; + unsigned long shm_atime; /* last attach time */ + unsigned long shm_dtime_high; + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_ctime_high; + unsigned long shm_ctime; /* last change time */ unsigned long __unused4; #endif size_t shm_segsz; /* size of segment (bytes) */ diff --git a/arch/powerpc/include/uapi/asm/siginfo.h b/arch/powerpc/include/uapi/asm/siginfo.h index 9f142451a01f..1d51d9b88221 100644 --- a/arch/powerpc/include/uapi/asm/siginfo.h +++ b/arch/powerpc/include/uapi/asm/siginfo.h @@ -15,19 +15,4 @@ #include <asm-generic/siginfo.h> -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -/* - * SIGTRAP si_codes - */ -#ifdef __KERNEL__ -#define TRAP_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - - #endif /* _ASM_POWERPC_SIGINFO_H */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 6bee65f3cfd3..8817c5a6bcc2 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -13,6 +13,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/compat.h> #include <linux/signal.h> #include <linux/sched.h> #include <linux/kernel.h> @@ -42,7 +43,6 @@ #include <asm/paca.h> #include <asm/lppaca.h> #include <asm/cache.h> -#include <asm/compat.h> #include <asm/mmu.h> #include <asm/hvcall.h> #include <asm/xics.h> @@ -562,6 +562,7 @@ int main(void) OFFSET(VCORE_NAPPING_THREADS, kvmppc_vcore, napping_threads); OFFSET(VCORE_KVM, kvmppc_vcore, kvm); OFFSET(VCORE_TB_OFFSET, kvmppc_vcore, tb_offset); + OFFSET(VCORE_TB_OFFSET_APPL, kvmppc_vcore, tb_offset_applied); OFFSET(VCORE_LPCR, kvmppc_vcore, lpcr); OFFSET(VCORE_PCR, kvmppc_vcore, pcr); OFFSET(VCORE_DPDES, kvmppc_vcore, dpdes); diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 3f30c994e931..458b928dbd84 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -28,6 +28,7 @@ _GLOBAL(__setup_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -41,6 +42,7 @@ _GLOBAL(__restore_cpu_power7) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR li r4,(LPCR_LPES1 >> LPCR_LPES_SH) bl __init_LPCR_ISA206 @@ -57,6 +59,7 @@ _GLOBAL(__setup_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -78,6 +81,7 @@ _GLOBAL(__restore_cpu_power8) beqlr li r0,0 mtspr SPRN_LPID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR ori r3, r3, LPCR_PECEDH li r4,0 /* LPES = 0 */ @@ -99,6 +103,7 @@ _GLOBAL(__setup_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 @@ -123,6 +128,7 @@ _GLOBAL(__restore_cpu_power9) mtspr SPRN_PSSCR,r0 mtspr SPRN_LPID,r0 mtspr SPRN_PID,r0 + mtspr SPRN_PCR,r0 mfspr r3,SPRN_LPCR LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE | LPCR_HEIC) or r3, r3, r4 diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index da20569de9d4..138157deeadf 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -309,8 +309,6 @@ int dma_set_coherent_mask(struct device *dev, u64 mask) } EXPORT_SYMBOL(dma_set_coherent_mask); -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - int dma_set_mask(struct device *dev, u64 dma_mask) { if (ppc_md.dma_set_mask) @@ -361,7 +359,6 @@ EXPORT_SYMBOL_GPL(dma_get_required_mask); static int __init dma_init(void) { - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); #endif diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 8ab51f6ca03a..c904477abaf3 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -101,6 +101,7 @@ static void __restore_cpu_cpufeatures(void) if (hv_mode) { mtspr(SPRN_LPID, 0); mtspr(SPRN_HFSCR, system_registers.hfscr); + mtspr(SPRN_PCR, 0); } mtspr(SPRN_FSCR, system_registers.fscr); diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index bc640e4c5ca5..90bb39b1a23c 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c @@ -1775,18 +1775,6 @@ static int proc_eeh_show(struct seq_file *m, void *v) return 0; } -static int proc_eeh_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_eeh_show, NULL); -} - -static const struct file_operations proc_eeh_operations = { - .open = proc_eeh_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - #ifdef CONFIG_DEBUG_FS static int eeh_enable_dbgfs_set(void *data, u64 val) { @@ -1828,7 +1816,7 @@ DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, static int __init eeh_init_proc(void) { if (machine_is(pseries) || machine_is(powernv)) { - proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); + proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); #ifdef CONFIG_DEBUG_FS debugfs_create_file("eeh_enable", 0600, powerpc_debugfs_root, NULL, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ae6a849db60b..f283958129f2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -885,7 +885,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM) #endif -EXC_REAL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) +EXC_REAL_OOL_MASKABLE(decrementer, 0x900, 0x80, IRQS_DISABLED) EXC_VIRT_MASKABLE(decrementer, 0x4900, 0x80, 0x900, IRQS_DISABLED) TRAMP_KVM(PACA_EXGEN, 0x900) EXC_COMMON_ASYNC(decrementer_common, 0x900, timer_interrupt) @@ -961,6 +961,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) mtctr r13; \ GET_PACA(r13); \ std r10,PACA_EXGEN+EX_R10(r13); \ + INTERRUPT_TO_KERNEL; \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ mfctr r9; @@ -969,7 +970,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) #define SYSCALL_KVMTEST \ HMT_MEDIUM; \ mr r9,r13; \ - GET_PACA(r13); + GET_PACA(r13); \ + INTERRUPT_TO_KERNEL; #endif #define LOAD_SYSCALL_HANDLER(reg) \ @@ -1507,6 +1509,19 @@ masked_##_H##interrupt: \ b .; \ MASKED_DEC_HANDLER(_H) +TRAMP_REAL_BEGIN(stf_barrier_fallback) + std r9,PACA_EXRFI+EX_R9(r13) + std r10,PACA_EXRFI+EX_R10(r13) + sync + ld r9,PACA_EXRFI+EX_R9(r13) + ld r10,PACA_EXRFI+EX_R10(r13) + ori 31,31,0 + .rept 14 + b 1f +1: + .endr + blr + TRAMP_REAL_BEGIN(rfi_flush_fallback) SET_SCRATCH0(r13); GET_PACA(r13); diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 1237f13fed51..26ea9793d290 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -632,6 +632,7 @@ void do_break (struct pt_regs *regs, unsigned long address, hw_breakpoint_disable(); /* Deliver the signal to userspace */ + clear_siginfo(&info); info.si_signo = SIGTRAP; info.si_errno = 0; info.si_code = TRAP_HWBKPT; diff --git a/arch/powerpc/kernel/rtas-proc.c b/arch/powerpc/kernel/rtas-proc.c index fb070d8cad07..d49063d0baa4 100644 --- a/arch/powerpc/kernel/rtas-proc.c +++ b/arch/powerpc/kernel/rtas-proc.c @@ -154,18 +154,6 @@ static ssize_t ppc_rtas_tone_volume_write(struct file *file, static int ppc_rtas_tone_volume_show(struct seq_file *m, void *v); static int ppc_rtas_rmo_buf_show(struct seq_file *m, void *v); -static int sensors_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_sensors_show, NULL); -} - -static const struct file_operations ppc_rtas_sensors_operations = { - .open = sensors_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int poweron_open(struct inode *inode, struct file *file) { return single_open(file, ppc_rtas_poweron_show, NULL); @@ -231,18 +219,6 @@ static const struct file_operations ppc_rtas_tone_volume_operations = { .release = single_release, }; -static int rmo_buf_open(struct inode *inode, struct file *file) -{ - return single_open(file, ppc_rtas_rmo_buf_show, NULL); -} - -static const struct file_operations ppc_rtas_rmo_buf_ops = { - .open = rmo_buf_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int ppc_rtas_find_all_sensors(void); static void ppc_rtas_process_sensor(struct seq_file *m, struct individual_sensor *s, int state, int error, const char *loc); @@ -267,14 +243,14 @@ static int __init proc_rtas_init(void) &ppc_rtas_clock_operations); proc_create("powerpc/rtas/poweron", 0644, NULL, &ppc_rtas_poweron_operations); - proc_create("powerpc/rtas/sensors", 0444, NULL, - &ppc_rtas_sensors_operations); + proc_create_single("powerpc/rtas/sensors", 0444, NULL, + ppc_rtas_sensors_show); proc_create("powerpc/rtas/frequency", 0644, NULL, &ppc_rtas_tone_freq_operations); proc_create("powerpc/rtas/volume", 0644, NULL, &ppc_rtas_tone_volume_operations); - proc_create("powerpc/rtas/rmo_buffer", 0400, NULL, - &ppc_rtas_rmo_buf_ops); + proc_create_single("powerpc/rtas/rmo_buffer", 0400, NULL, + ppc_rtas_rmo_buf_show); return 0; } diff --git a/arch/powerpc/kernel/security.c b/arch/powerpc/kernel/security.c index bab5a27ea805..b98a722da915 100644 --- a/arch/powerpc/kernel/security.c +++ b/arch/powerpc/kernel/security.c @@ -8,6 +8,7 @@ #include <linux/device.h> #include <linux/seq_buf.h> +#include <asm/debugfs.h> #include <asm/security_features.h> @@ -86,3 +87,151 @@ ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, c return s.len; } + +/* + * Store-forwarding barrier support. + */ + +static enum stf_barrier_type stf_enabled_flush_types; +static bool no_stf_barrier; +bool stf_barrier; + +static int __init handle_no_stf_barrier(char *p) +{ + pr_info("stf-barrier: disabled on command line."); + no_stf_barrier = true; + return 0; +} + +early_param("no_stf_barrier", handle_no_stf_barrier); + +/* This is the generic flag used by other architectures */ +static int __init handle_ssbd(char *p) +{ + if (!p || strncmp(p, "auto", 5) == 0 || strncmp(p, "on", 2) == 0 ) { + /* Until firmware tells us, we have the barrier with auto */ + return 0; + } else if (strncmp(p, "off", 3) == 0) { + handle_no_stf_barrier(NULL); + return 0; + } else + return 1; + + return 0; +} +early_param("spec_store_bypass_disable", handle_ssbd); + +/* This is the generic flag used by other architectures */ +static int __init handle_no_ssbd(char *p) +{ + handle_no_stf_barrier(NULL); + return 0; +} +early_param("nospec_store_bypass_disable", handle_no_ssbd); + +static void stf_barrier_enable(bool enable) +{ + if (enable) + do_stf_barrier_fixups(stf_enabled_flush_types); + else + do_stf_barrier_fixups(STF_BARRIER_NONE); + + stf_barrier = enable; +} + +void setup_stf_barrier(void) +{ + enum stf_barrier_type type; + bool enable, hv; + + hv = cpu_has_feature(CPU_FTR_HVMODE); + + /* Default to fallback in case fw-features are not available */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) + type = STF_BARRIER_EIEIO; + else if (cpu_has_feature(CPU_FTR_ARCH_207S)) + type = STF_BARRIER_SYNC_ORI; + else if (cpu_has_feature(CPU_FTR_ARCH_206)) + type = STF_BARRIER_FALLBACK; + else + type = STF_BARRIER_NONE; + + enable = security_ftr_enabled(SEC_FTR_FAVOUR_SECURITY) && + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR) || + (security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && hv)); + + if (type == STF_BARRIER_FALLBACK) { + pr_info("stf-barrier: fallback barrier available\n"); + } else if (type == STF_BARRIER_SYNC_ORI) { + pr_info("stf-barrier: hwsync barrier available\n"); + } else if (type == STF_BARRIER_EIEIO) { + pr_info("stf-barrier: eieio barrier available\n"); + } + + stf_enabled_flush_types = type; + + if (!no_stf_barrier) + stf_barrier_enable(enable); +} + +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + if (stf_barrier && stf_enabled_flush_types != STF_BARRIER_NONE) { + const char *type; + switch (stf_enabled_flush_types) { + case STF_BARRIER_EIEIO: + type = "eieio"; + break; + case STF_BARRIER_SYNC_ORI: + type = "hwsync"; + break; + case STF_BARRIER_FALLBACK: + type = "fallback"; + break; + default: + type = "unknown"; + } + return sprintf(buf, "Mitigation: Kernel entry/exit barrier (%s)\n", type); + } + + if (!security_ftr_enabled(SEC_FTR_L1D_FLUSH_HV) && + !security_ftr_enabled(SEC_FTR_L1D_FLUSH_PR)) + return sprintf(buf, "Not affected\n"); + + return sprintf(buf, "Vulnerable\n"); +} + +#ifdef CONFIG_DEBUG_FS +static int stf_barrier_set(void *data, u64 val) +{ + bool enable; + + if (val == 1) + enable = true; + else if (val == 0) + enable = false; + else + return -EINVAL; + + /* Only do anything if we're changing state */ + if (enable != stf_barrier) + stf_barrier_enable(enable); + + return 0; +} + +static int stf_barrier_get(void *data, u64 *val) +{ + *val = stf_barrier ? 1 : 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(fops_stf_barrier, stf_barrier_get, stf_barrier_set, "%llu\n"); + +static __init int stf_barrier_debugfs_init(void) +{ + debugfs_create_file("stf_barrier", 0600, powerpc_debugfs_root, NULL, &fops_stf_barrier); + return 0; +} +device_initcall(stf_barrier_debugfs_init); +#endif /* CONFIG_DEBUG_FS */ diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 0904492e7032..0e17dcb48720 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -296,7 +296,6 @@ NOKPROBE_SYMBOL(die); void user_single_step_siginfo(struct task_struct *tsk, struct pt_regs *regs, siginfo_t *info) { - memset(info, 0, sizeof(*info)); info->si_signo = SIGTRAP; info->si_code = TRAP_TRACE; info->si_addr = (void __user *)regs->nip; @@ -334,7 +333,7 @@ void _exception_pkey(int signr, struct pt_regs *regs, int code, */ thread_pkey_regs_save(¤t->thread); - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = signr; info.si_code = code; info.si_addr = (void __user *) addr; @@ -970,7 +969,7 @@ void unknown_exception(struct pt_regs *regs) printk("Bad trap at PC: %lx, SR: %lx, vector=%lx\n", regs->nip, regs->msr, regs->trap); - _exception(SIGTRAP, regs, TRAP_FIXME, 0); + _exception(SIGTRAP, regs, TRAP_UNK, 0); exception_exit(prev_state); } @@ -992,7 +991,7 @@ bail: void RunModeException(struct pt_regs *regs) { - _exception(SIGTRAP, regs, TRAP_FIXME, 0); + _exception(SIGTRAP, regs, TRAP_UNK, 0); } void single_step_exception(struct pt_regs *regs) @@ -1032,7 +1031,7 @@ static void emulate_single_step(struct pt_regs *regs) static inline int __parse_fpscr(unsigned long fpscr) { - int ret = FPE_FIXME; + int ret = FPE_FLTUNK; /* Invalid operation */ if ((fpscr & FPSCR_VE) && (fpscr & FPSCR_VX)) @@ -1973,7 +1972,7 @@ void SPEFloatingPointException(struct pt_regs *regs) extern int do_spe_mathemu(struct pt_regs *regs); unsigned long spefscr; int fpexc_mode; - int code = FPE_FIXME; + int code = FPE_FLTUNK; int err; flush_spe_to_thread(current); @@ -2042,7 +2041,7 @@ void SPEFloatingPointRoundException(struct pt_regs *regs) printk(KERN_ERR "unrecognized spe instruction " "in %s at %lx\n", current->comm, regs->nip); } else { - _exception(SIGFPE, regs, FPE_FIXME, regs->nip); + _exception(SIGFPE, regs, FPE_FLTUNK, regs->nip); return; } } diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index c8af90ff49f0..b8d82678f8b4 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -134,6 +134,20 @@ SECTIONS #ifdef CONFIG_PPC64 . = ALIGN(8); + __stf_entry_barrier_fixup : AT(ADDR(__stf_entry_barrier_fixup) - LOAD_OFFSET) { + __start___stf_entry_barrier_fixup = .; + *(__stf_entry_barrier_fixup) + __stop___stf_entry_barrier_fixup = .; + } + + . = ALIGN(8); + __stf_exit_barrier_fixup : AT(ADDR(__stf_exit_barrier_fixup) - LOAD_OFFSET) { + __start___stf_exit_barrier_fixup = .; + *(__stf_exit_barrier_fixup) + __stop___stf_exit_barrier_fixup = .; + } + + . = ALIGN(8); __rfi_flush_fixup : AT(ADDR(__rfi_flush_fixup) - LOAD_OFFSET) { __start___rfi_flush_fixup = .; *(__rfi_flush_fixup) diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index a57eafec4dc2..361f42c8c73e 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -162,7 +162,7 @@ static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, if (cpu_has_feature(CPU_FTR_P9_TLBIE_BUG)) asm volatile(PPC_TLBIE_5(%0, %1, 0, 0, 1) : : "r" (addr), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) @@ -173,7 +173,7 @@ static void kvmppc_radix_flush_pwc(struct kvm *kvm, unsigned long addr) /* RIC=1 PRS=0 R=1 IS=2 */ asm volatile(PPC_TLBIE_5(%0, %1, 1, 0, 1) : : "r" (rb), "r" (kvm->arch.lpid) : "memory"); - asm volatile("ptesync": : :"memory"); + asm volatile("eieio ; tlbsync ; ptesync": : :"memory"); } unsigned long kvmppc_radix_update_pte(struct kvm *kvm, pte_t *ptep, @@ -584,7 +584,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep)) { - old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_PRESENT, 0, + old = kvmppc_radix_update_pte(kvm, ptep, ~0UL, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift); if ((old & _PAGE_DIRTY) && memslot->dirty_bitmap) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4d07fca5121c..9963f65c212b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2441,6 +2441,7 @@ static void init_vcore_to_run(struct kvmppc_vcore *vc) vc->in_guest = 0; vc->napping_threads = 0; vc->conferring_threads = 0; + vc->tb_offset_applied = 0; } static bool can_dynamic_split(struct kvmppc_vcore *vc, struct core_info *cip) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index bd63fa8a08b5..07ca1b2a7966 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -692,6 +692,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) 22: ld r8,VCORE_TB_OFFSET(r5) cmpdi r8,0 beq 37f + std r8, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current host timebase */ add r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -940,18 +941,6 @@ FTR_SECTION_ELSE ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) 8: - /* - * Set the decrementer to the guest decrementer. - */ - ld r8,VCPU_DEC_EXPIRES(r4) - /* r8 is a host timebase value here, convert to guest TB */ - ld r5,HSTATE_KVM_VCORE(r13) - ld r6,VCORE_TB_OFFSET(r5) - add r8,r8,r6 - mftb r7 - subf r3,r7,r8 - mtspr SPRN_DEC,r3 - ld r5, VCPU_SPRG0(r4) ld r6, VCPU_SPRG1(r4) ld r7, VCPU_SPRG2(r4) @@ -1005,6 +994,18 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_LPCR,r8 isync + /* + * Set the decrementer to the guest decrementer. + */ + ld r8,VCPU_DEC_EXPIRES(r4) + /* r8 is a host timebase value here, convert to guest TB */ + ld r5,HSTATE_KVM_VCORE(r13) + ld r6,VCORE_TB_OFFSET_APPL(r5) + add r8,r8,r6 + mftb r7 + subf r3,r7,r8 + mtspr SPRN_DEC,r3 + /* Check if HDEC expires soon */ mfspr r3, SPRN_HDEC EXTEND_HDEC(r3) @@ -1597,8 +1598,27 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX) guest_bypass: stw r12, STACK_SLOT_TRAP(r1) - mr r3, r12 + + /* Save DEC */ + /* Do this before kvmhv_commence_exit so we know TB is guest TB */ + ld r3, HSTATE_KVM_VCORE(r13) + mfspr r5,SPRN_DEC + mftb r6 + /* On P9, if the guest has large decr enabled, don't sign extend */ +BEGIN_FTR_SECTION + ld r4, VCORE_LPCR(r3) + andis. r4, r4, LPCR_LD@h + bne 16f +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + extsw r5,r5 +16: add r5,r5,r6 + /* r5 is a guest timebase value here, convert to host TB */ + ld r4,VCORE_TB_OFFSET_APPL(r3) + subf r5,r4,r5 + std r5,VCPU_DEC_EXPIRES(r9) + /* Increment exit count, poke other threads to exit */ + mr r3, r12 bl kvmhv_commence_exit nop ld r9, HSTATE_KVM_VCPU(r13) @@ -1639,23 +1659,6 @@ guest_bypass: mtspr SPRN_PURR,r3 mtspr SPRN_SPURR,r4 - /* Save DEC */ - ld r3, HSTATE_KVM_VCORE(r13) - mfspr r5,SPRN_DEC - mftb r6 - /* On P9, if the guest has large decr enabled, don't sign extend */ -BEGIN_FTR_SECTION - ld r4, VCORE_LPCR(r3) - andis. r4, r4, LPCR_LD@h - bne 16f -END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) - extsw r5,r5 -16: add r5,r5,r6 - /* r5 is a guest timebase value here, convert to host TB */ - ld r4,VCORE_TB_OFFSET(r3) - subf r5,r4,r5 - std r5,VCPU_DEC_EXPIRES(r9) - BEGIN_FTR_SECTION b 8f END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) @@ -1905,6 +1908,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) cmpwi cr2, r0, 0 beq cr2, 4f + /* + * Radix: do eieio; tlbsync; ptesync sequence in case we + * interrupted the guest between a tlbie and a ptesync. + */ + eieio + tlbsync + ptesync + /* Radix: Handle the case where the guest used an illegal PID */ LOAD_REG_ADDR(r4, mmu_base_pid) lwz r3, VCPU_GUEST_PID(r9) @@ -2017,9 +2028,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 27: /* Subtract timebase offset from timebase */ - ld r8,VCORE_TB_OFFSET(r5) + ld r8, VCORE_TB_OFFSET_APPL(r5) cmpdi r8,0 beq 17f + li r0, 0 + std r0, VCORE_TB_OFFSET_APPL(r5) mftb r6 /* current guest timebase */ subf r8,r8,r6 mtspr SPRN_TBU40,r8 /* update upper 40 bits */ @@ -2700,7 +2713,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) add r3, r3, r5 ld r4, HSTATE_KVM_VCPU(r13) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) subf r3, r6, r3 /* convert to host TB value */ std r3, VCPU_DEC_EXPIRES(r4) @@ -2799,7 +2812,7 @@ END_FTR_SECTION(CPU_FTR_TM | CPU_FTR_P9_TM_HV_ASSIST, 0) /* Restore guest decrementer */ ld r3, VCPU_DEC_EXPIRES(r4) ld r5, HSTATE_KVM_VCORE(r13) - ld r6, VCORE_TB_OFFSET(r5) + ld r6, VCORE_TB_OFFSET_APPL(r5) add r3, r3, r6 /* convert host TB to guest TB value */ mftb r7 subf r3, r7, r3 @@ -3606,12 +3619,9 @@ kvmppc_fix_pmao: */ kvmhv_start_timing: ld r5, HSTATE_KVM_VCORE(r13) - lbz r6, VCORE_IN_GUEST(r5) - cmpwi r6, 0 - beq 5f /* if in guest, need to */ - ld r6, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -5: mftb r5 - subf r5, r6, r5 + ld r6, VCORE_TB_OFFSET_APPL(r5) + mftb r5 + subf r5, r6, r5 /* subtract current timebase offset */ std r3, VCPU_CUR_ACTIVITY(r4) std r5, VCPU_ACTIVITY_START(r4) blr @@ -3622,15 +3632,12 @@ kvmhv_start_timing: */ kvmhv_accumulate_time: ld r5, HSTATE_KVM_VCORE(r13) - lbz r8, VCORE_IN_GUEST(r5) - cmpwi r8, 0 - beq 4f /* if in guest, need to */ - ld r8, VCORE_TB_OFFSET(r5) /* subtract timebase offset */ -4: ld r5, VCPU_CUR_ACTIVITY(r4) + ld r8, VCORE_TB_OFFSET_APPL(r5) + ld r5, VCPU_CUR_ACTIVITY(r4) ld r6, VCPU_ACTIVITY_START(r4) std r3, VCPU_CUR_ACTIVITY(r4) mftb r7 - subf r7, r8, r7 + subf r7, r8, r7 /* subtract current timebase offset */ std r7, VCPU_ACTIVITY_START(r4) cmpdi r5, 0 beqlr diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index c7a5deadd1cc..99c3620b40d9 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -11,6 +11,9 @@ #define XGLUE(a,b) a##b #define GLUE(a,b) XGLUE(a,b) +/* Dummy interrupt used when taking interrupts out of a queue in H_CPPR */ +#define XICS_DUMMY 1 + static void GLUE(X_PFX,ack_pending)(struct kvmppc_xive_vcpu *xc) { u8 cppr; @@ -205,6 +208,10 @@ skip_ipi: goto skip_ipi; } + /* If it's the dummy interrupt, continue searching */ + if (hirq == XICS_DUMMY) + goto skip_ipi; + /* If fetching, update queue pointers */ if (scan_type == scan_fetch) { q->idx = idx; @@ -385,9 +392,76 @@ static void GLUE(X_PFX,push_pending_to_hw)(struct kvmppc_xive_vcpu *xc) __x_writeb(prio, __x_tima + TM_SPC_SET_OS_PENDING); } +static void GLUE(X_PFX,scan_for_rerouted_irqs)(struct kvmppc_xive *xive, + struct kvmppc_xive_vcpu *xc) +{ + unsigned int prio; + + /* For each priority that is now masked */ + for (prio = xc->cppr; prio < KVMPPC_XIVE_Q_COUNT; prio++) { + struct xive_q *q = &xc->queues[prio]; + struct kvmppc_xive_irq_state *state; + struct kvmppc_xive_src_block *sb; + u32 idx, toggle, entry, irq, hw_num; + struct xive_irq_data *xd; + __be32 *qpage; + u16 src; + + idx = q->idx; + toggle = q->toggle; + qpage = READ_ONCE(q->qpage); + if (!qpage) + continue; + + /* For each interrupt in the queue */ + for (;;) { + entry = be32_to_cpup(qpage + idx); + + /* No more ? */ + if ((entry >> 31) == toggle) + break; + irq = entry & 0x7fffffff; + + /* Skip dummies and IPIs */ + if (irq == XICS_DUMMY || irq == XICS_IPI) + goto next; + sb = kvmppc_xive_find_source(xive, irq, &src); + if (!sb) + goto next; + state = &sb->irq_state[src]; + + /* Has it been rerouted ? */ + if (xc->server_num == state->act_server) + goto next; + + /* + * Allright, it *has* been re-routed, kill it from + * the queue. + */ + qpage[idx] = cpu_to_be32((entry & 0x80000000) | XICS_DUMMY); + + /* Find the HW interrupt */ + kvmppc_xive_select_irq(state, &hw_num, &xd); + + /* If it's not an LSI, set PQ to 11 the EOI will force a resend */ + if (!(xd->flags & XIVE_IRQ_FLAG_LSI)) + GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_11); + + /* EOI the source */ + GLUE(X_PFX,source_eoi)(hw_num, xd); + + next: + idx = (idx + 1) & q->msk; + if (idx == 0) + toggle ^= 1; + } + } +} + X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) { struct kvmppc_xive_vcpu *xc = vcpu->arch.xive_vcpu; + struct kvmppc_xive *xive = vcpu->kvm->arch.xive; u8 old_cppr; pr_devel("H_CPPR(cppr=%ld)\n", cppr); @@ -407,14 +481,34 @@ X_STATIC int GLUE(X_PFX,h_cppr)(struct kvm_vcpu *vcpu, unsigned long cppr) */ smp_mb(); - /* - * We are masking less, we need to look for pending things - * to deliver and set VP pending bits accordingly to trigger - * a new interrupt otherwise we might miss MFRR changes for - * which we have optimized out sending an IPI signal. - */ - if (cppr > old_cppr) + if (cppr > old_cppr) { + /* + * We are masking less, we need to look for pending things + * to deliver and set VP pending bits accordingly to trigger + * a new interrupt otherwise we might miss MFRR changes for + * which we have optimized out sending an IPI signal. + */ GLUE(X_PFX,push_pending_to_hw)(xc); + } else { + /* + * We are masking more, we need to check the queue for any + * interrupt that has been routed to another CPU, take + * it out (replace it with the dummy) and retrigger it. + * + * This is necessary since those interrupts may otherwise + * never be processed, at least not until this CPU restores + * its CPPR. + * + * This is in theory racy vs. HW adding new interrupts to + * the queue. In practice this works because the interesting + * cases are when the guest has done a set_xive() to move the + * interrupt away, which flushes the xive, followed by the + * target CPU doing a H_CPPR. So any new interrupt coming into + * the queue must still be routed to us and isn't a source + * of concern. + */ + GLUE(X_PFX,scan_for_rerouted_irqs)(xive, xc); + } /* Apply new CPPR */ xc->hw_cppr = cppr; diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 288fe4f0db4e..e1bcdc32a851 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -23,6 +23,7 @@ #include <asm/page.h> #include <asm/sections.h> #include <asm/setup.h> +#include <asm/security_features.h> #include <asm/firmware.h> struct fixup_entry { @@ -117,6 +118,120 @@ void do_feature_fixups(unsigned long value, void *fixup_start, void *fixup_end) } #ifdef CONFIG_PPC_BOOK3S_64 +void do_stf_entry_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[3], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_entry_barrier_fixup), + end = PTRRELOC(&__stop___stf_entry_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK) { + instrs[i++] = 0x7d4802a6; /* mflr r10 */ + instrs[i++] = 0x60000000; /* branch patched below */ + instrs[i++] = 0x7d4803a6; /* mtlr r10 */ + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } else if (types & STF_BARRIER_SYNC_ORI) { + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe94d0000; /* ld r10,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + + if (types & STF_BARRIER_FALLBACK) + patch_branch(dest + 1, (unsigned long)&stf_barrier_fallback, + BRANCH_SET_LINK); + else + patch_instruction(dest + 1, instrs[1]); + + patch_instruction(dest + 2, instrs[2]); + } + + printk(KERN_DEBUG "stf-barrier: patched %d entry locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + +void do_stf_exit_barrier_fixups(enum stf_barrier_type types) +{ + unsigned int instrs[6], *dest; + long *start, *end; + int i; + + start = PTRRELOC(&__start___stf_exit_barrier_fixup), + end = PTRRELOC(&__stop___stf_exit_barrier_fixup); + + instrs[0] = 0x60000000; /* nop */ + instrs[1] = 0x60000000; /* nop */ + instrs[2] = 0x60000000; /* nop */ + instrs[3] = 0x60000000; /* nop */ + instrs[4] = 0x60000000; /* nop */ + instrs[5] = 0x60000000; /* nop */ + + i = 0; + if (types & STF_BARRIER_FALLBACK || types & STF_BARRIER_SYNC_ORI) { + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14ba6; /* mtspr 0x131, r13 (HSPRG1) */ + instrs[i++] = 0x7db04aa6; /* mfspr r13, 0x130 (HSPRG0) */ + } else { + instrs[i++] = 0x7db243a6; /* mtsprg 2,r13 */ + instrs[i++] = 0x7db142a6; /* mfsprg r13,1 */ + } + instrs[i++] = 0x7c0004ac; /* hwsync */ + instrs[i++] = 0xe9ad0000; /* ld r13,0(r13) */ + instrs[i++] = 0x63ff0000; /* ori 31,31,0 speculation barrier */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + instrs[i++] = 0x7db14aa6; /* mfspr r13, 0x131 (HSPRG1) */ + } else { + instrs[i++] = 0x7db242a6; /* mfsprg r13,2 */ + } + } else if (types & STF_BARRIER_EIEIO) { + instrs[i++] = 0x7e0006ac; /* eieio + bit 6 hint */ + } + + for (i = 0; start < end; start++, i++) { + dest = (void *)start + *start; + + pr_devel("patching dest %lx\n", (unsigned long)dest); + + patch_instruction(dest, instrs[0]); + patch_instruction(dest + 1, instrs[1]); + patch_instruction(dest + 2, instrs[2]); + patch_instruction(dest + 3, instrs[3]); + patch_instruction(dest + 4, instrs[4]); + patch_instruction(dest + 5, instrs[5]); + } + printk(KERN_DEBUG "stf-barrier: patched %d exit locations (%s barrier)\n", i, + (types == STF_BARRIER_NONE) ? "no" : + (types == STF_BARRIER_FALLBACK) ? "fallback" : + (types == STF_BARRIER_EIEIO) ? "eieio" : + (types == (STF_BARRIER_SYNC_ORI)) ? "hwsync" + : "unknown"); +} + + +void do_stf_barrier_fixups(enum stf_barrier_type types) +{ + do_stf_entry_barrier_fixups(types); + do_stf_exit_barrier_fixups(types); +} + void do_rfi_flush_fixups(enum l1d_flush_type types) { unsigned int instrs[3], *dest; diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index c01d627e687a..ef268d5d9db7 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -168,6 +168,7 @@ static int do_sigbus(struct pt_regs *regs, unsigned long address, return SIGBUS; current->thread.trap_nr = BUS_ADRERR; + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_ADRERR; diff --git a/arch/powerpc/oprofile/backtrace.c b/arch/powerpc/oprofile/backtrace.c index ecc66d5f02c9..ad054dd0d666 100644 --- a/arch/powerpc/oprofile/backtrace.c +++ b/arch/powerpc/oprofile/backtrace.c @@ -7,6 +7,7 @@ * 2 of the License, or (at your option) any later version. **/ +#include <linux/compat_time.h> #include <linux/oprofile.h> #include <linux/sched.h> #include <asm/processor.h> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype index 67d3125d0610..84b58abc08ee 100644 --- a/arch/powerpc/platforms/Kconfig.cputype +++ b/arch/powerpc/platforms/Kconfig.cputype @@ -222,6 +222,7 @@ config PTE_64BIT config PHYS_64BIT bool 'Large physical address support' if E500 || PPC_86xx depends on (44x || E500 || PPC_86xx) && !PPC_83xx && !PPC_82xx + select PHYS_ADDR_T_64BIT ---help--- This option enables kernel support for larger than 32-bit physical addresses. This feature may not be available on all cores. diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index 870c0a82d560..1e002e94d0f6 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -44,7 +44,7 @@ static void spufs_handle_event(struct spu_context *ctx, return; } - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); switch (type) { case SPE_EVENT_INVALID_DMA: diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index ccc421503363..c9ef3c532169 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -1095,18 +1095,6 @@ static int show_spu_loadavg(struct seq_file *s, void *private) atomic_read(&nr_spu_contexts), idr_get_cursor(&task_active_pid_ns(current)->idr) - 1); return 0; -} - -static int spu_loadavg_open(struct inode *inode, struct file *file) -{ - return single_open(file, show_spu_loadavg, NULL); -} - -static const struct file_operations spu_loadavg_fops = { - .open = spu_loadavg_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, }; int __init spu_sched_init(void) @@ -1135,7 +1123,7 @@ int __init spu_sched_init(void) mod_timer(&spuloadavg_timer, 0); - entry = proc_create("spu_loadavg", 0, NULL, &spu_loadavg_fops); + entry = proc_create_single("spu_loadavg", 0, NULL, show_spu_loadavg); if (!entry) goto out_stop_kthread; diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 1bceb95f422d..5584247f5029 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) return count; } +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) { s64 rc = OPAL_BUSY; @@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); if (rc == OPAL_BUSY_EVENT) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); } else if (rc == OPAL_BUSY) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); } } diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index ef8c9ce53a61..a6648ec99ca7 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -131,6 +131,7 @@ static void __init pnv_setup_arch(void) set_arch_panic_timeout(10, ARCH_PANIC_TIMEOUT); pnv_setup_rfi_flush(); + setup_stf_barrier(); /* Initialize SMP */ pnv_smp_init(); diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index b55ad4286dc7..fdb32e056ef4 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -710,6 +710,7 @@ static void __init pSeries_setup_arch(void) fwnmi_init(); pseries_setup_rfi_flush(); + setup_stf_barrier(); /* By default, only probe PCI (can be overridden by rtas_pci) */ pci_add_flags(PCI_PROBE_ONLY); diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cd4fd85fde84..274bc064c41f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -3,8 +3,16 @@ # see Documentation/kbuild/kconfig-language.txt. # +config 64BIT + bool + +config 32BIT + bool + config RISCV def_bool y + # even on 32-bit, physical (and DMA) addresses are > 32-bits + select PHYS_ADDR_T_64BIT select OF select OF_EARLY_FLATTREE select OF_IRQ @@ -22,7 +30,6 @@ config RISCV select GENERIC_ATOMIC64 if !64BIT || !RISCV_ISA_A select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_GENERIC_DMA_COHERENT select IRQ_DOMAIN @@ -39,16 +46,9 @@ config RISCV config MMU def_bool y -# even on 32-bit, physical (and DMA) addresses are > 32-bits -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - config ZONE_DMA32 bool - default y - -config ARCH_DMA_ADDR_T_64BIT - def_bool y + default y if 64BIT config PAGE_OFFSET hex @@ -101,7 +101,6 @@ choice config ARCH_RV32I bool "RV32I" - select CPU_SUPPORTS_32BIT_KERNEL select 32BIT select GENERIC_ASHLDI3 select GENERIC_ASHRDI3 @@ -109,13 +108,13 @@ config ARCH_RV32I config ARCH_RV64I bool "RV64I" - select CPU_SUPPORTS_64BIT_KERNEL select 64BIT select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FTRACE_MCOUNT_RECORD select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS + select SWIOTLB endchoice @@ -171,11 +170,6 @@ config NR_CPUS depends on SMP default "8" -config CPU_SUPPORTS_32BIT_KERNEL - bool -config CPU_SUPPORTS_64BIT_KERNEL - bool - choice prompt "CPU Tuning" default TUNE_GENERIC @@ -202,24 +196,6 @@ endmenu menu "Kernel type" -choice - prompt "Kernel code model" - default 64BIT - -config 32BIT - bool "32-bit kernel" - depends on CPU_SUPPORTS_32BIT_KERNEL - help - Select this option to build a 32-bit kernel. - -config 64BIT - bool "64-bit kernel" - depends on CPU_SUPPORTS_64BIT_KERNEL - help - Select this option to build a 64-bit kernel. - -endchoice - source "mm/Kconfig" source "kernel/Kconfig.preempt" diff --git a/arch/riscv/include/asm/dma-mapping.h b/arch/riscv/include/asm/dma-mapping.h new file mode 100644 index 000000000000..8facc1c8fa05 --- /dev/null +++ b/arch/riscv/include/asm/dma-mapping.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#ifndef _RISCV_ASM_DMA_MAPPING_H +#define _RISCV_ASM_DMA_MAPPING_H 1 + +#ifdef CONFIG_SWIOTLB +#include <linux/swiotlb.h> +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +{ + return &swiotlb_dma_ops; +} +#else +#include <asm-generic/dma-mapping.h> +#endif /* CONFIG_SWIOTLB */ + +#endif /* _RISCV_ASM_DMA_MAPPING_H */ diff --git a/arch/riscv/include/asm/pci.h b/arch/riscv/include/asm/pci.h index 0f2fc9ef20fc..b3638c505728 100644 --- a/arch/riscv/include/asm/pci.h +++ b/arch/riscv/include/asm/pci.h @@ -26,9 +26,6 @@ /* RISC-V shim does not initialize PCI bus */ #define pcibios_assign_all_busses() 1 -/* We do not have an IOMMU */ -#define PCI_DMA_BUS_IS_PHYS 1 - extern int isa_dma_bridge_buggy; #ifdef CONFIG_PCI diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index c11f40c1b2a8..ee44a48faf79 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -29,6 +29,7 @@ #include <linux/of_fdt.h> #include <linux/of_platform.h> #include <linux/sched/task.h> +#include <linux/swiotlb.h> #include <asm/setup.h> #include <asm/sections.h> @@ -206,6 +207,7 @@ void __init setup_arch(char **cmdline_p) setup_bootmem(); paging_init(); unflatten_device_tree(); + swiotlb_init(1); #ifdef CONFIG_SMP setup_smp(); diff --git a/arch/riscv/kernel/traps.c b/arch/riscv/kernel/traps.c index 93132cb59184..b99d9dd21fd0 100644 --- a/arch/riscv/kernel/traps.c +++ b/arch/riscv/kernel/traps.c @@ -63,18 +63,6 @@ void die(struct pt_regs *regs, const char *str) do_exit(SIGSEGV); } -static inline void do_trap_siginfo(int signo, int code, - unsigned long addr, struct task_struct *tsk) -{ - siginfo_t info; - - info.si_signo = signo; - info.si_errno = 0; - info.si_code = code; - info.si_addr = (void __user *)addr; - force_sig_info(signo, &info, tsk); -} - void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr, struct task_struct *tsk) { @@ -87,7 +75,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, show_regs(regs); } - do_trap_siginfo(signo, code, addr, tsk); + force_sig_fault(signo, code, (void __user *)addr, tsk); } static void do_trap_error(struct pt_regs *regs, int signo, int code, @@ -149,7 +137,7 @@ asmlinkage void do_trap_break(struct pt_regs *regs) } #endif /* CONFIG_GENERIC_BUG */ - do_trap_siginfo(SIGTRAP, TRAP_BRKPT, regs->sepc, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)(regs->sepc), current); regs->sepc += 0x4; } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index 199ac3e4da1d..b7deee7e738f 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -35,9 +35,6 @@ config GENERIC_BUG config GENERIC_BUG_RELATIVE_POINTERS def_bool y -config ARCH_DMA_ADDR_T_64BIT - def_bool y - config GENERIC_LOCKBREAK def_bool y if SMP && PREEMPT @@ -133,7 +130,6 @@ config S390 select HAVE_CMPXCHG_LOCAL select HAVE_COPY_THREAD_TLS select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select DMA_DIRECT_OPS select HAVE_DYNAMIC_FTRACE @@ -709,7 +705,11 @@ config QDIO menuconfig PCI bool "PCI support" select PCI_MSI + select IOMMU_HELPER select IOMMU_SUPPORT + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH + help Enable PCI support. @@ -733,15 +733,6 @@ config PCI_DOMAINS config HAS_IOMEM def_bool PCI -config IOMMU_HELPER - def_bool PCI - -config NEED_SG_DMA_LENGTH - def_bool PCI - -config NEED_DMA_MAP_STATE - def_bool PCI - config CHSC_SCH def_tristate m prompt "Support for CHSC subchannels" @@ -847,6 +838,10 @@ config CCW source "drivers/Kconfig" +config HAVE_PNETID + tristate + default (SMC || CCWGROUP) + source "fs/Kconfig" source "arch/s390/Kconfig.debug" diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6176fe9795ca..941d8cc6c9f5 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index c105bcc6d7a6..eb6f75f24208 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m diff --git a/arch/s390/crypto/arch_random.c b/arch/s390/crypto/arch_random.c index 8720e9203ecf..dd95cdbd22ce 100644 --- a/arch/s390/crypto/arch_random.c +++ b/arch/s390/crypto/arch_random.c @@ -2,14 +2,37 @@ /* * s390 arch random implementation. * - * Copyright IBM Corp. 2017 - * Author(s): Harald Freudenberger <freude@de.ibm.com> + * Copyright IBM Corp. 2017, 2018 + * Author(s): Harald Freudenberger + * + * The s390_arch_random_generate() function may be called from random.c + * in interrupt context. So this implementation does the best to be very + * fast. There is a buffer of random data which is asynchronously checked + * and filled by a workqueue thread. + * If there are enough bytes in the buffer the s390_arch_random_generate() + * just delivers these bytes. Otherwise false is returned until the + * worker thread refills the buffer. + * The worker fills the rng buffer by pulling fresh entropy from the + * high quality (but slow) true hardware random generator. This entropy + * is then spread over the buffer with an pseudo random generator PRNG. + * As the arch_get_random_seed_long() fetches 8 bytes and the calling + * function add_interrupt_randomness() counts this as 1 bit entropy the + * distribution needs to make sure there is in fact 1 bit entropy contained + * in 8 bytes of the buffer. The current values pull 32 byte entropy + * and scatter this into a 2048 byte buffer. So 8 byte in the buffer + * will contain 1 bit of entropy. + * The worker thread is rescheduled based on the charge level of the + * buffer but at least with 500 ms delay to avoid too much CPU consumption. + * So the max. amount of rng data delivered via arch_get_random_seed is + * limited to 4k bytes per second. */ #include <linux/kernel.h> #include <linux/atomic.h> #include <linux/random.h> +#include <linux/slab.h> #include <linux/static_key.h> +#include <linux/workqueue.h> #include <asm/cpacf.h> DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); @@ -17,11 +40,83 @@ DEFINE_STATIC_KEY_FALSE(s390_arch_random_available); atomic64_t s390_arch_random_counter = ATOMIC64_INIT(0); EXPORT_SYMBOL(s390_arch_random_counter); +#define ARCH_REFILL_TICKS (HZ/2) +#define ARCH_PRNG_SEED_SIZE 32 +#define ARCH_RNG_BUF_SIZE 2048 + +static DEFINE_SPINLOCK(arch_rng_lock); +static u8 *arch_rng_buf; +static unsigned int arch_rng_buf_idx; + +static void arch_rng_refill_buffer(struct work_struct *); +static DECLARE_DELAYED_WORK(arch_rng_work, arch_rng_refill_buffer); + +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes) +{ + /* lock rng buffer */ + if (!spin_trylock(&arch_rng_lock)) + return false; + + /* try to resolve the requested amount of bytes from the buffer */ + arch_rng_buf_idx -= nbytes; + if (arch_rng_buf_idx < ARCH_RNG_BUF_SIZE) { + memcpy(buf, arch_rng_buf + arch_rng_buf_idx, nbytes); + atomic64_add(nbytes, &s390_arch_random_counter); + spin_unlock(&arch_rng_lock); + return true; + } + + /* not enough bytes in rng buffer, refill is done asynchronously */ + spin_unlock(&arch_rng_lock); + + return false; +} +EXPORT_SYMBOL(s390_arch_random_generate); + +static void arch_rng_refill_buffer(struct work_struct *unused) +{ + unsigned int delay = ARCH_REFILL_TICKS; + + spin_lock(&arch_rng_lock); + if (arch_rng_buf_idx > ARCH_RNG_BUF_SIZE) { + /* buffer is exhausted and needs refill */ + u8 seed[ARCH_PRNG_SEED_SIZE]; + u8 prng_wa[240]; + /* fetch ARCH_PRNG_SEED_SIZE bytes of entropy */ + cpacf_trng(NULL, 0, seed, sizeof(seed)); + /* blow this entropy up to ARCH_RNG_BUF_SIZE with PRNG */ + memset(prng_wa, 0, sizeof(prng_wa)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_SEED, + &prng_wa, NULL, 0, seed, sizeof(seed)); + cpacf_prno(CPACF_PRNO_SHA512_DRNG_GEN, + &prng_wa, arch_rng_buf, ARCH_RNG_BUF_SIZE, NULL, 0); + arch_rng_buf_idx = ARCH_RNG_BUF_SIZE; + } + delay += (ARCH_REFILL_TICKS * arch_rng_buf_idx) / ARCH_RNG_BUF_SIZE; + spin_unlock(&arch_rng_lock); + + /* kick next check */ + queue_delayed_work(system_long_wq, &arch_rng_work, delay); +} + static int __init s390_arch_random_init(void) { - /* check if subfunction CPACF_PRNO_TRNG is available */ - if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG)) + /* all the needed PRNO subfunctions available ? */ + if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG) && + cpacf_query_func(CPACF_PRNO, CPACF_PRNO_SHA512_DRNG_GEN)) { + + /* alloc arch random working buffer */ + arch_rng_buf = kmalloc(ARCH_RNG_BUF_SIZE, GFP_KERNEL); + if (!arch_rng_buf) + return -ENOMEM; + + /* kick worker queue job to fill the random buffer */ + queue_delayed_work(system_long_wq, + &arch_rng_work, ARCH_REFILL_TICKS); + + /* enable arch random to the outside world */ static_branch_enable(&s390_arch_random_available); + } return 0; } diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index e8077f0971f8..2bf01ba44107 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -13,6 +13,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include <asm/vx-insn.h> /* Vector register range containing CRC-32 constants */ @@ -67,6 +68,8 @@ .previous + GEN_BR_THUNK %r14 + .text /* * The CRC-32 function(s) use these calling conventions: @@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16) .Ldone: VLGVF %r2,%v2,3 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index d8c67a58c0c5..7d6f568bd3ad 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -14,6 +14,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include <asm/vx-insn.h> /* Vector register range containing CRC-32 constants */ @@ -76,6 +77,7 @@ .previous + GEN_BR_THUNK %r14 .text @@ -264,6 +266,6 @@ crc32_le_vgfm_generic: .Ldone: VLGVF %r2,%v2,2 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/hypfs/hypfs_sprp.c b/arch/s390/hypfs/hypfs_sprp.c index ae0ed8dd5f1b..5d85a039391c 100644 --- a/arch/s390/hypfs/hypfs_sprp.c +++ b/arch/s390/hypfs/hypfs_sprp.c @@ -13,7 +13,6 @@ #include <linux/string.h> #include <linux/types.h> #include <linux/uaccess.h> -#include <asm/compat.h> #include <asm/diag.h> #include <asm/sclp.h> #include "hypfs.h" diff --git a/arch/s390/include/asm/archrandom.h b/arch/s390/include/asm/archrandom.h index 09aed1095336..c67b82dfa558 100644 --- a/arch/s390/include/asm/archrandom.h +++ b/arch/s390/include/asm/archrandom.h @@ -15,16 +15,11 @@ #include <linux/static_key.h> #include <linux/atomic.h> -#include <asm/cpacf.h> DECLARE_STATIC_KEY_FALSE(s390_arch_random_available); extern atomic64_t s390_arch_random_counter; -static void s390_arch_random_generate(u8 *buf, unsigned int nbytes) -{ - cpacf_trng(NULL, 0, buf, nbytes); - atomic64_add(nbytes, &s390_arch_random_counter); -} +bool s390_arch_random_generate(u8 *buf, unsigned int nbytes); static inline bool arch_has_random(void) { @@ -51,8 +46,7 @@ static inline bool arch_get_random_int(unsigned int *v) static inline bool arch_get_random_seed_long(unsigned long *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } @@ -60,8 +54,7 @@ static inline bool arch_get_random_seed_long(unsigned long *v) static inline bool arch_get_random_seed_int(unsigned int *v) { if (static_branch_likely(&s390_arch_random_available)) { - s390_arch_random_generate((u8 *)v, sizeof(*v)); - return true; + return s390_arch_random_generate((u8 *)v, sizeof(*v)); } return false; } diff --git a/arch/s390/include/asm/ccwdev.h b/arch/s390/include/asm/ccwdev.h index 20bce136b2e5..a29dd430fb40 100644 --- a/arch/s390/include/asm/ccwdev.h +++ b/arch/s390/include/asm/ccwdev.h @@ -231,4 +231,5 @@ int ccw_device_siosl(struct ccw_device *); extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *); struct channel_path_desc_fmt0 *ccw_device_get_chp_desc(struct ccw_device *, int); +u8 *ccw_device_get_util_str(struct ccw_device *cdev, int chp_idx); #endif /* _S390_CCWDEV_H_ */ diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h index 99aa817dad32..860cab7479c3 100644 --- a/arch/s390/include/asm/ccwgroup.h +++ b/arch/s390/include/asm/ccwgroup.h @@ -73,4 +73,14 @@ extern void ccwgroup_remove_ccwdev(struct ccw_device *cdev); #define to_ccwgroupdev(x) container_of((x), struct ccwgroup_device, dev) #define to_ccwgroupdrv(x) container_of((x), struct ccwgroup_driver, driver) + +#if IS_ENABLED(CONFIG_CCWGROUP) +bool dev_is_ccwgroup(struct device *dev); +#else /* CONFIG_CCWGROUP */ +static inline bool dev_is_ccwgroup(struct device *dev) +{ + return false; +} +#endif /* CONFIG_CCWGROUP */ + #endif diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 9830fb6b076e..97db2fba546a 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -53,7 +53,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u16 __compat_uid_t; @@ -97,16 +96,6 @@ typedef struct { u32 gprs_high[NUM_GPRS]; } s390_compat_regs_high; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; u16 __pad1; @@ -243,10 +232,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_ulong_t __pad1; - compat_time_t sem_ctime; - compat_ulong_t __pad2; + compat_ulong_t sem_otime; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime; + compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused1; compat_ulong_t __unused2; @@ -254,12 +243,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - compat_time_t msg_stime; - compat_ulong_t __pad1; - compat_time_t msg_rtime; - compat_ulong_t __pad2; - compat_time_t msg_ctime; - compat_ulong_t __pad3; + compat_ulong_t msg_stime; + compat_ulong_t msg_stime_high; + compat_ulong_t msg_rtime; + compat_ulong_t msg_rtime_high; + compat_ulong_t msg_ctime; + compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -272,12 +261,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_ulong_t __pad1; - compat_time_t shm_dtime; - compat_ulong_t __pad2; - compat_time_t shm_ctime; - compat_ulong_t __pad3; + compat_ulong_t shm_atime; + compat_ulong_t shm_atime_high; + compat_ulong_t shm_dtime; + compat_ulong_t shm_dtime_high; + compat_ulong_t shm_ctime; + compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; diff --git a/arch/s390/include/asm/cpu_mf.h b/arch/s390/include/asm/cpu_mf.h index f58d17e9dd65..de023a9a88ca 100644 --- a/arch/s390/include/asm/cpu_mf.h +++ b/arch/s390/include/asm/cpu_mf.h @@ -113,7 +113,7 @@ struct hws_basic_entry { struct hws_diag_entry { unsigned int def:16; /* 0-15 Data Entry Format */ - unsigned int R:14; /* 16-19 and 20-30 reserved */ + unsigned int R:15; /* 16-19 and 20-30 reserved */ unsigned int I:1; /* 31 entry valid or invalid */ u8 data[]; /* Machine-dependent sample data */ } __packed; @@ -129,7 +129,9 @@ struct hws_trailer_entry { unsigned int f:1; /* 0 - Block Full Indicator */ unsigned int a:1; /* 1 - Alert request control */ unsigned int t:1; /* 2 - Timestamp format */ - unsigned long long:61; /* 3 - 63: Reserved */ + unsigned int :29; /* 3 - 31: Reserved */ + unsigned int bsdes:16; /* 32-47: size of basic SDE */ + unsigned int dsdes:16; /* 48-63: size of diagnostic SDE */ }; unsigned long long flags; /* 0 - 63: All indicators */ }; diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 1a61b1b997f2..7d22a474a040 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -125,8 +125,9 @@ * ELF register definitions.. */ +#include <linux/compat.h> + #include <asm/ptrace.h> -#include <asm/compat.h> #include <asm/syscall.h> #include <asm/user.h> @@ -136,7 +137,6 @@ typedef s390_regs elf_gregset_t; typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; -#include <linux/compat.h> #include <linux/sched/mm.h> /* for task_struct */ #include <asm/mmu_context.h> diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h index a296c6acfd07..dfbc3c6c0674 100644 --- a/arch/s390/include/asm/hardirq.h +++ b/arch/s390/include/asm/hardirq.h @@ -14,6 +14,8 @@ #include <asm/lowcore.h> #define local_softirq_pending() (S390_lowcore.softirq_pending) +#define set_softirq_pending(x) (S390_lowcore.softirq_pending = (x)) +#define or_softirq_pending(x) (S390_lowcore.softirq_pending |= (x)) #define __ARCH_IRQ_STAT #define __ARCH_HAS_DO_SOFTIRQ diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h new file mode 100644 index 000000000000..a01f81186e86 --- /dev/null +++ b/arch/s390/include/asm/nospec-insn.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_NOSPEC_ASM_H +#define _ASM_S390_NOSPEC_ASM_H + +#include <asm/alternative-asm.h> +#include <asm/asm-offsets.h> +#include <asm/dwarf.h> + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_EXPOLINE + +_LC_BR_R1 = __LC_BR_R1 + +/* + * The expoline macros are used to create thunks in the same format + * as gcc generates them. The 'comdat' section flag makes sure that + * the various thunks are merged into a single copy. + */ + .macro __THUNK_PROLOG_NAME name + .pushsection .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + CFI_STARTPROC + .endm + + .macro __THUNK_EPILOG + CFI_ENDPROC + .popsection + .endm + + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_PROLOG_BC d0,r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_BC d0,r1,r2 + jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2 + .endm + + .macro __DECODE_RR expand,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RR failed" + .endif + .endm + + .macro __DECODE_RRR expand,rsave,rtarget,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rtarget,%r\r2 + .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r3 + \expand \r1,\r2,\r3 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RRR failed" + .endif + .endm + + .macro __DECODE_DRR expand,disp,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \disp,\r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_DRR failed" + .endif + .endm + + .macro __THUNK_EX_BR reg,ruse + # Be very careful when adding instructions to this macro! + # The ALTERNATIVE replacement code has a .+10 which targets + # the "br \reg" after the code has been patched. +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,555f + j . +#else + .ifc \reg,%r1 + ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 + j . + .else + larl \ruse,555f + ex 0,0(\ruse) + j . + .endif +#endif +555: br \reg + .endm + + .macro __THUNK_EX_BC disp,reg,ruse +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,556f + j . +#else + larl \ruse,556f + ex 0,0(\ruse) + j . +#endif +556: b \disp(\reg) + .endm + + .macro GEN_BR_THUNK reg,ruse=%r1 + __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse + __THUNK_EX_BR \reg,\ruse + __THUNK_EPILOG + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse + __THUNK_EX_BC \disp,\reg,\ruse + __THUNK_EPILOG + .endm + + .macro BR_EX reg,ruse=%r1 +557: __DECODE_RR __THUNK_BR,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 557b-. + .popsection + .endm + + .macro B_EX disp,reg,ruse=%r1 +558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 558b-. + .popsection + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 +559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 559b-. + .popsection + .endm + +#else + .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + .endm + + .macro BR_EX reg,ruse=%r1 + br \reg + .endm + + .macro B_EX disp,reg,ruse=%r1 + b \disp(\reg) + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 + basr \rsave,\rtarget + .endm +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_NOSPEC_ASM_H */ diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index 12fe3591034f..94f8db468c9b 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -2,8 +2,6 @@ #ifndef __ASM_S390_PCI_H #define __ASM_S390_PCI_H -/* must be set before including asm-generic/pci.h */ -#define PCI_DMA_BUS_IS_PHYS (0) /* must be set before including pci_clp.h */ #define PCI_BAR_COUNT 6 diff --git a/arch/s390/include/asm/pnet.h b/arch/s390/include/asm/pnet.h new file mode 100644 index 000000000000..6e278584f8f1 --- /dev/null +++ b/arch/s390/include/asm/pnet.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#ifndef _ASM_S390_PNET_H +#define _ASM_S390_PNET_H + +#include <linux/device.h> +#include <linux/types.h> + +#define PNETIDS_LEN 64 /* Total utility string length in bytes + * to cover up to 4 PNETIDs of 16 bytes + * for up to 4 device ports + */ +#define MAX_PNETID_LEN 16 /* Max.length of a single port PNETID */ +#define MAX_PNETID_PORTS (PNETIDS_LEN / MAX_PNETID_LEN) + /* Max. # of ports with a PNETID */ + +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid); +#endif /* _ASM_S390_PNET_H */ diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index e297bcfc476f..6090670df51f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,5 +13,11 @@ int verify_sha256_digest(void); +extern u64 kernel_entry; +extern u64 kernel_type; + +extern u64 crash_start; +extern u64 crash_size; + #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/include/uapi/asm/Kbuild b/arch/s390/include/uapi/asm/Kbuild index faef3f7e8353..e364873e0d10 100644 --- a/arch/s390/include/uapi/asm/Kbuild +++ b/arch/s390/include/uapi/asm/Kbuild @@ -9,9 +9,12 @@ generic-y += errno.h generic-y += fcntl.h generic-y += ioctl.h generic-y += mman.h +generic-y += msgbuf.h generic-y += param.h generic-y += poll.h generic-y += resource.h +generic-y += sembuf.h +generic-y += shmbuf.h generic-y += sockios.h generic-y += swab.h generic-y += termbits.h diff --git a/arch/s390/include/uapi/asm/msgbuf.h b/arch/s390/include/uapi/asm/msgbuf.h deleted file mode 100644 index 604f847cd68c..000000000000 --- a/arch/s390/include/uapi/asm/msgbuf.h +++ /dev/null @@ -1,38 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _S390_MSGBUF_H -#define _S390_MSGBUF_H - -/* - * The msqid64_ds structure for S/390 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem - * - 2 miscellaneous 32-bit values - */ - -struct msqid64_ds { - struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ -#ifndef __s390x__ - unsigned long __unused1; -#endif /* ! __s390x__ */ - __kernel_time_t msg_rtime; /* last msgrcv time */ -#ifndef __s390x__ - unsigned long __unused2; -#endif /* ! __s390x__ */ - __kernel_time_t msg_ctime; /* last change time */ -#ifndef __s390x__ - unsigned long __unused3; -#endif /* ! __s390x__ */ - unsigned long msg_cbytes; /* current number of bytes on queue */ - unsigned long msg_qnum; /* number of messages in queue */ - unsigned long msg_qbytes; /* max number of bytes on queue */ - __kernel_pid_t msg_lspid; /* pid of last msgsnd */ - __kernel_pid_t msg_lrpid; /* last receive pid */ - unsigned long __unused4; - unsigned long __unused5; -}; - -#endif /* _S390_MSGBUF_H */ diff --git a/arch/s390/include/uapi/asm/sembuf.h b/arch/s390/include/uapi/asm/sembuf.h deleted file mode 100644 index 3e917697b668..000000000000 --- a/arch/s390/include/uapi/asm/sembuf.h +++ /dev/null @@ -1,30 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _S390_SEMBUF_H -#define _S390_SEMBUF_H - -/* - * The semid64_ds structure for S/390 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem (for !__s390x__) - * - 2 miscellaneous 32-bit values - */ - -struct semid64_ds { - struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - __kernel_time_t sem_otime; /* last semop time */ -#ifndef __s390x__ - unsigned long __unused1; -#endif /* ! __s390x__ */ - __kernel_time_t sem_ctime; /* last change time */ -#ifndef __s390x__ - unsigned long __unused2; -#endif /* ! __s390x__ */ - unsigned long sem_nsems; /* no. of semaphores in array */ - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _S390_SEMBUF_H */ diff --git a/arch/s390/include/uapi/asm/shmbuf.h b/arch/s390/include/uapi/asm/shmbuf.h deleted file mode 100644 index 9cdce8d7ce60..000000000000 --- a/arch/s390/include/uapi/asm/shmbuf.h +++ /dev/null @@ -1,49 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _S390_SHMBUF_H -#define _S390_SHMBUF_H - -/* - * The shmid64_ds structure for S/390 architecture. - * Note extra padding because this structure is passed back and forth - * between kernel and user space. - * - * Pad space is left for: - * - 64-bit time_t to solve y2038 problem (for !__s390x__) - * - 2 miscellaneous 32-bit values - */ - -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ -#ifndef __s390x__ - unsigned long __unused1; -#endif /* ! __s390x__ */ - __kernel_time_t shm_dtime; /* last detach time */ -#ifndef __s390x__ - unsigned long __unused2; -#endif /* ! __s390x__ */ - __kernel_time_t shm_ctime; /* last change time */ -#ifndef __s390x__ - unsigned long __unused3; -#endif /* ! __s390x__ */ - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused4; - unsigned long __unused5; -}; - -struct shminfo64 { - unsigned long shmmax; - unsigned long shmmin; - unsigned long shmmni; - unsigned long shmseg; - unsigned long shmall; - unsigned long __unused1; - unsigned long __unused2; - unsigned long __unused3; - unsigned long __unused4; -}; - -#endif /* _S390_SHMBUF_H */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 84ea6225efb4..2fed39b26b42 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -6,22 +6,26 @@ ifdef CONFIG_FUNCTION_TRACER # Do not trace tracer code -CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) # Do not trace early setup code -CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) -CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_als.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_early.o = $(CC_FLAGS_FTRACE) +CFLAGS_REMOVE_early_nobss.o = $(CC_FLAGS_FTRACE) endif -GCOV_PROFILE_als.o := n -GCOV_PROFILE_early.o := n +GCOV_PROFILE_als.o := n +GCOV_PROFILE_early.o := n +GCOV_PROFILE_early_nobss.o := n -KCOV_INSTRUMENT_als.o := n -KCOV_INSTRUMENT_early.o := n +KCOV_INSTRUMENT_als.o := n +KCOV_INSTRUMENT_early.o := n +KCOV_INSTRUMENT_early_nobss.o := n -UBSAN_SANITIZE_als.o := n -UBSAN_SANITIZE_early.o := n +UBSAN_SANITIZE_als.o := n +UBSAN_SANITIZE_early.o := n +UBSAN_SANITIZE_early_nobss.o := n # # Use -march=z900 for als.c to be able to print an error @@ -57,7 +61,7 @@ CFLAGS_ptrace.o += -DUTS_MACHINE='"$(UTS_MACHINE)"' obj-y := traps.o time.o process.o base.o early.o setup.o idle.o vtime.o obj-y += processor.o sys_s390.o ptrace.o signal.o cpcmd.o ebcdic.o nmi.o -obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o +obj-y += debug.o irq.o ipl.o dis.o diag.o vdso.o als.o early_nobss.o obj-y += sysinfo.o jump_label.o lgr.o os_info.o machine_kexec.o pgm_check.o obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o @@ -65,6 +69,7 @@ obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o @@ -93,3 +98,6 @@ obj-$(CONFIG_TRACEPOINTS) += trace.o # vdso obj-y += vdso64/ obj-$(CONFIG_COMPAT) += vdso32/ + +chkbss := head.o head64.o als.o early_nobss.o +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index eb2a5c0443cd..11aea745a2a6 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -181,6 +181,7 @@ int main(void) OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); + OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f6c56009e822..b65874b0b412 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -9,18 +9,22 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/ptrace.h> #include <asm/sigp.h> + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_mcck_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: la %r1,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) lpswe __LC_MCK_OLD_PSW @@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -117,7 +121,7 @@ ENTRY(diag308_reset) larl %r4,.Lcontinue_psw # Restore PSW flags lpswe 0(%r4) .Lcontinue: - br %r14 + BR_EX %r14 .align 16 .Lrestart_psw: .long 0x00080000,0x80000000 + .Lrestart_part2 diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index 32daa0f84325..827699eb48fa 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -34,32 +34,6 @@ static void __init setup_boot_command_line(void); /* - * Get the TOD clock running. - */ -static void __init reset_tod_clock(void) -{ - u64 time; - - if (store_tod_clock(&time) == 0) - return; - /* TOD clock not running. Set the clock to Unix Epoch. */ - if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) - disabled_wait(0); - - memset(tod_clock_base, 0, 16); - *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; - S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; -} - -/* - * Clear bss memory - */ -static noinline __init void clear_bss_section(void) -{ - memset(__bss_start, 0, __bss_stop - __bss_start); -} - -/* * Initialize storage key for kernel pages */ static noinline __init void init_kernel_storage_key(void) @@ -310,57 +284,6 @@ static int __init cad_setup(char *str) } early_param("cad", cad_setup); -static __init void memmove_early(void *dst, const void *src, size_t n) -{ - unsigned long addr; - long incr; - psw_t old; - - if (!n) - return; - incr = 1; - if (dst > src) { - incr = -incr; - dst += n - 1; - src += n - 1; - } - old = S390_lowcore.program_new_psw; - S390_lowcore.program_new_psw.mask = __extract_psw(); - asm volatile( - " larl %[addr],1f\n" - " stg %[addr],%[psw_pgm_addr]\n" - "0: mvc 0(1,%[dst]),0(%[src])\n" - " agr %[dst],%[incr]\n" - " agr %[src],%[incr]\n" - " brctg %[n],0b\n" - "1:\n" - : [addr] "=&d" (addr), - [psw_pgm_addr] "=Q" (S390_lowcore.program_new_psw.addr), - [dst] "+&a" (dst), [src] "+&a" (src), [n] "+d" (n) - : [incr] "d" (incr) - : "cc", "memory"); - S390_lowcore.program_new_psw = old; -} - -static __init noinline void rescue_initrd(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); - /* - * Just like in case of IPL from VM reader we make sure there is a - * gap of 4MB between end of kernel and start of initrd. - * That way we can also be sure that saving an NSS will succeed, - * which however only requires different segments. - */ - if (!INITRD_START || !INITRD_SIZE) - return; - if (INITRD_START >= min_initrd_addr) - return; - memmove_early((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); - INITRD_START = min_initrd_addr; -#endif -} - /* Set up boot command line */ static void __init append_to_cmdline(size_t (*ipl_data)(char *, size_t)) { @@ -410,9 +333,6 @@ static void __init setup_boot_command_line(void) void __init startup_init(void) { - reset_tod_clock(); - rescue_initrd(); - clear_bss_section(); time_early_init(); init_kernel_storage_key(); lockdep_off(); diff --git a/arch/s390/kernel/early_nobss.c b/arch/s390/kernel/early_nobss.c new file mode 100644 index 000000000000..2d84fc48df3a --- /dev/null +++ b/arch/s390/kernel/early_nobss.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright IBM Corp. 2007, 2018 + */ + +/* + * Early setup functions which may not rely on an initialized bss + * section. The last thing that is supposed to happen here is + * initialization of the bss section. + */ + +#include <linux/processor.h> +#include <linux/string.h> +#include <asm/sections.h> +#include <asm/lowcore.h> +#include <asm/setup.h> +#include <asm/timex.h> +#include "entry.h" + +static void __init reset_tod_clock(void) +{ + u64 time; + + if (store_tod_clock(&time) == 0) + return; + /* TOD clock not running. Set the clock to Unix Epoch. */ + if (set_tod_clock(TOD_UNIX_EPOCH) != 0 || store_tod_clock(&time) != 0) + disabled_wait(0); + + memset(tod_clock_base, 0, 16); + *(__u64 *) &tod_clock_base[1] = TOD_UNIX_EPOCH; + S390_lowcore.last_update_clock = TOD_UNIX_EPOCH; +} + +static void __init rescue_initrd(void) +{ + unsigned long min_initrd_addr = (unsigned long) _end + (4UL << 20); + + /* + * Just like in case of IPL from VM reader we make sure there is a + * gap of 4MB between end of kernel and start of initrd. + * That way we can also be sure that saving an NSS will succeed, + * which however only requires different segments. + */ + if (!IS_ENABLED(CONFIG_BLK_DEV_INITRD)) + return; + if (!INITRD_START || !INITRD_SIZE) + return; + if (INITRD_START >= min_initrd_addr) + return; + memmove((void *) min_initrd_addr, (void *) INITRD_START, INITRD_SIZE); + INITRD_START = min_initrd_addr; +} + +static void __init clear_bss_section(void) +{ + memset(__bss_start, 0, __bss_stop - __bss_start); +} + +void __init startup_init_nobss(void) +{ + reset_tod_clock(); + rescue_initrd(); + clear_bss_section(); +} diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f22f139a041..f03402efab4b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -28,6 +28,7 @@ #include <asm/setup.h> #include <asm/nmi.h> #include <asm/export.h> +#include <asm/nospec-insn.h> __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -183,67 +184,9 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm -#ifdef CONFIG_EXPOLINE - - .macro GEN_BR_THUNK name,reg,tmp - .section .text.\name,"axG",@progbits,\name,comdat - .globl \name - .hidden \name - .type \name,@function -\name: - CFI_STARTPROC -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,0f -#else - larl \tmp,0f - ex 0,0(\tmp) -#endif - j . -0: br \reg - CFI_ENDPROC - .endm - - GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 - - .macro BASR_R14_R9 -0: brasl %r14,__s390x_indirect_jump_r1use_r9 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R1USE_R14 -0: jg __s390x_indirect_jump_r1use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R11USE_R14 -0: jg __s390x_indirect_jump_r11use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - -#else /* CONFIG_EXPOLINE */ - - .macro BASR_R14_R9 - basr %r14,%r9 - .endm - - .macro BR_R1USE_R14 - br %r14 - .endm - - .macro BR_R11USE_R14 - br %r14 - .endm - -#endif /* CONFIG_EXPOLINE */ - + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: @@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP ENTRY(__bpon) .globl __bpon BPON - BR_R1USE_R14 + BR_EX %r14 /* * Scheduler resume function, called by switch_to @@ -284,7 +227,7 @@ ENTRY(__switch_to) mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 - BR_R1USE_R14 + BR_EX %r14 .L__critical_start: @@ -351,7 +294,7 @@ sie_exit: xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_SIE_REASON(%r15) # return exit reason code - BR_R1USE_R14 + BR_EX %r14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_SIE_REASON(%r15) # set exit reason code @@ -410,7 +353,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - BASR_R14_R9 # call sys_xxxx + BASR_EX %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -595,7 +538,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - BASR_R14_R9 # call sys_xxx + BASR_EX %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -619,7 +562,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - BASR_R14_R9 + BASR_EX %r14,%r9 j .Lsysc_tracenogo /* @@ -701,7 +644,7 @@ ENTRY(pgm_check_handler) je .Lpgm_return lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - BASR_R14_R9 # branch to interrupt-handler + BASR_EX %r14,%r9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -1019,7 +962,7 @@ ENTRY(psw_idle) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - BR_R1USE_R14 + BR_EX %r14 .Lpsw_idle_end: /* @@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lsave_fpu_regs_end: EXPORT_SYMBOL(save_fpu_regs) @@ -1107,7 +1050,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lload_fpu_regs_end: .L__critical_end: @@ -1322,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_R11USE_R14 +0: BR_EX %r14 .align 8 .Lcleanup_table: @@ -1358,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_R11USE_R14 + BR_EX %r14 #endif .Lcleanup_system_call: @@ -1412,7 +1355,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1424,7 +1367,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1441,14 +1384,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore: # check if stpt has been executed @@ -1462,7 +1405,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1515,17 +1458,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 /* * Integer constants diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index e87758f8fbdc..961abfac2c5f 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -58,6 +58,7 @@ void do_notify_resume(struct pt_regs *regs); void __init init_IRQ(void); void do_IRQ(struct pt_regs *regs, int irq); void do_restart(void); +void __init startup_init_nobss(void); void __init startup_init(void); void die(struct pt_regs *regs, const char *str); int setup_profiling_timer(unsigned int multiplier); diff --git a/arch/s390/kernel/head64.S b/arch/s390/kernel/head64.S index 38a973ccf501..791cb9000e86 100644 --- a/arch/s390/kernel/head64.S +++ b/arch/s390/kernel/head64.S @@ -40,8 +40,12 @@ ENTRY(startup_continue) stg %r15,__LC_KERNEL_STACK # set end of kernel stack aghi %r15,-160 # -# Save ipl parameters, clear bss memory, initialize storage key for kernel pages, -# and create a kernel NSS if the SAVESYS= parm is defined +# Early setup functions that may not rely on an initialized bss section, +# like moving the initrd. Returns with an initialized bss section. +# + brasl %r14,startup_init_nobss +# +# Early machine initialization and detection functions. # brasl %r14,startup_init lpswe .Lentry-.LPG1(13) # jump to _stext in primary-space, diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 94f2099bceb0..3d17c41074ca 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -176,10 +176,9 @@ void do_softirq_own_stack(void) new -= STACK_FRAME_OVERHEAD; ((struct stack_frame *) new)->back_chain = old; asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" + " brasl 14,__do_softirq\n" " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) + : : "a" (new), "a" (old) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); } else { diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 82df7d80fab2..27110f3294ed 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,13 +9,17 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> #include <asm/ftrace.h> +#include <asm/nospec-insn.h> #include <asm/ptrace.h> #include <asm/export.h> + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + .section .kprobes.text, "ax" ENTRY(ftrace_stub) - br %r14 + BR_EX %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -23,7 +27,7 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) - br %r14 + BR_EX %r14 EXPORT_SYMBOL(_mcount) @@ -53,7 +57,7 @@ ENTRY(ftrace_caller) #endif lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r1 + BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. @@ -68,7 +72,7 @@ ftrace_graph_caller_end: #endif lg %r1,(STACK_PTREGS_PSW+8)(%r15) lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) - br %r1 + BR_EX %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -81,6 +85,6 @@ ENTRY(return_to_handler) aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) - br %r14 + BR_EX %r14 #endif diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 46d49a11663f..18ae7b9c71d6 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/device.h> -#include <linux/cpu.h> #include <asm/nospec-branch.h> static int __init nobp_setup_early(char *str) @@ -37,31 +36,13 @@ early_param("nospec", nospec_setup_early); static int __init nospec_report(void) { if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) - pr_info("Spectre V2 mitigation: execute trampolines.\n"); + pr_info("Spectre V2 mitigation: execute trampolines\n"); if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) - pr_info("Spectre V2 mitigation: limited branch prediction.\n"); + pr_info("Spectre V2 mitigation: limited branch prediction\n"); return 0; } arch_initcall(nospec_report); -#ifdef CONFIG_SYSFS -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) - return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) - return sprintf(buf, "Mitigation: limited branch prediction.\n"); - return sprintf(buf, "Vulnerable\n"); -} -#endif - #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); @@ -112,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ - memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -133,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + /* Check for unconditional branch 0x07f? or 0x47f???? */ + if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) continue; + + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4); switch (type) { case BRCL_EXPOLINE: - /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brcl to b, replace with bc + nopr */ + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brcl to br, replace with bcr + nop */ + } break; case BRASL_EXPOLINE: - /* brasl to thunk, replace with basr + nop */ - insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brasl to b, replace with bas + nopr */ + insnbuf[0] = 0x4d; + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brasl to br, replace with basr + nop */ + insnbuf[0] = 0x0d; + } break; } diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c new file mode 100644 index 000000000000..8affad5f18cb --- /dev/null +++ b/arch/s390/kernel/nospec-sysfs.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/device.h> +#include <linux/cpu.h> +#include <asm/facility.h> +#include <asm/nospec-branch.h> + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction\n"); + return sprintf(buf, "Vulnerable\n"); +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c9ddd7aa5ec..0292d68e7dde 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event) */ rate = 0; if (attr->freq) { + if (!attr->sample_freq) { + err = -EINVAL; + goto out; + } rate = freq_to_sample_rate(&si, attr->sample_freq); rate = hw_limit_rate(&si, rate); attr->freq = 0; diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 73cc3750f0d3..7f14adf512c6 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -7,8 +7,11 @@ #include <linux/linkage.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/sigp.h> + GEN_BR_THUNK %r9 + # # Issue "store status" for the current CPU to its prefix page # and call passed function afterwards @@ -67,9 +70,9 @@ ENTRY(store_status) st %r4,0(%r1) st %r5,4(%r1) stg %r2,8(%r1) - lgr %r1,%r2 + lgr %r9,%r2 lgr %r2,%r3 - br %r1 + BR_EX %r9 .section .bss .align 8 diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index e99187149f17..a049a7b9d6e8 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -13,6 +13,7 @@ #include <asm/ptrace.h> #include <asm/thread_info.h> #include <asm/asm-offsets.h> +#include <asm/nospec-insn.h> #include <asm/sigp.h> /* @@ -24,6 +25,8 @@ * (see below) in the resume process. * This function runs with disabled interrupts. */ + GEN_BR_THUNK %r14 + .section .text ENTRY(swsusp_arch_suspend) stmg %r6,%r15,__SF_GPRS(%r15) @@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend) spx 0x318(%r1) lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 /* * Restore saved memory image to correct place and restore register context. @@ -197,11 +200,10 @@ pgm_check_entry: larl %r15,init_thread_union ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) larl %r2,.Lpanic_string - larl %r3,sclp_early_printk lghi %r1,0 sam31 sigp %r1,%r0,SIGP_SET_ARCHITECTURE - basr %r14,%r3 + brasl %r14,sclp_early_printk larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: @@ -267,7 +269,7 @@ restore_registers: /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 .section .data..nosave,"aw",@progbits .align 8 diff --git a/arch/s390/kernel/sysinfo.c b/arch/s390/kernel/sysinfo.c index fc7e04c2195b..54f5496913fa 100644 --- a/arch/s390/kernel/sysinfo.c +++ b/arch/s390/kernel/sysinfo.c @@ -294,21 +294,9 @@ static int sysinfo_show(struct seq_file *m, void *v) return 0; } -static int sysinfo_open(struct inode *inode, struct file *file) -{ - return single_open(file, sysinfo_show, NULL); -} - -static const struct file_operations sysinfo_fops = { - .open = sysinfo_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int __init sysinfo_create_proc(void) { - proc_create("sysinfo", 0444, NULL, &sysinfo_fops); + proc_create_single("sysinfo", 0444, NULL, sysinfo_show); return 0; } device_initcall(sysinfo_create_proc); @@ -386,18 +374,6 @@ static const struct seq_operations service_level_seq_ops = { .show = service_level_show }; -static int service_level_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &service_level_seq_ops); -} - -static const struct file_operations service_level_ops = { - .open = service_level_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release -}; - static void service_level_vm_print(struct seq_file *m, struct service_level *slr) { @@ -420,7 +396,7 @@ static struct service_level service_level_vm = { static __init int create_proc_service_level(void) { - proc_create("service_levels", 0, NULL, &service_level_ops); + proc_create_seq("service_levels", 0, NULL, &service_level_seq_ops); if (MACHINE_IS_VM) register_service_level(&service_level_vm); return 0; diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index a5297a22bc1e..8003b38c1688 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -44,14 +44,8 @@ int is_valid_bugaddr(unsigned long addr) void do_report_trap(struct pt_regs *regs, int si_signo, int si_code, char *str) { - siginfo_t info; - if (user_mode(regs)) { - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = get_trap_ip(regs); - force_sig_info(si_signo, &info, current); + force_sig_fault(si_signo, si_code, get_trap_ip(regs), current); report_user_fault(regs, si_signo, 0); } else { const struct exception_table_entry *fixup; @@ -80,18 +74,12 @@ NOKPROBE_SYMBOL(do_trap); void do_per_trap(struct pt_regs *regs) { - siginfo_t info; - if (notify_die(DIE_SSTEP, "sstep", regs, 0, 0, SIGTRAP) == NOTIFY_STOP) return; if (!current->ptrace) return; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_HWBKPT; - info.si_addr = - (void __force __user *) current->thread.per_event.address; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, + (void __force __user *) current->thread.per_event.address, current); } NOKPROBE_SYMBOL(do_per_trap); @@ -165,7 +153,6 @@ void translation_exception(struct pt_regs *regs) void illegal_op(struct pt_regs *regs) { - siginfo_t info; __u8 opcode[6]; __u16 __user *location; int is_uprobe_insn = 0; @@ -177,13 +164,9 @@ void illegal_op(struct pt_regs *regs) if (get_user(*((__u16 *) opcode), (__u16 __user *) location)) return; if (*((__u16 *) opcode) == S390_BREAKPOINT_U16) { - if (current->ptrace) { - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = location; - force_sig_info(SIGTRAP, &info, current); - } else + if (current->ptrace) + force_sig_fault(SIGTRAP, TRAP_BRKPT, location, current); + else signal = SIGILL; #ifdef CONFIG_UPROBES } else if (*((__u16 *) opcode) == UPROBE_SWBP_INSN) { diff --git a/arch/s390/kernel/vmlinux.lds.S b/arch/s390/kernel/vmlinux.lds.S index 08d12cfaf091..f0414f52817b 100644 --- a/arch/s390/kernel/vmlinux.lds.S +++ b/arch/s390/kernel/vmlinux.lds.S @@ -82,10 +82,10 @@ SECTIONS . = ALIGN(PAGE_SIZE); .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { - VMLINUX_SYMBOL(_sinittext) = . ; + _sinittext = .; INIT_TEXT . = ALIGN(PAGE_SIZE); - VMLINUX_SYMBOL(_einittext) = . ; + _einittext = .; } /* diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index ebfa0442e569..a3bce0e84346 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -26,7 +26,6 @@ #include <asm/gmap.h> #include <asm/io.h> #include <asm/ptrace.h> -#include <asm/compat.h> #include <asm/sclp.h> #include "gaccess.h" #include "kvm-s390.h" diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 8961e3970901..969882b54266 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -578,7 +578,7 @@ static int pin_blocks(struct kvm_vcpu *vcpu, struct vsie_page *vsie_page) gpa = READ_ONCE(scb_o->itdba) & ~0xffUL; if (gpa && (scb_s->ecb & ECB_TE)) { - if (!(gpa & ~0x1fffU)) { + if (!(gpa & ~0x1fffUL)) { rc = set_validity_icpt(scb_s, 0x0080U); goto unpin; } diff --git a/arch/s390/lib/Makefile b/arch/s390/lib/Makefile index 9bfe0802684b..57ab40188d4b 100644 --- a/arch/s390/lib/Makefile +++ b/arch/s390/lib/Makefile @@ -8,3 +8,6 @@ obj-y += mem.o xor.o lib-$(CONFIG_SMP) += spinlock.o lib-$(CONFIG_KPROBES) += probes.o lib-$(CONFIG_UPROBES) += probes.o + +chkbss := mem.o +include $(srctree)/arch/s390/scripts/Makefile.chkbss diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 495c9c4bacc7..2311f15be9cf 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -7,6 +7,9 @@ #include <linux/linkage.h> #include <asm/export.h> +#include <asm/nospec-insn.h> + + GEN_BR_THUNK %r14 /* * void *memmove(void *dest, const void *src, size_t n) @@ -33,14 +36,14 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) brctg %r4,.Lmemmove_reverse ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) - br %r14 + BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memmove) @@ -77,7 +80,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) - br %r14 + BR_EX %r14 .Lmemset_fill: cghi %r4,1 lgr %r1,%r2 @@ -95,10 +98,10 @@ ENTRY(memset) stc %r3,0(%r1) larl %r5,.Lmemset_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) - br %r14 + BR_EX %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) .Lmemset_mvc: @@ -121,7 +124,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) la %r1,256(%r1) @@ -159,10 +162,10 @@ ENTRY(__memset\bits) \insn %r3,0(%r1) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) - br %r14 + BR_EX %r14 .L__memset_exit\bits: \insn %r3,0(%r2) - br %r14 + BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) .endm diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 93faeca52284..e074480d3598 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -265,14 +265,10 @@ void report_user_fault(struct pt_regs *regs, long signr, int is_mm_fault) */ static noinline void do_sigsegv(struct pt_regs *regs, int si_code) { - struct siginfo si; - report_user_fault(regs, SIGSEGV, 1); - si.si_signo = SIGSEGV; - si.si_errno = 0; - si.si_code = si_code; - si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, si_code, + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), + current); } static noinline void do_no_context(struct pt_regs *regs) @@ -316,18 +312,13 @@ static noinline void do_low_address(struct pt_regs *regs) static noinline void do_sigbus(struct pt_regs *regs) { - struct task_struct *tsk = current; - struct siginfo si; - /* * Send a sigbus, regardless of whether we were in kernel * or user mode. */ - si.si_signo = SIGBUS; - si.si_errno = 0; - si.si_code = BUS_ADRERR; - si.si_addr = (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK); - force_sig_info(SIGBUS, &si, tsk); + force_sig_fault(SIGBUS, BUS_ADRERR, + (void __user *)(regs->int_parm_long & __FAIL_ADDR_MASK), + current); } static noinline int signal_return(struct pt_regs *regs) diff --git a/arch/s390/net/Makefile b/arch/s390/net/Makefile index e0d5f245e42b..e2b85ffdbb0c 100644 --- a/arch/s390/net/Makefile +++ b/arch/s390/net/Makefile @@ -3,3 +3,4 @@ # Arch-specific network modules # obj-$(CONFIG_BPF_JIT) += bpf_jit.o bpf_jit_comp.o +obj-$(CONFIG_HAVE_PNETID) += pnet.o diff --git a/arch/s390/net/bpf_jit.S b/arch/s390/net/bpf_jit.S index 25bb4643c4f4..9f794869c1b0 100644 --- a/arch/s390/net/bpf_jit.S +++ b/arch/s390/net/bpf_jit.S @@ -9,6 +9,7 @@ */ #include <linux/linkage.h> +#include <asm/nospec-insn.h> #include "bpf_jit.h" /* @@ -54,7 +55,7 @@ ENTRY(sk_load_##NAME##_pos); \ clg %r3,STK_OFF_HLEN(%r15); /* Offset + SIZE > hlen? */ \ jh sk_load_##NAME##_slow; \ LOAD %r14,-SIZE(%r3,%r12); /* Get data from skb */ \ - b OFF_OK(%r6); /* Return */ \ + B_EX OFF_OK,%r6; /* Return */ \ \ sk_load_##NAME##_slow:; \ lgr %r2,%r7; /* Arg1 = skb pointer */ \ @@ -64,11 +65,14 @@ sk_load_##NAME##_slow:; \ brasl %r14,skb_copy_bits; /* Get data from skb */ \ LOAD %r14,STK_OFF_TMP(%r15); /* Load from temp bufffer */ \ ltgr %r2,%r2; /* Set cc to (%r2 != 0) */ \ - br %r6; /* Return */ + BR_EX %r6; /* Return */ sk_load_common(word, 4, llgf) /* r14 = *(u32 *) (skb->data+offset) */ sk_load_common(half, 2, llgh) /* r14 = *(u16 *) (skb->data+offset) */ + GEN_BR_THUNK %r6 + GEN_B_THUNK OFF_OK,%r6 + /* * Load 1 byte from SKB (optimized version) */ @@ -80,7 +84,7 @@ ENTRY(sk_load_byte_pos) clg %r3,STK_OFF_HLEN(%r15) # Offset >= hlen? jnl sk_load_byte_slow llgc %r14,0(%r3,%r12) # Get byte from skb - b OFF_OK(%r6) # Return OK + B_EX OFF_OK,%r6 # Return OK sk_load_byte_slow: lgr %r2,%r7 # Arg1 = skb pointer @@ -90,7 +94,7 @@ sk_load_byte_slow: brasl %r14,skb_copy_bits # Get data from skb llgc %r14,STK_OFF_TMP(%r15) # Load result from temp buffer ltgr %r2,%r2 # Set cc to (%r2 != 0) - br %r6 # Return cc + BR_EX %r6 # Return cc #define sk_negative_common(NAME, SIZE, LOAD) \ sk_load_##NAME##_slow_neg:; \ @@ -104,7 +108,7 @@ sk_load_##NAME##_slow_neg:; \ jz bpf_error; \ LOAD %r14,0(%r2); /* Get data from pointer */ \ xr %r3,%r3; /* Set cc to zero */ \ - br %r6; /* Return cc */ + BR_EX %r6; /* Return cc */ sk_negative_common(word, 4, llgf) sk_negative_common(half, 2, llgh) @@ -113,4 +117,4 @@ sk_negative_common(byte, 1, llgc) bpf_error: # force a return 0 from jit handler ltgr %r15,%r15 # Set condition code - br %r6 + BR_EX %r6 diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 78a19c93b380..dd2bcf0e7d00 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,8 @@ #include <linux/bpf.h> #include <asm/cacheflush.h> #include <asm/dis.h> +#include <asm/facility.h> +#include <asm/nospec-branch.h> #include <asm/set_memory.h> #include "bpf_jit.h" @@ -41,6 +43,8 @@ struct bpf_jit { int base_ip; /* Base address for literal pool */ int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ + int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ + int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int labels[1]; /* Labels for local jumps */ }; @@ -250,6 +254,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) REG_SET_SEEN(b2); \ }) +#define EMIT6_PCREL_RILB(op, b, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ + REG_SET_SEEN(b); \ +}) + +#define EMIT6_PCREL_RIL(op, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | rel >> 16, rel & 0xffff); \ +}) + #define _EMIT6_IMM(op, imm) \ ({ \ unsigned int __imm = (imm); \ @@ -469,8 +486,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,0(%r1) */ + EMIT4_DISP(0x44000000, REG_0, REG_1, 0); + } + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } /* br %r14 */ _EMIT2(0x07fe); + + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable && + (jit->seen & SEEN_FUNC)) { + jit->r1_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r1 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,S390_lowcore.br_r1_tampoline */ + EMIT4_DISP(0x44000000, REG_0, REG_0, + offsetof(struct lowcore, br_r1_trampoline)); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } + } } /* @@ -966,8 +1020,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* lg %w1,<d(imm)>(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, EMIT_CONST_U64(func)); - /* basr %r14,%w1 */ - EMIT2(0x0d00, REG_14, REG_W1); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + /* brasl %r14,__s390_indirect_jump_r1 */ + EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + } else { + /* basr %r14,%w1 */ + EMIT2(0x0d00, REG_14, REG_W1); + } /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); if ((jit->seen & SEEN_SKB) && diff --git a/arch/s390/net/pnet.c b/arch/s390/net/pnet.c new file mode 100644 index 000000000000..e22f1b10a6c7 --- /dev/null +++ b/arch/s390/net/pnet.c @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IBM System z PNET ID Support + * + * Copyright IBM Corp. 2018 + */ + +#include <linux/device.h> +#include <linux/module.h> +#include <linux/pci.h> +#include <linux/types.h> +#include <asm/ccwgroup.h> +#include <asm/ccwdev.h> +#include <asm/pnet.h> + +/* + * Get the PNETIDs from a device. + * s390 hardware supports the definition of a so-called Physical Network + * Identifier (short PNETID) per network device port. These PNETIDs can be + * used to identify network devices that are attached to the same physical + * network (broadcast domain). + * + * The device can be + * - a ccwgroup device with all bundled subchannels having the same PNETID + * - a PCI attached network device + * + * Returns: + * 0: PNETIDs extracted from device. + * -ENOMEM: No memory to extract utility string. + * -EOPNOTSUPP: Device type without utility string support + */ +static int pnet_ids_by_device(struct device *dev, u8 *pnetids) +{ + memset(pnetids, 0, PNETIDS_LEN); + if (dev_is_ccwgroup(dev)) { + struct ccwgroup_device *gdev = to_ccwgroupdev(dev); + u8 *util_str; + + util_str = ccw_device_get_util_str(gdev->cdev[0], 0); + if (!util_str) + return -ENOMEM; + memcpy(pnetids, util_str, PNETIDS_LEN); + kfree(util_str); + return 0; + } + if (dev_is_pci(dev)) { + struct zpci_dev *zdev = to_zpci(to_pci_dev(dev)); + + memcpy(pnetids, zdev->util_str, sizeof(zdev->util_str)); + return 0; + } + return -EOPNOTSUPP; +} + +/* + * Extract the pnetid for a device port. + * + * Return 0 if a pnetid is found and -ENOENT otherwise. + */ +int pnet_id_by_dev_port(struct device *dev, unsigned short port, u8 *pnetid) +{ + u8 pnetids[MAX_PNETID_PORTS][MAX_PNETID_LEN]; + static const u8 zero[MAX_PNETID_LEN] = { 0 }; + int rc = 0; + + if (!dev || port >= MAX_PNETID_PORTS) + return -ENOENT; + + if (!pnet_ids_by_device(dev, (u8 *)pnetids) && + memcmp(pnetids[port], zero, MAX_PNETID_LEN)) + memcpy(pnetid, pnetids[port], MAX_PNETID_LEN); + else + rc = -ENOENT; + + return rc; +} +EXPORT_SYMBOL_GPL(pnet_id_by_dev_port); + +MODULE_DESCRIPTION("pnetid determination from utility strings"); +MODULE_LICENSE("GPL"); diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 93cd0f1ca12b..19b2d2a9b43d 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -19,7 +19,6 @@ #include <linux/uaccess.h> #include <asm/pci_debug.h> #include <asm/pci_clp.h> -#include <asm/compat.h> #include <asm/clp.h> #include <uapi/asm/clp.h> diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 2d15d84c20ed..d387a0fbdd7e 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -668,15 +668,6 @@ void zpci_dma_exit(void) kmem_cache_destroy(dma_region_table_cache); } -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init dma_debug_do_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_debug_do_init); - const struct dma_map_ops s390_pci_dma_ops = { .alloc = s390_dma_alloc, .free = s390_dma_free, @@ -685,8 +676,6 @@ const struct dma_map_ops s390_pci_dma_ops = { .map_page = s390_dma_map_pages, .unmap_page = s390_dma_unmap_pages, .mapping_error = s390_mapping_error, - /* if we support direct DMA this must be conditional */ - .is_phys = 0, /* dma_supported is unconditionally true without a callback */ }; EXPORT_SYMBOL_GPL(s390_pci_dma_ops); diff --git a/arch/s390/purgatory/Makefile b/arch/s390/purgatory/Makefile index e9525bc1b4a6..1ace023cbdce 100644 --- a/arch/s390/purgatory/Makefile +++ b/arch/s390/purgatory/Makefile @@ -21,7 +21,7 @@ LDFLAGS_purgatory.ro += -z nodefaultlib KBUILD_CFLAGS := -fno-strict-aliasing -Wall -Wstrict-prototypes KBUILD_CFLAGS += -Wno-pointer-sign -Wno-sign-compare KBUILD_CFLAGS += -fno-zero-initialized-in-bss -fno-builtin -ffreestanding -KBUILD_CFLAGS += -c -MD -Os -m64 +KBUILD_CFLAGS += -c -MD -Os -m64 -msoft-float KBUILD_CFLAGS += $(call cc-option,-fno-PIE) $(obj)/purgatory.ro: $(PURGATORY_OBJS) FORCE diff --git a/arch/s390/scripts/Makefile.chkbss b/arch/s390/scripts/Makefile.chkbss new file mode 100644 index 000000000000..d92f2d94a5d9 --- /dev/null +++ b/arch/s390/scripts/Makefile.chkbss @@ -0,0 +1,14 @@ +# SPDX-License-Identifier: GPL-2.0 + +quiet_cmd_chkbss = CHKBSS $< +define cmd_chkbss + if ! $(OBJDUMP) -j .bss -w -h $< | awk 'END { if ($$3) exit 1 }'; then \ + echo "error: $< .bss section is not empty" >&2; exit 1; \ + fi; \ + touch $@; +endef + +$(obj)/built-in.a: $(patsubst %, $(obj)/%.chkbss, $(chkbss)) + +%.o.chkbss: %.o + $(call cmd,chkbss) diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 97fe29316476..a97538b607a4 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -9,11 +9,11 @@ config SUPERH select HAVE_IDE if HAS_IOPORT_MAP select HAVE_MEMBLOCK select HAVE_MEMBLOCK_NODE_MAP + select NO_BOOTMEM select ARCH_DISCARD_MEMBLOCK select HAVE_OPROFILE select HAVE_GENERIC_DMA_COHERENT select HAVE_ARCH_TRACEHOOK - select HAVE_DMA_API_DEBUG select HAVE_PERF_EVENTS select HAVE_DEBUG_BUGVERBOSE select ARCH_HAVE_CUSTOM_GPIO_H @@ -50,6 +50,9 @@ config SUPERH select HAVE_ARCH_AUDITSYSCALL select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_NMI + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH + help The SuperH is a RISC processor targeted for use in embedded systems and consumer electronics; it was also used in the Sega Dreamcast @@ -160,12 +163,6 @@ config DMA_COHERENT config DMA_NONCOHERENT def_bool !DMA_COHERENT -config NEED_DMA_MAP_STATE - def_bool DMA_NONCOHERENT - -config NEED_SG_DMA_LENGTH - def_bool y - config PGTABLE_LEVELS default 3 if X2TLB default 2 diff --git a/arch/sh/drivers/dma/dma-api.c b/arch/sh/drivers/dma/dma-api.c index c0eec08d8f95..b05be597b19f 100644 --- a/arch/sh/drivers/dma/dma-api.c +++ b/arch/sh/drivers/dma/dma-api.c @@ -339,18 +339,6 @@ static int dma_proc_show(struct seq_file *m, void *v) return 0; } -static int dma_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, dma_proc_show, NULL); -} - -static const struct file_operations dma_proc_fops = { - .open = dma_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - int register_dmac(struct dma_info *info) { unsigned int total_channels, i; @@ -423,7 +411,7 @@ EXPORT_SYMBOL(unregister_dmac); static int __init dma_api_init(void) { printk(KERN_NOTICE "DMA: Registering DMA API.\n"); - return proc_create("dma", 0, NULL, &dma_proc_fops) ? 0 : -ENOMEM; + return proc_create_single("dma", 0, NULL, dma_proc_show) ? 0 : -ENOMEM; } subsys_initcall(dma_api_init); diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 1efcce74997b..46dd82ab2c29 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,3 +1,4 @@ +generic-y += compat.h generic-y += current.h generic-y += delay.h generic-y += div64.h diff --git a/arch/sh/include/asm/pci.h b/arch/sh/include/asm/pci.h index 0033f0df2b3b..10a36b1cf2ea 100644 --- a/arch/sh/include/asm/pci.h +++ b/arch/sh/include/asm/pci.h @@ -71,12 +71,6 @@ extern unsigned long PCIBIOS_MIN_IO, PCIBIOS_MIN_MEM; * SuperH has everything mapped statically like x86. */ -/* The PCI address space does equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - #ifdef CONFIG_PCI /* * None of the SH PCI controllers support MWI, it is always treated as a diff --git a/arch/sh/kernel/cpu/sh2/probe.c b/arch/sh/kernel/cpu/sh2/probe.c index 4205f6d42b69..a5bd03642678 100644 --- a/arch/sh/kernel/cpu/sh2/probe.c +++ b/arch/sh/kernel/cpu/sh2/probe.c @@ -43,7 +43,11 @@ void __ref cpu_probe(void) #endif #if defined(CONFIG_CPU_J2) +#if defined(CONFIG_SMP) unsigned cpu = hard_smp_processor_id(); +#else + unsigned cpu = 0; +#endif if (cpu == 0) of_scan_flat_dt(scan_cache, NULL); if (j2_ccr_base) __raw_writel(0x80000303, j2_ccr_base + 4*cpu); if (cpu != 0) return; diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index 178457d7620c..3e3a32fc676e 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -78,7 +78,6 @@ const struct dma_map_ops nommu_dma_ops = { .sync_single_for_device = nommu_sync_single_for_device, .sync_sg_for_device = nommu_sync_sg_for_device, #endif - .is_phys = 1, }; void __init no_iommu_init(void) diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index afe965712a69..8648ed05ccf0 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -347,13 +347,8 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) /* Deliver the signal to userspace */ if (!arch_check_bp_in_kernelspace(bp)) { - siginfo_t info; - - info.si_signo = args->signr; - info.si_errno = notifier_to_errno(rc); - info.si_code = TRAP_HWBKPT; - - force_sig_info(args->signr, &info, current); + force_sig_fault(SIGTRAP, TRAP_HWBKPT, + (void __user *)NULL, current); } rcu_read_unlock(); diff --git a/arch/sh/kernel/irq.c b/arch/sh/kernel/irq.c index 245dbeb20afe..5717c7cbdd97 100644 --- a/arch/sh/kernel/irq.c +++ b/arch/sh/kernel/irq.c @@ -44,7 +44,7 @@ int arch_show_interrupts(struct seq_file *p, int prec) seq_printf(p, "%*s: ", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", irq_stat[j].__nmi_count); + seq_printf(p, "%10u ", nmi_count(j)); seq_printf(p, " Non-maskable interrupts\n"); seq_printf(p, "%*s: %10u\n", prec, "ERR", atomic_read(&irq_err_count)); diff --git a/arch/sh/kernel/setup.c b/arch/sh/kernel/setup.c index d34e998b809f..c286cf5da6e7 100644 --- a/arch/sh/kernel/setup.c +++ b/arch/sh/kernel/setup.c @@ -11,7 +11,6 @@ #include <linux/ioport.h> #include <linux/init.h> #include <linux/initrd.h> -#include <linux/bootmem.h> #include <linux/console.h> #include <linux/root_dev.h> #include <linux/utsname.h> diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index b3770bb26211..60709ad17fc7 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -477,7 +477,6 @@ asmlinkage void do_address_error(struct pt_regs *regs, { unsigned long error_code = 0; mm_segment_t oldfs; - siginfo_t info; insn_size_t instruction; int tmp; @@ -537,11 +536,7 @@ uspace_segv: "access (PC %lx PR %lx)\n", current->comm, regs->pc, regs->pr); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, si_code, (void __user *)address, current); } else { inc_unaligned_kernel_access(); @@ -598,19 +593,20 @@ int is_dsp_inst(struct pt_regs *regs) #ifdef CONFIG_CPU_SH2A asmlinkage void do_divide_error(unsigned long r4) { - siginfo_t info; + int code; switch (r4) { case TRAP_DIVZERO_ERROR: - info.si_code = FPE_INTDIV; + code = FPE_INTDIV; break; case TRAP_DIVOVF_ERROR: - info.si_code = FPE_INTOVF; + code = FPE_INTOVF; break; + default: + /* Let gcc know unhandled cases don't make it past here */ + return; } - - info.si_signo = SIGFPE; - force_sig_info(info.si_signo, &info, current); + force_sig_fault(SIGFPE, code, NULL, current); } #endif diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index c86f4360c6ce..a0fa8fc88739 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -507,7 +507,6 @@ static int ieee_fpe_handler(struct pt_regs *regs) unsigned short insn = *(unsigned short *)regs->pc; unsigned short finsn; unsigned long nextpc; - siginfo_t info; int nib[4] = { (insn >> 12) & 0xf, (insn >> 8) & 0xf, @@ -560,11 +559,8 @@ static int ieee_fpe_handler(struct pt_regs *regs) ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK); task_thread_info(tsk)->status |= TS_USEDFPU; } else { - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = FPE_FLTINV; - info.si_addr = (void __user *)regs->pc; - force_sig_info(SIGFPE, &info, tsk); + force_sig_fault(SIGFPE, FPE_FLTINV, + (void __user *)regs->pc, tsk); } regs->pc = nextpc; diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 8ce98691d822..fceb2adfcac7 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -20,18 +20,9 @@ #include <asm/cacheflush.h> #include <asm/addrspace.h> -#define PREALLOC_DMA_DEBUG_ENTRIES 4096 - const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_init); - void *dma_generic_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) @@ -59,7 +50,9 @@ void *dma_generic_alloc_coherent(struct device *dev, size_t size, split_page(pfn_to_page(virt_to_phys(ret) >> PAGE_SHIFT), order); - *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); + *dma_handle = virt_to_phys(ret); + if (!WARN_ON(!dev)) + *dma_handle -= PFN_PHYS(dev->dma_pfn_offset); return ret_nocache; } @@ -69,9 +62,12 @@ void dma_generic_free_coherent(struct device *dev, size_t size, unsigned long attrs) { int order = get_order(size); - unsigned long pfn = (dma_handle >> PAGE_SHIFT) + dev->dma_pfn_offset; + unsigned long pfn = dma_handle >> PAGE_SHIFT; int k; + if (!WARN_ON(!dev)) + pfn += dev->dma_pfn_offset; + for (k = 0; k < (1 << order); k++) __free_pages(pfn_to_page(pfn + k), 0); @@ -143,7 +139,7 @@ int __init platform_resource_setup_memory(struct platform_device *pdev, if (!memsize) return 0; - buf = dma_alloc_coherent(NULL, memsize, &dma_handle, GFP_KERNEL); + buf = dma_alloc_coherent(&pdev->dev, memsize, &dma_handle, GFP_KERNEL); if (!buf) { pr_warning("%s: unable to allocate memory\n", name); return -ENOMEM; diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 6fd1bf7481c7..b8e7bb84b6b1 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -42,14 +42,7 @@ static void force_sig_info_fault(int si_signo, int si_code, unsigned long address, struct task_struct *tsk) { - siginfo_t info; - - info.si_signo = si_signo; - info.si_errno = 0; - info.si_code = si_code; - info.si_addr = (void __user *)address; - - force_sig_info(si_signo, &info, tsk); + force_sig_fault(si_signo, si_code, (void __user *)address, tsk); } /* diff --git a/arch/sh/mm/init.c b/arch/sh/mm/init.c index ce0bbaa7e404..4034035fbede 100644 --- a/arch/sh/mm/init.c +++ b/arch/sh/mm/init.c @@ -211,59 +211,15 @@ void __init allocate_pgdat(unsigned int nid) NODE_DATA(nid) = __va(phys); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; #endif NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; } -static void __init bootmem_init_one_node(unsigned int nid) -{ - unsigned long total_pages, paddr; - unsigned long end_pfn; - struct pglist_data *p; - - p = NODE_DATA(nid); - - /* Nothing to do.. */ - if (!p->node_spanned_pages) - return; - - end_pfn = pgdat_end_pfn(p); - - total_pages = bootmem_bootmap_pages(p->node_spanned_pages); - - paddr = memblock_alloc(total_pages << PAGE_SHIFT, PAGE_SIZE); - if (!paddr) - panic("Can't allocate bootmap for nid[%d]\n", nid); - - init_bootmem_node(p, paddr >> PAGE_SHIFT, p->node_start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - - /* - * XXX Handle initial reservations for the system memory node - * only for the moment, we'll refactor this later for handling - * reservations in other nodes. - */ - if (nid == 0) { - struct memblock_region *reg; - - /* Reserve the sections we're already using. */ - for_each_memblock(reserved, reg) { - reserve_bootmem(reg->base, reg->size, BOOTMEM_DEFAULT); - } - } - - sparse_memory_present_with_active_regions(nid); -} - static void __init do_init_bootmem(void) { struct memblock_region *reg; - int i; /* Add active regions with valid PFNs. */ for_each_memblock(memory, reg) { @@ -279,9 +235,12 @@ static void __init do_init_bootmem(void) plat_mem_setup(); - for_each_online_node(i) - bootmem_init_one_node(i); + for_each_memblock(memory, reg) { + int nid = memblock_get_region_node(reg); + memory_present(nid, memblock_region_memory_base_pfn(reg), + memblock_region_memory_end_pfn(reg)); + } sparse_init(); } @@ -322,7 +281,6 @@ void __init paging_init(void) { unsigned long max_zone_pfns[MAX_NR_ZONES]; unsigned long vaddr, end; - int nid; sh_mv.mv_mem_init(); @@ -377,21 +335,7 @@ void __init paging_init(void) kmap_coherent_init(); memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); - - for_each_online_node(nid) { - pg_data_t *pgdat = NODE_DATA(nid); - unsigned long low, start_pfn; - - start_pfn = pgdat->bdata->node_min_pfn; - low = pgdat->bdata->node_low_pfn; - - if (max_zone_pfns[ZONE_NORMAL] < low) - max_zone_pfns[ZONE_NORMAL] = low; - - printk("Node %u: start_pfn = 0x%lx, low = 0x%lx\n", - nid, start_pfn, low); - } - + max_zone_pfns[ZONE_NORMAL] = max_low_pfn; free_area_init_nodes(max_zone_pfns); } diff --git a/arch/sh/mm/numa.c b/arch/sh/mm/numa.c index 05713d190247..830e8b3684e4 100644 --- a/arch/sh/mm/numa.c +++ b/arch/sh/mm/numa.c @@ -8,7 +8,6 @@ * for more details. */ #include <linux/module.h> -#include <linux/bootmem.h> #include <linux/memblock.h> #include <linux/mm.h> #include <linux/numa.h> @@ -26,9 +25,7 @@ EXPORT_SYMBOL_GPL(node_data); */ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) { - unsigned long bootmap_pages; unsigned long start_pfn, end_pfn; - unsigned long bootmem_paddr; /* Don't allow bogus node assignment */ BUG_ON(nid >= MAX_NUMNODES || nid <= 0); @@ -48,25 +45,9 @@ void __init setup_bootmem_node(int nid, unsigned long start, unsigned long end) SMP_CACHE_BYTES, end)); memset(NODE_DATA(nid), 0, sizeof(struct pglist_data)); - NODE_DATA(nid)->bdata = &bootmem_node_data[nid]; NODE_DATA(nid)->node_start_pfn = start_pfn; NODE_DATA(nid)->node_spanned_pages = end_pfn - start_pfn; - /* Node-local bootmap */ - bootmap_pages = bootmem_bootmap_pages(end_pfn - start_pfn); - bootmem_paddr = memblock_alloc_base(bootmap_pages << PAGE_SHIFT, - PAGE_SIZE, end); - init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT, - start_pfn, end_pfn); - - free_bootmem_with_active_regions(nid, end_pfn); - - /* Reserve the pgdat and bootmap space with the bootmem allocator */ - reserve_bootmem_node(NODE_DATA(nid), start_pfn << PAGE_SHIFT, - sizeof(struct pglist_data), BOOTMEM_DEFAULT); - reserve_bootmem_node(NODE_DATA(nid), bootmem_paddr, - bootmap_pages << PAGE_SHIFT, BOOTMEM_DEFAULT); - /* It's up */ node_set_online(nid); diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index 8767e45f1b2b..435dbc033afe 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig @@ -25,7 +25,6 @@ config SPARC select RTC_CLASS select RTC_DRV_M48T59 select RTC_SYSTOHC - select HAVE_DMA_API_DEBUG select HAVE_ARCH_JUMP_LABEL if SPARC64 select GENERIC_IRQ_SHOW select ARCH_WANT_IPC_PARSE_VERSION @@ -44,6 +43,8 @@ config SPARC select ARCH_HAS_SG_CHAIN select CPU_NO_EFFICIENT_FFS select LOCKDEP_SMALL if LOCKDEP + select NEED_DMA_MAP_STATE + select NEED_SG_DMA_LENGTH config SPARC32 def_bool !64BIT @@ -67,6 +68,7 @@ config SPARC64 select HAVE_SYSCALL_TRACEPOINTS select HAVE_CONTEXT_TRACKING select HAVE_DEBUG_KMEMLEAK + select IOMMU_HELPER select SPARSE_IRQ select RTC_DRV_CMOS select RTC_DRV_BQ4802 @@ -102,14 +104,6 @@ config ARCH_ATU bool default y if SPARC64 -config ARCH_DMA_ADDR_T_64BIT - bool - default y if ARCH_ATU - -config IOMMU_HELPER - bool - default y if SPARC64 - config STACKTRACE_SUPPORT bool default y if SPARC64 @@ -146,12 +140,6 @@ config ZONE_DMA bool default y if SPARC32 -config NEED_DMA_MAP_STATE - def_bool y - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA bool default y if SPARC32 diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h index 615283e16f22..4eb51d2dae98 100644 --- a/arch/sparc/include/asm/compat.h +++ b/arch/sparc/include/asm/compat.h @@ -11,7 +11,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u16 __compat_uid_t; @@ -39,16 +38,6 @@ typedef u32 compat_ulong_t; typedef u64 compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; compat_ino_t st_ino; @@ -168,6 +157,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr) return (u32)(unsigned long)uptr; } +#ifdef CONFIG_COMPAT static inline void __user *arch_compat_alloc_user_space(long len) { struct pt_regs *regs = current_thread_info()->kregs; @@ -184,6 +174,7 @@ static inline void __user *arch_compat_alloc_user_space(long len) return (void __user *) usp; } +#endif struct compat_ipc64_perm { compat_key_t key; @@ -201,10 +192,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - unsigned int __pad1; - compat_time_t sem_otime; - unsigned int __pad2; - compat_time_t sem_ctime; + unsigned int sem_otime_high; + unsigned int sem_otime; + unsigned int sem_ctime_high; + unsigned int sem_ctime; u32 sem_nsems; u32 __unused1; u32 __unused2; @@ -212,12 +203,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - unsigned int __pad1; - compat_time_t msg_stime; - unsigned int __pad2; - compat_time_t msg_rtime; - unsigned int __pad3; - compat_time_t msg_ctime; + unsigned int msg_stime_high; + unsigned int msg_stime; + unsigned int msg_rtime_high; + unsigned int msg_rtime; + unsigned int msg_ctime_high; + unsigned int msg_ctime; unsigned int msg_cbytes; unsigned int msg_qnum; unsigned int msg_qbytes; @@ -229,12 +220,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; - unsigned int __pad1; - compat_time_t shm_atime; - unsigned int __pad2; - compat_time_t shm_dtime; - unsigned int __pad3; - compat_time_t shm_ctime; + unsigned int shm_atime_high; + unsigned int shm_atime; + unsigned int shm_dtime_high; + unsigned int shm_dtime; + unsigned int shm_ctime_high; + unsigned int shm_ctime; compat_size_t shm_segsz; compat_pid_t shm_cpid; compat_pid_t shm_lpid; @@ -243,6 +234,7 @@ struct compat_shmid64_ds { unsigned int __unused2; }; +#ifdef CONFIG_COMPAT static inline int is_compat_task(void) { return test_thread_flag(TIF_32BIT); @@ -254,5 +246,6 @@ static inline bool in_compat_syscall(void) return pt_regs_trap_type(current_pt_regs()) == 0x110; } #define in_compat_syscall in_compat_syscall +#endif #endif /* _ASM_SPARC64_COMPAT_H */ diff --git a/arch/sparc/include/asm/hardirq_64.h b/arch/sparc/include/asm/hardirq_64.h index f56540271993..75b92bfe04b5 100644 --- a/arch/sparc/include/asm/hardirq_64.h +++ b/arch/sparc/include/asm/hardirq_64.h @@ -10,8 +10,9 @@ #include <asm/cpudata.h> #define __ARCH_IRQ_STAT -#define local_softirq_pending() \ - (local_cpu_data().__softirq_pending) + +#define local_softirq_pending_ref \ + __cpu_data.__softirq_pending void ack_bad_irq(unsigned int irq); diff --git a/arch/sparc/include/asm/iommu-common.h b/arch/sparc/include/asm/iommu-common.h new file mode 100644 index 000000000000..802c90c79d1f --- /dev/null +++ b/arch/sparc/include/asm/iommu-common.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_IOMMU_COMMON_H +#define _LINUX_IOMMU_COMMON_H + +#include <linux/spinlock_types.h> +#include <linux/device.h> +#include <asm/page.h> + +#define IOMMU_POOL_HASHBITS 4 +#define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) +#define IOMMU_ERROR_CODE (~(unsigned long) 0) + +struct iommu_pool { + unsigned long start; + unsigned long end; + unsigned long hint; + spinlock_t lock; +}; + +struct iommu_map_table { + unsigned long table_map_base; + unsigned long table_shift; + unsigned long nr_pools; + void (*lazy_flush)(struct iommu_map_table *); + unsigned long poolsize; + struct iommu_pool pools[IOMMU_NR_POOLS]; + u32 flags; +#define IOMMU_HAS_LARGE_POOL 0x00000001 +#define IOMMU_NO_SPAN_BOUND 0x00000002 +#define IOMMU_NEED_FLUSH 0x00000004 + struct iommu_pool large_pool; + unsigned long *map; +}; + +extern void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check); + +extern unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order); + +extern void iommu_tbl_range_free(struct iommu_map_table *iommu, + u64 dma_addr, unsigned long npages, + unsigned long entry); + +#endif diff --git a/arch/sparc/include/asm/iommu_64.h b/arch/sparc/include/asm/iommu_64.h index 9ed6b54caa4b..0ef6dedf747e 100644 --- a/arch/sparc/include/asm/iommu_64.h +++ b/arch/sparc/include/asm/iommu_64.h @@ -17,7 +17,7 @@ #define IOPTE_WRITE 0x0000000000000002UL #define IOMMU_NUM_CTXS 4096 -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> struct iommu_arena { unsigned long *map; diff --git a/arch/sparc/include/asm/pci_32.h b/arch/sparc/include/asm/pci_32.h index 98917e48727d..cfc0ee9476c6 100644 --- a/arch/sparc/include/asm/pci_32.h +++ b/arch/sparc/include/asm/pci_32.h @@ -17,10 +17,6 @@ #define PCI_IRQ_NONE 0xffffffff -/* Dynamic DMA mapping stuff. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - #endif /* __KERNEL__ */ #ifndef CONFIG_LEON_PCI diff --git a/arch/sparc/include/asm/pci_64.h b/arch/sparc/include/asm/pci_64.h index 671274e36cfa..fac77813402c 100644 --- a/arch/sparc/include/asm/pci_64.h +++ b/arch/sparc/include/asm/pci_64.h @@ -17,12 +17,6 @@ #define PCI_IRQ_NONE 0xffffffff -/* The PCI address space does not equal the physical memory - * address space. The networking and block device layers use - * this boolean for bounce buffer decisions. - */ -#define PCI_DMA_BUS_IS_PHYS (0) - /* PCI IOMMU mapping bypass support. */ /* PCI 64-bit addressing works for all slots on all controller diff --git a/arch/sparc/include/uapi/asm/jsflash.h b/arch/sparc/include/uapi/asm/jsflash.h deleted file mode 100644 index 68c98a54281a..000000000000 --- a/arch/sparc/include/uapi/asm/jsflash.h +++ /dev/null @@ -1,40 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * jsflash.h: OS Flash SIMM support for JavaStations. - * - * Copyright (C) 1999 Pete Zaitcev - */ - -#ifndef _SPARC_JSFLASH_H -#define _SPARC_JSFLASH_H - -#ifndef _SPARC_TYPES_H -#include <linux/types.h> -#endif - -/* - * Semantics of the offset is a full address. - * Hardcode it or get it from probe ioctl. - * - * We use full bus address, so that we would be - * automatically compatible with possible future systems. - */ - -#define JSFLASH_IDENT (('F'<<8)|54) -struct jsflash_ident_arg { - __u64 off; /* 0x20000000 is included */ - __u32 size; - char name[32]; /* With trailing zero */ -}; - -#define JSFLASH_ERASE (('F'<<8)|55) -/* Put 0 as argument, may be flags or sector number... */ - -#define JSFLASH_PROGRAM (('F'<<8)|56) -struct jsflash_program_arg { - __u64 data; /* char* for sparc and sparc64 */ - __u64 off; - __u32 size; -}; - -#endif /* _SPARC_JSFLASH_H */ diff --git a/arch/sparc/include/uapi/asm/msgbuf.h b/arch/sparc/include/uapi/asm/msgbuf.h index b601c4f4d956..ffc46c211d6d 100644 --- a/arch/sparc/include/uapi/asm/msgbuf.h +++ b/arch/sparc/include/uapi/asm/msgbuf.h @@ -8,25 +8,22 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ - -#if defined(__sparc__) && defined(__arch64__) -# define PADDING(x) -#else -# define PADDING(x) unsigned int x; -#endif - - struct msqid64_ds { struct ipc64_perm msg_perm; - PADDING(__pad1) +#if defined(__sparc__) && defined(__arch64__) __kernel_time_t msg_stime; /* last msgsnd time */ - PADDING(__pad2) __kernel_time_t msg_rtime; /* last msgrcv time */ - PADDING(__pad3) __kernel_time_t msg_ctime; /* last change time */ +#else + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ +#endif unsigned long msg_cbytes; /* current number of bytes on queue */ unsigned long msg_qnum; /* number of messages in queue */ unsigned long msg_qbytes; /* max number of bytes on queue */ @@ -35,5 +32,4 @@ struct msqid64_ds { unsigned long __unused1; unsigned long __unused2; }; -#undef PADDING #endif /* _SPARC_MSGBUF_H */ diff --git a/arch/sparc/include/uapi/asm/oradax.h b/arch/sparc/include/uapi/asm/oradax.h index 722951908b0a..4f6676fe4bcc 100644 --- a/arch/sparc/include/uapi/asm/oradax.h +++ b/arch/sparc/include/uapi/asm/oradax.h @@ -3,7 +3,7 @@ * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by - * the Free Software Foundation, either version 3 of the License, or + * the Free Software Foundation, either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, diff --git a/arch/sparc/include/uapi/asm/sembuf.h b/arch/sparc/include/uapi/asm/sembuf.h index f49b0ffa0ab8..f3d309c2e1cd 100644 --- a/arch/sparc/include/uapi/asm/sembuf.h +++ b/arch/sparc/include/uapi/asm/sembuf.h @@ -8,25 +8,23 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ -#if defined(__sparc__) && defined(__arch64__) -# define PADDING(x) -#else -# define PADDING(x) unsigned int x; -#endif struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - PADDING(__pad1) +#if defined(__sparc__) && defined(__arch64__) __kernel_time_t sem_otime; /* last semop time */ - PADDING(__pad2) __kernel_time_t sem_ctime; /* last change time */ +#else + unsigned long sem_otime_high; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime_high; + unsigned long sem_ctime; /* last change time */ +#endif unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused1; unsigned long __unused2; }; -#undef PADDING #endif /* _SPARC64_SEMBUF_H */ diff --git a/arch/sparc/include/uapi/asm/shmbuf.h b/arch/sparc/include/uapi/asm/shmbuf.h index 286631db705c..06618b84822d 100644 --- a/arch/sparc/include/uapi/asm/shmbuf.h +++ b/arch/sparc/include/uapi/asm/shmbuf.h @@ -8,24 +8,23 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values */ -#if defined(__sparc__) && defined(__arch64__) -# define PADDING(x) -#else -# define PADDING(x) unsigned int x; -#endif - struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ - PADDING(__pad1) +#if defined(__sparc__) && defined(__arch64__) __kernel_time_t shm_atime; /* last attach time */ - PADDING(__pad2) __kernel_time_t shm_dtime; /* last detach time */ - PADDING(__pad3) __kernel_time_t shm_ctime; /* last change time */ +#else + unsigned long shm_atime_high; + unsigned long shm_atime; /* last attach time */ + unsigned long shm_dtime_high; + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_ctime_high; + unsigned long shm_ctime; /* last change time */ +#endif size_t shm_segsz; /* size of segment (bytes) */ __kernel_pid_t shm_cpid; /* pid of creator */ __kernel_pid_t shm_lpid; /* pid of last operator */ @@ -46,6 +45,4 @@ struct shminfo64 { unsigned long __unused4; }; -#undef PADDING - #endif /* _SPARC_SHMBUF_H */ diff --git a/arch/sparc/include/uapi/asm/siginfo.h b/arch/sparc/include/uapi/asm/siginfo.h index 896ce447d16a..e7049550ac82 100644 --- a/arch/sparc/include/uapi/asm/siginfo.h +++ b/arch/sparc/include/uapi/asm/siginfo.h @@ -18,13 +18,6 @@ #define SI_NOINFO 32767 /* no information in siginfo_t */ /* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -/* * SIGEMT si_codes */ #define EMT_TAGOVF 1 /* tag overflow */ diff --git a/arch/sparc/kernel/Makefile b/arch/sparc/kernel/Makefile index 76cb57750dda..cf8640841b7a 100644 --- a/arch/sparc/kernel/Makefile +++ b/arch/sparc/kernel/Makefile @@ -59,7 +59,7 @@ obj-$(CONFIG_SPARC32) += leon_pmc.o obj-$(CONFIG_SPARC64) += reboot.o obj-$(CONFIG_SPARC64) += sysfs.o -obj-$(CONFIG_SPARC64) += iommu.o +obj-$(CONFIG_SPARC64) += iommu.o iommu-common.o obj-$(CONFIG_SPARC64) += central.o obj-$(CONFIG_SPARC64) += starfire.o obj-$(CONFIG_SPARC64) += power.o @@ -74,8 +74,6 @@ obj-$(CONFIG_SPARC64) += pcr.o obj-$(CONFIG_SPARC64) += nmi.o obj-$(CONFIG_SPARC64_SMP) += cpumap.o -obj-y += dma.o - obj-$(CONFIG_PCIC_PCI) += pcic.o obj-$(CONFIG_LEON_PCI) += leon_pci.o obj-$(CONFIG_SPARC_GRPCI2)+= leon_pci_grpci2.o diff --git a/arch/sparc/kernel/dma.c b/arch/sparc/kernel/dma.c deleted file mode 100644 index f73e7597c971..000000000000 --- a/arch/sparc/kernel/dma.c +++ /dev/null @@ -1,13 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#include <linux/kernel.h> -#include <linux/dma-mapping.h> -#include <linux/dma-debug.h> - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 15) - -static int __init dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(dma_init); diff --git a/arch/sparc/kernel/iommu-common.c b/arch/sparc/kernel/iommu-common.c new file mode 100644 index 000000000000..59cb16691322 --- /dev/null +++ b/arch/sparc/kernel/iommu-common.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/iommu-helper.h> +#include <linux/dma-mapping.h> +#include <linux/hash.h> +#include <asm/iommu-common.h> + +static unsigned long iommu_large_alloc = 15; + +static DEFINE_PER_CPU(unsigned int, iommu_hash_common); + +static inline bool need_flush(struct iommu_map_table *iommu) +{ + return ((iommu->flags & IOMMU_NEED_FLUSH) != 0); +} + +static inline void set_flush(struct iommu_map_table *iommu) +{ + iommu->flags |= IOMMU_NEED_FLUSH; +} + +static inline void clear_flush(struct iommu_map_table *iommu) +{ + iommu->flags &= ~IOMMU_NEED_FLUSH; +} + +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + +/* + * Initialize iommu_pool entries for the iommu_map_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than iommu_large_alloc pages. + */ +void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + setup_iommu_pool_hash(); + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->table_shift = table_shift; + iommu->lazy_flush = lazy_flush; + start = 0; + if (skip_span_boundary_check) + iommu->flags |= IOMMU_NO_SPAN_BOUND; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->pools[i].lock)); + iommu->pools[i].start = start; + iommu->pools[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->pools[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned int pool_hash = __this_cpu_read(iommu_hash_common); + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *pool; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > iommu_large_alloc); + unsigned long shift; + unsigned long align_mask = 0; + + if (align_order > 0) + align_mask = ~0ul >> (BITS_PER_LONG - align_order); + + /* Sanity check */ + if (unlikely(npages == 0)) { + WARN_ON_ONCE(1); + return IOMMU_ERROR_CODE; + } + + if (largealloc) { + pool = &(iommu->large_pool); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + pool = &(iommu->pools[pool_nr]); + } + spin_lock_irqsave(&pool->lock, flags); + + again: + if (pass == 0 && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) + start = *handle; + else + start = pool->hint; + + limit = pool->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning. If a + * flush is needed, it will get done based on the return value + * from iommu_area_alloc() below. + */ + if (start >= limit) + start = pool->start; + shift = iommu->table_map_base >> iommu->table_shift; + if (limit + shift > mask) { + limit = mask - shift + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0 in pool 0. + */ + if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); + pool = &(iommu->pools[0]); + spin_lock(&(pool->lock)); + start = pool->start; + } else { + start &= mask; + } + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->table_shift); + else + boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); + + boundary_size = boundary_size >> iommu->table_shift; + /* + * if the skip_span_boundary_check had been set during init, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, align_mask); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + pool = &(iommu->pools[pool_nr]); + spin_lock(&(pool->lock)); + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else { + /* give up */ + n = IOMMU_ERROR_CODE; + goto bail; + } + } + if (iommu->lazy_flush && + (n < pool->hint || need_flush(iommu))) { + clear_flush(iommu); + iommu->lazy_flush(iommu); + } + + end = n + npages; + pool->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; +bail: + spin_unlock_irqrestore(&(pool->lock), flags); + + return n; +} + +static struct iommu_pool *get_pool(struct iommu_map_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->pools[pool_nr]; + } + return p; +} + +/* Caller supplies the index of the entry into the iommu map table + * itself when the mapping from dma_addr to the entry is not the + * default addr->entry mapping below. + */ +void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, + unsigned long npages, unsigned long entry) +{ + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->table_shift; + + if (entry == IOMMU_ERROR_CODE) /* use default addr->entry mapping */ + entry = (dma_addr - iommu->table_map_base) >> shift; + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index b08dc3416f06..40d008b0bd3e 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -14,7 +14,7 @@ #include <linux/errno.h> #include <linux/iommu-helper.h> #include <linux/bitmap.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #ifdef CONFIG_PCI #include <linux/pci.h> diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 3bcef9ce74df..cca9134cfa7d 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -678,25 +678,14 @@ static int sparc_io_proc_show(struct seq_file *m, void *v) return 0; } - -static int sparc_io_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, sparc_io_proc_show, PDE_DATA(inode)); -} - -static const struct file_operations sparc_io_proc_fops = { - .owner = THIS_MODULE, - .open = sparc_io_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; #endif /* CONFIG_PROC_FS */ static void register_proc_sparc_ioport(void) { #ifdef CONFIG_PROC_FS - proc_create_data("io_map", 0, NULL, &sparc_io_proc_fops, &sparc_iomap); - proc_create_data("dvma_map", 0, NULL, &sparc_io_proc_fops, &_sparc_dvma); + proc_create_single_data("io_map", 0, NULL, sparc_io_proc_show, + &sparc_iomap); + proc_create_single_data("dvma_map", 0, NULL, sparc_io_proc_show, + &_sparc_dvma); #endif } diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 86b625f9d8dc..c0fa3ef6cf01 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -16,7 +16,7 @@ #include <linux/list.h> #include <linux/init.h> #include <linux/bitmap.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #include <asm/hypervisor.h> #include <asm/iommu.h> diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index 249367228c33..565d9ac883d0 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -16,7 +16,7 @@ #include <linux/export.h> #include <linux/log2.h> #include <linux/of_device.h> -#include <linux/iommu-common.h> +#include <asm/iommu-common.h> #include <asm/iommu.h> #include <asm/irq.h> diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 454a8af28f13..6c086086ca8f 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -518,14 +518,7 @@ void synchronize_user_stack(void) static void stack_unaligned(unsigned long sp) { - siginfo_t info; - - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *) sp; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) sp, 0, current); } void fault_in_user_windows(void) diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index e8c3cb6b6d08..7f3d9c59719a 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -147,17 +147,11 @@ SYSCALL_DEFINE0(nis_syscall) asmlinkage void sparc_breakpoint (struct pt_regs *regs) { - siginfo_t info; #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%x, nPC=%x\n", regs->pc, regs->npc); #endif - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *)regs->pc; - info.si_trapno = 0; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->pc, 0, current); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%x nPC=%x\n", regs->pc, regs->npc); diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 9ef8de63f28b..7e49bbc925a5 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -502,7 +502,6 @@ SYSCALL_DEFINE0(nis_syscall) asmlinkage void sparc_breakpoint(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (test_thread_flag(TIF_32BIT)) { regs->tpc &= 0xffffffff; @@ -511,12 +510,7 @@ asmlinkage void sparc_breakpoint(struct pt_regs *regs) #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Entering kernel PC=%lx, nPC=%lx\n", regs->tpc, regs->tnpc); #endif - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - force_sig_info(SIGTRAP, &info, current); + force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->tpc, 0, current); #ifdef DEBUG_SPARC_BREAKPOINT printk ("TRAP: Returning to space: PC=%lx nPC=%lx\n", regs->tpc, regs->tnpc); #endif diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index b1ed763e4787..bcdfc6168dd5 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -93,8 +93,6 @@ void __noreturn die_if_kernel(char *str, struct pt_regs *regs) void do_hw_interrupt(struct pt_regs *regs, unsigned long type) { - siginfo_t info; - if(type < 0x80) { /* Sun OS's puke from bad traps, Linux survives! */ printk("Unimplemented Sparc TRAP, type = %02lx\n", type); @@ -104,19 +102,13 @@ void do_hw_interrupt(struct pt_regs *regs, unsigned long type) if(regs->psr & PSR_PS) die_if_kernel("Kernel bad trap", regs); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLTRP; - info.si_addr = (void __user *)regs->pc; - info.si_trapno = type - 0x80; - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_ILLTRP, + (void __user *)regs->pc, type - 0x80, current); } void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - if(psr & PSR_PS) die_if_kernel("Kernel illegal instruction", regs); #ifdef TRAP_DEBUG @@ -124,27 +116,15 @@ void do_illegal_instruction(struct pt_regs *regs, unsigned long pc, unsigned lon regs->pc, *(unsigned long *)regs->pc); #endif - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGILL, &info, current); + send_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current); } void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - if(psr & PSR_PS) die_if_kernel("Penguin instruction from Penguin mode??!?!", regs); - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGILL, &info, current); + send_sig_fault(SIGILL, ILL_PRVOPC, (void __user *)pc, 0, current); } /* XXX User may want to be allowed to do this. XXX */ @@ -152,8 +132,6 @@ void do_priv_instruction(struct pt_regs *regs, unsigned long pc, unsigned long n void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - if(regs->psr & PSR_PS) { printk("KERNEL MNA at pc %08lx npc %08lx called by %08lx\n", pc, npc, regs->u_regs[UREG_RETPC]); @@ -165,12 +143,9 @@ void do_memaccess_unaligned(struct pt_regs *regs, unsigned long pc, unsigned lon instruction_dump ((unsigned long *) regs->pc); printk ("do_MNA!\n"); #endif - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = /* FIXME: Should dig out mna address */ (void *)0; - info.si_trapno = 0; - send_sig_info(SIGBUS, &info, current); + send_sig_fault(SIGBUS, BUS_ADRALN, + /* FIXME: Should dig out mna address */ (void *)0, + 0, current); } static unsigned long init_fsr = 0x0UL; @@ -226,9 +201,9 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { static int calls; - siginfo_t info; unsigned long fsr; int ret = 0; + int code; #ifndef CONFIG_SMP struct task_struct *fpt = last_task_used_math; #else @@ -303,24 +278,20 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc, } fsr = fpt->thread.fsr; - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - info.si_code = FPE_FIXME; + code = FPE_FLTUNK; if ((fsr & 0x1c000) == (1 << 14)) { if (fsr & 0x10) - info.si_code = FPE_FLTINV; + code = FPE_FLTINV; else if (fsr & 0x08) - info.si_code = FPE_FLTOVF; + code = FPE_FLTOVF; else if (fsr & 0x04) - info.si_code = FPE_FLTUND; + code = FPE_FLTUND; else if (fsr & 0x02) - info.si_code = FPE_FLTDIV; + code = FPE_FLTDIV; else if (fsr & 0x01) - info.si_code = FPE_FLTRES; + code = FPE_FLTRES; } - send_sig_info(SIGFPE, &info, fpt); + send_sig_fault(SIGFPE, code, (void __user *)pc, 0, fpt); #ifndef CONFIG_SMP last_task_used_math = NULL; #endif @@ -332,16 +303,9 @@ void do_fpe_trap(struct pt_regs *regs, unsigned long pc, unsigned long npc, void handle_tag_overflow(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - if(psr & PSR_PS) die_if_kernel("Penguin overflow trap from kernel mode", regs); - info.si_signo = SIGEMT; - info.si_errno = 0; - info.si_code = EMT_TAGOVF; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGEMT, &info, current); + send_sig_fault(SIGEMT, EMT_TAGOVF, (void __user *)pc, 0, current); } void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc, @@ -359,61 +323,33 @@ void handle_watchpoint(struct pt_regs *regs, unsigned long pc, unsigned long npc void handle_reg_access(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - #ifdef TRAP_DEBUG printk("Register Access Exception at PC %08lx NPC %08lx PSR %08lx\n", pc, npc, psr); #endif - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_OBJERR; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)pc, 0, current); } void handle_cp_disabled(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_COPROC; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGILL, &info, current); + send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current); } void handle_cp_exception(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - #ifdef TRAP_DEBUG printk("Co-Processor Exception at PC %08lx NPC %08lx PSR %08lx\n", pc, npc, psr); #endif - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_COPROC; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGILL, &info, current); + send_sig_fault(SIGILL, ILL_COPROC, (void __user *)pc, 0, current); } void handle_hw_divzero(struct pt_regs *regs, unsigned long pc, unsigned long npc, unsigned long psr) { - siginfo_t info; - - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = FPE_INTDIV; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - send_sig_info(SIGFPE, &info, current); + send_sig_fault(SIGFPE, FPE_INTDIV, (void __user *)pc, 0, current); } #ifdef CONFIG_DEBUG_BUGVERBOSE diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 462a21abd105..aa624ed79db1 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -87,7 +87,6 @@ static void dump_tl1_traplog(struct tl1_traplog *p) void bad_trap(struct pt_regs *regs, long lvl) { char buffer[36]; - siginfo_t info; if (notify_die(DIE_TRAP, "bad trap", regs, 0, lvl, SIGTRAP) == NOTIFY_STOP) @@ -107,12 +106,8 @@ void bad_trap(struct pt_regs *regs, long lvl) regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLTRP; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = lvl; - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_ILLTRP, + (void __user *)regs->tpc, lvl, current); } void bad_trap_tl1(struct pt_regs *regs, long lvl) @@ -191,7 +186,6 @@ EXPORT_SYMBOL_GPL(unregister_dimm_printer); void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "instruction access exception", regs, 0, 0x8, SIGTRAP) == NOTIFY_STOP) @@ -206,12 +200,8 @@ void spitfire_insn_access_exception(struct pt_regs *regs, unsigned long sfsr, un regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, + (void __user *)regs->tpc, 0, current); out: exception_exit(prev_state); } @@ -230,7 +220,6 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig { unsigned short type = (type_ctx >> 16); unsigned short ctx = (type_ctx & 0xffff); - siginfo_t info; if (notify_die(DIE_TRAP, "instruction access exception", regs, 0, 0x8, SIGTRAP) == NOTIFY_STOP) @@ -247,12 +236,7 @@ void sun4v_insn_access_exception(struct pt_regs *regs, unsigned long addr, unsig regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *) addr; - info.si_trapno = 0; - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *) addr, 0, current); } void sun4v_insn_access_exception_tl1(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) @@ -307,7 +291,6 @@ bool is_no_fault_exception(struct pt_regs *regs) void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, unsigned long sfar) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "data access exception", regs, 0, 0x30, SIGTRAP) == NOTIFY_STOP) @@ -338,12 +321,7 @@ void spitfire_data_access_exception(struct pt_regs *regs, unsigned long sfsr, un if (is_no_fault_exception(regs)) return; - info.si_signo = SIGSEGV; - info.si_errno = 0; - info.si_code = SEGV_MAPERR; - info.si_addr = (void __user *)sfar; - info.si_trapno = 0; - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)sfar, 0, current); out: exception_exit(prev_state); } @@ -559,8 +537,6 @@ static void spitfire_cee_log(unsigned long afsr, unsigned long afar, unsigned lo static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned long udbh, unsigned long udbl, unsigned long tt, int tl1, struct pt_regs *regs) { - siginfo_t info; - printk(KERN_WARNING "CPU[%d]: Uncorrectable Error AFSR[%lx] " "AFAR[%lx] UDBL[%lx] UDBH[%ld] TT[%lx] TL>1[%d]\n", smp_processor_id(), afsr, afar, udbl, udbh, tt, tl1); @@ -595,12 +571,7 @@ static void spitfire_ue_log(unsigned long afsr, unsigned long afar, unsigned lon regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_OBJERR; - info.si_addr = (void *)0; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_OBJERR, (void *)0, 0, current); } void spitfire_access_error(struct pt_regs *regs, unsigned long status_encoded, unsigned long afar) @@ -2190,7 +2161,6 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, if (attrs & SUN4V_ERR_ATTRS_MEMORY) { unsigned long addr = ent->err_raddr; - siginfo_t info; if (addr == ~(u64)0) { /* This seems highly unlikely to ever occur */ @@ -2211,21 +2181,13 @@ bool sun4v_nonresum_error_user_handled(struct pt_regs *regs, addr += PAGE_SIZE; } } - info.si_signo = SIGKILL; - info.si_errno = 0; - info.si_trapno = 0; - force_sig_info(info.si_signo, &info, current); + force_sig(SIGKILL, current); return true; } if (attrs & SUN4V_ERR_ATTRS_PIO) { - siginfo_t info; - - info.si_signo = SIGBUS; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)sun4v_get_vaddr(regs); - force_sig_info(info.si_signo, &info, current); - + force_sig_fault(SIGBUS, BUS_ADRERR, + (void __user *)sun4v_get_vaddr(regs), 0, current); return true; } @@ -2362,30 +2324,27 @@ static void do_fpe_common(struct pt_regs *regs) regs->tnpc += 4; } else { unsigned long fsr = current_thread_info()->xfsr[0]; - siginfo_t info; + int code; if (test_thread_flag(TIF_32BIT)) { regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - info.si_code = FPE_FIXME; + code = FPE_FLTUNK; if ((fsr & 0x1c000) == (1 << 14)) { if (fsr & 0x10) - info.si_code = FPE_FLTINV; + code = FPE_FLTINV; else if (fsr & 0x08) - info.si_code = FPE_FLTOVF; + code = FPE_FLTOVF; else if (fsr & 0x04) - info.si_code = FPE_FLTUND; + code = FPE_FLTUND; else if (fsr & 0x02) - info.si_code = FPE_FLTDIV; + code = FPE_FLTDIV; else if (fsr & 0x01) - info.si_code = FPE_FLTRES; + code = FPE_FLTRES; } - force_sig_info(SIGFPE, &info, current); + force_sig_fault(SIGFPE, code, + (void __user *)regs->tpc, 0, current); } } @@ -2428,7 +2387,6 @@ out: void do_tof(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "tagged arithmetic overflow", regs, 0, 0x26, SIGEMT) == NOTIFY_STOP) @@ -2440,12 +2398,8 @@ void do_tof(struct pt_regs *regs) regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGEMT; - info.si_errno = 0; - info.si_code = EMT_TAGOVF; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - force_sig_info(SIGEMT, &info, current); + force_sig_fault(SIGEMT, EMT_TAGOVF, + (void __user *)regs->tpc, 0, current); out: exception_exit(prev_state); } @@ -2453,7 +2407,6 @@ out: void do_div0(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "integer division by zero", regs, 0, 0x28, SIGFPE) == NOTIFY_STOP) @@ -2465,12 +2418,8 @@ void do_div0(struct pt_regs *regs) regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGFPE; - info.si_errno = 0; - info.si_code = FPE_INTDIV; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - force_sig_info(SIGFPE, &info, current); + force_sig_fault(SIGFPE, FPE_INTDIV, + (void __user *)regs->tpc, 0, current); out: exception_exit(prev_state); } @@ -2632,7 +2581,6 @@ void do_illegal_instruction(struct pt_regs *regs) unsigned long pc = regs->tpc; unsigned long tstate = regs->tstate; u32 insn; - siginfo_t info; if (notify_die(DIE_TRAP, "illegal instruction", regs, 0, 0x10, SIGILL) == NOTIFY_STOP) @@ -2666,12 +2614,7 @@ void do_illegal_instruction(struct pt_regs *regs) } } } - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_ILLOPC; - info.si_addr = (void __user *)pc; - info.si_trapno = 0; - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_ILLOPC, (void __user *)pc, 0, current); out: exception_exit(prev_state); } @@ -2679,7 +2622,6 @@ out: void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned long sfsr) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "memory address unaligned", regs, 0, 0x34, SIGSEGV) == NOTIFY_STOP) @@ -2692,20 +2634,13 @@ void mem_address_unaligned(struct pt_regs *regs, unsigned long sfar, unsigned lo if (is_no_fault_exception(regs)) return; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *)sfar; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *)sfar, 0, current); out: exception_exit(prev_state); } void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_ctx) { - siginfo_t info; - if (notify_die(DIE_TRAP, "memory address unaligned", regs, 0, 0x34, SIGSEGV) == NOTIFY_STOP) return; @@ -2717,12 +2652,7 @@ void sun4v_do_mna(struct pt_regs *regs, unsigned long addr, unsigned long type_c if (is_no_fault_exception(regs)) return; - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *) addr; - info.si_trapno = 0; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRALN, (void __user *) addr, 0, current); } /* sun4v_mem_corrupt_detect_precise() - Handle precise exception on an ADI @@ -2775,7 +2705,6 @@ void sun4v_mem_corrupt_detect_precise(struct pt_regs *regs, unsigned long addr, void do_privop(struct pt_regs *regs) { enum ctx_state prev_state = exception_enter(); - siginfo_t info; if (notify_die(DIE_TRAP, "privileged operation", regs, 0, 0x11, SIGILL) == NOTIFY_STOP) @@ -2785,12 +2714,8 @@ void do_privop(struct pt_regs *regs) regs->tpc &= 0xffffffff; regs->tnpc &= 0xffffffff; } - info.si_signo = SIGILL; - info.si_errno = 0; - info.si_code = ILL_PRVOPC; - info.si_addr = (void __user *)regs->tpc; - info.si_trapno = 0; - force_sig_info(SIGILL, &info, current); + force_sig_fault(SIGILL, ILL_PRVOPC, + (void __user *)regs->tpc, 0, current); out: exception_exit(prev_state); } diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index 7642d7e4f0d9..64ac8c0c1429 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -311,14 +311,9 @@ static inline int ok_for_user(struct pt_regs *regs, unsigned int insn, static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn) { - siginfo_t info; - - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void __user *)safe_compute_effective_address(regs, insn); - info.si_trapno = 0; - send_sig_info(SIGBUS, &info, current); + send_sig_fault(SIGBUS, BUS_ADRALN, + (void __user *)safe_compute_effective_address(regs, insn), + 0, current); } asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn) diff --git a/arch/sparc/kernel/vio.c b/arch/sparc/kernel/vio.c index 1a0fa10cb6b7..32bae68e34c1 100644 --- a/arch/sparc/kernel/vio.c +++ b/arch/sparc/kernel/vio.c @@ -403,7 +403,7 @@ static struct vio_dev *vio_create_one(struct mdesc_handle *hp, u64 mp, if (err) { printk(KERN_ERR "VIO: Could not register device %s, err=%d\n", dev_name(&vdev->dev), err); - kfree(vdev); + put_device(&vdev->dev); return NULL; } if (vdev->dp) diff --git a/arch/sparc/mm/fault_32.c b/arch/sparc/mm/fault_32.c index a8103a84b4ac..9f75b6444bf1 100644 --- a/arch/sparc/mm/fault_32.c +++ b/arch/sparc/mm/fault_32.c @@ -127,19 +127,11 @@ show_signal_msg(struct pt_regs *regs, int sig, int code, static void __do_fault_siginfo(int code, int sig, struct pt_regs *regs, unsigned long addr) { - siginfo_t info; - - info.si_signo = sig; - info.si_code = code; - info.si_errno = 0; - info.si_addr = (void __user *) addr; - info.si_trapno = 0; - if (unlikely(show_unhandled_signals)) - show_signal_msg(regs, sig, info.si_code, + show_signal_msg(regs, sig, code, addr, current); - force_sig_info (sig, &info, current); + force_sig_fault(sig, code, (void __user *) addr, 0, current); } static unsigned long compute_si_addr(struct pt_regs *regs, int text_fault) diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 41363f46797b..63166fcf9e25 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -170,11 +170,7 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, int fault_code) { unsigned long addr; - siginfo_t info; - info.si_code = code; - info.si_signo = sig; - info.si_errno = 0; if (fault_code & FAULT_CODE_ITLB) { addr = regs->tpc; } else { @@ -187,13 +183,11 @@ static void do_fault_siginfo(int code, int sig, struct pt_regs *regs, else addr = fault_addr; } - info.si_addr = (void __user *) addr; - info.si_trapno = 0; if (unlikely(show_unhandled_signals)) show_signal_msg(regs, sig, code, addr, current); - force_sig_info(sig, &info, current); + force_sig_fault(sig, code, (void __user *) addr, 0, current); } static unsigned int get_fault_insn(struct pt_regs *regs, unsigned int insn) diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c index d4e8c497ae86..dcf5ea28a281 100644 --- a/arch/um/drivers/ubd_kern.c +++ b/arch/um/drivers/ubd_kern.c @@ -208,19 +208,6 @@ static int fake_ide_media_proc_show(struct seq_file *m, void *v) return 0; } -static int fake_ide_media_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, fake_ide_media_proc_show, NULL); -} - -static const struct file_operations fake_ide_media_proc_fops = { - .owner = THIS_MODULE, - .open = fake_ide_media_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static void make_ide_entries(const char *dev_name) { struct proc_dir_entry *dir, *ent; @@ -231,7 +218,8 @@ static void make_ide_entries(const char *dev_name) dir = proc_mkdir(dev_name, proc_ide); if(!dir) return; - ent = proc_create("media", S_IRUGO, dir, &fake_ide_media_proc_fops); + ent = proc_create_single("media", S_IRUGO, dir, + fake_ide_media_proc_show); if(!ent) return; snprintf(name, sizeof(name), "ide0/%s", dev_name); proc_symlink(dev_name, proc_ide_root, name); diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index bb5a196c3061..b10dde6cb793 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,6 +1,7 @@ generic-y += barrier.h generic-y += bpf_perf_event.h generic-y += bug.h +generic-y += compat.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c index bc2a516c190f..1a1d88a4d940 100644 --- a/arch/um/kernel/ptrace.c +++ b/arch/um/kernel/ptrace.c @@ -115,17 +115,10 @@ long arch_ptrace(struct task_struct *child, long request, static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs, int error_code) { - struct siginfo info; - - memset(&info, 0, sizeof(info)); - info.si_signo = SIGTRAP; - info.si_code = TRAP_BRKPT; - - /* User-mode eip? */ - info.si_addr = UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL; - /* Send us the fake SIGTRAP */ - force_sig_info(SIGTRAP, &info, tsk); + force_sig_fault(SIGTRAP, TRAP_BRKPT, + /* User-mode eip? */ + UPT_IS_USER(regs) ? (void __user *) UPT_IP(regs) : NULL, tsk); } /* diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index b2b02df9896e..ec9a42c14c56 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -162,13 +162,9 @@ static void show_segv_info(struct uml_pt_regs *regs) static void bad_segv(struct faultinfo fi, unsigned long ip) { - struct siginfo si; - - si.si_signo = SIGSEGV; - si.si_code = SEGV_ACCERR; - si.si_addr = (void __user *) FAULT_ADDRESS(fi); current->thread.arch.faultinfo = fi; - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, SEGV_ACCERR, (void __user *) FAULT_ADDRESS(fi), + current); } void fatal_sigsegv(void) @@ -214,8 +210,8 @@ void segv_handler(int sig, struct siginfo *unused_si, struct uml_pt_regs *regs) unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, struct uml_pt_regs *regs) { - struct siginfo si; jmp_buf *catcher; + int si_code; int err; int is_write = FAULT_WRITE(fi); unsigned long address = FAULT_ADDRESS(fi); @@ -239,7 +235,7 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, if (SEGV_IS_FIXABLE(&fi)) err = handle_page_fault(address, ip, is_write, is_user, - &si.si_code); + &si_code); else { err = -EFAULT; /* @@ -271,18 +267,14 @@ unsigned long segv(struct faultinfo fi, unsigned long ip, int is_user, show_segv_info(regs); if (err == -EACCES) { - si.si_signo = SIGBUS; - si.si_errno = 0; - si.si_code = BUS_ADRERR; - si.si_addr = (void __user *)address; current->thread.arch.faultinfo = fi; - force_sig_info(SIGBUS, &si, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address, + current); } else { BUG_ON(err != -EFAULT); - si.si_signo = SIGSEGV; - si.si_addr = (void __user *) address; current->thread.arch.faultinfo = fi; - force_sig_info(SIGSEGV, &si, current); + force_sig_fault(SIGSEGV, si_code, (void __user *) address, + current); } out: @@ -294,9 +286,7 @@ out: void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) { - struct faultinfo *fi; - struct siginfo clean_si; - + int code, err; if (!UPT_IS_USER(regs)) { if (sig == SIGBUS) printk(KERN_ERR "Bus error - the host /dev/shm or /tmp " @@ -306,29 +296,21 @@ void relay_signal(int sig, struct siginfo *si, struct uml_pt_regs *regs) arch_examine_signal(sig, regs); - clear_siginfo(&clean_si); - clean_si.si_signo = si->si_signo; - clean_si.si_errno = si->si_errno; - clean_si.si_code = si->si_code; - switch (sig) { - case SIGILL: - case SIGFPE: - case SIGSEGV: - case SIGBUS: - case SIGTRAP: - fi = UPT_FAULTINFO(regs); - clean_si.si_addr = (void __user *) FAULT_ADDRESS(*fi); + /* Is the signal layout for the signal known? + * Signal data must be scrubbed to prevent information leaks. + */ + code = si->si_code; + err = si->si_errno; + if ((err == 0) && (siginfo_layout(sig, code) == SIL_FAULT)) { + struct faultinfo *fi = UPT_FAULTINFO(regs); current->thread.arch.faultinfo = *fi; -#ifdef __ARCH_SI_TRAPNO - clean_si.si_trapno = si->si_trapno; -#endif - break; - default: - printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d)\n", - sig, si->si_code); + force_sig_fault(sig, code, (void __user *)FAULT_ADDRESS(*fi), + current); + } else { + printk(KERN_ERR "Attempted to relay unknown signal %d (si_code = %d) with errno %d\n", + sig, code, err); + force_sig(sig, current); } - - force_sig_info(sig, &clean_si, current); } void bus_handler(int sig, struct siginfo *si, struct uml_pt_regs *regs) diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index 462e59a7ae78..03f991e44288 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig @@ -19,6 +19,8 @@ config UNICORE32 select ARCH_WANT_FRAME_POINTERS select GENERIC_IOMAP select MODULES_USE_ELF_REL + select NEED_DMA_MAP_STATE + select SWIOTLB help UniCore-32 is 32-bit Instruction Set Architecture, including a series of low-power-consumption RISC chip @@ -61,9 +63,6 @@ config ARCH_MAY_HAVE_PC_FDC config ZONE_DMA def_bool y -config NEED_DMA_MAP_STATE - def_bool y - source "init/Kconfig" source "kernel/Kconfig.freezer" diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 6f70c76c81fc..bfc7abe77905 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -1,5 +1,6 @@ generic-y += atomic.h generic-y += bugs.h +generic-y += compat.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c index 12c8c9527b8e..8594b168f25e 100644 --- a/arch/unicore32/kernel/fpu-ucf64.c +++ b/arch/unicore32/kernel/fpu-ucf64.c @@ -52,14 +52,14 @@ * Raise a SIGFPE for the current process. * sicode describes the signal being raised. */ -void ucf64_raise_sigfpe(unsigned int sicode, struct pt_regs *regs) +void ucf64_raise_sigfpe(struct pt_regs *regs) { siginfo_t info; - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = SIGFPE; - info.si_code = sicode; + info.si_code = FPE_FLTUNK; info.si_addr = (void __user *)(instruction_pointer(regs) - 4); /* @@ -94,7 +94,7 @@ void ucf64_exchandler(u32 inst, u32 fpexc, struct pt_regs *regs) pr_debug("UniCore-F64 FPSCR 0x%08x INST 0x%08x\n", cff(FPSCR), inst); - ucf64_raise_sigfpe(0, regs); + ucf64_raise_sigfpe(regs); return; } diff --git a/arch/unicore32/mm/Kconfig b/arch/unicore32/mm/Kconfig index e9154a59d561..82759b6aba67 100644 --- a/arch/unicore32/mm/Kconfig +++ b/arch/unicore32/mm/Kconfig @@ -39,14 +39,3 @@ config CPU_TLB_SINGLE_ENTRY_DISABLE default y help Say Y here to disable the TLB single entry operations. - -config SWIOTLB - def_bool y - select DMA_DIRECT_OPS - -config IOMMU_HELPER - def_bool SWIOTLB - -config NEED_SG_DMA_LENGTH - def_bool SWIOTLB - diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index bbefcc46a45e..381473412937 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -125,6 +125,7 @@ static void __do_user_fault(struct task_struct *tsk, unsigned long addr, tsk->thread.address = addr; tsk->thread.error_code = fsr; tsk->thread.trap_no = 14; + clear_siginfo(&si); si.si_signo = sig; si.si_errno = 0; si.si_code = code; @@ -472,6 +473,7 @@ asmlinkage void do_DataAbort(unsigned long addr, unsigned int fsr, printk(KERN_ALERT "Unhandled fault: %s (0x%03x) at 0x%08lx\n", inf->name, fsr, addr); + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; @@ -491,6 +493,7 @@ asmlinkage void do_PrefetchAbort(unsigned long addr, printk(KERN_ALERT "Unhandled prefetch abort: %s (0x%03x) at 0x%08lx\n", inf->name, ifsr, addr); + clear_siginfo(&info); info.si_signo = inf->sig; info.si_errno = 0; info.si_code = inf->code; diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index c07f492b871a..1fe24b624d44 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -28,6 +28,8 @@ config X86_64 select ARCH_USE_CMPXCHG_LOCKREF select HAVE_ARCH_SOFT_DIRTY select MODULES_USE_ELF_RELA + select NEED_DMA_MAP_STATE + select SWIOTLB select X86_DEV_DMA_OPS select ARCH_HAS_SYSCALL_WRAPPER @@ -60,6 +62,7 @@ config X86 select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_REFCOUNT select ARCH_HAS_UACCESS_FLUSHCACHE if X86_64 + select ARCH_HAS_UACCESS_MCSAFE if X86_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX @@ -134,7 +137,6 @@ config X86 select HAVE_C_RECORDMCOUNT select HAVE_DEBUG_KMEMLEAK select HAVE_DEBUG_STACKOVERFLOW - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_REGS @@ -184,6 +186,7 @@ config X86 select HAVE_UNSTABLE_SCHED_CLOCK select HAVE_USER_RETURN_NOTIFIER select IRQ_FORCED_THREADING + select NEED_SG_DMA_LENGTH select PCI_LOCKLESS_CONFIG select PERF_EVENTS select RTC_LIB @@ -236,13 +239,6 @@ config ARCH_MMAP_RND_COMPAT_BITS_MAX config SBUS bool -config NEED_DMA_MAP_STATE - def_bool y - depends on X86_64 || INTEL_IOMMU || DMA_API_DEBUG || SWIOTLB - -config NEED_SG_DMA_LENGTH - def_bool y - config GENERIC_ISA_DMA def_bool y depends on ISA_DMA_API @@ -875,6 +871,7 @@ config DMI config GART_IOMMU bool "Old AMD GART IOMMU support" + select IOMMU_HELPER select SWIOTLB depends on X86_64 && PCI && AMD_NB ---help--- @@ -896,6 +893,7 @@ config GART_IOMMU config CALGARY_IOMMU bool "IBM Calgary IOMMU support" + select IOMMU_HELPER select SWIOTLB depends on X86_64 && PCI ---help--- @@ -923,20 +921,6 @@ config CALGARY_IOMMU_ENABLED_BY_DEFAULT Calgary anyway, pass 'iommu=calgary' on the kernel command line. If unsure, say Y. -# need this always selected by IOMMU for the VIA workaround -config SWIOTLB - def_bool y if X86_64 - ---help--- - Support for software bounce buffers used on x86-64 systems - which don't have a hardware IOMMU. Using this PCI devices - which can only access 32-bits of memory can be used on systems - with more than 3 GB of memory. - If unsure, say Y. - -config IOMMU_HELPER - def_bool y - depends on CALGARY_IOMMU || GART_IOMMU || SWIOTLB || AMD_IOMMU - config MAXSMP bool "Enable Maximum number of SMP Processors and NUMA Nodes" depends on X86_64 && SMP && DEBUG_KERNEL @@ -1458,6 +1442,7 @@ config HIGHMEM config X86_PAE bool "PAE (Physical Address Extension) Support" depends on X86_32 && !HIGHMEM4G + select PHYS_ADDR_T_64BIT select SWIOTLB ---help--- PAE is required for NX support, and furthermore enables @@ -1485,14 +1470,6 @@ config X86_5LEVEL Say N if unsure. -config ARCH_PHYS_ADDR_T_64BIT - def_bool y - depends on X86_64 || X86_PAE - -config ARCH_DMA_ADDR_T_64BIT - def_bool y - depends on X86_64 || HIGHMEM64G - config X86_DIRECT_GBPAGES def_bool y depends on X86_64 && !DEBUG_PAGEALLOC diff --git a/arch/x86/boot/compressed/cmdline.c b/arch/x86/boot/compressed/cmdline.c index 0cb325734cfb..af6cda0b7900 100644 --- a/arch/x86/boot/compressed/cmdline.c +++ b/arch/x86/boot/compressed/cmdline.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" -#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE +#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL static unsigned long fs; static inline void set_fs(unsigned long seg) diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 47d3efff6805..a8a8642d2b0b 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -109,23 +109,34 @@ void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) } static efi_status_t -__setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) +__setup_efi_pci(efi_pci_io_protocol_t *pci, struct pci_setup_rom **__rom) { struct pci_setup_rom *rom = NULL; efi_status_t status; unsigned long size; - uint64_t attributes; + uint64_t attributes, romsize; + void *romimage; - status = efi_early->call(pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, 0, - &attributes); + status = efi_call_proto(efi_pci_io_protocol, attributes, pci, + EfiPciIoAttributeOperationGet, 0, 0, + &attributes); if (status != EFI_SUCCESS) return status; - if (!pci->romimage || !pci->romsize) + /* + * Some firmware images contain EFI function pointers at the place where the + * romimage and romsize fields are supposed to be. Typically the EFI + * code is mapped at high addresses, translating to an unrealistically + * large romsize. The UEFI spec limits the size of option ROMs to 16 + * MiB so we reject any ROMs over 16 MiB in size to catch this. + */ + romimage = (void *)(unsigned long)efi_table_attr(efi_pci_io_protocol, + romimage, pci); + romsize = efi_table_attr(efi_pci_io_protocol, romsize, pci); + if (!romimage || !romsize || romsize > SZ_16M) return EFI_INVALID_PARAMETER; - size = pci->romsize + sizeof(*rom); + size = romsize + sizeof(*rom); status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); if (status != EFI_SUCCESS) { @@ -141,29 +152,32 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) rom->pcilen = pci->romsize; *__rom = rom; - status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, - PCI_VENDOR_ID, 1, &(rom->vendor)); + status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, + EfiPciIoWidthUint16, PCI_VENDOR_ID, 1, + &rom->vendor); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to read rom->vendor\n"); goto free_struct; } - status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, - PCI_DEVICE_ID, 1, &(rom->devid)); + status = efi_call_proto(efi_pci_io_protocol, pci.read, pci, + EfiPciIoWidthUint16, PCI_DEVICE_ID, 1, + &rom->devid); if (status != EFI_SUCCESS) { efi_printk(sys_table, "Failed to read rom->devid\n"); goto free_struct; } - status = efi_early->call(pci->get_location, pci, &(rom->segment), - &(rom->bus), &(rom->device), &(rom->function)); + status = efi_call_proto(efi_pci_io_protocol, get_location, pci, + &rom->segment, &rom->bus, &rom->device, + &rom->function); if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, romimage, romsize); return status; free_struct: @@ -175,7 +189,7 @@ static void setup_efi_pci32(struct boot_params *params, void **pci_handle, unsigned long size) { - efi_pci_io_protocol_32 *pci = NULL; + efi_pci_io_protocol_t *pci = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; u32 *handles = (u32 *)(unsigned long)pci_handle; efi_status_t status; @@ -202,7 +216,7 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle, if (!pci) continue; - status = __setup_efi_pci32(pci, &rom); + status = __setup_efi_pci(pci, &rom); if (status != EFI_SUCCESS) continue; @@ -216,73 +230,11 @@ setup_efi_pci32(struct boot_params *params, void **pci_handle, } } -static efi_status_t -__setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) -{ - struct pci_setup_rom *rom; - efi_status_t status; - unsigned long size; - uint64_t attributes; - - status = efi_early->call(pci->attributes, pci, - EfiPciIoAttributeOperationGet, 0, - &attributes); - if (status != EFI_SUCCESS) - return status; - - if (!pci->romimage || !pci->romsize) - return EFI_INVALID_PARAMETER; - - size = pci->romsize + sizeof(*rom); - - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, size, &rom); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to alloc mem for rom\n"); - return status; - } - - rom->data.type = SETUP_PCI; - rom->data.len = size - sizeof(struct setup_data); - rom->data.next = 0; - rom->pcilen = pci->romsize; - *__rom = rom; - - status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, - PCI_VENDOR_ID, 1, &(rom->vendor)); - - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->vendor\n"); - goto free_struct; - } - - status = efi_early->call(pci->pci.read, pci, EfiPciIoWidthUint16, - PCI_DEVICE_ID, 1, &(rom->devid)); - - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to read rom->devid\n"); - goto free_struct; - } - - status = efi_early->call(pci->get_location, pci, &(rom->segment), - &(rom->bus), &(rom->device), &(rom->function)); - - if (status != EFI_SUCCESS) - goto free_struct; - - memcpy(rom->romdata, pci->romimage, pci->romsize); - return status; - -free_struct: - efi_call_early(free_pool, rom); - return status; - -} - static void setup_efi_pci64(struct boot_params *params, void **pci_handle, unsigned long size) { - efi_pci_io_protocol_64 *pci = NULL; + efi_pci_io_protocol_t *pci = NULL; efi_guid_t pci_proto = EFI_PCI_IO_PROTOCOL_GUID; u64 *handles = (u64 *)(unsigned long)pci_handle; efi_status_t status; @@ -309,7 +261,7 @@ setup_efi_pci64(struct boot_params *params, void **pci_handle, if (!pci) continue; - status = __setup_efi_pci64(pci, &rom); + status = __setup_efi_pci(pci, &rom); if (status != EFI_SUCCESS) continue; diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fca012baba19..64037895b085 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -306,6 +306,25 @@ ENTRY(startup_64) leaq boot_stack_end(%rbx), %rsp /* + * paging_prepare() and cleanup_trampoline() below can have GOT + * references. Adjust the table with address we are running at. + * + * Zero RAX for adjust_got: the GOT was not adjusted before; + * there's no adjustment to undo. + */ + xorq %rax, %rax + + /* + * Calculate the address the binary is loaded at and use it as + * a GOT adjustment. + */ + call 1f +1: popq %rdi + subq $1b, %rdi + + call adjust_got + + /* * At this point we are in long mode with 4-level paging enabled, * but we might want to enable 5-level paging or vice versa. * @@ -346,6 +365,7 @@ ENTRY(startup_64) * this function call. */ pushq %rsi + movq %rsi, %rdi /* real mode address */ call paging_prepare popq %rsi @@ -370,10 +390,14 @@ trampoline_return: /* * cleanup_trampoline() would restore trampoline memory. * + * RDI is address of the page table to use instead of page table + * in trampoline memory (if required). + * * RSI holds real mode data and needs to be preserved across * this function call. */ pushq %rsi + leaq top_pgtable(%rbx), %rdi call cleanup_trampoline popq %rsi @@ -381,6 +405,21 @@ trampoline_return: pushq $0 popfq + /* + * Previously we've adjusted the GOT with address the binary was + * loaded at. Now we need to re-adjust for relocation address. + * + * Calculate the address the binary is loaded at, so that we can + * undo the previous GOT adjustment. + */ + call 1f +1: popq %rax + subq $1b, %rax + + /* The new adjustment is the relocation address */ + movq %rbx, %rdi + call adjust_got + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. @@ -482,19 +521,6 @@ relocated: rep stosq /* - * Adjust our own GOT - */ - leaq _got(%rip), %rdx - leaq _egot(%rip), %rcx -1: - cmpq %rcx, %rdx - jae 2f - addq %rbx, (%rdx) - addq $8, %rdx - jmp 1b -2: - -/* * Do the extraction, and jump to the new kernel.. */ pushq %rsi /* Save the real mode argument */ @@ -512,6 +538,27 @@ relocated: */ jmp *%rax +/* + * Adjust the global offset table + * + * RAX is the previous adjustment of the table to undo (use 0 if it's the + * first time we touch GOT). + * RDI is the new adjustment to apply. + */ +adjust_got: + /* Walk through the GOT adding the address to the entries */ + leaq _got(%rip), %rdx + leaq _egot(%rip), %rcx +1: + cmpq %rcx, %rdx + jae 2f + subq %rax, (%rdx) /* Undo previous adjustment */ + addq %rdi, (%rdx) /* Apply the new adjustment */ + addq $8, %rdx + jmp 1b +2: + ret + .code32 /* * This is the 32-bit trampoline that will be copied over to low memory. @@ -649,3 +696,10 @@ boot_stack_end: .balign 4096 pgtable: .fill BOOT_PGT_SIZE, 1, 0 + +/* + * The page table is going to be used instead of page table in the trampoline + * memory. + */ +top_pgtable: + .fill PAGE_SIZE, 1, 0 diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a0a50b91ecef..b87a7582853d 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -47,7 +47,7 @@ #include <linux/decompress/mm.h> #ifdef CONFIG_X86_5LEVEL -unsigned int pgtable_l5_enabled __ro_after_init; +unsigned int __pgtable_l5_enabled; unsigned int pgdir_shift __ro_after_init = 39; unsigned int ptrs_per_p4d __ro_after_init = 1; #endif @@ -734,7 +734,7 @@ void choose_random_location(unsigned long input, #ifdef CONFIG_X86_5LEVEL if (__read_cr4() & X86_CR4_LA57) { - pgtable_l5_enabled = 1; + __pgtable_l5_enabled = 1; pgdir_shift = 48; ptrs_per_p4d = 512; } diff --git a/arch/x86/boot/compressed/misc.h b/arch/x86/boot/compressed/misc.h index 9e11be4cae19..a423bdb42686 100644 --- a/arch/x86/boot/compressed/misc.h +++ b/arch/x86/boot/compressed/misc.h @@ -12,10 +12,8 @@ #undef CONFIG_PARAVIRT_SPINLOCKS #undef CONFIG_KASAN -#ifdef CONFIG_X86_5LEVEL -/* cpu_feature_enabled() cannot be used that early */ -#define pgtable_l5_enabled __pgtable_l5_enabled -#endif +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 #include <linux/linkage.h> #include <linux/screen_info.h> diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 32af1cbcd903..8c5107545251 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -23,14 +23,6 @@ struct paging_config { static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; /* - * The page table is going to be used instead of page table in the trampoline - * memory. - * - * It must not be in BSS as BSS is cleared after cleanup_trampoline(). - */ -static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); - -/* * Trampoline address will be printed by extract_kernel() for debugging * purposes. * @@ -39,16 +31,23 @@ static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); */ unsigned long *trampoline_32bit __section(.data); -struct paging_config paging_prepare(void) +extern struct boot_params *boot_params; +int cmdline_find_option_bool(const char *option); + +struct paging_config paging_prepare(void *rmode) { struct paging_config paging_config = {}; unsigned long bios_start, ebda_start; + /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + boot_params = rmode; + /* * Check if LA57 is desired and supported. * - * There are two parts to the check: + * There are several parts to the check: * - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y + * - if user asked to disable 5-level paging: no5lvl in cmdline * - if the machine supports 5-level paging: * + CPUID leaf 7 is supported * + the leaf has the feature bit set @@ -56,6 +55,7 @@ struct paging_config paging_prepare(void) * That's substitute for boot_cpu_has() in early boot code. */ if (IS_ENABLED(CONFIG_X86_5LEVEL) && + !cmdline_find_option_bool("no5lvl") && native_cpuid_eax(0) >= 7 && (native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) { paging_config.l5_required = 1; @@ -134,19 +134,19 @@ out: return paging_config; } -void cleanup_trampoline(void) +void cleanup_trampoline(void *pgtable) { void *trampoline_pgtable; - trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET; + trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long); /* * Move the top level page table out of trampoline memory, * if it's there. */ if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)top_pgtable); + memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); } /* Restore trampoline memory */ diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 5f07333bb224..a450ad573dcb 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -15,7 +15,6 @@ obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o obj-$(CONFIG_CRYPTO_AES_586) += aes-i586.o obj-$(CONFIG_CRYPTO_TWOFISH_586) += twofish-i586.o -obj-$(CONFIG_CRYPTO_SALSA20_586) += salsa20-i586.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_586) += serpent-sse2-i586.o obj-$(CONFIG_CRYPTO_AES_X86_64) += aes-x86_64.o @@ -24,7 +23,6 @@ obj-$(CONFIG_CRYPTO_CAMELLIA_X86_64) += camellia-x86_64.o obj-$(CONFIG_CRYPTO_BLOWFISH_X86_64) += blowfish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64) += twofish-x86_64.o obj-$(CONFIG_CRYPTO_TWOFISH_X86_64_3WAY) += twofish-x86_64-3way.o -obj-$(CONFIG_CRYPTO_SALSA20_X86_64) += salsa20-x86_64.o obj-$(CONFIG_CRYPTO_CHACHA20_X86_64) += chacha20-x86_64.o obj-$(CONFIG_CRYPTO_SERPENT_SSE2_X86_64) += serpent-sse2-x86_64.o obj-$(CONFIG_CRYPTO_AES_NI_INTEL) += aesni-intel.o @@ -38,6 +36,16 @@ obj-$(CONFIG_CRYPTO_SHA512_SSSE3) += sha512-ssse3.o obj-$(CONFIG_CRYPTO_CRCT10DIF_PCLMUL) += crct10dif-pclmul.o obj-$(CONFIG_CRYPTO_POLY1305_X86_64) += poly1305-x86_64.o +obj-$(CONFIG_CRYPTO_AEGIS128_AESNI_SSE2) += aegis128-aesni.o +obj-$(CONFIG_CRYPTO_AEGIS128L_AESNI_SSE2) += aegis128l-aesni.o +obj-$(CONFIG_CRYPTO_AEGIS256_AESNI_SSE2) += aegis256-aesni.o + +obj-$(CONFIG_CRYPTO_MORUS640_GLUE) += morus640_glue.o +obj-$(CONFIG_CRYPTO_MORUS1280_GLUE) += morus1280_glue.o + +obj-$(CONFIG_CRYPTO_MORUS640_SSE2) += morus640-sse2.o +obj-$(CONFIG_CRYPTO_MORUS1280_SSE2) += morus1280-sse2.o + # These modules require assembler to support AVX. ifeq ($(avx_supported),yes) obj-$(CONFIG_CRYPTO_CAMELLIA_AESNI_AVX_X86_64) += \ @@ -55,11 +63,12 @@ ifeq ($(avx2_supported),yes) obj-$(CONFIG_CRYPTO_SHA1_MB) += sha1-mb/ obj-$(CONFIG_CRYPTO_SHA256_MB) += sha256-mb/ obj-$(CONFIG_CRYPTO_SHA512_MB) += sha512-mb/ + + obj-$(CONFIG_CRYPTO_MORUS1280_AVX2) += morus1280-avx2.o endif aes-i586-y := aes-i586-asm_32.o aes_glue.o twofish-i586-y := twofish-i586-asm_32.o twofish_glue.o -salsa20-i586-y := salsa20-i586-asm_32.o salsa20_glue.o serpent-sse2-i586-y := serpent-sse2-i586-asm_32.o serpent_sse2_glue.o aes-x86_64-y := aes-x86_64-asm_64.o aes_glue.o @@ -68,10 +77,16 @@ camellia-x86_64-y := camellia-x86_64-asm_64.o camellia_glue.o blowfish-x86_64-y := blowfish-x86_64-asm_64.o blowfish_glue.o twofish-x86_64-y := twofish-x86_64-asm_64.o twofish_glue.o twofish-x86_64-3way-y := twofish-x86_64-asm_64-3way.o twofish_glue_3way.o -salsa20-x86_64-y := salsa20-x86_64-asm_64.o salsa20_glue.o chacha20-x86_64-y := chacha20-ssse3-x86_64.o chacha20_glue.o serpent-sse2-x86_64-y := serpent-sse2-x86_64-asm_64.o serpent_sse2_glue.o +aegis128-aesni-y := aegis128-aesni-asm.o aegis128-aesni-glue.o +aegis128l-aesni-y := aegis128l-aesni-asm.o aegis128l-aesni-glue.o +aegis256-aesni-y := aegis256-aesni-asm.o aegis256-aesni-glue.o + +morus640-sse2-y := morus640-sse2-asm.o morus640-sse2-glue.o +morus1280-sse2-y := morus1280-sse2-asm.o morus1280-sse2-glue.o + ifeq ($(avx_supported),yes) camellia-aesni-avx-x86_64-y := camellia-aesni-avx-asm_64.o \ camellia_aesni_avx_glue.o @@ -87,6 +102,8 @@ ifeq ($(avx2_supported),yes) camellia-aesni-avx2-y := camellia-aesni-avx2-asm_64.o camellia_aesni_avx2_glue.o chacha20-x86_64-y += chacha20-avx2-x86_64.o serpent-avx2-y := serpent-avx2-asm_64.o serpent_avx2_glue.o + + morus1280-avx2-y := morus1280-avx2-asm.o morus1280-avx2-glue.o endif aesni-intel-y := aesni-intel_asm.o aesni-intel_glue.o fpu.o diff --git a/arch/x86/crypto/aegis128-aesni-asm.S b/arch/x86/crypto/aegis128-aesni-asm.S new file mode 100644 index 000000000000..9254e0b6cc06 --- /dev/null +++ b/arch/x86/crypto/aegis128-aesni-asm.S @@ -0,0 +1,749 @@ +/* + * AES-NI + SSE2 implementation of AEGIS-128 + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define STATE0 %xmm0 +#define STATE1 %xmm1 +#define STATE2 %xmm2 +#define STATE3 %xmm3 +#define STATE4 %xmm4 +#define KEY %xmm5 +#define MSG %xmm5 +#define T0 %xmm6 +#define T1 %xmm7 + +#define STATEP %rdi +#define LEN %rsi +#define SRC %rdx +#define DST %rcx + +.section .rodata.cst16.aegis128_const, "aM", @progbits, 32 +.align 16 +.Laegis128_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Laegis128_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.aegis128_counter, "aM", @progbits, 16 +.align 16 +.Laegis128_counter: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + +.text + +/* + * aegis128_update + * input: + * STATE[0-4] - input state + * output: + * STATE[0-4] - output state (shifted positions) + * changed: + * T0 + */ +.macro aegis128_update + movdqa STATE4, T0 + aesenc STATE0, STATE4 + aesenc STATE1, STATE0 + aesenc STATE2, STATE1 + aesenc STATE3, STATE2 + aesenc T0, STATE3 +.endm + +/* + * __load_partial: internal ABI + * input: + * LEN - bytes + * SRC - src + * output: + * MSG - message block + * changed: + * T0 + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + pxor MSG, MSG + + mov LEN, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov LEN, %r8 + and $0x1E, %r8 + add SRC, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov LEN, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov LEN, %r8 + and $0x1C, %r8 + add SRC, %r8 + shl $0x10, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov LEN, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov LEN, %r8 + and $0x18, %r8 + add SRC, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG + + mov LEN, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov LEN, %r8 + and $0x10, %r8 + add SRC, %r8 + pslldq $8, MSG + movq (%r8), T0 + pxor T0, MSG + +.Lld_partial_8: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * LEN - bytes + * DST - dst + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov LEN, %r8 + mov DST, %r9 + + movq T0, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0 + movq T0, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $0x10, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +/* + * void crypto_aegis128_aesni_init(void *state, const void *key, const void *iv); + */ +ENTRY(crypto_aegis128_aesni_init) + FRAME_BEGIN + + /* load IV: */ + movdqu (%rdx), T1 + + /* load key: */ + movdqa (%rsi), KEY + pxor KEY, T1 + movdqa T1, STATE0 + movdqa KEY, STATE3 + movdqa KEY, STATE4 + + /* load the constants: */ + movdqa .Laegis128_const_0, STATE2 + movdqa .Laegis128_const_1, STATE1 + pxor STATE2, STATE3 + pxor STATE1, STATE4 + + /* update 10 times with KEY / KEY xor IV: */ + aegis128_update; pxor KEY, STATE4 + aegis128_update; pxor T1, STATE3 + aegis128_update; pxor KEY, STATE2 + aegis128_update; pxor T1, STATE1 + aegis128_update; pxor KEY, STATE0 + aegis128_update; pxor T1, STATE4 + aegis128_update; pxor KEY, STATE3 + aegis128_update; pxor T1, STATE2 + aegis128_update; pxor KEY, STATE1 + aegis128_update; pxor T1, STATE0 + + /* store the state: */ + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_init) + +/* + * void crypto_aegis128_aesni_ad(void *state, unsigned int length, + * const void *data); + */ +ENTRY(crypto_aegis128_aesni_ad) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lad_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + and $0xF, %r8 + jnz .Lad_u_loop + +.align 8 +.Lad_a_loop: + movdqa 0x00(SRC), MSG + aegis128_update + pxor MSG, STATE4 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_1 + + movdqa 0x10(SRC), MSG + aegis128_update + pxor MSG, STATE3 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_2 + + movdqa 0x20(SRC), MSG + aegis128_update + pxor MSG, STATE2 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_3 + + movdqa 0x30(SRC), MSG + aegis128_update + pxor MSG, STATE1 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_4 + + movdqa 0x40(SRC), MSG + aegis128_update + pxor MSG, STATE0 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_0 + + add $0x50, SRC + jmp .Lad_a_loop + +.align 8 +.Lad_u_loop: + movdqu 0x00(SRC), MSG + aegis128_update + pxor MSG, STATE4 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_1 + + movdqu 0x10(SRC), MSG + aegis128_update + pxor MSG, STATE3 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_2 + + movdqu 0x20(SRC), MSG + aegis128_update + pxor MSG, STATE2 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_3 + + movdqu 0x30(SRC), MSG + aegis128_update + pxor MSG, STATE1 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_4 + + movdqu 0x40(SRC), MSG + aegis128_update + pxor MSG, STATE0 + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_0 + + add $0x50, SRC + jmp .Lad_u_loop + + /* store the state: */ +.Lad_out_0: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + ret + +.Lad_out_1: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + ret + +.Lad_out_2: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + ret + +.Lad_out_3: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + ret + +.Lad_out_4: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + ret + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_ad) + +.macro encrypt_block a s0 s1 s2 s3 s4 i + movdq\a (\i * 0x10)(SRC), MSG + movdqa MSG, T0 + pxor \s1, T0 + pxor \s4, T0 + movdqa \s2, T1 + pand \s3, T1 + pxor T1, T0 + movdq\a T0, (\i * 0x10)(DST) + + aegis128_update + pxor MSG, \s4 + + sub $0x10, LEN + cmp $0x10, LEN + jl .Lenc_out_\i +.endm + +/* + * void crypto_aegis128_aesni_enc(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128_aesni_enc) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lenc_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Lenc_u_loop + +.align 8 +.Lenc_a_loop: + encrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Lenc_a_loop + +.align 8 +.Lenc_u_loop: + encrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 + encrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 + encrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 + encrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 + encrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Lenc_u_loop + + /* store the state: */ +.Lenc_out_0: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + ret + +.Lenc_out_1: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + ret + +.Lenc_out_2: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + ret + +.Lenc_out_3: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + ret + +.Lenc_out_4: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + ret + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_enc) + +/* + * void crypto_aegis128_aesni_enc_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128_aesni_enc_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* encrypt message: */ + call __load_partial + + movdqa MSG, T0 + pxor STATE1, T0 + pxor STATE4, T0 + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, T0 + + call __store_partial + + aegis128_update + pxor MSG, STATE4 + + /* store the state: */ + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + + FRAME_END +ENDPROC(crypto_aegis128_aesni_enc_tail) + +.macro decrypt_block a s0 s1 s2 s3 s4 i + movdq\a (\i * 0x10)(SRC), MSG + pxor \s1, MSG + pxor \s4, MSG + movdqa \s2, T1 + pand \s3, T1 + pxor T1, MSG + movdq\a MSG, (\i * 0x10)(DST) + + aegis128_update + pxor MSG, \s4 + + sub $0x10, LEN + cmp $0x10, LEN + jl .Ldec_out_\i +.endm + +/* + * void crypto_aegis128_aesni_dec(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128_aesni_dec) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Ldec_out + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 8 +.Ldec_a_loop: + decrypt_block a STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block a STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block a STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block a STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block a STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Ldec_a_loop + +.align 8 +.Ldec_u_loop: + decrypt_block u STATE0 STATE1 STATE2 STATE3 STATE4 0 + decrypt_block u STATE4 STATE0 STATE1 STATE2 STATE3 1 + decrypt_block u STATE3 STATE4 STATE0 STATE1 STATE2 2 + decrypt_block u STATE2 STATE3 STATE4 STATE0 STATE1 3 + decrypt_block u STATE1 STATE2 STATE3 STATE4 STATE0 4 + + add $0x50, SRC + add $0x50, DST + jmp .Ldec_u_loop + + /* store the state: */ +.Ldec_out_0: + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + FRAME_END + ret + +.Ldec_out_1: + movdqu STATE3, 0x00(STATEP) + movdqu STATE4, 0x10(STATEP) + movdqu STATE0, 0x20(STATEP) + movdqu STATE1, 0x30(STATEP) + movdqu STATE2, 0x40(STATEP) + FRAME_END + ret + +.Ldec_out_2: + movdqu STATE2, 0x00(STATEP) + movdqu STATE3, 0x10(STATEP) + movdqu STATE4, 0x20(STATEP) + movdqu STATE0, 0x30(STATEP) + movdqu STATE1, 0x40(STATEP) + FRAME_END + ret + +.Ldec_out_3: + movdqu STATE1, 0x00(STATEP) + movdqu STATE2, 0x10(STATEP) + movdqu STATE3, 0x20(STATEP) + movdqu STATE4, 0x30(STATEP) + movdqu STATE0, 0x40(STATEP) + FRAME_END + ret + +.Ldec_out_4: + movdqu STATE0, 0x00(STATEP) + movdqu STATE1, 0x10(STATEP) + movdqu STATE2, 0x20(STATEP) + movdqu STATE3, 0x30(STATEP) + movdqu STATE4, 0x40(STATEP) + FRAME_END + ret + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_dec) + +/* + * void crypto_aegis128_aesni_dec_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128_aesni_dec_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* decrypt message: */ + call __load_partial + + pxor STATE1, MSG + pxor STATE4, MSG + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, MSG + + movdqa MSG, T0 + call __store_partial + + /* mask with byte count: */ + movq LEN, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + movdqa .Laegis128_counter, T1 + pcmpgtb T1, T0 + pand T0, MSG + + aegis128_update + pxor MSG, STATE4 + + /* store the state: */ + movdqu STATE4, 0x00(STATEP) + movdqu STATE0, 0x10(STATEP) + movdqu STATE1, 0x20(STATEP) + movdqu STATE2, 0x30(STATEP) + movdqu STATE3, 0x40(STATEP) + + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_dec_tail) + +/* + * void crypto_aegis128_aesni_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_aegis128_aesni_final) + FRAME_BEGIN + + /* load the state: */ + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + + /* prepare length block: */ + movq %rdx, MSG + movq %rcx, T0 + pslldq $8, T0 + pxor T0, MSG + psllq $3, MSG /* multiply by 8 (to get bit count) */ + + pxor STATE3, MSG + + /* update state: */ + aegis128_update; pxor MSG, STATE4 + aegis128_update; pxor MSG, STATE3 + aegis128_update; pxor MSG, STATE2 + aegis128_update; pxor MSG, STATE1 + aegis128_update; pxor MSG, STATE0 + aegis128_update; pxor MSG, STATE4 + aegis128_update; pxor MSG, STATE3 + + /* xor tag: */ + movdqu (%rsi), MSG + + pxor STATE0, MSG + pxor STATE1, MSG + pxor STATE2, MSG + pxor STATE3, MSG + pxor STATE4, MSG + + movdqu MSG, (%rsi) + + FRAME_END + ret +ENDPROC(crypto_aegis128_aesni_final) diff --git a/arch/x86/crypto/aegis128-aesni-glue.c b/arch/x86/crypto/aegis128-aesni-glue.c new file mode 100644 index 000000000000..5de7c0d46edf --- /dev/null +++ b/arch/x86/crypto/aegis128-aesni-glue.c @@ -0,0 +1,407 @@ +/* + * The AEGIS-128 Authenticated-Encryption Algorithm + * Glue for AES-NI + SSE2 implementation + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +#define AEGIS128_BLOCK_ALIGN 16 +#define AEGIS128_BLOCK_SIZE 16 +#define AEGIS128_NONCE_SIZE 16 +#define AEGIS128_STATE_BLOCKS 5 +#define AEGIS128_KEY_SIZE 16 +#define AEGIS128_MIN_AUTH_SIZE 8 +#define AEGIS128_MAX_AUTH_SIZE 16 + +asmlinkage void crypto_aegis128_aesni_init(void *state, void *key, void *iv); + +asmlinkage void crypto_aegis128_aesni_ad( + void *state, unsigned int length, const void *data); + +asmlinkage void crypto_aegis128_aesni_enc( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_dec( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_enc_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_dec_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128_aesni_final( + void *state, void *tag_xor, unsigned int cryptlen, + unsigned int assoclen); + +struct aegis_block { + u8 bytes[AEGIS128_BLOCK_SIZE] __aligned(AEGIS128_BLOCK_ALIGN); +}; + +struct aegis_state { + struct aegis_block blocks[AEGIS128_STATE_BLOCKS]; +}; + +struct aegis_ctx { + struct aegis_block key; +}; + +struct aegis_crypt_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, unsigned int length, const void *src, + void *dst); + void (*crypt_tail)(void *state, unsigned int length, const void *src, + void *dst); +}; + +static void crypto_aegis128_aesni_process_ad( + struct aegis_state *state, struct scatterlist *sg_src, + unsigned int assoclen) +{ + struct scatter_walk walk; + struct aegis_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= AEGIS128_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = AEGIS128_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + crypto_aegis128_aesni_ad(state, + AEGIS128_BLOCK_SIZE, + buf.bytes); + pos = 0; + left -= fill; + src += fill; + } + + crypto_aegis128_aesni_ad(state, left, src); + + src += left & ~(AEGIS128_BLOCK_SIZE - 1); + left &= AEGIS128_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + pos += left; + assoclen -= size; + + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, AEGIS128_BLOCK_SIZE - pos); + crypto_aegis128_aesni_ad(state, AEGIS128_BLOCK_SIZE, buf.bytes); + } +} + +static void crypto_aegis128_aesni_process_crypt( + struct aegis_state *state, struct aead_request *req, + const struct aegis_crypt_ops *ops) +{ + struct skcipher_walk walk; + u8 *src, *dst; + unsigned int chunksize, base; + + ops->skcipher_walk_init(&walk, req, false); + + while (walk.nbytes) { + src = walk.src.virt.addr; + dst = walk.dst.virt.addr; + chunksize = walk.nbytes; + + ops->crypt_blocks(state, chunksize, src, dst); + + base = chunksize & ~(AEGIS128_BLOCK_SIZE - 1); + src += base; + dst += base; + chunksize &= AEGIS128_BLOCK_SIZE - 1; + + if (chunksize > 0) + ops->crypt_tail(state, chunksize, src, dst); + + skcipher_walk_done(&walk, 0); + } +} + +static struct aegis_ctx *crypto_aegis128_aesni_ctx(struct crypto_aead *aead) +{ + u8 *ctx = crypto_aead_ctx(aead); + ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); + return (void *)ctx; +} + +static int crypto_aegis128_aesni_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(aead); + + if (keylen != AEGIS128_KEY_SIZE) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key.bytes, key, AEGIS128_KEY_SIZE); + + return 0; +} + +static int crypto_aegis128_aesni_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + if (authsize > AEGIS128_MAX_AUTH_SIZE) + return -EINVAL; + if (authsize < AEGIS128_MIN_AUTH_SIZE) + return -EINVAL; + return 0; +} + +static void crypto_aegis128_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, + const struct aegis_crypt_ops *ops) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_ctx *ctx = crypto_aegis128_aesni_ctx(tfm); + struct aegis_state state; + + kernel_fpu_begin(); + + crypto_aegis128_aesni_init(&state, ctx->key.bytes, req->iv); + crypto_aegis128_aesni_process_ad(&state, req->src, req->assoclen); + crypto_aegis128_aesni_process_crypt(&state, req, ops); + crypto_aegis128_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +static int crypto_aegis128_aesni_encrypt(struct aead_request *req) +{ + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = crypto_aegis128_aesni_enc, + .crypt_tail = crypto_aegis128_aesni_enc_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} + +static int crypto_aegis128_aesni_decrypt(struct aead_request *req) +{ + static const struct aegis_block zeros = {}; + + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = crypto_aegis128_aesni_dec, + .crypt_tail = crypto_aegis128_aesni_dec_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_aegis128_aesni_crypt(req, &tag, cryptlen, &OPS); + + return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; +} + +static int crypto_aegis128_aesni_init_tfm(struct crypto_aead *aead) +{ + return 0; +} + +static void crypto_aegis128_aesni_exit_tfm(struct crypto_aead *aead) +{ +} + +static int cryptd_aegis128_aesni_setkey(struct crypto_aead *aead, + const u8 *key, unsigned int keylen) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); +} + +static int cryptd_aegis128_aesni_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); +} + +static int cryptd_aegis128_aesni_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_encrypt(req); +} + +static int cryptd_aegis128_aesni_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_decrypt(req); +} + +static int cryptd_aegis128_aesni_init_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_tfm = cryptd_alloc_aead("__aegis128-aesni", CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + return 0; +} + +static void cryptd_aegis128_aesni_exit_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} + +static struct aead_alg crypto_aegis128_aesni_alg[] = { + { + .setkey = crypto_aegis128_aesni_setkey, + .setauthsize = crypto_aegis128_aesni_setauthsize, + .encrypt = crypto_aegis128_aesni_encrypt, + .decrypt = crypto_aegis128_aesni_decrypt, + .init = crypto_aegis128_aesni_init_tfm, + .exit = crypto_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + + .cra_name = "__aegis128", + .cra_driver_name = "__aegis128-aesni", + + .cra_module = THIS_MODULE, + } + }, { + .setkey = cryptd_aegis128_aesni_setkey, + .setauthsize = cryptd_aegis128_aesni_setauthsize, + .encrypt = cryptd_aegis128_aesni_encrypt, + .decrypt = cryptd_aegis128_aesni_decrypt, + .init = cryptd_aegis128_aesni_init_tfm, + .exit = cryptd_aegis128_aesni_exit_tfm, + + .ivsize = AEGIS128_NONCE_SIZE, + .maxauthsize = AEGIS128_MAX_AUTH_SIZE, + .chunksize = AEGIS128_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), + .cra_alignmask = 0, + + .cra_priority = 400, + + .cra_name = "aegis128", + .cra_driver_name = "aegis128-aesni", + + .cra_module = THIS_MODULE, + } + } +}; + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + X86_FEATURE_MATCH(X86_FEATURE_XMM2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + +static int __init crypto_aegis128_aesni_module_init(void) +{ + if (!x86_match_cpu(aesni_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_aegis128_aesni_alg, + ARRAY_SIZE(crypto_aegis128_aesni_alg)); +} + +static void __exit crypto_aegis128_aesni_module_exit(void) +{ + crypto_unregister_aeads(crypto_aegis128_aesni_alg, + ARRAY_SIZE(crypto_aegis128_aesni_alg)); +} + +module_init(crypto_aegis128_aesni_module_init); +module_exit(crypto_aegis128_aesni_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("AEGIS-128 AEAD algorithm -- AESNI+SSE2 implementation"); +MODULE_ALIAS_CRYPTO("aegis128"); +MODULE_ALIAS_CRYPTO("aegis128-aesni"); diff --git a/arch/x86/crypto/aegis128l-aesni-asm.S b/arch/x86/crypto/aegis128l-aesni-asm.S new file mode 100644 index 000000000000..9263c344f2c7 --- /dev/null +++ b/arch/x86/crypto/aegis128l-aesni-asm.S @@ -0,0 +1,825 @@ +/* + * AES-NI + SSE2 implementation of AEGIS-128L + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define STATE0 %xmm0 +#define STATE1 %xmm1 +#define STATE2 %xmm2 +#define STATE3 %xmm3 +#define STATE4 %xmm4 +#define STATE5 %xmm5 +#define STATE6 %xmm6 +#define STATE7 %xmm7 +#define MSG0 %xmm8 +#define MSG1 %xmm9 +#define T0 %xmm10 +#define T1 %xmm11 +#define T2 %xmm12 +#define T3 %xmm13 + +#define STATEP %rdi +#define LEN %rsi +#define SRC %rdx +#define DST %rcx + +.section .rodata.cst16.aegis128l_const, "aM", @progbits, 32 +.align 16 +.Laegis128l_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Laegis128l_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.aegis128l_counter, "aM", @progbits, 16 +.align 16 +.Laegis128l_counter0: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +.Laegis128l_counter1: + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + +.text + +/* + * __load_partial: internal ABI + * input: + * LEN - bytes + * SRC - src + * output: + * MSG0 - first message block + * MSG1 - second message block + * changed: + * T0 + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + pxor MSG0, MSG0 + pxor MSG1, MSG1 + + mov LEN, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov LEN, %r8 + and $0x1E, %r8 + add SRC, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov LEN, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov LEN, %r8 + and $0x1C, %r8 + add SRC, %r8 + shl $0x10, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov LEN, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov LEN, %r8 + and $0x18, %r8 + add SRC, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG0 + + mov LEN, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov LEN, %r8 + and $0x10, %r8 + add SRC, %r8 + pslldq $8, MSG0 + movq (%r8), T0 + pxor T0, MSG0 + +.Lld_partial_8: + mov LEN, %r8 + and $0x10, %r8 + jz .Lld_partial_16 + + movdqa MSG0, MSG1 + movdqu (SRC), MSG0 + +.Lld_partial_16: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * LEN - bytes + * DST - dst + * output: + * T0 - first message block + * T1 - second message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov LEN, %r8 + mov DST, %r9 + + cmp $16, %r8 + jl .Lst_partial_16 + + movdqu T0, (%r9) + movdqa T1, T0 + + sub $16, %r8 + add $16, %r9 + +.Lst_partial_16: + movq T0, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0 + movq T0, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $0x10, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +.macro update + movdqa STATE7, T0 + aesenc STATE0, STATE7 + aesenc STATE1, STATE0 + aesenc STATE2, STATE1 + aesenc STATE3, STATE2 + aesenc STATE4, STATE3 + aesenc STATE5, STATE4 + aesenc STATE6, STATE5 + aesenc T0, STATE6 +.endm + +.macro update0 + update + pxor MSG0, STATE7 + pxor MSG1, STATE3 +.endm + +.macro update1 + update + pxor MSG0, STATE6 + pxor MSG1, STATE2 +.endm + +.macro update2 + update + pxor MSG0, STATE5 + pxor MSG1, STATE1 +.endm + +.macro update3 + update + pxor MSG0, STATE4 + pxor MSG1, STATE0 +.endm + +.macro update4 + update + pxor MSG0, STATE3 + pxor MSG1, STATE7 +.endm + +.macro update5 + update + pxor MSG0, STATE2 + pxor MSG1, STATE6 +.endm + +.macro update6 + update + pxor MSG0, STATE1 + pxor MSG1, STATE5 +.endm + +.macro update7 + update + pxor MSG0, STATE0 + pxor MSG1, STATE4 +.endm + +.macro state_load + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + movdqu 0x50(STATEP), STATE5 + movdqu 0x60(STATEP), STATE6 + movdqu 0x70(STATEP), STATE7 +.endm + +.macro state_store s0 s1 s2 s3 s4 s5 s6 s7 + movdqu \s7, 0x00(STATEP) + movdqu \s0, 0x10(STATEP) + movdqu \s1, 0x20(STATEP) + movdqu \s2, 0x30(STATEP) + movdqu \s3, 0x40(STATEP) + movdqu \s4, 0x50(STATEP) + movdqu \s5, 0x60(STATEP) + movdqu \s6, 0x70(STATEP) +.endm + +.macro state_store0 + state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 +.endm + +.macro state_store1 + state_store STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 +.endm + +.macro state_store2 + state_store STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 +.endm + +.macro state_store3 + state_store STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 +.endm + +.macro state_store4 + state_store STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 +.endm + +.macro state_store5 + state_store STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 +.endm + +.macro state_store6 + state_store STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 +.endm + +.macro state_store7 + state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 +.endm + +/* + * void crypto_aegis128l_aesni_init(void *state, const void *key, const void *iv); + */ +ENTRY(crypto_aegis128l_aesni_init) + FRAME_BEGIN + + /* load key: */ + movdqa (%rsi), MSG1 + movdqa MSG1, STATE0 + movdqa MSG1, STATE4 + movdqa MSG1, STATE5 + movdqa MSG1, STATE6 + movdqa MSG1, STATE7 + + /* load IV: */ + movdqu (%rdx), MSG0 + pxor MSG0, STATE0 + pxor MSG0, STATE4 + + /* load the constants: */ + movdqa .Laegis128l_const_0, STATE2 + movdqa .Laegis128l_const_1, STATE1 + movdqa STATE1, STATE3 + pxor STATE2, STATE5 + pxor STATE1, STATE6 + pxor STATE2, STATE7 + + /* update 10 times with IV and KEY: */ + update0 + update1 + update2 + update3 + update4 + update5 + update6 + update7 + update0 + update1 + + state_store1 + + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_init) + +.macro ad_block a i + movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 + movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 + update\i + sub $0x20, LEN + cmp $0x20, LEN + jl .Lad_out_\i +.endm + +/* + * void crypto_aegis128l_aesni_ad(void *state, unsigned int length, + * const void *data); + */ +ENTRY(crypto_aegis128l_aesni_ad) + FRAME_BEGIN + + cmp $0x20, LEN + jb .Lad_out + + state_load + + mov SRC, %r8 + and $0xf, %r8 + jnz .Lad_u_loop + +.align 8 +.Lad_a_loop: + ad_block a 0 + ad_block a 1 + ad_block a 2 + ad_block a 3 + ad_block a 4 + ad_block a 5 + ad_block a 6 + ad_block a 7 + + add $0x100, SRC + jmp .Lad_a_loop + +.align 8 +.Lad_u_loop: + ad_block u 0 + ad_block u 1 + ad_block u 2 + ad_block u 3 + ad_block u 4 + ad_block u 5 + ad_block u 6 + ad_block u 7 + + add $0x100, SRC + jmp .Lad_u_loop + +.Lad_out_0: + state_store0 + FRAME_END + ret + +.Lad_out_1: + state_store1 + FRAME_END + ret + +.Lad_out_2: + state_store2 + FRAME_END + ret + +.Lad_out_3: + state_store3 + FRAME_END + ret + +.Lad_out_4: + state_store4 + FRAME_END + ret + +.Lad_out_5: + state_store5 + FRAME_END + ret + +.Lad_out_6: + state_store6 + FRAME_END + ret + +.Lad_out_7: + state_store7 + FRAME_END + ret + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_ad) + +.macro crypt m0 m1 s0 s1 s2 s3 s4 s5 s6 s7 + pxor \s1, \m0 + pxor \s6, \m0 + movdqa \s2, T3 + pand \s3, T3 + pxor T3, \m0 + + pxor \s2, \m1 + pxor \s5, \m1 + movdqa \s6, T3 + pand \s7, T3 + pxor T3, \m1 +.endm + +.macro crypt0 m0 m1 + crypt \m0 \m1 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 +.endm + +.macro crypt1 m0 m1 + crypt \m0 \m1 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 +.endm + +.macro crypt2 m0 m1 + crypt \m0 \m1 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 +.endm + +.macro crypt3 m0 m1 + crypt \m0 \m1 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 STATE4 +.endm + +.macro crypt4 m0 m1 + crypt \m0 \m1 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 STATE3 +.endm + +.macro crypt5 m0 m1 + crypt \m0 \m1 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 STATE2 +.endm + +.macro crypt6 m0 m1 + crypt \m0 \m1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 STATE1 +.endm + +.macro crypt7 m0 m1 + crypt \m0 \m1 STATE1 STATE2 STATE3 STATE4 STATE5 STATE6 STATE7 STATE0 +.endm + +.macro encrypt_block a i + movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 + movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 + movdqa MSG0, T0 + movdqa MSG1, T1 + crypt\i T0, T1 + movdq\a T0, (\i * 0x20 + 0x00)(DST) + movdq\a T1, (\i * 0x20 + 0x10)(DST) + + update\i + + sub $0x20, LEN + cmp $0x20, LEN + jl .Lenc_out_\i +.endm + +.macro decrypt_block a i + movdq\a (\i * 0x20 + 0x00)(SRC), MSG0 + movdq\a (\i * 0x20 + 0x10)(SRC), MSG1 + crypt\i MSG0, MSG1 + movdq\a MSG0, (\i * 0x20 + 0x00)(DST) + movdq\a MSG1, (\i * 0x20 + 0x10)(DST) + + update\i + + sub $0x20, LEN + cmp $0x20, LEN + jl .Ldec_out_\i +.endm + +/* + * void crypto_aegis128l_aesni_enc(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128l_aesni_enc) + FRAME_BEGIN + + cmp $0x20, LEN + jb .Lenc_out + + state_load + + mov SRC, %r8 + or DST, %r8 + and $0xf, %r8 + jnz .Lenc_u_loop + +.align 8 +.Lenc_a_loop: + encrypt_block a 0 + encrypt_block a 1 + encrypt_block a 2 + encrypt_block a 3 + encrypt_block a 4 + encrypt_block a 5 + encrypt_block a 6 + encrypt_block a 7 + + add $0x100, SRC + add $0x100, DST + jmp .Lenc_a_loop + +.align 8 +.Lenc_u_loop: + encrypt_block u 0 + encrypt_block u 1 + encrypt_block u 2 + encrypt_block u 3 + encrypt_block u 4 + encrypt_block u 5 + encrypt_block u 6 + encrypt_block u 7 + + add $0x100, SRC + add $0x100, DST + jmp .Lenc_u_loop + +.Lenc_out_0: + state_store0 + FRAME_END + ret + +.Lenc_out_1: + state_store1 + FRAME_END + ret + +.Lenc_out_2: + state_store2 + FRAME_END + ret + +.Lenc_out_3: + state_store3 + FRAME_END + ret + +.Lenc_out_4: + state_store4 + FRAME_END + ret + +.Lenc_out_5: + state_store5 + FRAME_END + ret + +.Lenc_out_6: + state_store6 + FRAME_END + ret + +.Lenc_out_7: + state_store7 + FRAME_END + ret + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_enc) + +/* + * void crypto_aegis128l_aesni_enc_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128l_aesni_enc_tail) + FRAME_BEGIN + + state_load + + /* encrypt message: */ + call __load_partial + + movdqa MSG0, T0 + movdqa MSG1, T1 + crypt0 T0, T1 + + call __store_partial + + update0 + + state_store0 + + FRAME_END +ENDPROC(crypto_aegis128l_aesni_enc_tail) + +/* + * void crypto_aegis128l_aesni_dec(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128l_aesni_dec) + FRAME_BEGIN + + cmp $0x20, LEN + jb .Ldec_out + + state_load + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 8 +.Ldec_a_loop: + decrypt_block a 0 + decrypt_block a 1 + decrypt_block a 2 + decrypt_block a 3 + decrypt_block a 4 + decrypt_block a 5 + decrypt_block a 6 + decrypt_block a 7 + + add $0x100, SRC + add $0x100, DST + jmp .Ldec_a_loop + +.align 8 +.Ldec_u_loop: + decrypt_block u 0 + decrypt_block u 1 + decrypt_block u 2 + decrypt_block u 3 + decrypt_block u 4 + decrypt_block u 5 + decrypt_block u 6 + decrypt_block u 7 + + add $0x100, SRC + add $0x100, DST + jmp .Ldec_u_loop + +.Ldec_out_0: + state_store0 + FRAME_END + ret + +.Ldec_out_1: + state_store1 + FRAME_END + ret + +.Ldec_out_2: + state_store2 + FRAME_END + ret + +.Ldec_out_3: + state_store3 + FRAME_END + ret + +.Ldec_out_4: + state_store4 + FRAME_END + ret + +.Ldec_out_5: + state_store5 + FRAME_END + ret + +.Ldec_out_6: + state_store6 + FRAME_END + ret + +.Ldec_out_7: + state_store7 + FRAME_END + ret + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_dec) + +/* + * void crypto_aegis128l_aesni_dec_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis128l_aesni_dec_tail) + FRAME_BEGIN + + state_load + + /* decrypt message: */ + call __load_partial + + crypt0 MSG0, MSG1 + + movdqa MSG0, T0 + movdqa MSG1, T1 + call __store_partial + + /* mask with byte count: */ + movq LEN, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + movdqa T0, T1 + movdqa .Laegis128l_counter0, T2 + movdqa .Laegis128l_counter1, T3 + pcmpgtb T2, T0 + pcmpgtb T3, T1 + pand T0, MSG0 + pand T1, MSG1 + + update0 + + state_store0 + + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_dec_tail) + +/* + * void crypto_aegis128l_aesni_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_aegis128l_aesni_final) + FRAME_BEGIN + + state_load + + /* prepare length block: */ + movq %rdx, MSG0 + movq %rcx, T0 + pslldq $8, T0 + pxor T0, MSG0 + psllq $3, MSG0 /* multiply by 8 (to get bit count) */ + + pxor STATE2, MSG0 + movdqa MSG0, MSG1 + + /* update state: */ + update0 + update1 + update2 + update3 + update4 + update5 + update6 + + /* xor tag: */ + movdqu (%rsi), T0 + + pxor STATE1, T0 + pxor STATE2, T0 + pxor STATE3, T0 + pxor STATE4, T0 + pxor STATE5, T0 + pxor STATE6, T0 + pxor STATE7, T0 + + movdqu T0, (%rsi) + + FRAME_END + ret +ENDPROC(crypto_aegis128l_aesni_final) diff --git a/arch/x86/crypto/aegis128l-aesni-glue.c b/arch/x86/crypto/aegis128l-aesni-glue.c new file mode 100644 index 000000000000..876e4866e633 --- /dev/null +++ b/arch/x86/crypto/aegis128l-aesni-glue.c @@ -0,0 +1,407 @@ +/* + * The AEGIS-128L Authenticated-Encryption Algorithm + * Glue for AES-NI + SSE2 implementation + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +#define AEGIS128L_BLOCK_ALIGN 16 +#define AEGIS128L_BLOCK_SIZE 32 +#define AEGIS128L_NONCE_SIZE 16 +#define AEGIS128L_STATE_BLOCKS 8 +#define AEGIS128L_KEY_SIZE 16 +#define AEGIS128L_MIN_AUTH_SIZE 8 +#define AEGIS128L_MAX_AUTH_SIZE 16 + +asmlinkage void crypto_aegis128l_aesni_init(void *state, void *key, void *iv); + +asmlinkage void crypto_aegis128l_aesni_ad( + void *state, unsigned int length, const void *data); + +asmlinkage void crypto_aegis128l_aesni_enc( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128l_aesni_dec( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128l_aesni_enc_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128l_aesni_dec_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis128l_aesni_final( + void *state, void *tag_xor, unsigned int cryptlen, + unsigned int assoclen); + +struct aegis_block { + u8 bytes[AEGIS128L_BLOCK_SIZE] __aligned(AEGIS128L_BLOCK_ALIGN); +}; + +struct aegis_state { + struct aegis_block blocks[AEGIS128L_STATE_BLOCKS]; +}; + +struct aegis_ctx { + struct aegis_block key; +}; + +struct aegis_crypt_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, unsigned int length, const void *src, + void *dst); + void (*crypt_tail)(void *state, unsigned int length, const void *src, + void *dst); +}; + +static void crypto_aegis128l_aesni_process_ad( + struct aegis_state *state, struct scatterlist *sg_src, + unsigned int assoclen) +{ + struct scatter_walk walk; + struct aegis_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= AEGIS128L_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = AEGIS128L_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + crypto_aegis128l_aesni_ad(state, + AEGIS128L_BLOCK_SIZE, + buf.bytes); + pos = 0; + left -= fill; + src += fill; + } + + crypto_aegis128l_aesni_ad(state, left, src); + + src += left & ~(AEGIS128L_BLOCK_SIZE - 1); + left &= AEGIS128L_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + pos += left; + assoclen -= size; + + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, AEGIS128L_BLOCK_SIZE - pos); + crypto_aegis128l_aesni_ad(state, AEGIS128L_BLOCK_SIZE, buf.bytes); + } +} + +static void crypto_aegis128l_aesni_process_crypt( + struct aegis_state *state, struct aead_request *req, + const struct aegis_crypt_ops *ops) +{ + struct skcipher_walk walk; + u8 *src, *dst; + unsigned int chunksize, base; + + ops->skcipher_walk_init(&walk, req, false); + + while (walk.nbytes) { + src = walk.src.virt.addr; + dst = walk.dst.virt.addr; + chunksize = walk.nbytes; + + ops->crypt_blocks(state, chunksize, src, dst); + + base = chunksize & ~(AEGIS128L_BLOCK_SIZE - 1); + src += base; + dst += base; + chunksize &= AEGIS128L_BLOCK_SIZE - 1; + + if (chunksize > 0) + ops->crypt_tail(state, chunksize, src, dst); + + skcipher_walk_done(&walk, 0); + } +} + +static struct aegis_ctx *crypto_aegis128l_aesni_ctx(struct crypto_aead *aead) +{ + u8 *ctx = crypto_aead_ctx(aead); + ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); + return (void *)ctx; +} + +static int crypto_aegis128l_aesni_setkey(struct crypto_aead *aead, + const u8 *key, unsigned int keylen) +{ + struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(aead); + + if (keylen != AEGIS128L_KEY_SIZE) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key.bytes, key, AEGIS128L_KEY_SIZE); + + return 0; +} + +static int crypto_aegis128l_aesni_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + if (authsize > AEGIS128L_MAX_AUTH_SIZE) + return -EINVAL; + if (authsize < AEGIS128L_MIN_AUTH_SIZE) + return -EINVAL; + return 0; +} + +static void crypto_aegis128l_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, + const struct aegis_crypt_ops *ops) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_ctx *ctx = crypto_aegis128l_aesni_ctx(tfm); + struct aegis_state state; + + kernel_fpu_begin(); + + crypto_aegis128l_aesni_init(&state, ctx->key.bytes, req->iv); + crypto_aegis128l_aesni_process_ad(&state, req->src, req->assoclen); + crypto_aegis128l_aesni_process_crypt(&state, req, ops); + crypto_aegis128l_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +static int crypto_aegis128l_aesni_encrypt(struct aead_request *req) +{ + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = crypto_aegis128l_aesni_enc, + .crypt_tail = crypto_aegis128l_aesni_enc_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} + +static int crypto_aegis128l_aesni_decrypt(struct aead_request *req) +{ + static const struct aegis_block zeros = {}; + + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = crypto_aegis128l_aesni_dec, + .crypt_tail = crypto_aegis128l_aesni_dec_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_aegis128l_aesni_crypt(req, &tag, cryptlen, &OPS); + + return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; +} + +static int crypto_aegis128l_aesni_init_tfm(struct crypto_aead *aead) +{ + return 0; +} + +static void crypto_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) +{ +} + +static int cryptd_aegis128l_aesni_setkey(struct crypto_aead *aead, + const u8 *key, unsigned int keylen) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); +} + +static int cryptd_aegis128l_aesni_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); +} + +static int cryptd_aegis128l_aesni_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_encrypt(req); +} + +static int cryptd_aegis128l_aesni_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_decrypt(req); +} + +static int cryptd_aegis128l_aesni_init_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_tfm = cryptd_alloc_aead("__aegis128l-aesni", CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + return 0; +} + +static void cryptd_aegis128l_aesni_exit_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} + +static struct aead_alg crypto_aegis128l_aesni_alg[] = { + { + .setkey = crypto_aegis128l_aesni_setkey, + .setauthsize = crypto_aegis128l_aesni_setauthsize, + .encrypt = crypto_aegis128l_aesni_encrypt, + .decrypt = crypto_aegis128l_aesni_decrypt, + .init = crypto_aegis128l_aesni_init_tfm, + .exit = crypto_aegis128l_aesni_exit_tfm, + + .ivsize = AEGIS128L_NONCE_SIZE, + .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, + .chunksize = AEGIS128L_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + + .cra_name = "__aegis128l", + .cra_driver_name = "__aegis128l-aesni", + + .cra_module = THIS_MODULE, + } + }, { + .setkey = cryptd_aegis128l_aesni_setkey, + .setauthsize = cryptd_aegis128l_aesni_setauthsize, + .encrypt = cryptd_aegis128l_aesni_encrypt, + .decrypt = cryptd_aegis128l_aesni_decrypt, + .init = cryptd_aegis128l_aesni_init_tfm, + .exit = cryptd_aegis128l_aesni_exit_tfm, + + .ivsize = AEGIS128L_NONCE_SIZE, + .maxauthsize = AEGIS128L_MAX_AUTH_SIZE, + .chunksize = AEGIS128L_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), + .cra_alignmask = 0, + + .cra_priority = 400, + + .cra_name = "aegis128l", + .cra_driver_name = "aegis128l-aesni", + + .cra_module = THIS_MODULE, + } + } +}; + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + X86_FEATURE_MATCH(X86_FEATURE_XMM2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + +static int __init crypto_aegis128l_aesni_module_init(void) +{ + if (!x86_match_cpu(aesni_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_aegis128l_aesni_alg, + ARRAY_SIZE(crypto_aegis128l_aesni_alg)); +} + +static void __exit crypto_aegis128l_aesni_module_exit(void) +{ + crypto_unregister_aeads(crypto_aegis128l_aesni_alg, + ARRAY_SIZE(crypto_aegis128l_aesni_alg)); +} + +module_init(crypto_aegis128l_aesni_module_init); +module_exit(crypto_aegis128l_aesni_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("AEGIS-128L AEAD algorithm -- AESNI+SSE2 implementation"); +MODULE_ALIAS_CRYPTO("aegis128l"); +MODULE_ALIAS_CRYPTO("aegis128l-aesni"); diff --git a/arch/x86/crypto/aegis256-aesni-asm.S b/arch/x86/crypto/aegis256-aesni-asm.S new file mode 100644 index 000000000000..1d977d515bf9 --- /dev/null +++ b/arch/x86/crypto/aegis256-aesni-asm.S @@ -0,0 +1,702 @@ +/* + * AES-NI + SSE2 implementation of AEGIS-128L + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define STATE0 %xmm0 +#define STATE1 %xmm1 +#define STATE2 %xmm2 +#define STATE3 %xmm3 +#define STATE4 %xmm4 +#define STATE5 %xmm5 +#define MSG %xmm6 +#define T0 %xmm7 +#define T1 %xmm8 +#define T2 %xmm9 +#define T3 %xmm10 + +#define STATEP %rdi +#define LEN %rsi +#define SRC %rdx +#define DST %rcx + +.section .rodata.cst16.aegis256_const, "aM", @progbits, 32 +.align 16 +.Laegis256_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Laegis256_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.aegis256_counter, "aM", @progbits, 16 +.align 16 +.Laegis256_counter: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + +.text + +/* + * __load_partial: internal ABI + * input: + * LEN - bytes + * SRC - src + * output: + * MSG - message block + * changed: + * T0 + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + pxor MSG, MSG + + mov LEN, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov LEN, %r8 + and $0x1E, %r8 + add SRC, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov LEN, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov LEN, %r8 + and $0x1C, %r8 + add SRC, %r8 + shl $0x10, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov LEN, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov LEN, %r8 + and $0x18, %r8 + add SRC, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG + + mov LEN, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov LEN, %r8 + and $0x10, %r8 + add SRC, %r8 + pslldq $8, MSG + movq (%r8), T0 + pxor T0, MSG + +.Lld_partial_8: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * LEN - bytes + * DST - dst + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov LEN, %r8 + mov DST, %r9 + + movq T0, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0 + movq T0, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $0x10, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +.macro update + movdqa STATE5, T0 + aesenc STATE0, STATE5 + aesenc STATE1, STATE0 + aesenc STATE2, STATE1 + aesenc STATE3, STATE2 + aesenc STATE4, STATE3 + aesenc T0, STATE4 +.endm + +.macro update0 m + update + pxor \m, STATE5 +.endm + +.macro update1 m + update + pxor \m, STATE4 +.endm + +.macro update2 m + update + pxor \m, STATE3 +.endm + +.macro update3 m + update + pxor \m, STATE2 +.endm + +.macro update4 m + update + pxor \m, STATE1 +.endm + +.macro update5 m + update + pxor \m, STATE0 +.endm + +.macro state_load + movdqu 0x00(STATEP), STATE0 + movdqu 0x10(STATEP), STATE1 + movdqu 0x20(STATEP), STATE2 + movdqu 0x30(STATEP), STATE3 + movdqu 0x40(STATEP), STATE4 + movdqu 0x50(STATEP), STATE5 +.endm + +.macro state_store s0 s1 s2 s3 s4 s5 + movdqu \s5, 0x00(STATEP) + movdqu \s0, 0x10(STATEP) + movdqu \s1, 0x20(STATEP) + movdqu \s2, 0x30(STATEP) + movdqu \s3, 0x40(STATEP) + movdqu \s4, 0x50(STATEP) +.endm + +.macro state_store0 + state_store STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 +.endm + +.macro state_store1 + state_store STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 +.endm + +.macro state_store2 + state_store STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 +.endm + +.macro state_store3 + state_store STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 +.endm + +.macro state_store4 + state_store STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 +.endm + +.macro state_store5 + state_store STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 +.endm + +/* + * void crypto_aegis256_aesni_init(void *state, const void *key, const void *iv); + */ +ENTRY(crypto_aegis256_aesni_init) + FRAME_BEGIN + + /* load key: */ + movdqa 0x00(%rsi), MSG + movdqa 0x10(%rsi), T1 + movdqa MSG, STATE4 + movdqa T1, STATE5 + + /* load IV: */ + movdqu 0x00(%rdx), T2 + movdqu 0x10(%rdx), T3 + pxor MSG, T2 + pxor T1, T3 + movdqa T2, STATE0 + movdqa T3, STATE1 + + /* load the constants: */ + movdqa .Laegis256_const_0, STATE3 + movdqa .Laegis256_const_1, STATE2 + pxor STATE3, STATE4 + pxor STATE2, STATE5 + + /* update 10 times with IV and KEY: */ + update0 MSG + update1 T1 + update2 T2 + update3 T3 + update4 MSG + update5 T1 + update0 T2 + update1 T3 + update2 MSG + update3 T1 + update4 T2 + update5 T3 + update0 MSG + update1 T1 + update2 T2 + update3 T3 + + state_store3 + + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_init) + +.macro ad_block a i + movdq\a (\i * 0x10)(SRC), MSG + update\i MSG + sub $0x10, LEN + cmp $0x10, LEN + jl .Lad_out_\i +.endm + +/* + * void crypto_aegis256_aesni_ad(void *state, unsigned int length, + * const void *data); + */ +ENTRY(crypto_aegis256_aesni_ad) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lad_out + + state_load + + mov SRC, %r8 + and $0xf, %r8 + jnz .Lad_u_loop + +.align 8 +.Lad_a_loop: + ad_block a 0 + ad_block a 1 + ad_block a 2 + ad_block a 3 + ad_block a 4 + ad_block a 5 + + add $0x60, SRC + jmp .Lad_a_loop + +.align 8 +.Lad_u_loop: + ad_block u 0 + ad_block u 1 + ad_block u 2 + ad_block u 3 + ad_block u 4 + ad_block u 5 + + add $0x60, SRC + jmp .Lad_u_loop + +.Lad_out_0: + state_store0 + FRAME_END + ret + +.Lad_out_1: + state_store1 + FRAME_END + ret + +.Lad_out_2: + state_store2 + FRAME_END + ret + +.Lad_out_3: + state_store3 + FRAME_END + ret + +.Lad_out_4: + state_store4 + FRAME_END + ret + +.Lad_out_5: + state_store5 + FRAME_END + ret + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_ad) + +.macro crypt m s0 s1 s2 s3 s4 s5 + pxor \s1, \m + pxor \s4, \m + pxor \s5, \m + movdqa \s2, T3 + pand \s3, T3 + pxor T3, \m +.endm + +.macro crypt0 m + crypt \m STATE0 STATE1 STATE2 STATE3 STATE4 STATE5 +.endm + +.macro crypt1 m + crypt \m STATE5 STATE0 STATE1 STATE2 STATE3 STATE4 +.endm + +.macro crypt2 m + crypt \m STATE4 STATE5 STATE0 STATE1 STATE2 STATE3 +.endm + +.macro crypt3 m + crypt \m STATE3 STATE4 STATE5 STATE0 STATE1 STATE2 +.endm + +.macro crypt4 m + crypt \m STATE2 STATE3 STATE4 STATE5 STATE0 STATE1 +.endm + +.macro crypt5 m + crypt \m STATE1 STATE2 STATE3 STATE4 STATE5 STATE0 +.endm + +.macro encrypt_block a i + movdq\a (\i * 0x10)(SRC), MSG + movdqa MSG, T0 + crypt\i T0 + movdq\a T0, (\i * 0x10)(DST) + + update\i MSG + + sub $0x10, LEN + cmp $0x10, LEN + jl .Lenc_out_\i +.endm + +.macro decrypt_block a i + movdq\a (\i * 0x10)(SRC), MSG + crypt\i MSG + movdq\a MSG, (\i * 0x10)(DST) + + update\i MSG + + sub $0x10, LEN + cmp $0x10, LEN + jl .Ldec_out_\i +.endm + +/* + * void crypto_aegis256_aesni_enc(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis256_aesni_enc) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Lenc_out + + state_load + + mov SRC, %r8 + or DST, %r8 + and $0xf, %r8 + jnz .Lenc_u_loop + +.align 8 +.Lenc_a_loop: + encrypt_block a 0 + encrypt_block a 1 + encrypt_block a 2 + encrypt_block a 3 + encrypt_block a 4 + encrypt_block a 5 + + add $0x60, SRC + add $0x60, DST + jmp .Lenc_a_loop + +.align 8 +.Lenc_u_loop: + encrypt_block u 0 + encrypt_block u 1 + encrypt_block u 2 + encrypt_block u 3 + encrypt_block u 4 + encrypt_block u 5 + + add $0x60, SRC + add $0x60, DST + jmp .Lenc_u_loop + +.Lenc_out_0: + state_store0 + FRAME_END + ret + +.Lenc_out_1: + state_store1 + FRAME_END + ret + +.Lenc_out_2: + state_store2 + FRAME_END + ret + +.Lenc_out_3: + state_store3 + FRAME_END + ret + +.Lenc_out_4: + state_store4 + FRAME_END + ret + +.Lenc_out_5: + state_store5 + FRAME_END + ret + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_enc) + +/* + * void crypto_aegis256_aesni_enc_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis256_aesni_enc_tail) + FRAME_BEGIN + + state_load + + /* encrypt message: */ + call __load_partial + + movdqa MSG, T0 + crypt0 T0 + + call __store_partial + + update0 MSG + + state_store0 + + FRAME_END +ENDPROC(crypto_aegis256_aesni_enc_tail) + +/* + * void crypto_aegis256_aesni_dec(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis256_aesni_dec) + FRAME_BEGIN + + cmp $0x10, LEN + jb .Ldec_out + + state_load + + mov SRC, %r8 + or DST, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 8 +.Ldec_a_loop: + decrypt_block a 0 + decrypt_block a 1 + decrypt_block a 2 + decrypt_block a 3 + decrypt_block a 4 + decrypt_block a 5 + + add $0x60, SRC + add $0x60, DST + jmp .Ldec_a_loop + +.align 8 +.Ldec_u_loop: + decrypt_block u 0 + decrypt_block u 1 + decrypt_block u 2 + decrypt_block u 3 + decrypt_block u 4 + decrypt_block u 5 + + add $0x60, SRC + add $0x60, DST + jmp .Ldec_u_loop + +.Ldec_out_0: + state_store0 + FRAME_END + ret + +.Ldec_out_1: + state_store1 + FRAME_END + ret + +.Ldec_out_2: + state_store2 + FRAME_END + ret + +.Ldec_out_3: + state_store3 + FRAME_END + ret + +.Ldec_out_4: + state_store4 + FRAME_END + ret + +.Ldec_out_5: + state_store5 + FRAME_END + ret + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_dec) + +/* + * void crypto_aegis256_aesni_dec_tail(void *state, unsigned int length, + * const void *src, void *dst); + */ +ENTRY(crypto_aegis256_aesni_dec_tail) + FRAME_BEGIN + + state_load + + /* decrypt message: */ + call __load_partial + + crypt0 MSG + + movdqa MSG, T0 + call __store_partial + + /* mask with byte count: */ + movq LEN, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + movdqa .Laegis256_counter, T1 + pcmpgtb T1, T0 + pand T0, MSG + + update0 MSG + + state_store0 + + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_dec_tail) + +/* + * void crypto_aegis256_aesni_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_aegis256_aesni_final) + FRAME_BEGIN + + state_load + + /* prepare length block: */ + movq %rdx, MSG + movq %rcx, T0 + pslldq $8, T0 + pxor T0, MSG + psllq $3, MSG /* multiply by 8 (to get bit count) */ + + pxor STATE3, MSG + + /* update state: */ + update0 MSG + update1 MSG + update2 MSG + update3 MSG + update4 MSG + update5 MSG + update0 MSG + + /* xor tag: */ + movdqu (%rsi), MSG + + pxor STATE0, MSG + pxor STATE1, MSG + pxor STATE2, MSG + pxor STATE3, MSG + pxor STATE4, MSG + pxor STATE5, MSG + + movdqu MSG, (%rsi) + + FRAME_END + ret +ENDPROC(crypto_aegis256_aesni_final) diff --git a/arch/x86/crypto/aegis256-aesni-glue.c b/arch/x86/crypto/aegis256-aesni-glue.c new file mode 100644 index 000000000000..2b5dd3af8f4d --- /dev/null +++ b/arch/x86/crypto/aegis256-aesni-glue.c @@ -0,0 +1,407 @@ +/* + * The AEGIS-256 Authenticated-Encryption Algorithm + * Glue for AES-NI + SSE2 implementation + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> +#include <crypto/scatterwalk.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +#define AEGIS256_BLOCK_ALIGN 16 +#define AEGIS256_BLOCK_SIZE 16 +#define AEGIS256_NONCE_SIZE 32 +#define AEGIS256_STATE_BLOCKS 6 +#define AEGIS256_KEY_SIZE 32 +#define AEGIS256_MIN_AUTH_SIZE 8 +#define AEGIS256_MAX_AUTH_SIZE 16 + +asmlinkage void crypto_aegis256_aesni_init(void *state, void *key, void *iv); + +asmlinkage void crypto_aegis256_aesni_ad( + void *state, unsigned int length, const void *data); + +asmlinkage void crypto_aegis256_aesni_enc( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis256_aesni_dec( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis256_aesni_enc_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis256_aesni_dec_tail( + void *state, unsigned int length, const void *src, void *dst); + +asmlinkage void crypto_aegis256_aesni_final( + void *state, void *tag_xor, unsigned int cryptlen, + unsigned int assoclen); + +struct aegis_block { + u8 bytes[AEGIS256_BLOCK_SIZE] __aligned(AEGIS256_BLOCK_ALIGN); +}; + +struct aegis_state { + struct aegis_block blocks[AEGIS256_STATE_BLOCKS]; +}; + +struct aegis_ctx { + struct aegis_block key[AEGIS256_KEY_SIZE / AEGIS256_BLOCK_SIZE]; +}; + +struct aegis_crypt_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, unsigned int length, const void *src, + void *dst); + void (*crypt_tail)(void *state, unsigned int length, const void *src, + void *dst); +}; + +static void crypto_aegis256_aesni_process_ad( + struct aegis_state *state, struct scatterlist *sg_src, + unsigned int assoclen) +{ + struct scatter_walk walk; + struct aegis_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= AEGIS256_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = AEGIS256_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + crypto_aegis256_aesni_ad(state, + AEGIS256_BLOCK_SIZE, + buf.bytes); + pos = 0; + left -= fill; + src += fill; + } + + crypto_aegis256_aesni_ad(state, left, src); + + src += left & ~(AEGIS256_BLOCK_SIZE - 1); + left &= AEGIS256_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + pos += left; + assoclen -= size; + + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, AEGIS256_BLOCK_SIZE - pos); + crypto_aegis256_aesni_ad(state, AEGIS256_BLOCK_SIZE, buf.bytes); + } +} + +static void crypto_aegis256_aesni_process_crypt( + struct aegis_state *state, struct aead_request *req, + const struct aegis_crypt_ops *ops) +{ + struct skcipher_walk walk; + u8 *src, *dst; + unsigned int chunksize, base; + + ops->skcipher_walk_init(&walk, req, false); + + while (walk.nbytes) { + src = walk.src.virt.addr; + dst = walk.dst.virt.addr; + chunksize = walk.nbytes; + + ops->crypt_blocks(state, chunksize, src, dst); + + base = chunksize & ~(AEGIS256_BLOCK_SIZE - 1); + src += base; + dst += base; + chunksize &= AEGIS256_BLOCK_SIZE - 1; + + if (chunksize > 0) + ops->crypt_tail(state, chunksize, src, dst); + + skcipher_walk_done(&walk, 0); + } +} + +static struct aegis_ctx *crypto_aegis256_aesni_ctx(struct crypto_aead *aead) +{ + u8 *ctx = crypto_aead_ctx(aead); + ctx = PTR_ALIGN(ctx, __alignof__(struct aegis_ctx)); + return (void *)ctx; +} + +static int crypto_aegis256_aesni_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(aead); + + if (keylen != AEGIS256_KEY_SIZE) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key, key, AEGIS256_KEY_SIZE); + + return 0; +} + +static int crypto_aegis256_aesni_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + if (authsize > AEGIS256_MAX_AUTH_SIZE) + return -EINVAL; + if (authsize < AEGIS256_MIN_AUTH_SIZE) + return -EINVAL; + return 0; +} + +static void crypto_aegis256_aesni_crypt(struct aead_request *req, + struct aegis_block *tag_xor, + unsigned int cryptlen, + const struct aegis_crypt_ops *ops) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_ctx *ctx = crypto_aegis256_aesni_ctx(tfm); + struct aegis_state state; + + kernel_fpu_begin(); + + crypto_aegis256_aesni_init(&state, ctx->key, req->iv); + crypto_aegis256_aesni_process_ad(&state, req->src, req->assoclen); + crypto_aegis256_aesni_process_crypt(&state, req, ops); + crypto_aegis256_aesni_final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +static int crypto_aegis256_aesni_encrypt(struct aead_request *req) +{ + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = crypto_aegis256_aesni_enc, + .crypt_tail = crypto_aegis256_aesni_enc_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} + +static int crypto_aegis256_aesni_decrypt(struct aead_request *req) +{ + static const struct aegis_block zeros = {}; + + static const struct aegis_crypt_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = crypto_aegis256_aesni_dec, + .crypt_tail = crypto_aegis256_aesni_dec_tail, + }; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct aegis_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_aegis256_aesni_crypt(req, &tag, cryptlen, &OPS); + + return crypto_memneq(tag.bytes, zeros.bytes, authsize) ? -EBADMSG : 0; +} + +static int crypto_aegis256_aesni_init_tfm(struct crypto_aead *aead) +{ + return 0; +} + +static void crypto_aegis256_aesni_exit_tfm(struct crypto_aead *aead) +{ +} + +static int cryptd_aegis256_aesni_setkey(struct crypto_aead *aead, + const u8 *key, unsigned int keylen) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); +} + +static int cryptd_aegis256_aesni_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); +} + +static int cryptd_aegis256_aesni_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_encrypt(req); +} + +static int cryptd_aegis256_aesni_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_decrypt(req); +} + +static int cryptd_aegis256_aesni_init_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_tfm = cryptd_alloc_aead("__aegis256-aesni", CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + return 0; +} + +static void cryptd_aegis256_aesni_exit_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} + +static struct aead_alg crypto_aegis256_aesni_alg[] = { + { + .setkey = crypto_aegis256_aesni_setkey, + .setauthsize = crypto_aegis256_aesni_setauthsize, + .encrypt = crypto_aegis256_aesni_encrypt, + .decrypt = crypto_aegis256_aesni_decrypt, + .init = crypto_aegis256_aesni_init_tfm, + .exit = crypto_aegis256_aesni_exit_tfm, + + .ivsize = AEGIS256_NONCE_SIZE, + .maxauthsize = AEGIS256_MAX_AUTH_SIZE, + .chunksize = AEGIS256_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_INTERNAL, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct aegis_ctx) + + __alignof__(struct aegis_ctx), + .cra_alignmask = 0, + + .cra_name = "__aegis256", + .cra_driver_name = "__aegis256-aesni", + + .cra_module = THIS_MODULE, + } + }, { + .setkey = cryptd_aegis256_aesni_setkey, + .setauthsize = cryptd_aegis256_aesni_setauthsize, + .encrypt = cryptd_aegis256_aesni_encrypt, + .decrypt = cryptd_aegis256_aesni_decrypt, + .init = cryptd_aegis256_aesni_init_tfm, + .exit = cryptd_aegis256_aesni_exit_tfm, + + .ivsize = AEGIS256_NONCE_SIZE, + .maxauthsize = AEGIS256_MAX_AUTH_SIZE, + .chunksize = AEGIS256_BLOCK_SIZE, + + .base = { + .cra_flags = CRYPTO_ALG_ASYNC, + .cra_blocksize = 1, + .cra_ctxsize = sizeof(struct cryptd_aead *), + .cra_alignmask = 0, + + .cra_priority = 400, + + .cra_name = "aegis256", + .cra_driver_name = "aegis256-aesni", + + .cra_module = THIS_MODULE, + } + } +}; + +static const struct x86_cpu_id aesni_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AES), + X86_FEATURE_MATCH(X86_FEATURE_XMM2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, aesni_cpu_id); + +static int __init crypto_aegis256_aesni_module_init(void) +{ + if (!x86_match_cpu(aesni_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_aegis256_aesni_alg, + ARRAY_SIZE(crypto_aegis256_aesni_alg)); +} + +static void __exit crypto_aegis256_aesni_module_exit(void) +{ + crypto_unregister_aeads(crypto_aegis256_aesni_alg, + ARRAY_SIZE(crypto_aegis256_aesni_alg)); +} + +module_init(crypto_aegis256_aesni_module_init); +module_exit(crypto_aegis256_aesni_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("AEGIS-256 AEAD algorithm -- AESNI+SSE2 implementation"); +MODULE_ALIAS_CRYPTO("aegis256"); +MODULE_ALIAS_CRYPTO("aegis256-aesni"); diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c index 0420bab19efb..2ddbe3a1868b 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_glue.c +++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c @@ -364,5 +364,5 @@ module_exit(ghash_pclmulqdqni_mod_exit); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("GHASH Message Digest Algorithm, " - "acclerated by PCLMULQDQ-NI"); + "accelerated by PCLMULQDQ-NI"); MODULE_ALIAS_CRYPTO("ghash"); diff --git a/arch/x86/crypto/morus1280-avx2-asm.S b/arch/x86/crypto/morus1280-avx2-asm.S new file mode 100644 index 000000000000..37d422e77931 --- /dev/null +++ b/arch/x86/crypto/morus1280-avx2-asm.S @@ -0,0 +1,621 @@ +/* + * AVX2 implementation of MORUS-1280 + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define SHUFFLE_MASK(i0, i1, i2, i3) \ + (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) + +#define MASK1 SHUFFLE_MASK(3, 0, 1, 2) +#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) +#define MASK3 SHUFFLE_MASK(1, 2, 3, 0) + +#define STATE0 %ymm0 +#define STATE0_LOW %xmm0 +#define STATE1 %ymm1 +#define STATE2 %ymm2 +#define STATE3 %ymm3 +#define STATE4 %ymm4 +#define KEY %ymm5 +#define MSG %ymm5 +#define MSG_LOW %xmm5 +#define T0 %ymm6 +#define T0_LOW %xmm6 +#define T1 %ymm7 + +.section .rodata.cst32.morus1280_const, "aM", @progbits, 32 +.align 32 +.Lmorus1280_const: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst32.morus1280_counter, "aM", @progbits, 32 +.align 32 +.Lmorus1280_counter: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + +.text + +.macro morus1280_round s0, s1, s2, s3, s4, b, w + vpand \s1, \s2, T0 + vpxor T0, \s0, \s0 + vpxor \s3, \s0, \s0 + vpsllq $\b, \s0, T0 + vpsrlq $(64 - \b), \s0, \s0 + vpxor T0, \s0, \s0 + vpermq $\w, \s3, \s3 +.endm + +/* + * __morus1280_update: internal ABI + * input: + * STATE[0-4] - input state + * MSG - message block + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus1280_update: + morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 + vpxor MSG, STATE1, STATE1 + morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 + vpxor MSG, STATE2, STATE2 + morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 + vpxor MSG, STATE3, STATE3 + morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 + vpxor MSG, STATE4, STATE4 + morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 + ret +ENDPROC(__morus1280_update) + +/* + * __morus1280_update_zero: internal ABI + * input: + * STATE[0-4] - input state + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus1280_update_zero: + morus1280_round STATE0, STATE1, STATE2, STATE3, STATE4, 13, MASK1 + morus1280_round STATE1, STATE2, STATE3, STATE4, STATE0, 46, MASK2 + morus1280_round STATE2, STATE3, STATE4, STATE0, STATE1, 38, MASK3 + morus1280_round STATE3, STATE4, STATE0, STATE1, STATE2, 7, MASK2 + morus1280_round STATE4, STATE0, STATE1, STATE2, STATE3, 4, MASK1 + ret +ENDPROC(__morus1280_update_zero) + +/* + * __load_partial: internal ABI + * input: + * %rsi - src + * %rcx - bytes + * output: + * MSG - message block + * changed: + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + vpxor MSG, MSG, MSG + + mov %rcx, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov %rcx, %r8 + and $0x1E, %r8 + add %rsi, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov %rcx, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov %rcx, %r8 + and $0x1C, %r8 + add %rsi, %r8 + shl $16, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov %rcx, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov %rcx, %r8 + and $0x18, %r8 + add %rsi, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG_LOW + + mov %rcx, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov %rcx, %r8 + and $0x10, %r8 + add %rsi, %r8 + pshufd $MASK2, MSG_LOW, MSG_LOW + pinsrq $0, (%r8), MSG_LOW + +.Lld_partial_8: + mov %rcx, %r8 + and $0x10, %r8 + jz .Lld_partial_16 + + vpermq $MASK2, MSG, MSG + movdqu (%rsi), MSG_LOW + +.Lld_partial_16: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * %rdx - dst + * %rcx - bytes + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov %rcx, %r8 + mov %rdx, %r9 + + cmp $16, %r8 + jl .Lst_partial_16 + + movdqu T0_LOW, (%r9) + vpermq $MASK2, T0, T0 + + sub $16, %r8 + add $16, %r9 + +.Lst_partial_16: + movq T0_LOW, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + pextrq $1, T0_LOW, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $16, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +/* + * void crypto_morus1280_avx2_init(void *state, const void *key, + * const void *iv); + */ +ENTRY(crypto_morus1280_avx2_init) + FRAME_BEGIN + + /* load IV: */ + vpxor STATE0, STATE0, STATE0 + movdqu (%rdx), STATE0_LOW + /* load key: */ + vmovdqu (%rsi), KEY + vmovdqa KEY, STATE1 + /* load all ones: */ + vpcmpeqd STATE2, STATE2, STATE2 + /* load all zeros: */ + vpxor STATE3, STATE3, STATE3 + /* load the constant: */ + vmovdqa .Lmorus1280_const, STATE4 + + /* update 16 times with zero: */ + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + + /* xor-in the key again after updates: */ + vpxor KEY, STATE1, STATE1 + + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_init) + +/* + * void crypto_morus1280_avx2_ad(void *state, const void *data, + * unsigned int length); + */ +ENTRY(crypto_morus1280_avx2_ad) + FRAME_BEGIN + + cmp $32, %rdx + jb .Lad_out + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + mov %rsi, %r8 + and $0x1F, %r8 + jnz .Lad_u_loop + +.align 4 +.Lad_a_loop: + vmovdqa (%rsi), MSG + call __morus1280_update + sub $32, %rdx + add $32, %rsi + cmp $32, %rdx + jge .Lad_a_loop + + jmp .Lad_cont +.align 4 +.Lad_u_loop: + vmovdqu (%rsi), MSG + call __morus1280_update + sub $32, %rdx + add $32, %rsi + cmp $32, %rdx + jge .Lad_u_loop + +.Lad_cont: + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_ad) + +/* + * void crypto_morus1280_avx2_enc(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_avx2_enc) + FRAME_BEGIN + + cmp $32, %rcx + jb .Lenc_out + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + mov %rsi, %r8 + or %rdx, %r8 + and $0x1F, %r8 + jnz .Lenc_u_loop + +.align 4 +.Lenc_a_loop: + vmovdqa (%rsi), MSG + vmovdqa MSG, T0 + vpxor STATE0, T0, T0 + vpermq $MASK3, STATE1, T1 + vpxor T1, T0, T0 + vpand STATE2, STATE3, T1 + vpxor T1, T0, T0 + vmovdqa T0, (%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Lenc_a_loop + + jmp .Lenc_cont +.align 4 +.Lenc_u_loop: + vmovdqu (%rsi), MSG + vmovdqa MSG, T0 + vpxor STATE0, T0, T0 + vpermq $MASK3, STATE1, T1 + vpxor T1, T0, T0 + vpand STATE2, STATE3, T1 + vpxor T1, T0, T0 + vmovdqu T0, (%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Lenc_u_loop + +.Lenc_cont: + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_enc) + +/* + * void crypto_morus1280_avx2_enc_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_avx2_enc_tail) + FRAME_BEGIN + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + /* encrypt message: */ + call __load_partial + + vmovdqa MSG, T0 + vpxor STATE0, T0, T0 + vpermq $MASK3, STATE1, T1 + vpxor T1, T0, T0 + vpand STATE2, STATE3, T1 + vpxor T1, T0, T0 + + call __store_partial + + call __morus1280_update + + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + + FRAME_END +ENDPROC(crypto_morus1280_avx2_enc_tail) + +/* + * void crypto_morus1280_avx2_dec(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_avx2_dec) + FRAME_BEGIN + + cmp $32, %rcx + jb .Ldec_out + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + mov %rsi, %r8 + or %rdx, %r8 + and $0x1F, %r8 + jnz .Ldec_u_loop + +.align 4 +.Ldec_a_loop: + vmovdqa (%rsi), MSG + vpxor STATE0, MSG, MSG + vpermq $MASK3, STATE1, T0 + vpxor T0, MSG, MSG + vpand STATE2, STATE3, T0 + vpxor T0, MSG, MSG + vmovdqa MSG, (%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Ldec_a_loop + + jmp .Ldec_cont +.align 4 +.Ldec_u_loop: + vmovdqu (%rsi), MSG + vpxor STATE0, MSG, MSG + vpermq $MASK3, STATE1, T0 + vpxor T0, MSG, MSG + vpand STATE2, STATE3, T0 + vpxor T0, MSG, MSG + vmovdqu MSG, (%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Ldec_u_loop + +.Ldec_cont: + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_dec) + +/* + * void crypto_morus1280_avx2_dec_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_avx2_dec_tail) + FRAME_BEGIN + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + /* decrypt message: */ + call __load_partial + + vpxor STATE0, MSG, MSG + vpermq $MASK3, STATE1, T0 + vpxor T0, MSG, MSG + vpand STATE2, STATE3, T0 + vpxor T0, MSG, MSG + vmovdqa MSG, T0 + + call __store_partial + + /* mask with byte count: */ + movq %rcx, T0_LOW + vpbroadcastb T0_LOW, T0 + vmovdqa .Lmorus1280_counter, T1 + vpcmpgtb T1, T0, T0 + vpand T0, MSG, MSG + + call __morus1280_update + + /* store the state: */ + vmovdqu STATE0, (0 * 32)(%rdi) + vmovdqu STATE1, (1 * 32)(%rdi) + vmovdqu STATE2, (2 * 32)(%rdi) + vmovdqu STATE3, (3 * 32)(%rdi) + vmovdqu STATE4, (4 * 32)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_dec_tail) + +/* + * void crypto_morus1280_avx2_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_morus1280_avx2_final) + FRAME_BEGIN + + /* load the state: */ + vmovdqu (0 * 32)(%rdi), STATE0 + vmovdqu (1 * 32)(%rdi), STATE1 + vmovdqu (2 * 32)(%rdi), STATE2 + vmovdqu (3 * 32)(%rdi), STATE3 + vmovdqu (4 * 32)(%rdi), STATE4 + + /* xor state[0] into state[4]: */ + vpxor STATE0, STATE4, STATE4 + + /* prepare length block: */ + vpxor MSG, MSG, MSG + vpinsrq $0, %rdx, MSG_LOW, MSG_LOW + vpinsrq $1, %rcx, MSG_LOW, MSG_LOW + vpsllq $3, MSG, MSG /* multiply by 8 (to get bit count) */ + + /* update state: */ + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + + /* xor tag: */ + vmovdqu (%rsi), MSG + + vpxor STATE0, MSG, MSG + vpermq $MASK3, STATE1, T0 + vpxor T0, MSG, MSG + vpand STATE2, STATE3, T0 + vpxor T0, MSG, MSG + vmovdqu MSG, (%rsi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_avx2_final) diff --git a/arch/x86/crypto/morus1280-avx2-glue.c b/arch/x86/crypto/morus1280-avx2-glue.c new file mode 100644 index 000000000000..f111f36d26dc --- /dev/null +++ b/arch/x86/crypto/morus1280-avx2-glue.c @@ -0,0 +1,68 @@ +/* + * The MORUS-1280 Authenticated-Encryption Algorithm + * Glue for AVX2 implementation + * + * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/internal/aead.h> +#include <crypto/morus1280_glue.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +asmlinkage void crypto_morus1280_avx2_init(void *state, const void *key, + const void *iv); +asmlinkage void crypto_morus1280_avx2_ad(void *state, const void *data, + unsigned int length); + +asmlinkage void crypto_morus1280_avx2_enc(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus1280_avx2_dec(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus1280_avx2_enc_tail(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus1280_avx2_dec_tail(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus1280_avx2_final(void *state, void *tag_xor, + u64 assoclen, u64 cryptlen); + +MORUS1280_DECLARE_ALGS(avx2, "morus1280-avx2", 400); + +static const struct x86_cpu_id avx2_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_AVX2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, avx2_cpu_id); + +static int __init crypto_morus1280_avx2_module_init(void) +{ + if (!x86_match_cpu(avx2_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_morus1280_avx2_algs, + ARRAY_SIZE(crypto_morus1280_avx2_algs)); +} + +static void __exit crypto_morus1280_avx2_module_exit(void) +{ + crypto_unregister_aeads(crypto_morus1280_avx2_algs, + ARRAY_SIZE(crypto_morus1280_avx2_algs)); +} + +module_init(crypto_morus1280_avx2_module_init); +module_exit(crypto_morus1280_avx2_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- AVX2 implementation"); +MODULE_ALIAS_CRYPTO("morus1280"); +MODULE_ALIAS_CRYPTO("morus1280-avx2"); diff --git a/arch/x86/crypto/morus1280-sse2-asm.S b/arch/x86/crypto/morus1280-sse2-asm.S new file mode 100644 index 000000000000..1fe637c7be9d --- /dev/null +++ b/arch/x86/crypto/morus1280-sse2-asm.S @@ -0,0 +1,895 @@ +/* + * SSE2 implementation of MORUS-1280 + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define SHUFFLE_MASK(i0, i1, i2, i3) \ + (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) + +#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) + +#define STATE0_LO %xmm0 +#define STATE0_HI %xmm1 +#define STATE1_LO %xmm2 +#define STATE1_HI %xmm3 +#define STATE2_LO %xmm4 +#define STATE2_HI %xmm5 +#define STATE3_LO %xmm6 +#define STATE3_HI %xmm7 +#define STATE4_LO %xmm8 +#define STATE4_HI %xmm9 +#define KEY_LO %xmm10 +#define KEY_HI %xmm11 +#define MSG_LO %xmm10 +#define MSG_HI %xmm11 +#define T0_LO %xmm12 +#define T0_HI %xmm13 +#define T1_LO %xmm14 +#define T1_HI %xmm15 + +.section .rodata.cst16.morus640_const, "aM", @progbits, 16 +.align 16 +.Lmorus640_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Lmorus640_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.morus640_counter, "aM", @progbits, 16 +.align 16 +.Lmorus640_counter_0: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +.Lmorus640_counter_1: + .byte 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 + .byte 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f + +.text + +.macro rol1 hi, lo + /* + * HI_1 | HI_0 || LO_1 | LO_0 + * ==> + * HI_0 | HI_1 || LO_1 | LO_0 + * ==> + * HI_0 | LO_1 || LO_0 | HI_1 + */ + pshufd $MASK2, \hi, \hi + movdqa \hi, T0_LO + punpcklqdq \lo, T0_LO + punpckhqdq \hi, \lo + movdqa \lo, \hi + movdqa T0_LO, \lo +.endm + +.macro rol2 hi, lo + movdqa \lo, T0_LO + movdqa \hi, \lo + movdqa T0_LO, \hi +.endm + +.macro rol3 hi, lo + /* + * HI_1 | HI_0 || LO_1 | LO_0 + * ==> + * HI_0 | HI_1 || LO_1 | LO_0 + * ==> + * LO_0 | HI_1 || HI_0 | LO_1 + */ + pshufd $MASK2, \hi, \hi + movdqa \lo, T0_LO + punpckhqdq \hi, T0_LO + punpcklqdq \lo, \hi + movdqa T0_LO, \lo +.endm + +.macro morus1280_round s0_l, s0_h, s1_l, s1_h, s2_l, s2_h, s3_l, s3_h, s4_l, s4_h, b, w + movdqa \s1_l, T0_LO + pand \s2_l, T0_LO + pxor T0_LO, \s0_l + + movdqa \s1_h, T0_LO + pand \s2_h, T0_LO + pxor T0_LO, \s0_h + + pxor \s3_l, \s0_l + pxor \s3_h, \s0_h + + movdqa \s0_l, T0_LO + psllq $\b, T0_LO + psrlq $(64 - \b), \s0_l + pxor T0_LO, \s0_l + + movdqa \s0_h, T0_LO + psllq $\b, T0_LO + psrlq $(64 - \b), \s0_h + pxor T0_LO, \s0_h + + \w \s3_h, \s3_l +.endm + +/* + * __morus1280_update: internal ABI + * input: + * STATE[0-4] - input state + * MSG - message block + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus1280_update: + morus1280_round \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + 13, rol1 + pxor MSG_LO, STATE1_LO + pxor MSG_HI, STATE1_HI + morus1280_round \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + 46, rol2 + pxor MSG_LO, STATE2_LO + pxor MSG_HI, STATE2_HI + morus1280_round \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + 38, rol3 + pxor MSG_LO, STATE3_LO + pxor MSG_HI, STATE3_HI + morus1280_round \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + 7, rol2 + pxor MSG_LO, STATE4_LO + pxor MSG_HI, STATE4_HI + morus1280_round \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + 4, rol1 + ret +ENDPROC(__morus1280_update) + +/* + * __morus1280_update_zero: internal ABI + * input: + * STATE[0-4] - input state + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus1280_update_zero: + morus1280_round \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + 13, rol1 + morus1280_round \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + 46, rol2 + morus1280_round \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + 38, rol3 + morus1280_round \ + STATE3_LO, STATE3_HI, \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + 7, rol2 + morus1280_round \ + STATE4_LO, STATE4_HI, \ + STATE0_LO, STATE0_HI, \ + STATE1_LO, STATE1_HI, \ + STATE2_LO, STATE2_HI, \ + STATE3_LO, STATE3_HI, \ + 4, rol1 + ret +ENDPROC(__morus1280_update_zero) + +/* + * __load_partial: internal ABI + * input: + * %rsi - src + * %rcx - bytes + * output: + * MSG - message block + * changed: + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + pxor MSG_LO, MSG_LO + pxor MSG_HI, MSG_HI + + mov %rcx, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov %rcx, %r8 + and $0x1E, %r8 + add %rsi, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov %rcx, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov %rcx, %r8 + and $0x1C, %r8 + add %rsi, %r8 + shl $16, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov %rcx, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov %rcx, %r8 + and $0x18, %r8 + add %rsi, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG_LO + + mov %rcx, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov %rcx, %r8 + and $0x10, %r8 + add %rsi, %r8 + pslldq $8, MSG_LO + movq (%r8), T0_LO + pxor T0_LO, MSG_LO + +.Lld_partial_8: + mov %rcx, %r8 + and $0x10, %r8 + jz .Lld_partial_16 + + movdqa MSG_LO, MSG_HI + movdqu (%rsi), MSG_LO + +.Lld_partial_16: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * %rdx - dst + * %rcx - bytes + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov %rcx, %r8 + mov %rdx, %r9 + + cmp $16, %r8 + jl .Lst_partial_16 + + movdqu T0_LO, (%r9) + movdqa T0_HI, T0_LO + + sub $16, %r8 + add $16, %r9 + +.Lst_partial_16: + movq T0_LO, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0_LO + movq T0_LO, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $16, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +/* + * void crypto_morus1280_sse2_init(void *state, const void *key, + * const void *iv); + */ +ENTRY(crypto_morus1280_sse2_init) + FRAME_BEGIN + + /* load IV: */ + pxor STATE0_HI, STATE0_HI + movdqu (%rdx), STATE0_LO + /* load key: */ + movdqu 0(%rsi), KEY_LO + movdqu 16(%rsi), KEY_HI + movdqa KEY_LO, STATE1_LO + movdqa KEY_HI, STATE1_HI + /* load all ones: */ + pcmpeqd STATE2_LO, STATE2_LO + pcmpeqd STATE2_HI, STATE2_HI + /* load all zeros: */ + pxor STATE3_LO, STATE3_LO + pxor STATE3_HI, STATE3_HI + /* load the constant: */ + movdqa .Lmorus640_const_0, STATE4_LO + movdqa .Lmorus640_const_1, STATE4_HI + + /* update 16 times with zero: */ + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + call __morus1280_update_zero + + /* xor-in the key again after updates: */ + pxor KEY_LO, STATE1_LO + pxor KEY_HI, STATE1_HI + + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_init) + +/* + * void crypto_morus1280_sse2_ad(void *state, const void *data, + * unsigned int length); + */ +ENTRY(crypto_morus1280_sse2_ad) + FRAME_BEGIN + + cmp $32, %rdx + jb .Lad_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + mov %rsi, %r8 + and $0xF, %r8 + jnz .Lad_u_loop + +.align 4 +.Lad_a_loop: + movdqa 0(%rsi), MSG_LO + movdqa 16(%rsi), MSG_HI + call __morus1280_update + sub $32, %rdx + add $32, %rsi + cmp $32, %rdx + jge .Lad_a_loop + + jmp .Lad_cont +.align 4 +.Lad_u_loop: + movdqu 0(%rsi), MSG_LO + movdqu 16(%rsi), MSG_HI + call __morus1280_update + sub $32, %rdx + add $32, %rsi + cmp $32, %rdx + jge .Lad_u_loop + +.Lad_cont: + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_ad) + +/* + * void crypto_morus1280_sse2_enc(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_sse2_enc) + FRAME_BEGIN + + cmp $32, %rcx + jb .Lenc_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + mov %rsi, %r8 + or %rdx, %r8 + and $0xF, %r8 + jnz .Lenc_u_loop + +.align 4 +.Lenc_a_loop: + movdqa 0(%rsi), MSG_LO + movdqa 16(%rsi), MSG_HI + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + movdqa MSG_LO, T0_LO + movdqa MSG_HI, T0_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + pxor STATE0_LO, T0_LO + pxor STATE0_HI, T0_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + movdqa T0_LO, 0(%rdx) + movdqa T0_HI, 16(%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Lenc_a_loop + + jmp .Lenc_cont +.align 4 +.Lenc_u_loop: + movdqu 0(%rsi), MSG_LO + movdqu 16(%rsi), MSG_HI + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + movdqa MSG_LO, T0_LO + movdqa MSG_HI, T0_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + pxor STATE0_LO, T0_LO + pxor STATE0_HI, T0_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + movdqu T0_LO, 0(%rdx) + movdqu T0_HI, 16(%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Lenc_u_loop + +.Lenc_cont: + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_enc) + +/* + * void crypto_morus1280_sse2_enc_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_sse2_enc_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + /* encrypt message: */ + call __load_partial + + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + movdqa MSG_LO, T0_LO + movdqa MSG_HI, T0_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + pxor STATE0_LO, T0_LO + pxor STATE0_HI, T0_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, T0_LO + pxor T1_HI, T0_HI + + call __store_partial + + call __morus1280_update + + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + + FRAME_END +ENDPROC(crypto_morus1280_sse2_enc_tail) + +/* + * void crypto_morus1280_sse2_dec(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_sse2_dec) + FRAME_BEGIN + + cmp $32, %rcx + jb .Ldec_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + mov %rsi, %r8 + or %rdx, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 4 +.Ldec_a_loop: + movdqa 0(%rsi), MSG_LO + movdqa 16(%rsi), MSG_HI + pxor STATE0_LO, MSG_LO + pxor STATE0_HI, MSG_HI + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqa MSG_LO, 0(%rdx) + movdqa MSG_HI, 16(%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Ldec_a_loop + + jmp .Ldec_cont +.align 4 +.Ldec_u_loop: + movdqu 0(%rsi), MSG_LO + movdqu 16(%rsi), MSG_HI + pxor STATE0_LO, MSG_LO + pxor STATE0_HI, MSG_HI + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqu MSG_LO, 0(%rdx) + movdqu MSG_HI, 16(%rdx) + + call __morus1280_update + sub $32, %rcx + add $32, %rsi + add $32, %rdx + cmp $32, %rcx + jge .Ldec_u_loop + +.Ldec_cont: + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_dec) + +/* + * void crypto_morus1280_sse2_dec_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus1280_sse2_dec_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + /* decrypt message: */ + call __load_partial + + pxor STATE0_LO, MSG_LO + pxor STATE0_HI, MSG_HI + movdqa STATE1_LO, T1_LO + movdqa STATE1_HI, T1_HI + rol3 T1_HI, T1_LO + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqa STATE2_LO, T1_LO + movdqa STATE2_HI, T1_HI + pand STATE3_LO, T1_LO + pand STATE3_HI, T1_HI + pxor T1_LO, MSG_LO + pxor T1_HI, MSG_HI + movdqa MSG_LO, T0_LO + movdqa MSG_HI, T0_HI + + call __store_partial + + /* mask with byte count: */ + movq %rcx, T0_LO + punpcklbw T0_LO, T0_LO + punpcklbw T0_LO, T0_LO + punpcklbw T0_LO, T0_LO + punpcklbw T0_LO, T0_LO + movdqa T0_LO, T0_HI + movdqa .Lmorus640_counter_0, T1_LO + movdqa .Lmorus640_counter_1, T1_HI + pcmpgtb T1_LO, T0_LO + pcmpgtb T1_HI, T0_HI + pand T0_LO, MSG_LO + pand T0_HI, MSG_HI + + call __morus1280_update + + /* store the state: */ + movdqu STATE0_LO, (0 * 16)(%rdi) + movdqu STATE0_HI, (1 * 16)(%rdi) + movdqu STATE1_LO, (2 * 16)(%rdi) + movdqu STATE1_HI, (3 * 16)(%rdi) + movdqu STATE2_LO, (4 * 16)(%rdi) + movdqu STATE2_HI, (5 * 16)(%rdi) + movdqu STATE3_LO, (6 * 16)(%rdi) + movdqu STATE3_HI, (7 * 16)(%rdi) + movdqu STATE4_LO, (8 * 16)(%rdi) + movdqu STATE4_HI, (9 * 16)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_dec_tail) + +/* + * void crypto_morus1280_sse2_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_morus1280_sse2_final) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0_LO + movdqu (1 * 16)(%rdi), STATE0_HI + movdqu (2 * 16)(%rdi), STATE1_LO + movdqu (3 * 16)(%rdi), STATE1_HI + movdqu (4 * 16)(%rdi), STATE2_LO + movdqu (5 * 16)(%rdi), STATE2_HI + movdqu (6 * 16)(%rdi), STATE3_LO + movdqu (7 * 16)(%rdi), STATE3_HI + movdqu (8 * 16)(%rdi), STATE4_LO + movdqu (9 * 16)(%rdi), STATE4_HI + + /* xor state[0] into state[4]: */ + pxor STATE0_LO, STATE4_LO + pxor STATE0_HI, STATE4_HI + + /* prepare length block: */ + movq %rdx, MSG_LO + movq %rcx, T0_LO + pslldq $8, T0_LO + pxor T0_LO, MSG_LO + psllq $3, MSG_LO /* multiply by 8 (to get bit count) */ + pxor MSG_HI, MSG_HI + + /* update state: */ + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + call __morus1280_update + + /* xor tag: */ + movdqu 0(%rsi), MSG_LO + movdqu 16(%rsi), MSG_HI + + pxor STATE0_LO, MSG_LO + pxor STATE0_HI, MSG_HI + movdqa STATE1_LO, T0_LO + movdqa STATE1_HI, T0_HI + rol3 T0_HI, T0_LO + pxor T0_LO, MSG_LO + pxor T0_HI, MSG_HI + movdqa STATE2_LO, T0_LO + movdqa STATE2_HI, T0_HI + pand STATE3_LO, T0_LO + pand STATE3_HI, T0_HI + pxor T0_LO, MSG_LO + pxor T0_HI, MSG_HI + + movdqu MSG_LO, 0(%rsi) + movdqu MSG_HI, 16(%rsi) + + FRAME_END + ret +ENDPROC(crypto_morus1280_sse2_final) diff --git a/arch/x86/crypto/morus1280-sse2-glue.c b/arch/x86/crypto/morus1280-sse2-glue.c new file mode 100644 index 000000000000..839270aa713c --- /dev/null +++ b/arch/x86/crypto/morus1280-sse2-glue.c @@ -0,0 +1,68 @@ +/* + * The MORUS-1280 Authenticated-Encryption Algorithm + * Glue for SSE2 implementation + * + * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/internal/aead.h> +#include <crypto/morus1280_glue.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +asmlinkage void crypto_morus1280_sse2_init(void *state, const void *key, + const void *iv); +asmlinkage void crypto_morus1280_sse2_ad(void *state, const void *data, + unsigned int length); + +asmlinkage void crypto_morus1280_sse2_enc(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus1280_sse2_dec(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus1280_sse2_enc_tail(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus1280_sse2_dec_tail(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus1280_sse2_final(void *state, void *tag_xor, + u64 assoclen, u64 cryptlen); + +MORUS1280_DECLARE_ALGS(sse2, "morus1280-sse2", 350); + +static const struct x86_cpu_id sse2_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_XMM2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id); + +static int __init crypto_morus1280_sse2_module_init(void) +{ + if (!x86_match_cpu(sse2_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_morus1280_sse2_algs, + ARRAY_SIZE(crypto_morus1280_sse2_algs)); +} + +static void __exit crypto_morus1280_sse2_module_exit(void) +{ + crypto_unregister_aeads(crypto_morus1280_sse2_algs, + ARRAY_SIZE(crypto_morus1280_sse2_algs)); +} + +module_init(crypto_morus1280_sse2_module_init); +module_exit(crypto_morus1280_sse2_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("MORUS-1280 AEAD algorithm -- SSE2 implementation"); +MODULE_ALIAS_CRYPTO("morus1280"); +MODULE_ALIAS_CRYPTO("morus1280-sse2"); diff --git a/arch/x86/crypto/morus1280_glue.c b/arch/x86/crypto/morus1280_glue.c new file mode 100644 index 000000000000..0dccdda1eb3a --- /dev/null +++ b/arch/x86/crypto/morus1280_glue.c @@ -0,0 +1,302 @@ +/* + * The MORUS-1280 Authenticated-Encryption Algorithm + * Common x86 SIMD glue skeleton + * + * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> +#include <crypto/morus1280_glue.h> +#include <crypto/scatterwalk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <asm/fpu/api.h> + +struct morus1280_state { + struct morus1280_block s[MORUS_STATE_BLOCKS]; +}; + +struct morus1280_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, const void *src, void *dst, + unsigned int length); + void (*crypt_tail)(void *state, const void *src, void *dst, + unsigned int length); +}; + +static void crypto_morus1280_glue_process_ad( + struct morus1280_state *state, + const struct morus1280_glue_ops *ops, + struct scatterlist *sg_src, unsigned int assoclen) +{ + struct scatter_walk walk; + struct morus1280_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= MORUS1280_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = MORUS1280_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE); + pos = 0; + left -= fill; + src += fill; + } + + ops->ad(state, src, left); + src += left & ~(MORUS1280_BLOCK_SIZE - 1); + left &= MORUS1280_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + + pos += left; + assoclen -= size; + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, MORUS1280_BLOCK_SIZE - pos); + ops->ad(state, buf.bytes, MORUS1280_BLOCK_SIZE); + } +} + +static void crypto_morus1280_glue_process_crypt(struct morus1280_state *state, + struct morus1280_ops ops, + struct aead_request *req) +{ + struct skcipher_walk walk; + u8 *cursor_src, *cursor_dst; + unsigned int chunksize, base; + + ops.skcipher_walk_init(&walk, req, false); + + while (walk.nbytes) { + cursor_src = walk.src.virt.addr; + cursor_dst = walk.dst.virt.addr; + chunksize = walk.nbytes; + + ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize); + + base = chunksize & ~(MORUS1280_BLOCK_SIZE - 1); + cursor_src += base; + cursor_dst += base; + chunksize &= MORUS1280_BLOCK_SIZE - 1; + + if (chunksize > 0) + ops.crypt_tail(state, cursor_src, cursor_dst, + chunksize); + + skcipher_walk_done(&walk, 0); + } +} + +int crypto_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct morus1280_ctx *ctx = crypto_aead_ctx(aead); + + if (keylen == MORUS1280_BLOCK_SIZE) { + memcpy(ctx->key.bytes, key, MORUS1280_BLOCK_SIZE); + } else if (keylen == MORUS1280_BLOCK_SIZE / 2) { + memcpy(ctx->key.bytes, key, keylen); + memcpy(ctx->key.bytes + keylen, key, keylen); + } else { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + return 0; +} +EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setkey); + +int crypto_morus1280_glue_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(crypto_morus1280_glue_setauthsize); + +static void crypto_morus1280_glue_crypt(struct aead_request *req, + struct morus1280_ops ops, + unsigned int cryptlen, + struct morus1280_block *tag_xor) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); + struct morus1280_state state; + + kernel_fpu_begin(); + + ctx->ops->init(&state, &ctx->key, req->iv); + crypto_morus1280_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); + crypto_morus1280_glue_process_crypt(&state, ops, req); + ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +int crypto_morus1280_glue_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); + struct morus1280_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = ctx->ops->enc, + .crypt_tail = ctx->ops->enc_tail, + }; + + struct morus1280_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} +EXPORT_SYMBOL_GPL(crypto_morus1280_glue_encrypt); + +int crypto_morus1280_glue_decrypt(struct aead_request *req) +{ + static const u8 zeros[MORUS1280_BLOCK_SIZE] = {}; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus1280_ctx *ctx = crypto_aead_ctx(tfm); + struct morus1280_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = ctx->ops->dec, + .crypt_tail = ctx->ops->dec_tail, + }; + + struct morus1280_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_morus1280_glue_crypt(req, OPS, cryptlen, &tag); + + return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(crypto_morus1280_glue_decrypt); + +void crypto_morus1280_glue_init_ops(struct crypto_aead *aead, + const struct morus1280_glue_ops *ops) +{ + struct morus1280_ctx *ctx = crypto_aead_ctx(aead); + ctx->ops = ops; +} +EXPORT_SYMBOL_GPL(crypto_morus1280_glue_init_ops); + +int cryptd_morus1280_glue_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setkey); + +int cryptd_morus1280_glue_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_setauthsize); + +int cryptd_morus1280_glue_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_encrypt(req); +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_encrypt); + +int cryptd_morus1280_glue_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_decrypt(req); +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_decrypt); + +int cryptd_morus1280_glue_init_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + const char *name = crypto_aead_alg(aead)->base.cra_driver_name; + char internal_name[CRYPTO_MAX_ALG_NAME]; + + if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) + >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + return 0; +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_init_tfm); + +void cryptd_morus1280_glue_exit_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} +EXPORT_SYMBOL_GPL(cryptd_morus1280_glue_exit_tfm); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("MORUS-1280 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/morus640-sse2-asm.S b/arch/x86/crypto/morus640-sse2-asm.S new file mode 100644 index 000000000000..71c72a0a0862 --- /dev/null +++ b/arch/x86/crypto/morus640-sse2-asm.S @@ -0,0 +1,614 @@ +/* + * SSE2 implementation of MORUS-640 + * + * Copyright (c) 2017-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +#define SHUFFLE_MASK(i0, i1, i2, i3) \ + (i0 | (i1 << 2) | (i2 << 4) | (i3 << 6)) + +#define MASK1 SHUFFLE_MASK(3, 0, 1, 2) +#define MASK2 SHUFFLE_MASK(2, 3, 0, 1) +#define MASK3 SHUFFLE_MASK(1, 2, 3, 0) + +#define STATE0 %xmm0 +#define STATE1 %xmm1 +#define STATE2 %xmm2 +#define STATE3 %xmm3 +#define STATE4 %xmm4 +#define KEY %xmm5 +#define MSG %xmm5 +#define T0 %xmm6 +#define T1 %xmm7 + +.section .rodata.cst16.morus640_const, "aM", @progbits, 32 +.align 16 +.Lmorus640_const_0: + .byte 0x00, 0x01, 0x01, 0x02, 0x03, 0x05, 0x08, 0x0d + .byte 0x15, 0x22, 0x37, 0x59, 0x90, 0xe9, 0x79, 0x62 +.Lmorus640_const_1: + .byte 0xdb, 0x3d, 0x18, 0x55, 0x6d, 0xc2, 0x2f, 0xf1 + .byte 0x20, 0x11, 0x31, 0x42, 0x73, 0xb5, 0x28, 0xdd + +.section .rodata.cst16.morus640_counter, "aM", @progbits, 16 +.align 16 +.Lmorus640_counter: + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + +.text + +.macro morus640_round s0, s1, s2, s3, s4, b, w + movdqa \s1, T0 + pand \s2, T0 + pxor T0, \s0 + pxor \s3, \s0 + movdqa \s0, T0 + pslld $\b, T0 + psrld $(32 - \b), \s0 + pxor T0, \s0 + pshufd $\w, \s3, \s3 +.endm + +/* + * __morus640_update: internal ABI + * input: + * STATE[0-4] - input state + * MSG - message block + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus640_update: + morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1 + pxor MSG, STATE1 + morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2 + pxor MSG, STATE2 + morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3 + pxor MSG, STATE3 + morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2 + pxor MSG, STATE4 + morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1 + ret +ENDPROC(__morus640_update) + + +/* + * __morus640_update_zero: internal ABI + * input: + * STATE[0-4] - input state + * output: + * STATE[0-4] - output state + * changed: + * T0 + */ +__morus640_update_zero: + morus640_round STATE0, STATE1, STATE2, STATE3, STATE4, 5, MASK1 + morus640_round STATE1, STATE2, STATE3, STATE4, STATE0, 31, MASK2 + morus640_round STATE2, STATE3, STATE4, STATE0, STATE1, 7, MASK3 + morus640_round STATE3, STATE4, STATE0, STATE1, STATE2, 22, MASK2 + morus640_round STATE4, STATE0, STATE1, STATE2, STATE3, 13, MASK1 + ret +ENDPROC(__morus640_update_zero) + +/* + * __load_partial: internal ABI + * input: + * %rsi - src + * %rcx - bytes + * output: + * MSG - message block + * changed: + * T0 + * %r8 + * %r9 + */ +__load_partial: + xor %r9, %r9 + pxor MSG, MSG + + mov %rcx, %r8 + and $0x1, %r8 + jz .Lld_partial_1 + + mov %rcx, %r8 + and $0x1E, %r8 + add %rsi, %r8 + mov (%r8), %r9b + +.Lld_partial_1: + mov %rcx, %r8 + and $0x2, %r8 + jz .Lld_partial_2 + + mov %rcx, %r8 + and $0x1C, %r8 + add %rsi, %r8 + shl $16, %r9 + mov (%r8), %r9w + +.Lld_partial_2: + mov %rcx, %r8 + and $0x4, %r8 + jz .Lld_partial_4 + + mov %rcx, %r8 + and $0x18, %r8 + add %rsi, %r8 + shl $32, %r9 + mov (%r8), %r8d + xor %r8, %r9 + +.Lld_partial_4: + movq %r9, MSG + + mov %rcx, %r8 + and $0x8, %r8 + jz .Lld_partial_8 + + mov %rcx, %r8 + and $0x10, %r8 + add %rsi, %r8 + pslldq $8, MSG + movq (%r8), T0 + pxor T0, MSG + +.Lld_partial_8: + ret +ENDPROC(__load_partial) + +/* + * __store_partial: internal ABI + * input: + * %rdx - dst + * %rcx - bytes + * output: + * T0 - message block + * changed: + * %r8 + * %r9 + * %r10 + */ +__store_partial: + mov %rcx, %r8 + mov %rdx, %r9 + + movq T0, %r10 + + cmp $8, %r8 + jl .Lst_partial_8 + + mov %r10, (%r9) + psrldq $8, T0 + movq T0, %r10 + + sub $8, %r8 + add $8, %r9 + +.Lst_partial_8: + cmp $4, %r8 + jl .Lst_partial_4 + + mov %r10d, (%r9) + shr $32, %r10 + + sub $4, %r8 + add $4, %r9 + +.Lst_partial_4: + cmp $2, %r8 + jl .Lst_partial_2 + + mov %r10w, (%r9) + shr $16, %r10 + + sub $2, %r8 + add $2, %r9 + +.Lst_partial_2: + cmp $1, %r8 + jl .Lst_partial_1 + + mov %r10b, (%r9) + +.Lst_partial_1: + ret +ENDPROC(__store_partial) + +/* + * void crypto_morus640_sse2_init(void *state, const void *key, const void *iv); + */ +ENTRY(crypto_morus640_sse2_init) + FRAME_BEGIN + + /* load IV: */ + movdqu (%rdx), STATE0 + /* load key: */ + movdqu (%rsi), KEY + movdqa KEY, STATE1 + /* load all ones: */ + pcmpeqd STATE2, STATE2 + /* load the constants: */ + movdqa .Lmorus640_const_0, STATE3 + movdqa .Lmorus640_const_1, STATE4 + + /* update 16 times with zero: */ + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + call __morus640_update_zero + + /* xor-in the key again after updates: */ + pxor KEY, STATE1 + + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_init) + +/* + * void crypto_morus640_sse2_ad(void *state, const void *data, + * unsigned int length); + */ +ENTRY(crypto_morus640_sse2_ad) + FRAME_BEGIN + + cmp $16, %rdx + jb .Lad_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + mov %rsi, %r8 + and $0xF, %r8 + jnz .Lad_u_loop + +.align 4 +.Lad_a_loop: + movdqa (%rsi), MSG + call __morus640_update + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lad_a_loop + + jmp .Lad_cont +.align 4 +.Lad_u_loop: + movdqu (%rsi), MSG + call __morus640_update + sub $16, %rdx + add $16, %rsi + cmp $16, %rdx + jge .Lad_u_loop + +.Lad_cont: + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + +.Lad_out: + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_ad) + +/* + * void crypto_morus640_sse2_enc(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus640_sse2_enc) + FRAME_BEGIN + + cmp $16, %rcx + jb .Lenc_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + mov %rsi, %r8 + or %rdx, %r8 + and $0xF, %r8 + jnz .Lenc_u_loop + +.align 4 +.Lenc_a_loop: + movdqa (%rsi), MSG + movdqa MSG, T0 + pxor STATE0, T0 + pshufd $MASK3, STATE1, T1 + pxor T1, T0 + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, T0 + movdqa T0, (%rdx) + + call __morus640_update + sub $16, %rcx + add $16, %rsi + add $16, %rdx + cmp $16, %rcx + jge .Lenc_a_loop + + jmp .Lenc_cont +.align 4 +.Lenc_u_loop: + movdqu (%rsi), MSG + movdqa MSG, T0 + pxor STATE0, T0 + pshufd $MASK3, STATE1, T1 + pxor T1, T0 + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, T0 + movdqu T0, (%rdx) + + call __morus640_update + sub $16, %rcx + add $16, %rsi + add $16, %rdx + cmp $16, %rcx + jge .Lenc_u_loop + +.Lenc_cont: + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + +.Lenc_out: + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_enc) + +/* + * void crypto_morus640_sse2_enc_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus640_sse2_enc_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + /* encrypt message: */ + call __load_partial + + movdqa MSG, T0 + pxor STATE0, T0 + pshufd $MASK3, STATE1, T1 + pxor T1, T0 + movdqa STATE2, T1 + pand STATE3, T1 + pxor T1, T0 + + call __store_partial + + call __morus640_update + + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + + FRAME_END +ENDPROC(crypto_morus640_sse2_enc_tail) + +/* + * void crypto_morus640_sse2_dec(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus640_sse2_dec) + FRAME_BEGIN + + cmp $16, %rcx + jb .Ldec_out + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + mov %rsi, %r8 + or %rdx, %r8 + and $0xF, %r8 + jnz .Ldec_u_loop + +.align 4 +.Ldec_a_loop: + movdqa (%rsi), MSG + pxor STATE0, MSG + pshufd $MASK3, STATE1, T0 + pxor T0, MSG + movdqa STATE2, T0 + pand STATE3, T0 + pxor T0, MSG + movdqa MSG, (%rdx) + + call __morus640_update + sub $16, %rcx + add $16, %rsi + add $16, %rdx + cmp $16, %rcx + jge .Ldec_a_loop + + jmp .Ldec_cont +.align 4 +.Ldec_u_loop: + movdqu (%rsi), MSG + pxor STATE0, MSG + pshufd $MASK3, STATE1, T0 + pxor T0, MSG + movdqa STATE2, T0 + pand STATE3, T0 + pxor T0, MSG + movdqu MSG, (%rdx) + + call __morus640_update + sub $16, %rcx + add $16, %rsi + add $16, %rdx + cmp $16, %rcx + jge .Ldec_u_loop + +.Ldec_cont: + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + +.Ldec_out: + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_dec) + +/* + * void crypto_morus640_sse2_dec_tail(void *state, const void *src, void *dst, + * unsigned int length); + */ +ENTRY(crypto_morus640_sse2_dec_tail) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + /* decrypt message: */ + call __load_partial + + pxor STATE0, MSG + pshufd $MASK3, STATE1, T0 + pxor T0, MSG + movdqa STATE2, T0 + pand STATE3, T0 + pxor T0, MSG + movdqa MSG, T0 + + call __store_partial + + /* mask with byte count: */ + movq %rcx, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + punpcklbw T0, T0 + movdqa .Lmorus640_counter, T1 + pcmpgtb T1, T0 + pand T0, MSG + + call __morus640_update + + /* store the state: */ + movdqu STATE0, (0 * 16)(%rdi) + movdqu STATE1, (1 * 16)(%rdi) + movdqu STATE2, (2 * 16)(%rdi) + movdqu STATE3, (3 * 16)(%rdi) + movdqu STATE4, (4 * 16)(%rdi) + + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_dec_tail) + +/* + * void crypto_morus640_sse2_final(void *state, void *tag_xor, + * u64 assoclen, u64 cryptlen); + */ +ENTRY(crypto_morus640_sse2_final) + FRAME_BEGIN + + /* load the state: */ + movdqu (0 * 16)(%rdi), STATE0 + movdqu (1 * 16)(%rdi), STATE1 + movdqu (2 * 16)(%rdi), STATE2 + movdqu (3 * 16)(%rdi), STATE3 + movdqu (4 * 16)(%rdi), STATE4 + + /* xor state[0] into state[4]: */ + pxor STATE0, STATE4 + + /* prepare length block: */ + movq %rdx, MSG + movq %rcx, T0 + pslldq $8, T0 + pxor T0, MSG + psllq $3, MSG /* multiply by 8 (to get bit count) */ + + /* update state: */ + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + call __morus640_update + + /* xor tag: */ + movdqu (%rsi), MSG + + pxor STATE0, MSG + pshufd $MASK3, STATE1, T0 + pxor T0, MSG + movdqa STATE2, T0 + pand STATE3, T0 + pxor T0, MSG + + movdqu MSG, (%rsi) + + FRAME_END + ret +ENDPROC(crypto_morus640_sse2_final) diff --git a/arch/x86/crypto/morus640-sse2-glue.c b/arch/x86/crypto/morus640-sse2-glue.c new file mode 100644 index 000000000000..26b47e2db8d2 --- /dev/null +++ b/arch/x86/crypto/morus640-sse2-glue.c @@ -0,0 +1,68 @@ +/* + * The MORUS-640 Authenticated-Encryption Algorithm + * Glue for SSE2 implementation + * + * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/internal/aead.h> +#include <crypto/morus640_glue.h> +#include <linux/module.h> +#include <asm/fpu/api.h> +#include <asm/cpu_device_id.h> + +asmlinkage void crypto_morus640_sse2_init(void *state, const void *key, + const void *iv); +asmlinkage void crypto_morus640_sse2_ad(void *state, const void *data, + unsigned int length); + +asmlinkage void crypto_morus640_sse2_enc(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus640_sse2_dec(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus640_sse2_enc_tail(void *state, const void *src, + void *dst, unsigned int length); +asmlinkage void crypto_morus640_sse2_dec_tail(void *state, const void *src, + void *dst, unsigned int length); + +asmlinkage void crypto_morus640_sse2_final(void *state, void *tag_xor, + u64 assoclen, u64 cryptlen); + +MORUS640_DECLARE_ALGS(sse2, "morus640-sse2", 400); + +static const struct x86_cpu_id sse2_cpu_id[] = { + X86_FEATURE_MATCH(X86_FEATURE_XMM2), + {} +}; +MODULE_DEVICE_TABLE(x86cpu, sse2_cpu_id); + +static int __init crypto_morus640_sse2_module_init(void) +{ + if (!x86_match_cpu(sse2_cpu_id)) + return -ENODEV; + + return crypto_register_aeads(crypto_morus640_sse2_algs, + ARRAY_SIZE(crypto_morus640_sse2_algs)); +} + +static void __exit crypto_morus640_sse2_module_exit(void) +{ + crypto_unregister_aeads(crypto_morus640_sse2_algs, + ARRAY_SIZE(crypto_morus640_sse2_algs)); +} + +module_init(crypto_morus640_sse2_module_init); +module_exit(crypto_morus640_sse2_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("MORUS-640 AEAD algorithm -- SSE2 implementation"); +MODULE_ALIAS_CRYPTO("morus640"); +MODULE_ALIAS_CRYPTO("morus640-sse2"); diff --git a/arch/x86/crypto/morus640_glue.c b/arch/x86/crypto/morus640_glue.c new file mode 100644 index 000000000000..7b58fe4d9bd1 --- /dev/null +++ b/arch/x86/crypto/morus640_glue.c @@ -0,0 +1,298 @@ +/* + * The MORUS-640 Authenticated-Encryption Algorithm + * Common x86 SIMD glue skeleton + * + * Copyright (c) 2016-2018 Ondrej Mosnacek <omosnacek@gmail.com> + * Copyright (C) 2017-2018 Red Hat, Inc. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + */ + +#include <crypto/cryptd.h> +#include <crypto/internal/aead.h> +#include <crypto/internal/skcipher.h> +#include <crypto/morus640_glue.h> +#include <crypto/scatterwalk.h> +#include <linux/err.h> +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/scatterlist.h> +#include <asm/fpu/api.h> + +struct morus640_state { + struct morus640_block s[MORUS_STATE_BLOCKS]; +}; + +struct morus640_ops { + int (*skcipher_walk_init)(struct skcipher_walk *walk, + struct aead_request *req, bool atomic); + + void (*crypt_blocks)(void *state, const void *src, void *dst, + unsigned int length); + void (*crypt_tail)(void *state, const void *src, void *dst, + unsigned int length); +}; + +static void crypto_morus640_glue_process_ad( + struct morus640_state *state, + const struct morus640_glue_ops *ops, + struct scatterlist *sg_src, unsigned int assoclen) +{ + struct scatter_walk walk; + struct morus640_block buf; + unsigned int pos = 0; + + scatterwalk_start(&walk, sg_src); + while (assoclen != 0) { + unsigned int size = scatterwalk_clamp(&walk, assoclen); + unsigned int left = size; + void *mapped = scatterwalk_map(&walk); + const u8 *src = (const u8 *)mapped; + + if (pos + size >= MORUS640_BLOCK_SIZE) { + if (pos > 0) { + unsigned int fill = MORUS640_BLOCK_SIZE - pos; + memcpy(buf.bytes + pos, src, fill); + ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE); + pos = 0; + left -= fill; + src += fill; + } + + ops->ad(state, src, left); + src += left & ~(MORUS640_BLOCK_SIZE - 1); + left &= MORUS640_BLOCK_SIZE - 1; + } + + memcpy(buf.bytes + pos, src, left); + + pos += left; + assoclen -= size; + scatterwalk_unmap(mapped); + scatterwalk_advance(&walk, size); + scatterwalk_done(&walk, 0, assoclen); + } + + if (pos > 0) { + memset(buf.bytes + pos, 0, MORUS640_BLOCK_SIZE - pos); + ops->ad(state, buf.bytes, MORUS640_BLOCK_SIZE); + } +} + +static void crypto_morus640_glue_process_crypt(struct morus640_state *state, + struct morus640_ops ops, + struct aead_request *req) +{ + struct skcipher_walk walk; + u8 *cursor_src, *cursor_dst; + unsigned int chunksize, base; + + ops.skcipher_walk_init(&walk, req, false); + + while (walk.nbytes) { + cursor_src = walk.src.virt.addr; + cursor_dst = walk.dst.virt.addr; + chunksize = walk.nbytes; + + ops.crypt_blocks(state, cursor_src, cursor_dst, chunksize); + + base = chunksize & ~(MORUS640_BLOCK_SIZE - 1); + cursor_src += base; + cursor_dst += base; + chunksize &= MORUS640_BLOCK_SIZE - 1; + + if (chunksize > 0) + ops.crypt_tail(state, cursor_src, cursor_dst, + chunksize); + + skcipher_walk_done(&walk, 0); + } +} + +int crypto_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct morus640_ctx *ctx = crypto_aead_ctx(aead); + + if (keylen != MORUS640_BLOCK_SIZE) { + crypto_aead_set_flags(aead, CRYPTO_TFM_RES_BAD_KEY_LEN); + return -EINVAL; + } + + memcpy(ctx->key.bytes, key, MORUS640_BLOCK_SIZE); + return 0; +} +EXPORT_SYMBOL_GPL(crypto_morus640_glue_setkey); + +int crypto_morus640_glue_setauthsize(struct crypto_aead *tfm, + unsigned int authsize) +{ + return (authsize <= MORUS_MAX_AUTH_SIZE) ? 0 : -EINVAL; +} +EXPORT_SYMBOL_GPL(crypto_morus640_glue_setauthsize); + +static void crypto_morus640_glue_crypt(struct aead_request *req, + struct morus640_ops ops, + unsigned int cryptlen, + struct morus640_block *tag_xor) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus640_ctx *ctx = crypto_aead_ctx(tfm); + struct morus640_state state; + + kernel_fpu_begin(); + + ctx->ops->init(&state, &ctx->key, req->iv); + crypto_morus640_glue_process_ad(&state, ctx->ops, req->src, req->assoclen); + crypto_morus640_glue_process_crypt(&state, ops, req); + ctx->ops->final(&state, tag_xor, req->assoclen, cryptlen); + + kernel_fpu_end(); +} + +int crypto_morus640_glue_encrypt(struct aead_request *req) +{ + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus640_ctx *ctx = crypto_aead_ctx(tfm); + struct morus640_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_encrypt, + .crypt_blocks = ctx->ops->enc, + .crypt_tail = ctx->ops->enc_tail, + }; + + struct morus640_block tag = {}; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen; + + crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag); + + scatterwalk_map_and_copy(tag.bytes, req->dst, + req->assoclen + cryptlen, authsize, 1); + return 0; +} +EXPORT_SYMBOL_GPL(crypto_morus640_glue_encrypt); + +int crypto_morus640_glue_decrypt(struct aead_request *req) +{ + static const u8 zeros[MORUS640_BLOCK_SIZE] = {}; + + struct crypto_aead *tfm = crypto_aead_reqtfm(req); + struct morus640_ctx *ctx = crypto_aead_ctx(tfm); + struct morus640_ops OPS = { + .skcipher_walk_init = skcipher_walk_aead_decrypt, + .crypt_blocks = ctx->ops->dec, + .crypt_tail = ctx->ops->dec_tail, + }; + + struct morus640_block tag; + unsigned int authsize = crypto_aead_authsize(tfm); + unsigned int cryptlen = req->cryptlen - authsize; + + scatterwalk_map_and_copy(tag.bytes, req->src, + req->assoclen + cryptlen, authsize, 0); + + crypto_morus640_glue_crypt(req, OPS, cryptlen, &tag); + + return crypto_memneq(tag.bytes, zeros, authsize) ? -EBADMSG : 0; +} +EXPORT_SYMBOL_GPL(crypto_morus640_glue_decrypt); + +void crypto_morus640_glue_init_ops(struct crypto_aead *aead, + const struct morus640_glue_ops *ops) +{ + struct morus640_ctx *ctx = crypto_aead_ctx(aead); + ctx->ops = ops; +} +EXPORT_SYMBOL_GPL(crypto_morus640_glue_init_ops); + +int cryptd_morus640_glue_setkey(struct crypto_aead *aead, const u8 *key, + unsigned int keylen) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setkey(&cryptd_tfm->base, key, keylen); +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setkey); + +int cryptd_morus640_glue_setauthsize(struct crypto_aead *aead, + unsigned int authsize) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + return crypto_aead_setauthsize(&cryptd_tfm->base, authsize); +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_setauthsize); + +int cryptd_morus640_glue_encrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_encrypt(req); +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_encrypt); + +int cryptd_morus640_glue_decrypt(struct aead_request *req) +{ + struct crypto_aead *aead = crypto_aead_reqtfm(req); + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + struct cryptd_aead *cryptd_tfm = *ctx; + + aead = &cryptd_tfm->base; + if (irq_fpu_usable() && (!in_atomic() || + !cryptd_aead_queued(cryptd_tfm))) + aead = cryptd_aead_child(cryptd_tfm); + + aead_request_set_tfm(req, aead); + + return crypto_aead_decrypt(req); +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_decrypt); + +int cryptd_morus640_glue_init_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead *cryptd_tfm; + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + const char *name = crypto_aead_alg(aead)->base.cra_driver_name; + char internal_name[CRYPTO_MAX_ALG_NAME]; + + if (snprintf(internal_name, CRYPTO_MAX_ALG_NAME, "__%s", name) + >= CRYPTO_MAX_ALG_NAME) + return -ENAMETOOLONG; + + cryptd_tfm = cryptd_alloc_aead(internal_name, CRYPTO_ALG_INTERNAL, + CRYPTO_ALG_INTERNAL); + if (IS_ERR(cryptd_tfm)) + return PTR_ERR(cryptd_tfm); + + *ctx = cryptd_tfm; + crypto_aead_set_reqsize(aead, crypto_aead_reqsize(&cryptd_tfm->base)); + return 0; +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_init_tfm); + +void cryptd_morus640_glue_exit_tfm(struct crypto_aead *aead) +{ + struct cryptd_aead **ctx = crypto_aead_ctx(aead); + + cryptd_free_aead(*ctx); +} +EXPORT_SYMBOL_GPL(cryptd_morus640_glue_exit_tfm); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Ondrej Mosnacek <omosnacek@gmail.com>"); +MODULE_DESCRIPTION("MORUS-640 AEAD mode -- glue for x86 optimizations"); diff --git a/arch/x86/crypto/salsa20-i586-asm_32.S b/arch/x86/crypto/salsa20-i586-asm_32.S deleted file mode 100644 index 6014b7b9e52a..000000000000 --- a/arch/x86/crypto/salsa20-i586-asm_32.S +++ /dev/null @@ -1,938 +0,0 @@ -# Derived from: -# salsa20_pm.s version 20051229 -# D. J. Bernstein -# Public domain. - -#include <linux/linkage.h> - -.text - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %esp,%eax - and $31,%eax - add $256,%eax - sub %eax,%esp - # eax_stack = eax - movl %eax,80(%esp) - # ebx_stack = ebx - movl %ebx,84(%esp) - # esi_stack = esi - movl %esi,88(%esp) - # edi_stack = edi - movl %edi,92(%esp) - # ebp_stack = ebp - movl %ebp,96(%esp) - # x = arg1 - movl 4(%esp,%eax),%edx - # m = arg2 - movl 8(%esp,%eax),%esi - # out = arg3 - movl 12(%esp,%eax),%edi - # bytes = arg4 - movl 16(%esp,%eax),%ebx - # bytes -= 0 - sub $0,%ebx - # goto done if unsigned<= - jbe ._done -._start: - # in0 = *(uint32 *) (x + 0) - movl 0(%edx),%eax - # in1 = *(uint32 *) (x + 4) - movl 4(%edx),%ecx - # in2 = *(uint32 *) (x + 8) - movl 8(%edx),%ebp - # j0 = in0 - movl %eax,164(%esp) - # in3 = *(uint32 *) (x + 12) - movl 12(%edx),%eax - # j1 = in1 - movl %ecx,168(%esp) - # in4 = *(uint32 *) (x + 16) - movl 16(%edx),%ecx - # j2 = in2 - movl %ebp,172(%esp) - # in5 = *(uint32 *) (x + 20) - movl 20(%edx),%ebp - # j3 = in3 - movl %eax,176(%esp) - # in6 = *(uint32 *) (x + 24) - movl 24(%edx),%eax - # j4 = in4 - movl %ecx,180(%esp) - # in7 = *(uint32 *) (x + 28) - movl 28(%edx),%ecx - # j5 = in5 - movl %ebp,184(%esp) - # in8 = *(uint32 *) (x + 32) - movl 32(%edx),%ebp - # j6 = in6 - movl %eax,188(%esp) - # in9 = *(uint32 *) (x + 36) - movl 36(%edx),%eax - # j7 = in7 - movl %ecx,192(%esp) - # in10 = *(uint32 *) (x + 40) - movl 40(%edx),%ecx - # j8 = in8 - movl %ebp,196(%esp) - # in11 = *(uint32 *) (x + 44) - movl 44(%edx),%ebp - # j9 = in9 - movl %eax,200(%esp) - # in12 = *(uint32 *) (x + 48) - movl 48(%edx),%eax - # j10 = in10 - movl %ecx,204(%esp) - # in13 = *(uint32 *) (x + 52) - movl 52(%edx),%ecx - # j11 = in11 - movl %ebp,208(%esp) - # in14 = *(uint32 *) (x + 56) - movl 56(%edx),%ebp - # j12 = in12 - movl %eax,212(%esp) - # in15 = *(uint32 *) (x + 60) - movl 60(%edx),%eax - # j13 = in13 - movl %ecx,216(%esp) - # j14 = in14 - movl %ebp,220(%esp) - # j15 = in15 - movl %eax,224(%esp) - # x_backup = x - movl %edx,64(%esp) -._bytesatleast1: - # bytes - 64 - cmp $64,%ebx - # goto nocopy if unsigned>= - jae ._nocopy - # ctarget = out - movl %edi,228(%esp) - # out = &tmp - leal 0(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leal 0(%esp),%edi - # m = &tmp - leal 0(%esp),%esi -._nocopy: - # out_backup = out - movl %edi,72(%esp) - # m_backup = m - movl %esi,68(%esp) - # bytes_backup = bytes - movl %ebx,76(%esp) - # in0 = j0 - movl 164(%esp),%eax - # in1 = j1 - movl 168(%esp),%ecx - # in2 = j2 - movl 172(%esp),%edx - # in3 = j3 - movl 176(%esp),%ebx - # x0 = in0 - movl %eax,100(%esp) - # x1 = in1 - movl %ecx,104(%esp) - # x2 = in2 - movl %edx,108(%esp) - # x3 = in3 - movl %ebx,112(%esp) - # in4 = j4 - movl 180(%esp),%eax - # in5 = j5 - movl 184(%esp),%ecx - # in6 = j6 - movl 188(%esp),%edx - # in7 = j7 - movl 192(%esp),%ebx - # x4 = in4 - movl %eax,116(%esp) - # x5 = in5 - movl %ecx,120(%esp) - # x6 = in6 - movl %edx,124(%esp) - # x7 = in7 - movl %ebx,128(%esp) - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in10 = j10 - movl 204(%esp),%edx - # in11 = j11 - movl 208(%esp),%ebx - # x8 = in8 - movl %eax,132(%esp) - # x9 = in9 - movl %ecx,136(%esp) - # x10 = in10 - movl %edx,140(%esp) - # x11 = in11 - movl %ebx,144(%esp) - # in12 = j12 - movl 212(%esp),%eax - # in13 = j13 - movl 216(%esp),%ecx - # in14 = j14 - movl 220(%esp),%edx - # in15 = j15 - movl 224(%esp),%ebx - # x12 = in12 - movl %eax,148(%esp) - # x13 = in13 - movl %ecx,152(%esp) - # x14 = in14 - movl %edx,156(%esp) - # x15 = in15 - movl %ebx,160(%esp) - # i = 20 - mov $20,%ebp - # p = x0 - movl 100(%esp),%eax - # s = x5 - movl 120(%esp),%ecx - # t = x10 - movl 140(%esp),%edx - # w = x15 - movl 160(%esp),%ebx -._mainloop: - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x12 - addl 148(%esp),%eax - # x5 = s - movl %ecx,120(%esp) - # t += x6 - addl 124(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x1 - movl 104(%esp),%esi - # r += s - add %ecx,%esi - # v = x11 - movl 144(%esp),%edi - # v += w - add %ebx,%edi - # p <<<= 7 - rol $7,%eax - # p ^= x4 - xorl 116(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x14 - xorl 156(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x9 - xorl 136(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x3 - xorl 112(%esp),%edi - # x4 = p - movl %eax,116(%esp) - # x14 = t - movl %edx,156(%esp) - # p += x0 - addl 100(%esp),%eax - # x9 = r - movl %esi,136(%esp) - # t += x10 - addl 140(%esp),%edx - # x3 = v - movl %edi,112(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x8 - xorl 132(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x2 - xorl 108(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x13 - xorl 152(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x7 - xorl 128(%esp),%ebx - # x8 = p - movl %eax,132(%esp) - # x2 = t - movl %edx,108(%esp) - # p += x4 - addl 116(%esp),%eax - # x13 = s - movl %ecx,152(%esp) - # t += x14 - addl 156(%esp),%edx - # x7 = w - movl %ebx,128(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x12 - xorl 148(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x6 - xorl 124(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x1 - xorl 104(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x11 - xorl 144(%esp),%edi - # x12 = p - movl %eax,148(%esp) - # x6 = t - movl %edx,124(%esp) - # p += x8 - addl 132(%esp),%eax - # x1 = r - movl %esi,104(%esp) - # t += x2 - addl 108(%esp),%edx - # x11 = v - movl %edi,144(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # x0 = p - movl %eax,100(%esp) - # x10 = t - movl %edx,140(%esp) - # p += x3 - addl 112(%esp),%eax - # p <<<= 7 - rol $7,%eax - # x5 = s - movl %ecx,120(%esp) - # t += x9 - addl 136(%esp),%edx - # x15 = w - movl %ebx,160(%esp) - # r = x4 - movl 116(%esp),%esi - # r += s - add %ecx,%esi - # v = x14 - movl 156(%esp),%edi - # v += w - add %ebx,%edi - # p ^= x1 - xorl 104(%esp),%eax - # t <<<= 7 - rol $7,%edx - # t ^= x11 - xorl 144(%esp),%edx - # r <<<= 7 - rol $7,%esi - # r ^= x6 - xorl 124(%esp),%esi - # v <<<= 7 - rol $7,%edi - # v ^= x12 - xorl 148(%esp),%edi - # x1 = p - movl %eax,104(%esp) - # x11 = t - movl %edx,144(%esp) - # p += x0 - addl 100(%esp),%eax - # x6 = r - movl %esi,124(%esp) - # t += x10 - addl 140(%esp),%edx - # x12 = v - movl %edi,148(%esp) - # p <<<= 9 - rol $9,%eax - # p ^= x2 - xorl 108(%esp),%eax - # t <<<= 9 - rol $9,%edx - # t ^= x8 - xorl 132(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 9 - rol $9,%ecx - # s ^= x7 - xorl 128(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 9 - rol $9,%ebx - # w ^= x13 - xorl 152(%esp),%ebx - # x2 = p - movl %eax,108(%esp) - # x8 = t - movl %edx,132(%esp) - # p += x1 - addl 104(%esp),%eax - # x7 = s - movl %ecx,128(%esp) - # t += x11 - addl 144(%esp),%edx - # x13 = w - movl %ebx,152(%esp) - # p <<<= 13 - rol $13,%eax - # p ^= x3 - xorl 112(%esp),%eax - # t <<<= 13 - rol $13,%edx - # t ^= x9 - xorl 136(%esp),%edx - # r += s - add %ecx,%esi - # r <<<= 13 - rol $13,%esi - # r ^= x4 - xorl 116(%esp),%esi - # v += w - add %ebx,%edi - # v <<<= 13 - rol $13,%edi - # v ^= x14 - xorl 156(%esp),%edi - # x3 = p - movl %eax,112(%esp) - # x9 = t - movl %edx,136(%esp) - # p += x2 - addl 108(%esp),%eax - # x4 = r - movl %esi,116(%esp) - # t += x8 - addl 132(%esp),%edx - # x14 = v - movl %edi,156(%esp) - # p <<<= 18 - rol $18,%eax - # p ^= x0 - xorl 100(%esp),%eax - # t <<<= 18 - rol $18,%edx - # t ^= x10 - xorl 140(%esp),%edx - # s += r - add %esi,%ecx - # s <<<= 18 - rol $18,%ecx - # s ^= x5 - xorl 120(%esp),%ecx - # w += v - add %edi,%ebx - # w <<<= 18 - rol $18,%ebx - # w ^= x15 - xorl 160(%esp),%ebx - # i -= 4 - sub $4,%ebp - # goto mainloop if unsigned > - ja ._mainloop - # x0 = p - movl %eax,100(%esp) - # x5 = s - movl %ecx,120(%esp) - # x10 = t - movl %edx,140(%esp) - # x15 = w - movl %ebx,160(%esp) - # out = out_backup - movl 72(%esp),%edi - # m = m_backup - movl 68(%esp),%esi - # in0 = x0 - movl 100(%esp),%eax - # in1 = x1 - movl 104(%esp),%ecx - # in0 += j0 - addl 164(%esp),%eax - # in1 += j1 - addl 168(%esp),%ecx - # in0 ^= *(uint32 *) (m + 0) - xorl 0(%esi),%eax - # in1 ^= *(uint32 *) (m + 4) - xorl 4(%esi),%ecx - # *(uint32 *) (out + 0) = in0 - movl %eax,0(%edi) - # *(uint32 *) (out + 4) = in1 - movl %ecx,4(%edi) - # in2 = x2 - movl 108(%esp),%eax - # in3 = x3 - movl 112(%esp),%ecx - # in2 += j2 - addl 172(%esp),%eax - # in3 += j3 - addl 176(%esp),%ecx - # in2 ^= *(uint32 *) (m + 8) - xorl 8(%esi),%eax - # in3 ^= *(uint32 *) (m + 12) - xorl 12(%esi),%ecx - # *(uint32 *) (out + 8) = in2 - movl %eax,8(%edi) - # *(uint32 *) (out + 12) = in3 - movl %ecx,12(%edi) - # in4 = x4 - movl 116(%esp),%eax - # in5 = x5 - movl 120(%esp),%ecx - # in4 += j4 - addl 180(%esp),%eax - # in5 += j5 - addl 184(%esp),%ecx - # in4 ^= *(uint32 *) (m + 16) - xorl 16(%esi),%eax - # in5 ^= *(uint32 *) (m + 20) - xorl 20(%esi),%ecx - # *(uint32 *) (out + 16) = in4 - movl %eax,16(%edi) - # *(uint32 *) (out + 20) = in5 - movl %ecx,20(%edi) - # in6 = x6 - movl 124(%esp),%eax - # in7 = x7 - movl 128(%esp),%ecx - # in6 += j6 - addl 188(%esp),%eax - # in7 += j7 - addl 192(%esp),%ecx - # in6 ^= *(uint32 *) (m + 24) - xorl 24(%esi),%eax - # in7 ^= *(uint32 *) (m + 28) - xorl 28(%esi),%ecx - # *(uint32 *) (out + 24) = in6 - movl %eax,24(%edi) - # *(uint32 *) (out + 28) = in7 - movl %ecx,28(%edi) - # in8 = x8 - movl 132(%esp),%eax - # in9 = x9 - movl 136(%esp),%ecx - # in8 += j8 - addl 196(%esp),%eax - # in9 += j9 - addl 200(%esp),%ecx - # in8 ^= *(uint32 *) (m + 32) - xorl 32(%esi),%eax - # in9 ^= *(uint32 *) (m + 36) - xorl 36(%esi),%ecx - # *(uint32 *) (out + 32) = in8 - movl %eax,32(%edi) - # *(uint32 *) (out + 36) = in9 - movl %ecx,36(%edi) - # in10 = x10 - movl 140(%esp),%eax - # in11 = x11 - movl 144(%esp),%ecx - # in10 += j10 - addl 204(%esp),%eax - # in11 += j11 - addl 208(%esp),%ecx - # in10 ^= *(uint32 *) (m + 40) - xorl 40(%esi),%eax - # in11 ^= *(uint32 *) (m + 44) - xorl 44(%esi),%ecx - # *(uint32 *) (out + 40) = in10 - movl %eax,40(%edi) - # *(uint32 *) (out + 44) = in11 - movl %ecx,44(%edi) - # in12 = x12 - movl 148(%esp),%eax - # in13 = x13 - movl 152(%esp),%ecx - # in12 += j12 - addl 212(%esp),%eax - # in13 += j13 - addl 216(%esp),%ecx - # in12 ^= *(uint32 *) (m + 48) - xorl 48(%esi),%eax - # in13 ^= *(uint32 *) (m + 52) - xorl 52(%esi),%ecx - # *(uint32 *) (out + 48) = in12 - movl %eax,48(%edi) - # *(uint32 *) (out + 52) = in13 - movl %ecx,52(%edi) - # in14 = x14 - movl 156(%esp),%eax - # in15 = x15 - movl 160(%esp),%ecx - # in14 += j14 - addl 220(%esp),%eax - # in15 += j15 - addl 224(%esp),%ecx - # in14 ^= *(uint32 *) (m + 56) - xorl 56(%esi),%eax - # in15 ^= *(uint32 *) (m + 60) - xorl 60(%esi),%ecx - # *(uint32 *) (out + 56) = in14 - movl %eax,56(%edi) - # *(uint32 *) (out + 60) = in15 - movl %ecx,60(%edi) - # bytes = bytes_backup - movl 76(%esp),%ebx - # in8 = j8 - movl 196(%esp),%eax - # in9 = j9 - movl 200(%esp),%ecx - # in8 += 1 - add $1,%eax - # in9 += 0 + carry - adc $0,%ecx - # j8 = in8 - movl %eax,196(%esp) - # j9 = in9 - movl %ecx,200(%esp) - # bytes - 64 - cmp $64,%ebx - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # goto bytesatleast64 if unsigned>= - jae ._bytesatleast64 - # m = out - mov %edi,%esi - # out = ctarget - movl 228(%esp),%edi - # i = bytes - mov %ebx,%ecx - # while (i) { *out++ = *m++; --i } - rep movsb -._bytesatleast64: - # x = x_backup - movl 64(%esp),%eax - # in8 = j8 - movl 196(%esp),%ecx - # in9 = j9 - movl 200(%esp),%edx - # *(uint32 *) (x + 32) = in8 - movl %ecx,32(%eax) - # *(uint32 *) (x + 36) = in9 - movl %edx,36(%eax) -._done: - # eax = eax_stack - movl 80(%esp),%eax - # ebx = ebx_stack - movl 84(%esp),%ebx - # esi = esi_stack - movl 88(%esp),%esi - # edi = edi_stack - movl 92(%esp),%edi - # ebp = ebp_stack - movl 96(%esp),%ebp - # leave - add %eax,%esp - ret -._bytesatleast65: - # bytes -= 64 - sub $64,%ebx - # out += 64 - add $64,%edi - # m += 64 - add $64,%esi - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) diff --git a/arch/x86/crypto/salsa20-x86_64-asm_64.S b/arch/x86/crypto/salsa20-x86_64-asm_64.S deleted file mode 100644 index 03a4918f41ee..000000000000 --- a/arch/x86/crypto/salsa20-x86_64-asm_64.S +++ /dev/null @@ -1,805 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#include <linux/linkage.h> - -# enter salsa20_encrypt_bytes -ENTRY(salsa20_encrypt_bytes) - mov %rsp,%r11 - and $31,%r11 - add $256,%r11 - sub %r11,%rsp - # x = arg1 - mov %rdi,%r8 - # m = arg2 - mov %rsi,%rsi - # out = arg3 - mov %rdx,%rdi - # bytes = arg4 - mov %rcx,%rdx - # unsigned>? bytes - 0 - cmp $0,%rdx - # comment:fp stack unchanged by jump - # goto done if !unsigned> - jbe ._done - # comment:fp stack unchanged by fallthrough -# start: -._start: - # r11_stack = r11 - movq %r11,0(%rsp) - # r12_stack = r12 - movq %r12,8(%rsp) - # r13_stack = r13 - movq %r13,16(%rsp) - # r14_stack = r14 - movq %r14,24(%rsp) - # r15_stack = r15 - movq %r15,32(%rsp) - # rbx_stack = rbx - movq %rbx,40(%rsp) - # rbp_stack = rbp - movq %rbp,48(%rsp) - # in0 = *(uint64 *) (x + 0) - movq 0(%r8),%rcx - # in2 = *(uint64 *) (x + 8) - movq 8(%r8),%r9 - # in4 = *(uint64 *) (x + 16) - movq 16(%r8),%rax - # in6 = *(uint64 *) (x + 24) - movq 24(%r8),%r10 - # in8 = *(uint64 *) (x + 32) - movq 32(%r8),%r11 - # in10 = *(uint64 *) (x + 40) - movq 40(%r8),%r12 - # in12 = *(uint64 *) (x + 48) - movq 48(%r8),%r13 - # in14 = *(uint64 *) (x + 56) - movq 56(%r8),%r14 - # j0 = in0 - movq %rcx,56(%rsp) - # j2 = in2 - movq %r9,64(%rsp) - # j4 = in4 - movq %rax,72(%rsp) - # j6 = in6 - movq %r10,80(%rsp) - # j8 = in8 - movq %r11,88(%rsp) - # j10 = in10 - movq %r12,96(%rsp) - # j12 = in12 - movq %r13,104(%rsp) - # j14 = in14 - movq %r14,112(%rsp) - # x_backup = x - movq %r8,120(%rsp) -# bytesatleast1: -._bytesatleast1: - # unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto nocopy if !unsigned< - jae ._nocopy - # ctarget = out - movq %rdi,128(%rsp) - # out = &tmp - leaq 192(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # out = &tmp - leaq 192(%rsp),%rdi - # m = &tmp - leaq 192(%rsp),%rsi - # comment:fp stack unchanged by fallthrough -# nocopy: -._nocopy: - # out_backup = out - movq %rdi,136(%rsp) - # m_backup = m - movq %rsi,144(%rsp) - # bytes_backup = bytes - movq %rdx,152(%rsp) - # x1 = j0 - movq 56(%rsp),%rdi - # x0 = x1 - mov %rdi,%rdx - # (uint64) x1 >>= 32 - shr $32,%rdi - # x3 = j2 - movq 64(%rsp),%rsi - # x2 = x3 - mov %rsi,%rcx - # (uint64) x3 >>= 32 - shr $32,%rsi - # x5 = j4 - movq 72(%rsp),%r8 - # x4 = x5 - mov %r8,%r9 - # (uint64) x5 >>= 32 - shr $32,%r8 - # x5_stack = x5 - movq %r8,160(%rsp) - # x7 = j6 - movq 80(%rsp),%r8 - # x6 = x7 - mov %r8,%rax - # (uint64) x7 >>= 32 - shr $32,%r8 - # x9 = j8 - movq 88(%rsp),%r10 - # x8 = x9 - mov %r10,%r11 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x11 = j10 - movq 96(%rsp),%r12 - # x10 = x11 - mov %r12,%r13 - # x10_stack = x10 - movq %r13,168(%rsp) - # (uint64) x11 >>= 32 - shr $32,%r12 - # x13 = j12 - movq 104(%rsp),%r13 - # x12 = x13 - mov %r13,%r14 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x15 = j14 - movq 112(%rsp),%r15 - # x14 = x15 - mov %r15,%rbx - # (uint64) x15 >>= 32 - shr $32,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = 20 - mov $20,%r15 -# mainloop: -._mainloop: - # i_backup = i - movq %r15,184(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x12 + x0 - lea (%r14,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x4 ^= a - xor %rbp,%r9 - # b = x1 + x5 - lea (%rdi,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x9 ^= b - xor %rbp,%r10 - # a = x0 + x4 - lea (%rdx,%r9),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x8 ^= a - xor %rbp,%r11 - # b = x5 + x9 - lea (%r15,%r10),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x13 ^= b - xor %rbp,%r13 - # a = x4 + x8 - lea (%r9,%r11),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x12 ^= a - xor %rbp,%r14 - # b = x9 + x13 - lea (%r10,%r13),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x1 ^= b - xor %rbp,%rdi - # a = x8 + x12 - lea (%r11,%r14),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x13 + x1 - lea (%r13,%rdi),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x6 + x10 - lea (%rax,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x14 ^= c - xor %r15,%rbx - # c = x10 + x14 - lea (%rbp,%rbx),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x2 ^= c - xor %r15,%rcx - # c = x14 + x2 - lea (%rbx,%rcx),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x6 ^= c - xor %r15,%rax - # c = x2 + x6 - lea (%rcx,%rax),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x11 + x15 - lea (%r12,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x3 ^= d - xor %rbp,%rsi - # d = x15 + x3 - lea (%r15,%rsi),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x7 ^= d - xor %rbp,%r8 - # d = x3 + x7 - lea (%rsi,%r8),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x11 ^= d - xor %rbp,%r12 - # d = x7 + x11 - lea (%r8,%r12),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # x5 = x5_stack - movq 160(%rsp),%r15 - # a = x3 + x0 - lea (%rsi,%rdx),%rbp - # (uint32) a <<<= 7 - rol $7,%ebp - # x1 ^= a - xor %rbp,%rdi - # b = x4 + x5 - lea (%r9,%r15),%rbp - # (uint32) b <<<= 7 - rol $7,%ebp - # x6 ^= b - xor %rbp,%rax - # a = x0 + x1 - lea (%rdx,%rdi),%rbp - # (uint32) a <<<= 9 - rol $9,%ebp - # x2 ^= a - xor %rbp,%rcx - # b = x5 + x6 - lea (%r15,%rax),%rbp - # (uint32) b <<<= 9 - rol $9,%ebp - # x7 ^= b - xor %rbp,%r8 - # a = x1 + x2 - lea (%rdi,%rcx),%rbp - # (uint32) a <<<= 13 - rol $13,%ebp - # x3 ^= a - xor %rbp,%rsi - # b = x6 + x7 - lea (%rax,%r8),%rbp - # (uint32) b <<<= 13 - rol $13,%ebp - # x4 ^= b - xor %rbp,%r9 - # a = x2 + x3 - lea (%rcx,%rsi),%rbp - # (uint32) a <<<= 18 - rol $18,%ebp - # x0 ^= a - xor %rbp,%rdx - # b = x7 + x4 - lea (%r8,%r9),%rbp - # (uint32) b <<<= 18 - rol $18,%ebp - # x5 ^= b - xor %rbp,%r15 - # x10 = x10_stack - movq 168(%rsp),%rbp - # x5_stack = x5 - movq %r15,160(%rsp) - # c = x9 + x10 - lea (%r10,%rbp),%r15 - # (uint32) c <<<= 7 - rol $7,%r15d - # x11 ^= c - xor %r15,%r12 - # c = x10 + x11 - lea (%rbp,%r12),%r15 - # (uint32) c <<<= 9 - rol $9,%r15d - # x8 ^= c - xor %r15,%r11 - # c = x11 + x8 - lea (%r12,%r11),%r15 - # (uint32) c <<<= 13 - rol $13,%r15d - # x9 ^= c - xor %r15,%r10 - # c = x8 + x9 - lea (%r11,%r10),%r15 - # (uint32) c <<<= 18 - rol $18,%r15d - # x10 ^= c - xor %r15,%rbp - # x15 = x15_stack - movq 176(%rsp),%r15 - # x10_stack = x10 - movq %rbp,168(%rsp) - # d = x14 + x15 - lea (%rbx,%r15),%rbp - # (uint32) d <<<= 7 - rol $7,%ebp - # x12 ^= d - xor %rbp,%r14 - # d = x15 + x12 - lea (%r15,%r14),%rbp - # (uint32) d <<<= 9 - rol $9,%ebp - # x13 ^= d - xor %rbp,%r13 - # d = x12 + x13 - lea (%r14,%r13),%rbp - # (uint32) d <<<= 13 - rol $13,%ebp - # x14 ^= d - xor %rbp,%rbx - # d = x13 + x14 - lea (%r13,%rbx),%rbp - # (uint32) d <<<= 18 - rol $18,%ebp - # x15 ^= d - xor %rbp,%r15 - # x15_stack = x15 - movq %r15,176(%rsp) - # i = i_backup - movq 184(%rsp),%r15 - # unsigned>? i -= 4 - sub $4,%r15 - # comment:fp stack unchanged by jump - # goto mainloop if unsigned> - ja ._mainloop - # (uint32) x2 += j2 - addl 64(%rsp),%ecx - # x3 <<= 32 - shl $32,%rsi - # x3 += j2 - addq 64(%rsp),%rsi - # (uint64) x3 >>= 32 - shr $32,%rsi - # x3 <<= 32 - shl $32,%rsi - # x2 += x3 - add %rsi,%rcx - # (uint32) x6 += j6 - addl 80(%rsp),%eax - # x7 <<= 32 - shl $32,%r8 - # x7 += j6 - addq 80(%rsp),%r8 - # (uint64) x7 >>= 32 - shr $32,%r8 - # x7 <<= 32 - shl $32,%r8 - # x6 += x7 - add %r8,%rax - # (uint32) x8 += j8 - addl 88(%rsp),%r11d - # x9 <<= 32 - shl $32,%r10 - # x9 += j8 - addq 88(%rsp),%r10 - # (uint64) x9 >>= 32 - shr $32,%r10 - # x9 <<= 32 - shl $32,%r10 - # x8 += x9 - add %r10,%r11 - # (uint32) x12 += j12 - addl 104(%rsp),%r14d - # x13 <<= 32 - shl $32,%r13 - # x13 += j12 - addq 104(%rsp),%r13 - # (uint64) x13 >>= 32 - shr $32,%r13 - # x13 <<= 32 - shl $32,%r13 - # x12 += x13 - add %r13,%r14 - # (uint32) x0 += j0 - addl 56(%rsp),%edx - # x1 <<= 32 - shl $32,%rdi - # x1 += j0 - addq 56(%rsp),%rdi - # (uint64) x1 >>= 32 - shr $32,%rdi - # x1 <<= 32 - shl $32,%rdi - # x0 += x1 - add %rdi,%rdx - # x5 = x5_stack - movq 160(%rsp),%rdi - # (uint32) x4 += j4 - addl 72(%rsp),%r9d - # x5 <<= 32 - shl $32,%rdi - # x5 += j4 - addq 72(%rsp),%rdi - # (uint64) x5 >>= 32 - shr $32,%rdi - # x5 <<= 32 - shl $32,%rdi - # x4 += x5 - add %rdi,%r9 - # x10 = x10_stack - movq 168(%rsp),%r8 - # (uint32) x10 += j10 - addl 96(%rsp),%r8d - # x11 <<= 32 - shl $32,%r12 - # x11 += j10 - addq 96(%rsp),%r12 - # (uint64) x11 >>= 32 - shr $32,%r12 - # x11 <<= 32 - shl $32,%r12 - # x10 += x11 - add %r12,%r8 - # x15 = x15_stack - movq 176(%rsp),%rdi - # (uint32) x14 += j14 - addl 112(%rsp),%ebx - # x15 <<= 32 - shl $32,%rdi - # x15 += j14 - addq 112(%rsp),%rdi - # (uint64) x15 >>= 32 - shr $32,%rdi - # x15 <<= 32 - shl $32,%rdi - # x14 += x15 - add %rdi,%rbx - # out = out_backup - movq 136(%rsp),%rdi - # m = m_backup - movq 144(%rsp),%rsi - # x0 ^= *(uint64 *) (m + 0) - xorq 0(%rsi),%rdx - # *(uint64 *) (out + 0) = x0 - movq %rdx,0(%rdi) - # x2 ^= *(uint64 *) (m + 8) - xorq 8(%rsi),%rcx - # *(uint64 *) (out + 8) = x2 - movq %rcx,8(%rdi) - # x4 ^= *(uint64 *) (m + 16) - xorq 16(%rsi),%r9 - # *(uint64 *) (out + 16) = x4 - movq %r9,16(%rdi) - # x6 ^= *(uint64 *) (m + 24) - xorq 24(%rsi),%rax - # *(uint64 *) (out + 24) = x6 - movq %rax,24(%rdi) - # x8 ^= *(uint64 *) (m + 32) - xorq 32(%rsi),%r11 - # *(uint64 *) (out + 32) = x8 - movq %r11,32(%rdi) - # x10 ^= *(uint64 *) (m + 40) - xorq 40(%rsi),%r8 - # *(uint64 *) (out + 40) = x10 - movq %r8,40(%rdi) - # x12 ^= *(uint64 *) (m + 48) - xorq 48(%rsi),%r14 - # *(uint64 *) (out + 48) = x12 - movq %r14,48(%rdi) - # x14 ^= *(uint64 *) (m + 56) - xorq 56(%rsi),%rbx - # *(uint64 *) (out + 56) = x14 - movq %rbx,56(%rdi) - # bytes = bytes_backup - movq 152(%rsp),%rdx - # in8 = j8 - movq 88(%rsp),%rcx - # in8 += 1 - add $1,%rcx - # j8 = in8 - movq %rcx,88(%rsp) - # unsigned>? unsigned<? bytes - 64 - cmp $64,%rdx - # comment:fp stack unchanged by jump - # goto bytesatleast65 if unsigned> - ja ._bytesatleast65 - # comment:fp stack unchanged by jump - # goto bytesatleast64 if !unsigned< - jae ._bytesatleast64 - # m = out - mov %rdi,%rsi - # out = ctarget - movq 128(%rsp),%rdi - # i = bytes - mov %rdx,%rcx - # while (i) { *out++ = *m++; --i } - rep movsb - # comment:fp stack unchanged by fallthrough -# bytesatleast64: -._bytesatleast64: - # x = x_backup - movq 120(%rsp),%rdi - # in8 = j8 - movq 88(%rsp),%rsi - # *(uint64 *) (x + 32) = in8 - movq %rsi,32(%rdi) - # r11 = r11_stack - movq 0(%rsp),%r11 - # r12 = r12_stack - movq 8(%rsp),%r12 - # r13 = r13_stack - movq 16(%rsp),%r13 - # r14 = r14_stack - movq 24(%rsp),%r14 - # r15 = r15_stack - movq 32(%rsp),%r15 - # rbx = rbx_stack - movq 40(%rsp),%rbx - # rbp = rbp_stack - movq 48(%rsp),%rbp - # comment:fp stack unchanged by fallthrough -# done: -._done: - # leave - add %r11,%rsp - mov %rdi,%rax - mov %rsi,%rdx - ret -# bytesatleast65: -._bytesatleast65: - # bytes -= 64 - sub $64,%rdx - # out += 64 - add $64,%rdi - # m += 64 - add $64,%rsi - # comment:fp stack unchanged by jump - # goto bytesatleast1 - jmp ._bytesatleast1 -ENDPROC(salsa20_encrypt_bytes) diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c deleted file mode 100644 index b07d7d959806..000000000000 --- a/arch/x86/crypto/salsa20_glue.c +++ /dev/null @@ -1,91 +0,0 @@ -/* - * Glue code for optimized assembly version of Salsa20. - * - * Copyright (c) 2007 Tan Swee Heng <thesweeheng@gmail.com> - * - * The assembly codes are public domain assembly codes written by Daniel. J. - * Bernstein <djb@cr.yp.to>. The codes are modified to include indentation - * and to remove extraneous comments and functions that are not needed. - * - i586 version, renamed as salsa20-i586-asm_32.S - * available from <http://cr.yp.to/snuffle/salsa20/x86-pm/salsa20.s> - * - x86-64 version, renamed as salsa20-x86_64-asm_64.S - * available from <http://cr.yp.to/snuffle/salsa20/amd64-3/salsa20.s> - * - * Also modified to set up the initial state using the generic C code rather - * than in assembly. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License as published by the Free - * Software Foundation; either version 2 of the License, or (at your option) - * any later version. - * - */ - -#include <asm/unaligned.h> -#include <crypto/internal/skcipher.h> -#include <crypto/salsa20.h> -#include <linux/module.h> - -asmlinkage void salsa20_encrypt_bytes(u32 state[16], const u8 *src, u8 *dst, - u32 bytes); - -static int salsa20_asm_crypt(struct skcipher_request *req) -{ - struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - const struct salsa20_ctx *ctx = crypto_skcipher_ctx(tfm); - struct skcipher_walk walk; - u32 state[16]; - int err; - - err = skcipher_walk_virt(&walk, req, true); - - crypto_salsa20_init(state, ctx, walk.iv); - - while (walk.nbytes > 0) { - unsigned int nbytes = walk.nbytes; - - if (nbytes < walk.total) - nbytes = round_down(nbytes, walk.stride); - - salsa20_encrypt_bytes(state, walk.src.virt.addr, - walk.dst.virt.addr, nbytes); - err = skcipher_walk_done(&walk, walk.nbytes - nbytes); - } - - return err; -} - -static struct skcipher_alg alg = { - .base.cra_name = "salsa20", - .base.cra_driver_name = "salsa20-asm", - .base.cra_priority = 200, - .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct salsa20_ctx), - .base.cra_module = THIS_MODULE, - - .min_keysize = SALSA20_MIN_KEY_SIZE, - .max_keysize = SALSA20_MAX_KEY_SIZE, - .ivsize = SALSA20_IV_SIZE, - .chunksize = SALSA20_BLOCK_SIZE, - .setkey = crypto_salsa20_setkey, - .encrypt = salsa20_asm_crypt, - .decrypt = salsa20_asm_crypt, -}; - -static int __init init(void) -{ - return crypto_register_skcipher(&alg); -} - -static void __exit fini(void) -{ - crypto_unregister_skcipher(&alg); -} - -module_init(init); -module_exit(fini); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)"); -MODULE_ALIAS_CRYPTO("salsa20"); -MODULE_ALIAS_CRYPTO("salsa20-asm"); diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index d6b27dab1b30..14a2f996e543 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,3 +396,4 @@ 382 i386 pkey_free sys_pkey_free __ia32_sys_pkey_free 383 i386 statx sys_statx __ia32_sys_statx 384 i386 arch_prctl sys_arch_prctl __ia32_compat_sys_arch_prctl +385 i386 io_pgetevents sys_io_pgetevents __ia32_compat_sys_io_pgetevents diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 4dfe42666d0c..cd36232ab62f 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -341,6 +341,7 @@ 330 common pkey_alloc __x64_sys_pkey_alloc 331 common pkey_free __x64_sys_pkey_free 332 common statx __x64_sys_statx +333 common io_pgetevents __x64_sys_io_pgetevents # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/arch/x86/entry/vdso/Makefile b/arch/x86/entry/vdso/Makefile index d998a487c9b1..261802b1cc50 100644 --- a/arch/x86/entry/vdso/Makefile +++ b/arch/x86/entry/vdso/Makefile @@ -44,14 +44,14 @@ obj-y += $(vdso_img_objs) targets += $(vdso_img_cfiles) targets += $(vdso_img_sodbg) $(vdso_img-y:%=vdso%.so) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,--no-undefined \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 \ $(DISABLE_LTO) -$(obj)/vdso64.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso64.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) HOST_EXTRACFLAGS += -I$(srctree)/tools/include -I$(srctree)/include/uapi -I$(srctree)/arch/$(SUBARCH)/include/uapi @@ -100,11 +100,8 @@ VDSO_LDFLAGS_vdsox32.lds = -Wl,-m,elf32_x86_64 \ -Wl,-z,max-page-size=4096 \ -Wl,-z,common-page-size=4096 -# 64-bit objects to re-brand as x32 -vobjs64-for-x32 := $(filter-out $(vobjs-nox32),$(vobjs-y)) - # x32-rebranded versions -vobjx32s-y := $(vobjs64-for-x32:.o=-x32.o) +vobjx32s-y := $(vobjs-y:.o=-x32.o) # same thing, but in the output directory vobjx32s := $(foreach F,$(vobjx32s-y),$(obj)/$F) @@ -122,7 +119,7 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg $(call if_changed,objcopy) -$(obj)/vdsox32.so.dbg: $(src)/vdsox32.lds $(vobjx32s) FORCE +$(obj)/vdsox32.so.dbg: $(obj)/vdsox32.lds $(vobjx32s) FORCE $(call if_changed,vdso) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 70b7845434cb..7782cdbcd67d 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -107,7 +107,7 @@ static bool write_ok_or_segv(unsigned long ptr, size_t size) thread->cr2 = ptr; thread->trap_nr = X86_TRAP_PF; - memset(&info, 0, sizeof(info)); + clear_siginfo(&info); info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = SEGV_MAPERR; diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 786fd875de92..4b98101209a1 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -889,7 +889,7 @@ static void force_ibs_eilvt_setup(void) if (!ibs_eilvt_valid()) goto out; - pr_info("IBS: LVT offset %d assigned\n", offset); + pr_info("LVT offset %d assigned\n", offset); return; out: diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index f5cbbba99283..981ba5e8241b 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -19,6 +19,7 @@ #include <asm/cpufeature.h> #include <asm/perf_event.h> #include <asm/msr.h> +#include <asm/smp.h> #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 @@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu) } if (amd_uncore_llc) { - unsigned int apicid = cpu_data(cpu).apicid; - unsigned int nshared, subleaf, prev_eax = 0; - uncore = *per_cpu_ptr(amd_uncore_llc, cpu); - /* - * Iterate over Cache Topology Definition leaves until no - * more cache descriptions are available. - */ - for (subleaf = 0; subleaf < 5; subleaf++) { - cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx); - - /* EAX[0:4] gives type of cache */ - if (!(eax & 0x1f)) - break; - - prev_eax = eax; - } - nshared = ((prev_eax >> 14) & 0xfff) + 1; - - uncore->id = apicid - (apicid % nshared); + uncore->id = per_cpu(cpu_llc_id, cpu); uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..6e461fb1e0d4 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -27,6 +27,7 @@ #include <linux/cpu.h> #include <linux/bitops.h> #include <linux/device.h> +#include <linux/nospec.h> #include <asm/apic.h> #include <asm/stacktrace.h> @@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; @@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ @@ -2391,7 +2397,7 @@ static unsigned long get_segment_base(unsigned int segment) #ifdef CONFIG_IA32_EMULATION -#include <asm/compat.h> +#include <linux/compat.h> static inline int perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *entry) diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -92,6 +92,7 @@ #include <linux/module.h> #include <linux/slab.h> #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/cpu_device_id.h> #include <asm/intel-family.h> #include "../perf_event.h" @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 3b993942a0e4..8d016ce5b80d 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -1194,7 +1194,7 @@ static int pt_event_addr_filters_validate(struct list_head *filters) filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; - if (!filter->inode) { + if (!filter->path.dentry) { if (!valid_kernel_ip(filter->offset)) return -EINVAL; @@ -1221,7 +1221,7 @@ static void pt_event_addr_filters_sync(struct perf_event *event) return; list_for_each_entry(filter, &head->list, entry) { - if (filter->inode && !offs[range]) { + if (filter->path.dentry && !offs[range]) { msr_a = msr_b = 0; } else { /* apply the offset */ diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index a7956fc7ca1d..15b07379e72d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -203,7 +203,7 @@ static void uncore_assign_hw_event(struct intel_uncore_box *box, hwc->idx = idx; hwc->last_tag = ++box->tags[idx]; - if (hwc->idx == UNCORE_PMC_IDX_FIXED) { + if (uncore_pmc_fixed(hwc->idx)) { hwc->event_base = uncore_fixed_ctr(box); hwc->config_base = uncore_fixed_ctl(box); return; @@ -218,7 +218,9 @@ void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *e u64 prev_count, new_count, delta; int shift; - if (event->hw.idx >= UNCORE_PMC_IDX_FIXED) + if (uncore_pmc_freerunning(event->hw.idx)) + shift = 64 - uncore_freerunning_bits(box, event); + else if (uncore_pmc_fixed(event->hw.idx)) shift = 64 - uncore_fixed_ctr_bits(box); else shift = 64 - uncore_perf_ctr_bits(box); @@ -449,15 +451,30 @@ static int uncore_assign_events(struct intel_uncore_box *box, int assign[], int return ret ? -EINVAL : 0; } -static void uncore_pmu_event_start(struct perf_event *event, int flags) +void uncore_pmu_event_start(struct perf_event *event, int flags) { struct intel_uncore_box *box = uncore_event_to_box(event); int idx = event->hw.idx; - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) return; - if (WARN_ON_ONCE(idx == -1 || idx >= UNCORE_PMC_IDX_MAX)) + /* + * Free running counter is read-only and always active. + * Use the current counter value as start point. + * There is no overflow interrupt for free running counter. + * Use hrtimer to periodically poll the counter to avoid overflow. + */ + if (uncore_pmc_freerunning(event->hw.idx)) { + list_add_tail(&event->active_entry, &box->active_list); + local64_set(&event->hw.prev_count, + uncore_read_counter(box, event)); + if (box->n_active++ == 0) + uncore_pmu_start_hrtimer(box); + return; + } + + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) return; event->hw.state = 0; @@ -474,11 +491,20 @@ static void uncore_pmu_event_start(struct perf_event *event, int flags) } } -static void uncore_pmu_event_stop(struct perf_event *event, int flags) +void uncore_pmu_event_stop(struct perf_event *event, int flags) { struct intel_uncore_box *box = uncore_event_to_box(event); struct hw_perf_event *hwc = &event->hw; + /* Cannot disable free running counter which is read-only */ + if (uncore_pmc_freerunning(hwc->idx)) { + list_del(&event->active_entry); + if (--box->n_active == 0) + uncore_pmu_cancel_hrtimer(box); + uncore_perf_event_update(box, event); + return; + } + if (__test_and_clear_bit(hwc->idx, box->active_mask)) { uncore_disable_event(box, event); box->n_active--; @@ -502,7 +528,7 @@ static void uncore_pmu_event_stop(struct perf_event *event, int flags) } } -static int uncore_pmu_event_add(struct perf_event *event, int flags) +int uncore_pmu_event_add(struct perf_event *event, int flags) { struct intel_uncore_box *box = uncore_event_to_box(event); struct hw_perf_event *hwc = &event->hw; @@ -512,6 +538,17 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags) if (!box) return -ENODEV; + /* + * The free funning counter is assigned in event_init(). + * The free running counter event and free running counter + * are 1:1 mapped. It doesn't need to be tracked in event_list. + */ + if (uncore_pmc_freerunning(hwc->idx)) { + if (flags & PERF_EF_START) + uncore_pmu_event_start(event, 0); + return 0; + } + ret = n = uncore_collect_events(box, event, false); if (ret < 0) return ret; @@ -563,13 +600,21 @@ static int uncore_pmu_event_add(struct perf_event *event, int flags) return 0; } -static void uncore_pmu_event_del(struct perf_event *event, int flags) +void uncore_pmu_event_del(struct perf_event *event, int flags) { struct intel_uncore_box *box = uncore_event_to_box(event); int i; uncore_pmu_event_stop(event, PERF_EF_UPDATE); + /* + * The event for free running counter is not tracked by event_list. + * It doesn't need to force event->hw.idx = -1 to reassign the counter. + * Because the event and the free running counter are 1:1 mapped. + */ + if (uncore_pmc_freerunning(event->hw.idx)) + return; + for (i = 0; i < box->n_events; i++) { if (event == box->event_list[i]) { uncore_put_event_constraint(box, event); @@ -603,6 +648,10 @@ static int uncore_validate_group(struct intel_uncore_pmu *pmu, struct intel_uncore_box *fake_box; int ret = -EINVAL, n; + /* The free running counter is always active. */ + if (uncore_pmc_freerunning(event->hw.idx)) + return 0; + fake_box = uncore_alloc_box(pmu->type, NUMA_NO_NODE); if (!fake_box) return -ENOMEM; @@ -690,6 +739,17 @@ static int uncore_pmu_event_init(struct perf_event *event) /* fixed counters have event field hardcoded to zero */ hwc->config = 0ULL; + } else if (is_freerunning_event(event)) { + if (!check_valid_freerunning_event(box, event)) + return -EINVAL; + event->hw.idx = UNCORE_PMC_IDX_FREERUNNING; + /* + * The free running counter event and free running counter + * are always 1:1 mapped. + * The free running counter is always active. + * Assign the free running counter here. + */ + event->hw.event_base = uncore_freerunning_counter(box, event); } else { hwc->config = event->attr.config & (pmu->type->event_mask | ((u64)pmu->type->event_mask_ext << 32)); diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index 414dc7e7c950..c9e1e0bef3c3 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -12,8 +12,13 @@ #define UNCORE_FIXED_EVENT 0xff #define UNCORE_PMC_IDX_MAX_GENERIC 8 +#define UNCORE_PMC_IDX_MAX_FIXED 1 +#define UNCORE_PMC_IDX_MAX_FREERUNNING 1 #define UNCORE_PMC_IDX_FIXED UNCORE_PMC_IDX_MAX_GENERIC -#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FIXED + 1) +#define UNCORE_PMC_IDX_FREERUNNING (UNCORE_PMC_IDX_FIXED + \ + UNCORE_PMC_IDX_MAX_FIXED) +#define UNCORE_PMC_IDX_MAX (UNCORE_PMC_IDX_FREERUNNING + \ + UNCORE_PMC_IDX_MAX_FREERUNNING) #define UNCORE_PCI_DEV_FULL_DATA(dev, func, type, idx) \ ((dev << 24) | (func << 16) | (type << 8) | idx) @@ -35,6 +40,7 @@ struct intel_uncore_ops; struct intel_uncore_pmu; struct intel_uncore_box; struct uncore_event_desc; +struct freerunning_counters; struct intel_uncore_type { const char *name; @@ -42,6 +48,7 @@ struct intel_uncore_type { int num_boxes; int perf_ctr_bits; int fixed_ctr_bits; + int num_freerunning_types; unsigned perf_ctr; unsigned event_ctl; unsigned event_mask; @@ -59,6 +66,7 @@ struct intel_uncore_type { struct intel_uncore_pmu *pmus; struct intel_uncore_ops *ops; struct uncore_event_desc *event_descs; + struct freerunning_counters *freerunning; const struct attribute_group *attr_groups[4]; struct pmu *pmu; /* for custom pmu ops */ }; @@ -129,6 +137,14 @@ struct uncore_event_desc { const char *config; }; +struct freerunning_counters { + unsigned int counter_base; + unsigned int counter_offset; + unsigned int box_offset; + unsigned int num_counters; + unsigned int bits; +}; + struct pci2phy_map { struct list_head list; int segment; @@ -157,6 +173,16 @@ static ssize_t __uncore_##_var##_show(struct kobject *kobj, \ static struct kobj_attribute format_attr_##_var = \ __ATTR(_name, 0444, __uncore_##_var##_show, NULL) +static inline bool uncore_pmc_fixed(int idx) +{ + return idx == UNCORE_PMC_IDX_FIXED; +} + +static inline bool uncore_pmc_freerunning(int idx) +{ + return idx == UNCORE_PMC_IDX_FREERUNNING; +} + static inline unsigned uncore_pci_box_ctl(struct intel_uncore_box *box) { return box->pmu->type->box_ctl; @@ -214,6 +240,60 @@ static inline unsigned uncore_msr_fixed_ctr(struct intel_uncore_box *box) return box->pmu->type->fixed_ctr + uncore_msr_box_offset(box); } + +/* + * In the uncore document, there is no event-code assigned to free running + * counters. Some events need to be defined to indicate the free running + * counters. The events are encoded as event-code + umask-code. + * + * The event-code for all free running counters is 0xff, which is the same as + * the fixed counters. + * + * The umask-code is used to distinguish a fixed counter and a free running + * counter, and different types of free running counters. + * - For fixed counters, the umask-code is 0x0X. + * X indicates the index of the fixed counter, which starts from 0. + * - For free running counters, the umask-code uses the rest of the space. + * It would bare the format of 0xXY. + * X stands for the type of free running counters, which starts from 1. + * Y stands for the index of free running counters of same type, which + * starts from 0. + * + * For example, there are three types of IIO free running counters on Skylake + * server, IO CLOCKS counters, BANDWIDTH counters and UTILIZATION counters. + * The event-code for all the free running counters is 0xff. + * 'ioclk' is the first counter of IO CLOCKS. IO CLOCKS is the first type, + * which umask-code starts from 0x10. + * So 'ioclk' is encoded as event=0xff,umask=0x10 + * 'bw_in_port2' is the third counter of BANDWIDTH counters. BANDWIDTH is + * the second type, which umask-code starts from 0x20. + * So 'bw_in_port2' is encoded as event=0xff,umask=0x22 + */ +static inline unsigned int uncore_freerunning_idx(u64 config) +{ + return ((config >> 8) & 0xf); +} + +#define UNCORE_FREERUNNING_UMASK_START 0x10 + +static inline unsigned int uncore_freerunning_type(u64 config) +{ + return ((((config >> 8) - UNCORE_FREERUNNING_UMASK_START) >> 4) & 0xf); +} + +static inline +unsigned int uncore_freerunning_counter(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->attr.config); + unsigned int idx = uncore_freerunning_idx(event->attr.config); + struct intel_uncore_pmu *pmu = box->pmu; + + return pmu->type->freerunning[type].counter_base + + pmu->type->freerunning[type].counter_offset * idx + + pmu->type->freerunning[type].box_offset * pmu->pmu_idx; +} + static inline unsigned uncore_msr_event_ctl(struct intel_uncore_box *box, int idx) { @@ -276,11 +356,52 @@ static inline int uncore_fixed_ctr_bits(struct intel_uncore_box *box) return box->pmu->type->fixed_ctr_bits; } +static inline +unsigned int uncore_freerunning_bits(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->attr.config); + + return box->pmu->type->freerunning[type].bits; +} + +static inline int uncore_num_freerunning(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->attr.config); + + return box->pmu->type->freerunning[type].num_counters; +} + +static inline int uncore_num_freerunning_types(struct intel_uncore_box *box, + struct perf_event *event) +{ + return box->pmu->type->num_freerunning_types; +} + +static inline bool check_valid_freerunning_event(struct intel_uncore_box *box, + struct perf_event *event) +{ + unsigned int type = uncore_freerunning_type(event->attr.config); + unsigned int idx = uncore_freerunning_idx(event->attr.config); + + return (type < uncore_num_freerunning_types(box, event)) && + (idx < uncore_num_freerunning(box, event)); +} + static inline int uncore_num_counters(struct intel_uncore_box *box) { return box->pmu->type->num_counters; } +static inline bool is_freerunning_event(struct perf_event *event) +{ + u64 cfg = event->attr.config; + + return ((cfg & UNCORE_FIXED_EVENT) == UNCORE_FIXED_EVENT) && + (((cfg >> 8) & 0xff) >= UNCORE_FREERUNNING_UMASK_START); +} + static inline void uncore_disable_box(struct intel_uncore_box *box) { if (box->pmu->type->ops->disable_box) @@ -346,6 +467,10 @@ struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event); void uncore_pmu_start_hrtimer(struct intel_uncore_box *box); void uncore_pmu_cancel_hrtimer(struct intel_uncore_box *box); +void uncore_pmu_event_start(struct perf_event *event, int flags); +void uncore_pmu_event_stop(struct perf_event *event, int flags); +int uncore_pmu_event_add(struct perf_event *event, int flags); +void uncore_pmu_event_del(struct perf_event *event, int flags); void uncore_pmu_event_read(struct perf_event *event); void uncore_perf_event_update(struct intel_uncore_box *box, struct perf_event *event); struct event_constraint * diff --git a/arch/x86/events/intel/uncore_nhmex.c b/arch/x86/events/intel/uncore_nhmex.c index 93e7a8397cde..173e2674be6e 100644 --- a/arch/x86/events/intel/uncore_nhmex.c +++ b/arch/x86/events/intel/uncore_nhmex.c @@ -246,7 +246,7 @@ static void nhmex_uncore_msr_enable_event(struct intel_uncore_box *box, struct p { struct hw_perf_event *hwc = &event->hw; - if (hwc->idx >= UNCORE_PMC_IDX_FIXED) + if (hwc->idx == UNCORE_PMC_IDX_FIXED) wrmsrl(hwc->config_base, NHMEX_PMON_CTL_EN_BIT0); else if (box->pmu->type->event_mask & NHMEX_PMON_CTL_EN_BIT0) wrmsrl(hwc->config_base, hwc->config | NHMEX_PMON_CTL_EN_BIT22); diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index aee5e8496be4..8527c3e1038b 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -285,6 +285,15 @@ static struct uncore_event_desc snb_uncore_imc_events[] = { #define SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE 0x5054 #define SNB_UNCORE_PCI_IMC_CTR_BASE SNB_UNCORE_PCI_IMC_DATA_READS_BASE +enum perf_snb_uncore_imc_freerunning_types { + SNB_PCI_UNCORE_IMC_DATA = 0, + SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, +}; + +static struct freerunning_counters snb_uncore_imc_freerunning[] = { + [SNB_PCI_UNCORE_IMC_DATA] = { SNB_UNCORE_PCI_IMC_DATA_READS_BASE, 0x4, 0x0, 2, 32 }, +}; + static struct attribute *snb_uncore_imc_formats_attr[] = { &format_attr_event.attr, NULL, @@ -341,9 +350,8 @@ static u64 snb_uncore_imc_read_counter(struct intel_uncore_box *box, struct perf } /* - * custom event_init() function because we define our own fixed, free - * running counters, so we do not want to conflict with generic uncore - * logic. Also simplifies processing + * Keep the custom event_init() function compatible with old event + * encoding for free running counters. */ static int snb_uncore_imc_event_init(struct perf_event *event) { @@ -405,11 +413,11 @@ static int snb_uncore_imc_event_init(struct perf_event *event) switch (cfg) { case SNB_UNCORE_PCI_IMC_DATA_READS: base = SNB_UNCORE_PCI_IMC_DATA_READS_BASE; - idx = UNCORE_PMC_IDX_FIXED; + idx = UNCORE_PMC_IDX_FREERUNNING; break; case SNB_UNCORE_PCI_IMC_DATA_WRITES: base = SNB_UNCORE_PCI_IMC_DATA_WRITES_BASE; - idx = UNCORE_PMC_IDX_FIXED + 1; + idx = UNCORE_PMC_IDX_FREERUNNING; break; default: return -EINVAL; @@ -430,75 +438,6 @@ static int snb_uncore_imc_hw_config(struct intel_uncore_box *box, struct perf_ev return 0; } -static void snb_uncore_imc_event_start(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - u64 count; - - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - - event->hw.state = 0; - box->n_active++; - - list_add_tail(&event->active_entry, &box->active_list); - - count = snb_uncore_imc_read_counter(box, event); - local64_set(&event->hw.prev_count, count); - - if (box->n_active == 1) - uncore_pmu_start_hrtimer(box); -} - -static void snb_uncore_imc_event_stop(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!(hwc->state & PERF_HES_STOPPED)) { - box->n_active--; - - WARN_ON_ONCE(hwc->state & PERF_HES_STOPPED); - hwc->state |= PERF_HES_STOPPED; - - list_del(&event->active_entry); - - if (box->n_active == 0) - uncore_pmu_cancel_hrtimer(box); - } - - if ((flags & PERF_EF_UPDATE) && !(hwc->state & PERF_HES_UPTODATE)) { - /* - * Drain the remaining delta count out of a event - * that we are disabling: - */ - uncore_perf_event_update(box, event); - hwc->state |= PERF_HES_UPTODATE; - } -} - -static int snb_uncore_imc_event_add(struct perf_event *event, int flags) -{ - struct intel_uncore_box *box = uncore_event_to_box(event); - struct hw_perf_event *hwc = &event->hw; - - if (!box) - return -ENODEV; - - hwc->state = PERF_HES_UPTODATE | PERF_HES_STOPPED; - if (!(flags & PERF_EF_START)) - hwc->state |= PERF_HES_ARCH; - - snb_uncore_imc_event_start(event, 0); - - return 0; -} - -static void snb_uncore_imc_event_del(struct perf_event *event, int flags) -{ - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); -} - int snb_pci2phy_map_init(int devid) { struct pci_dev *dev = NULL; @@ -530,10 +469,10 @@ int snb_pci2phy_map_init(int devid) static struct pmu snb_uncore_imc_pmu = { .task_ctx_nr = perf_invalid_context, .event_init = snb_uncore_imc_event_init, - .add = snb_uncore_imc_event_add, - .del = snb_uncore_imc_event_del, - .start = snb_uncore_imc_event_start, - .stop = snb_uncore_imc_event_stop, + .add = uncore_pmu_event_add, + .del = uncore_pmu_event_del, + .start = uncore_pmu_event_start, + .stop = uncore_pmu_event_stop, .read = uncore_pmu_event_read, }; @@ -552,12 +491,10 @@ static struct intel_uncore_type snb_uncore_imc = { .name = "imc", .num_counters = 2, .num_boxes = 1, - .fixed_ctr_bits = 32, - .fixed_ctr = SNB_UNCORE_PCI_IMC_CTR_BASE, + .num_freerunning_types = SNB_PCI_UNCORE_IMC_FREERUNNING_TYPE_MAX, + .freerunning = snb_uncore_imc_freerunning, .event_descs = snb_uncore_imc_events, .format_group = &snb_uncore_imc_format_group, - .perf_ctr = SNB_UNCORE_PCI_IMC_DATA_READS_BASE, - .event_mask = SNB_UNCORE_PCI_IMC_EVENT_MASK, .ops = &snb_uncore_imc_ops, .pmu = &snb_uncore_imc_pmu, }; diff --git a/arch/x86/events/intel/uncore_snbep.c b/arch/x86/events/intel/uncore_snbep.c index 77076a102e34..87dc0263a2e1 100644 --- a/arch/x86/events/intel/uncore_snbep.c +++ b/arch/x86/events/intel/uncore_snbep.c @@ -3522,6 +3522,87 @@ static struct intel_uncore_type skx_uncore_iio = { .format_group = &skx_uncore_iio_format_group, }; +enum perf_uncore_iio_freerunning_type_id { + SKX_IIO_MSR_IOCLK = 0, + SKX_IIO_MSR_BW = 1, + SKX_IIO_MSR_UTIL = 2, + + SKX_IIO_FREERUNNING_TYPE_MAX, +}; + + +static struct freerunning_counters skx_iio_freerunning[] = { + [SKX_IIO_MSR_IOCLK] = { 0xa45, 0x1, 0x20, 1, 36 }, + [SKX_IIO_MSR_BW] = { 0xb00, 0x1, 0x10, 8, 36 }, + [SKX_IIO_MSR_UTIL] = { 0xb08, 0x1, 0x10, 8, 36 }, +}; + +static struct uncore_event_desc skx_uncore_iio_freerunning_events[] = { + /* Free-Running IO CLOCKS Counter */ + INTEL_UNCORE_EVENT_DESC(ioclk, "event=0xff,umask=0x10"), + /* Free-Running IIO BANDWIDTH Counters */ + INTEL_UNCORE_EVENT_DESC(bw_in_port0, "event=0xff,umask=0x20"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1, "event=0xff,umask=0x21"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2, "event=0xff,umask=0x22"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3, "event=0xff,umask=0x23"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_in_port3.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0, "event=0xff,umask=0x24"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port0.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1, "event=0xff,umask=0x25"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port1.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2, "event=0xff,umask=0x26"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port2.unit, "MiB"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3, "event=0xff,umask=0x27"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.scale, "3.814697266e-6"), + INTEL_UNCORE_EVENT_DESC(bw_out_port3.unit, "MiB"), + /* Free-running IIO UTILIZATION Counters */ + INTEL_UNCORE_EVENT_DESC(util_in_port0, "event=0xff,umask=0x30"), + INTEL_UNCORE_EVENT_DESC(util_out_port0, "event=0xff,umask=0x31"), + INTEL_UNCORE_EVENT_DESC(util_in_port1, "event=0xff,umask=0x32"), + INTEL_UNCORE_EVENT_DESC(util_out_port1, "event=0xff,umask=0x33"), + INTEL_UNCORE_EVENT_DESC(util_in_port2, "event=0xff,umask=0x34"), + INTEL_UNCORE_EVENT_DESC(util_out_port2, "event=0xff,umask=0x35"), + INTEL_UNCORE_EVENT_DESC(util_in_port3, "event=0xff,umask=0x36"), + INTEL_UNCORE_EVENT_DESC(util_out_port3, "event=0xff,umask=0x37"), + { /* end: all zeroes */ }, +}; + +static struct intel_uncore_ops skx_uncore_iio_freerunning_ops = { + .read_counter = uncore_msr_read_counter, +}; + +static struct attribute *skx_uncore_iio_freerunning_formats_attr[] = { + &format_attr_event.attr, + &format_attr_umask.attr, + NULL, +}; + +static const struct attribute_group skx_uncore_iio_freerunning_format_group = { + .name = "format", + .attrs = skx_uncore_iio_freerunning_formats_attr, +}; + +static struct intel_uncore_type skx_uncore_iio_free_running = { + .name = "iio_free_running", + .num_counters = 17, + .num_boxes = 6, + .num_freerunning_types = SKX_IIO_FREERUNNING_TYPE_MAX, + .freerunning = skx_iio_freerunning, + .ops = &skx_uncore_iio_freerunning_ops, + .event_descs = skx_uncore_iio_freerunning_events, + .format_group = &skx_uncore_iio_freerunning_format_group, +}; + static struct attribute *skx_uncore_formats_attr[] = { &format_attr_event.attr, &format_attr_umask.attr, @@ -3595,6 +3676,7 @@ static struct intel_uncore_type *skx_msr_uncores[] = { &skx_uncore_ubox, &skx_uncore_chabox, &skx_uncore_iio, + &skx_uncore_iio_free_running, &skx_uncore_irp, &skx_uncore_pcu, NULL, diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <linux/perf_event.h> +#include <linux/nospec.h> #include <asm/intel-family.h> enum perf_msr_id { @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile index 367a8203cfcf..b173d404e3df 100644 --- a/arch/x86/hyperv/Makefile +++ b/arch/x86/hyperv/Makefile @@ -1 +1,2 @@ -obj-y := hv_init.o mmu.o +obj-y := hv_init.o mmu.o +obj-$(CONFIG_X86_64) += hv_apic.o diff --git a/arch/x86/hyperv/hv_apic.c b/arch/x86/hyperv/hv_apic.c new file mode 100644 index 000000000000..f68855499391 --- /dev/null +++ b/arch/x86/hyperv/hv_apic.c @@ -0,0 +1,256 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V specific APIC code. + * + * Copyright (C) 2018, Microsoft, Inc. + * + * Author : K. Y. Srinivasan <kys@microsoft.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include <linux/types.h> +#include <linux/version.h> +#include <linux/vmalloc.h> +#include <linux/mm.h> +#include <linux/clockchips.h> +#include <linux/hyperv.h> +#include <linux/slab.h> +#include <linux/cpuhotplug.h> +#include <asm/hypervisor.h> +#include <asm/mshyperv.h> +#include <asm/apic.h> + +static struct apic orig_apic; + +static u64 hv_apic_icr_read(void) +{ + u64 reg_val; + + rdmsrl(HV_X64_MSR_ICR, reg_val); + return reg_val; +} + +static void hv_apic_icr_write(u32 low, u32 id) +{ + u64 reg_val; + + reg_val = SET_APIC_DEST_FIELD(id); + reg_val = reg_val << 32; + reg_val |= low; + + wrmsrl(HV_X64_MSR_ICR, reg_val); +} + +static u32 hv_apic_read(u32 reg) +{ + u32 reg_val, hi; + + switch (reg) { + case APIC_EOI: + rdmsr(HV_X64_MSR_EOI, reg_val, hi); + return reg_val; + case APIC_TASKPRI: + rdmsr(HV_X64_MSR_TPR, reg_val, hi); + return reg_val; + + default: + return native_apic_mem_read(reg); + } +} + +static void hv_apic_write(u32 reg, u32 val) +{ + switch (reg) { + case APIC_EOI: + wrmsr(HV_X64_MSR_EOI, val, 0); + break; + case APIC_TASKPRI: + wrmsr(HV_X64_MSR_TPR, val, 0); + break; + default: + native_apic_mem_write(reg, val); + } +} + +static void hv_apic_eoi_write(u32 reg, u32 val) +{ + wrmsr(HV_X64_MSR_EOI, val, 0); +} + +/* + * IPI implementation on Hyper-V. + */ +static bool __send_ipi_mask_ex(const struct cpumask *mask, int vector) +{ + struct ipi_arg_ex **arg; + struct ipi_arg_ex *ipi_arg; + unsigned long flags; + int nr_bank = 0; + int ret = 1; + + local_irq_save(flags); + arg = (struct ipi_arg_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_ex_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->vp_set.valid_bank_mask = 0; + + if (!cpumask_equal(mask, cpu_present_mask)) { + ipi_arg->vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(ipi_arg->vp_set), mask); + } + if (!nr_bank) + ipi_arg->vp_set.format = HV_GENERIC_SET_ALL; + + ret = hv_do_rep_hypercall(HVCALL_SEND_IPI_EX, 0, nr_bank, + ipi_arg, NULL); + +ipi_mask_ex_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_mask(const struct cpumask *mask, int vector) +{ + int cur_cpu, vcpu; + struct ipi_arg_non_ex **arg; + struct ipi_arg_non_ex *ipi_arg; + int ret = 1; + unsigned long flags; + + if (cpumask_empty(mask)) + return true; + + if (!hv_hypercall_pg) + return false; + + if ((vector < HV_IPI_LOW_VECTOR) || (vector > HV_IPI_HIGH_VECTOR)) + return false; + + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + return __send_ipi_mask_ex(mask, vector); + + local_irq_save(flags); + arg = (struct ipi_arg_non_ex **)this_cpu_ptr(hyperv_pcpu_input_arg); + + ipi_arg = *arg; + if (unlikely(!ipi_arg)) + goto ipi_mask_done; + + ipi_arg->vector = vector; + ipi_arg->reserved = 0; + ipi_arg->cpu_mask = 0; + + for_each_cpu(cur_cpu, mask) { + vcpu = hv_cpu_number_to_vp_number(cur_cpu); + /* + * This particular version of the IPI hypercall can + * only target upto 64 CPUs. + */ + if (vcpu >= 64) + goto ipi_mask_done; + + __set_bit(vcpu, (unsigned long *)&ipi_arg->cpu_mask); + } + + ret = hv_do_hypercall(HVCALL_SEND_IPI, ipi_arg, NULL); + +ipi_mask_done: + local_irq_restore(flags); + return ((ret == 0) ? true : false); +} + +static bool __send_ipi_one(int cpu, int vector) +{ + struct cpumask mask = CPU_MASK_NONE; + + cpumask_set_cpu(cpu, &mask); + return __send_ipi_mask(&mask, vector); +} + +static void hv_send_ipi(int cpu, int vector) +{ + if (!__send_ipi_one(cpu, vector)) + orig_apic.send_IPI(cpu, vector); +} + +static void hv_send_ipi_mask(const struct cpumask *mask, int vector) +{ + if (!__send_ipi_mask(mask, vector)) + orig_apic.send_IPI_mask(mask, vector); +} + +static void hv_send_ipi_mask_allbutself(const struct cpumask *mask, int vector) +{ + unsigned int this_cpu = smp_processor_id(); + struct cpumask new_mask; + const struct cpumask *local_mask; + + cpumask_copy(&new_mask, mask); + cpumask_clear_cpu(this_cpu, &new_mask); + local_mask = &new_mask; + if (!__send_ipi_mask(local_mask, vector)) + orig_apic.send_IPI_mask_allbutself(mask, vector); +} + +static void hv_send_ipi_allbutself(int vector) +{ + hv_send_ipi_mask_allbutself(cpu_online_mask, vector); +} + +static void hv_send_ipi_all(int vector) +{ + if (!__send_ipi_mask(cpu_online_mask, vector)) + orig_apic.send_IPI_all(vector); +} + +static void hv_send_ipi_self(int vector) +{ + if (!__send_ipi_one(smp_processor_id(), vector)) + orig_apic.send_IPI_self(vector); +} + +void __init hv_apic_init(void) +{ + if (ms_hyperv.hints & HV_X64_CLUSTER_IPI_RECOMMENDED) { + if ((ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) + pr_info("Hyper-V: Using ext hypercalls for IPI\n"); + else + pr_info("Hyper-V: Using IPI hypercalls\n"); + /* + * Set the IPI entry points. + */ + orig_apic = *apic; + + apic->send_IPI = hv_send_ipi; + apic->send_IPI_mask = hv_send_ipi_mask; + apic->send_IPI_mask_allbutself = hv_send_ipi_mask_allbutself; + apic->send_IPI_allbutself = hv_send_ipi_allbutself; + apic->send_IPI_all = hv_send_ipi_all; + apic->send_IPI_self = hv_send_ipi_self; + } + + if (ms_hyperv.hints & HV_X64_APIC_ACCESS_RECOMMENDED) { + pr_info("Hyper-V: Using MSR based APIC access\n"); + apic_set_eoi_write(hv_apic_eoi_write); + apic->read = hv_apic_read; + apic->write = hv_apic_write; + apic->icr_write = hv_apic_icr_write; + apic->icr_read = hv_apic_icr_read; + } +} diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index cfecc2272f2d..4c431e1c1eff 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -91,12 +91,19 @@ EXPORT_SYMBOL_GPL(hv_vp_index); struct hv_vp_assist_page **hv_vp_assist_page; EXPORT_SYMBOL_GPL(hv_vp_assist_page); +void __percpu **hyperv_pcpu_input_arg; +EXPORT_SYMBOL_GPL(hyperv_pcpu_input_arg); + u32 hv_max_vp_index; static int hv_cpu_init(unsigned int cpu) { u64 msr_vp_index; struct hv_vp_assist_page **hvp = &hv_vp_assist_page[smp_processor_id()]; + void **input_arg; + + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + *input_arg = page_address(alloc_page(GFP_KERNEL)); hv_get_vp_index(msr_vp_index); @@ -217,6 +224,16 @@ static int hv_cpu_die(unsigned int cpu) { struct hv_reenlightenment_control re_ctrl; unsigned int new_cpu; + unsigned long flags; + void **input_arg; + void *input_pg = NULL; + + local_irq_save(flags); + input_arg = (void **)this_cpu_ptr(hyperv_pcpu_input_arg); + input_pg = *input_arg; + *input_arg = NULL; + local_irq_restore(flags); + free_page((unsigned long)input_pg); if (hv_vp_assist_page && hv_vp_assist_page[cpu]) wrmsrl(HV_X64_MSR_VP_ASSIST_PAGE, 0); @@ -242,8 +259,9 @@ static int hv_cpu_die(unsigned int cpu) * * 1. Setup the hypercall page. * 2. Register Hyper-V specific clocksource. + * 3. Setup Hyper-V specific APIC entry points. */ -void hyperv_init(void) +void __init hyperv_init(void) { u64 guest_id, required_msrs; union hv_x64_msr_hypercall_contents hypercall_msr; @@ -259,6 +277,16 @@ void hyperv_init(void) if ((ms_hyperv.features & required_msrs) != required_msrs) return; + /* + * Allocate the per-CPU state for the hypercall input arg. + * If this allocation fails, we will not be able to setup + * (per-CPU) hypercall input page and thus this failure is + * fatal on Hyper-V. + */ + hyperv_pcpu_input_arg = alloc_percpu(void *); + + BUG_ON(hyperv_pcpu_input_arg == NULL); + /* Allocate percpu VP index */ hv_vp_index = kmalloc_array(num_possible_cpus(), sizeof(*hv_vp_index), GFP_KERNEL); @@ -296,7 +324,7 @@ void hyperv_init(void) hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - hyper_alloc_mmu(); + hv_apic_init(); /* * Register Hyper-V specific clocksource. diff --git a/arch/x86/hyperv/mmu.c b/arch/x86/hyperv/mmu.c index 56c9ebac946f..5f053d7d1bd9 100644 --- a/arch/x86/hyperv/mmu.c +++ b/arch/x86/hyperv/mmu.c @@ -25,20 +25,13 @@ struct hv_flush_pcpu { struct hv_flush_pcpu_ex { u64 address_space; u64 flags; - struct { - u64 format; - u64 valid_bank_mask; - u64 bank_contents[]; - } hv_vp_set; + struct hv_vpset hv_vp_set; u64 gva_list[]; }; /* Each gva in gva_list encodes up to 4096 pages to flush */ #define HV_TLB_FLUSH_UNIT (4096 * PAGE_SIZE) -static struct hv_flush_pcpu __percpu **pcpu_flush; - -static struct hv_flush_pcpu_ex __percpu **pcpu_flush_ex; /* * Fills in gva_list starting from offset. Returns the number of items added. @@ -70,41 +63,6 @@ static inline int fill_gva_list(u64 gva_list[], int offset, return gva_n - offset; } -/* Return the number of banks in the resulting vp_set */ -static inline int cpumask_to_vp_set(struct hv_flush_pcpu_ex *flush, - const struct cpumask *cpus) -{ - int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; - - /* valid_bank_mask can represent up to 64 banks */ - if (hv_max_vp_index / 64 >= 64) - return 0; - - /* - * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex - * structs are not cleared between calls, we risk flushing unneeded - * vCPUs otherwise. - */ - for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) - flush->hv_vp_set.bank_contents[vcpu_bank] = 0; - - /* - * Some banks may end up being empty but this is acceptable. - */ - for_each_cpu(cpu, cpus) { - vcpu = hv_cpu_number_to_vp_number(cpu); - vcpu_bank = vcpu / 64; - vcpu_offset = vcpu % 64; - __set_bit(vcpu_offset, (unsigned long *) - &flush->hv_vp_set.bank_contents[vcpu_bank]); - if (vcpu_bank >= nr_bank) - nr_bank = vcpu_bank + 1; - } - flush->hv_vp_set.valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); - - return nr_bank; -} - static void hyperv_flush_tlb_others(const struct cpumask *cpus, const struct flush_tlb_info *info) { @@ -116,7 +74,7 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -124,10 +82,8 @@ static void hyperv_flush_tlb_others(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -203,7 +159,7 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, trace_hyperv_mmu_flush_tlb_others(cpus, info); - if (!pcpu_flush_ex || !hv_hypercall_pg) + if (!hv_hypercall_pg) goto do_native; if (cpumask_empty(cpus)) @@ -211,10 +167,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, local_irq_save(flags); - flush_pcpu = this_cpu_ptr(pcpu_flush_ex); - - if (unlikely(!*flush_pcpu)) - *flush_pcpu = page_address(alloc_page(GFP_ATOMIC)); + flush_pcpu = (struct hv_flush_pcpu_ex **) + this_cpu_ptr(hyperv_pcpu_input_arg); flush = *flush_pcpu; @@ -239,8 +193,8 @@ static void hyperv_flush_tlb_others_ex(const struct cpumask *cpus, flush->hv_vp_set.valid_bank_mask = 0; if (!cpumask_equal(cpus, cpu_present_mask)) { - flush->hv_vp_set.format = HV_GENERIC_SET_SPARCE_4K; - nr_bank = cpumask_to_vp_set(flush, cpus); + flush->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K; + nr_bank = cpumask_to_vpset(&(flush->hv_vp_set), cpus); } if (!nr_bank) { @@ -296,14 +250,3 @@ void hyperv_setup_mmu_ops(void) pv_mmu_ops.flush_tlb_others = hyperv_flush_tlb_others_ex; } } - -void hyper_alloc_mmu(void) -{ - if (!(ms_hyperv.hints & HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED)) - return; - - if (!(ms_hyperv.hints & HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED)) - pcpu_flush = alloc_percpu(struct hv_flush_pcpu *); - else - pcpu_flush_ex = alloc_percpu(struct hv_flush_pcpu_ex *); -} diff --git a/arch/x86/include/asm/cacheinfo.h b/arch/x86/include/asm/cacheinfo.h new file mode 100644 index 000000000000..e958e28f7ab5 --- /dev/null +++ b/arch/x86/include/asm/cacheinfo.h @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CACHEINFO_H +#define _ASM_X86_CACHEINFO_H + +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id); + +#endif /* _ASM_X86_CACHEINFO_H */ diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h index e1c8dab86670..fb97cf7c4137 100644 --- a/arch/x86/include/asm/compat.h +++ b/arch/x86/include/asm/compat.h @@ -17,7 +17,6 @@ typedef u32 compat_size_t; typedef s32 compat_ssize_t; -typedef s32 compat_time_t; typedef s32 compat_clock_t; typedef s32 compat_pid_t; typedef u16 __compat_uid_t; @@ -46,16 +45,6 @@ typedef u32 compat_u32; typedef u64 __attribute__((aligned(4))) compat_u64; typedef u32 compat_uptr_t; -struct compat_timespec { - compat_time_t tv_sec; - s32 tv_nsec; -}; - -struct compat_timeval { - compat_time_t tv_sec; - s32 tv_usec; -}; - struct compat_stat { compat_dev_t st_dev; u16 __pad1; @@ -145,10 +134,10 @@ struct compat_ipc64_perm { struct compat_semid64_ds { struct compat_ipc64_perm sem_perm; - compat_time_t sem_otime; - compat_ulong_t __unused1; - compat_time_t sem_ctime; - compat_ulong_t __unused2; + compat_ulong_t sem_otime; + compat_ulong_t sem_otime_high; + compat_ulong_t sem_ctime; + compat_ulong_t sem_ctime_high; compat_ulong_t sem_nsems; compat_ulong_t __unused3; compat_ulong_t __unused4; @@ -156,12 +145,12 @@ struct compat_semid64_ds { struct compat_msqid64_ds { struct compat_ipc64_perm msg_perm; - compat_time_t msg_stime; - compat_ulong_t __unused1; - compat_time_t msg_rtime; - compat_ulong_t __unused2; - compat_time_t msg_ctime; - compat_ulong_t __unused3; + compat_ulong_t msg_stime; + compat_ulong_t msg_stime_high; + compat_ulong_t msg_rtime; + compat_ulong_t msg_rtime_high; + compat_ulong_t msg_ctime; + compat_ulong_t msg_ctime_high; compat_ulong_t msg_cbytes; compat_ulong_t msg_qnum; compat_ulong_t msg_qbytes; @@ -174,12 +163,12 @@ struct compat_msqid64_ds { struct compat_shmid64_ds { struct compat_ipc64_perm shm_perm; compat_size_t shm_segsz; - compat_time_t shm_atime; - compat_ulong_t __unused1; - compat_time_t shm_dtime; - compat_ulong_t __unused2; - compat_time_t shm_ctime; - compat_ulong_t __unused3; + compat_ulong_t shm_atime; + compat_ulong_t shm_atime_high; + compat_ulong_t shm_dtime; + compat_ulong_t shm_dtime_high; + compat_ulong_t shm_ctime; + compat_ulong_t shm_ctime_high; compat_pid_t shm_cpid; compat_pid_t shm_lpid; compat_ulong_t shm_nattch; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b27da9602a6d..aced6c9290d6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) +#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO) + +/* + * Workaround for the sake of BPF compilation which utilizes kernel + * headers, but clang does not support ASM GOTO and fails the build. + */ +#ifndef __BPF_TRACING__ +#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" +#endif + +#define static_cpu_has(bit) boot_cpu_has(bit) + +#else + /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -195,6 +209,7 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) +#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 578793e97431..fb00a2fca990 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -198,7 +198,6 @@ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ #define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ - #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ @@ -207,13 +206,19 @@ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_CDP_L2 ( 7*32+15) /* Code and Data Prioritization L2 */ - +#define X86_FEATURE_MSR_SPEC_CTRL ( 7*32+16) /* "" MSR SPEC_CTRL is implemented */ +#define X86_FEATURE_SSBD ( 7*32+17) /* Speculative Store Bypass Disable */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* "" Fill RSB on context switches */ #define X86_FEATURE_SEV ( 7*32+20) /* AMD Secure Encrypted Virtualization */ - #define X86_FEATURE_USE_IBPB ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */ #define X86_FEATURE_USE_IBRS_FW ( 7*32+22) /* "" Use IBRS during runtime firmware calls */ +#define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE ( 7*32+23) /* "" Disable Speculative Store Bypass. */ +#define X86_FEATURE_LS_CFG_SSBD ( 7*32+24) /* "" AMD SSBD implementation via LS_CFG MSR */ +#define X86_FEATURE_IBRS ( 7*32+25) /* Indirect Branch Restricted Speculation */ +#define X86_FEATURE_IBPB ( 7*32+26) /* Indirect Branch Prediction Barrier */ +#define X86_FEATURE_STIBP ( 7*32+27) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_ZEN ( 7*32+28) /* "" CPU is AMD family 0x17 (Zen) */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ @@ -274,9 +279,10 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* Always save/restore FP error pointers */ -#define X86_FEATURE_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ -#define X86_FEATURE_IBRS (13*32+14) /* Indirect Branch Restricted Speculation */ -#define X86_FEATURE_STIBP (13*32+15) /* Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_AMD_IBPB (13*32+12) /* "" Indirect Branch Prediction Barrier */ +#define X86_FEATURE_AMD_IBRS (13*32+14) /* "" Indirect Branch Restricted Speculation */ +#define X86_FEATURE_AMD_STIBP (13*32+15) /* "" Single Thread Indirect Branch Predictors */ +#define X86_FEATURE_VIRT_SSBD (13*32+25) /* Virtualized Speculative Store Bypass Disable */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ @@ -334,6 +340,7 @@ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_ARCH_CAPABILITIES (18*32+29) /* IA32_ARCH_CAPABILITIES MSR (Intel) */ +#define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* "" Speculative Store Bypass Disable */ /* * BUG word(s) @@ -363,5 +370,6 @@ #define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #define X86_BUG_SPECTRE_V1 X86_BUG(15) /* CPU is affected by Spectre variant 1 attack with conditional branches */ #define X86_BUG_SPECTRE_V2 X86_BUG(16) /* CPU is affected by Spectre variant 2 attack with indirect branches */ +#define X86_BUG_SPEC_STORE_BYPASS X86_BUG(17) /* CPU is affected by speculative store bypass attack */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 89ce4bfd241f..ce4d176b3d13 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -30,10 +30,7 @@ static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) return dma_ops; } -int arch_dma_supported(struct device *dev, u64 mask); -#define arch_dma_supported arch_dma_supported - -bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); +bool arch_dma_alloc_attrs(struct device **dev); #define arch_dma_alloc_attrs arch_dma_alloc_attrs #endif diff --git a/arch/x86/include/asm/ftrace.h b/arch/x86/include/asm/ftrace.h index cc8f8fcf9b4a..c18ed65287d5 100644 --- a/arch/x86/include/asm/ftrace.h +++ b/arch/x86/include/asm/ftrace.h @@ -63,7 +63,7 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name #ifndef COMPILE_OFFSETS #if defined(CONFIG_FTRACE_SYSCALLS) && defined(CONFIG_IA32_EMULATION) -#include <asm/compat.h> +#include <linux/compat.h> /* * Because ia32 syscalls do not map to x86_64 syscall numbers diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 5ea2afd4c871..740a428acf1e 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h @@ -50,14 +50,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); #define inc_irq_stat(member) this_cpu_inc(irq_stat.member) -#define local_softirq_pending() this_cpu_read(irq_stat.__softirq_pending) - -#define __ARCH_SET_SOFTIRQ_PENDING - -#define set_softirq_pending(x) \ - this_cpu_write(irq_stat.__softirq_pending, (x)) -#define or_softirq_pending(x) this_cpu_or(irq_stat.__softirq_pending, (x)) - extern void ack_bad_irq(unsigned int irq); extern u64 arch_irq_stat_cpu(unsigned int cpu); diff --git a/arch/x86/include/asm/hyperv-tlfs.h b/arch/x86/include/asm/hyperv-tlfs.h index 416cb0e0c496..3bfa92c2793c 100644 --- a/arch/x86/include/asm/hyperv-tlfs.h +++ b/arch/x86/include/asm/hyperv-tlfs.h @@ -164,6 +164,11 @@ */ #define HV_X64_DEPRECATING_AEOI_RECOMMENDED (1 << 9) +/* + * Recommend using cluster IPI hypercalls. + */ +#define HV_X64_CLUSTER_IPI_RECOMMENDED (1 << 10) + /* Recommend using the newer ExProcessorMasks interface */ #define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED (1 << 11) @@ -329,12 +334,17 @@ struct hv_tsc_emulation_status { #define HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_MASK \ (~((1ull << HV_X64_MSR_HYPERCALL_PAGE_ADDRESS_SHIFT) - 1)) +#define HV_IPI_LOW_VECTOR 0x10 +#define HV_IPI_HIGH_VECTOR 0xff + /* Declare the various hypercall operations. */ #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003 #define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008 +#define HVCALL_SEND_IPI 0x000b #define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013 #define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014 +#define HVCALL_SEND_IPI_EX 0x0015 #define HVCALL_POST_MESSAGE 0x005c #define HVCALL_SIGNAL_EVENT 0x005d @@ -360,7 +370,7 @@ struct hv_tsc_emulation_status { #define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3) enum HV_GENERIC_SET_FORMAT { - HV_GENERIC_SET_SPARCE_4K, + HV_GENERIC_SET_SPARSE_4K, HV_GENERIC_SET_ALL, }; @@ -706,4 +716,22 @@ struct hv_enlightened_vmcs { #define HV_STIMER_AUTOENABLE (1ULL << 3) #define HV_STIMER_SINT(config) (__u8)(((config) >> 16) & 0x0F) +struct ipi_arg_non_ex { + u32 vector; + u32 reserved; + u64 cpu_mask; +}; + +struct hv_vpset { + u64 format; + u64 valid_bank_mask; + u64 bank_contents[]; +}; + +struct ipi_arg_ex { + u32 vector; + u32 reserved; + struct hv_vpset vp_set; +}; + #endif diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h index 35555016b1be..0b44b1abe4d9 100644 --- a/arch/x86/include/asm/intel_mid_vrtc.h +++ b/arch/x86/include/asm/intel_mid_vrtc.h @@ -4,7 +4,7 @@ extern unsigned char vrtc_cmos_read(unsigned char reg); extern void vrtc_cmos_write(unsigned char val, unsigned char reg); -extern void vrtc_get_time(struct timespec *now); -extern int vrtc_set_mmss(const struct timespec *now); +extern void vrtc_get_time(struct timespec64 *now); +extern int vrtc_set_mmss(const struct timespec64 *now); #endif diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index f6e5b9375d8c..6de64840dd22 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -94,10 +94,10 @@ build_mmio_write(__writel, "l", unsigned int, "r", ) #ifdef CONFIG_X86_64 -build_mmio_read(readq, "q", unsigned long, "=r", :"memory") -build_mmio_read(__readq, "q", unsigned long, "=r", ) -build_mmio_write(writeq, "q", unsigned long, "r", :"memory") -build_mmio_write(__writeq, "q", unsigned long, "r", ) +build_mmio_read(readq, "q", u64, "=r", :"memory") +build_mmio_read(__readq, "q", u64, "=r", ) +build_mmio_write(writeq, "q", u64, "r", :"memory") +build_mmio_write(__writeq, "q", u64, "r", ) #define readq_relaxed(a) __readq(a) #define writeq_relaxed(v, a) __writeq(v, a) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index c25775fad4ed..f4b2588865e9 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -924,7 +924,7 @@ struct kvm_x86_ops { int (*hardware_setup)(void); /* __init */ void (*hardware_unsetup)(void); /* __exit */ bool (*cpu_has_accelerated_tpr)(void); - bool (*cpu_has_high_real_mode_segbase)(void); + bool (*has_emulated_msr)(int index); void (*cpuid_update)(struct kvm_vcpu *vcpu); struct kvm *(*vm_alloc)(void); diff --git a/arch/x86/include/asm/mc146818rtc.h b/arch/x86/include/asm/mc146818rtc.h index 1775a32f7ea6..97198001e567 100644 --- a/arch/x86/include/asm/mc146818rtc.h +++ b/arch/x86/include/asm/mc146818rtc.h @@ -95,8 +95,8 @@ static inline unsigned char current_lock_cmos_reg(void) unsigned char rtc_cmos_read(unsigned char addr); void rtc_cmos_write(unsigned char val, unsigned char addr); -extern int mach_set_rtc_mmss(const struct timespec *now); -extern void mach_get_cmos_time(struct timespec *now); +extern int mach_set_rtc_mmss(const struct timespec64 *now); +extern void mach_get_cmos_time(struct timespec64 *now); #define RTC_IRQ 8 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 57e3785d0d26..cf9911b5a53c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ + /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b90e79610cf7..997192131b7b 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -122,6 +122,7 @@ static inline void hv_disable_stimer0_percpu_irq(int irq) {} #if IS_ENABLED(CONFIG_HYPERV) extern struct clocksource *hyperv_cs; extern void *hv_hypercall_pg; +extern void __percpu **hyperv_pcpu_input_arg; static inline u64 hv_do_hypercall(u64 control, void *input, void *output) { @@ -258,9 +259,41 @@ static inline int hv_cpu_number_to_vp_number(int cpu_number) return hv_vp_index[cpu_number]; } -void hyperv_init(void); +static inline int cpumask_to_vpset(struct hv_vpset *vpset, + const struct cpumask *cpus) +{ + int cpu, vcpu, vcpu_bank, vcpu_offset, nr_bank = 1; + + /* valid_bank_mask can represent up to 64 banks */ + if (hv_max_vp_index / 64 >= 64) + return 0; + + /* + * Clear all banks up to the maximum possible bank as hv_flush_pcpu_ex + * structs are not cleared between calls, we risk flushing unneeded + * vCPUs otherwise. + */ + for (vcpu_bank = 0; vcpu_bank <= hv_max_vp_index / 64; vcpu_bank++) + vpset->bank_contents[vcpu_bank] = 0; + + /* + * Some banks may end up being empty but this is acceptable. + */ + for_each_cpu(cpu, cpus) { + vcpu = hv_cpu_number_to_vp_number(cpu); + vcpu_bank = vcpu / 64; + vcpu_offset = vcpu % 64; + __set_bit(vcpu_offset, (unsigned long *) + &vpset->bank_contents[vcpu_bank]); + if (vcpu_bank >= nr_bank) + nr_bank = vcpu_bank + 1; + } + vpset->valid_bank_mask = GENMASK_ULL(nr_bank - 1, 0); + return nr_bank; +} + +void __init hyperv_init(void); void hyperv_setup_mmu_ops(void); -void hyper_alloc_mmu(void); void hyperv_report_panic(struct pt_regs *regs, long err); bool hv_is_hyperv_initialized(void); void hyperv_cleanup(void); @@ -269,6 +302,13 @@ void hyperv_reenlightenment_intr(struct pt_regs *regs); void set_hv_tscchange_cb(void (*cb)(void)); void clear_hv_tscchange_cb(void); void hyperv_stop_tsc_emulation(void); + +#ifdef CONFIG_X86_64 +void hv_apic_init(void); +#else +static inline void hv_apic_init(void) {} +#endif + #else /* CONFIG_HYPERV */ static inline void hyperv_init(void) {} static inline bool hv_is_hyperv_initialized(void) { return false; } diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 53d5b1b9255e..68b2c3150de1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -42,6 +42,8 @@ #define MSR_IA32_SPEC_CTRL 0x00000048 /* Speculation Control */ #define SPEC_CTRL_IBRS (1 << 0) /* Indirect Branch Restricted Speculation */ #define SPEC_CTRL_STIBP (1 << 1) /* Single Thread Indirect Branch Predictors */ +#define SPEC_CTRL_SSBD_SHIFT 2 /* Speculative Store Bypass Disable bit */ +#define SPEC_CTRL_SSBD (1 << SPEC_CTRL_SSBD_SHIFT) /* Speculative Store Bypass Disable */ #define MSR_IA32_PRED_CMD 0x00000049 /* Prediction Command */ #define PRED_CMD_IBPB (1 << 0) /* Indirect Branch Prediction Barrier */ @@ -60,14 +62,19 @@ #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C1_AUTO_UNDEMOTE (1UL << 28) #define MSR_MTRRcap 0x000000fe #define MSR_IA32_ARCH_CAPABILITIES 0x0000010a #define ARCH_CAP_RDCL_NO (1 << 0) /* Not susceptible to Meltdown */ #define ARCH_CAP_IBRS_ALL (1 << 1) /* Enhanced IBRS support */ +#define ARCH_CAP_SSB_NO (1 << 4) /* + * Not susceptible to Speculative Store Bypass + * attack, so no Speculative Store Bypass + * control required. + */ #define MSR_IA32_BBL_CR_CTL 0x00000119 #define MSR_IA32_BBL_CR_CTL3 0x0000011e @@ -340,6 +347,8 @@ #define MSR_AMD64_SEV_ENABLED_BIT 0 #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f + /* Fam 17h MSRs */ #define MSR_F17H_IRPERF 0xc00000e9 diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index f928ad9b143f..8b38df98548e 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -217,6 +217,14 @@ enum spectre_v2_mitigation { SPECTRE_V2_IBRS, }; +/* The Speculative Store Bypass disable variants */ +enum ssb_mitigation { + SPEC_STORE_BYPASS_NONE, + SPEC_STORE_BYPASS_DISABLE, + SPEC_STORE_BYPASS_PRCTL, + SPEC_STORE_BYPASS_SECCOMP, +}; + extern char __indirect_thunk_start[]; extern char __indirect_thunk_end[]; @@ -241,22 +249,27 @@ static inline void vmexit_fill_RSB(void) #endif } -#define alternative_msr_write(_msr, _val, _feature) \ - asm volatile(ALTERNATIVE("", \ - "movl %[msr], %%ecx\n\t" \ - "movl %[val], %%eax\n\t" \ - "movl $0, %%edx\n\t" \ - "wrmsr", \ - _feature) \ - : : [msr] "i" (_msr), [val] "i" (_val) \ - : "eax", "ecx", "edx", "memory") +static __always_inline +void alternative_msr_write(unsigned int msr, u64 val, unsigned int feature) +{ + asm volatile(ALTERNATIVE("", "wrmsr", %c[feature]) + : : "c" (msr), + "a" ((u32)val), + "d" ((u32)(val >> 32)), + [feature] "i" (feature) + : "memory"); +} static inline void indirect_branch_prediction_barrier(void) { - alternative_msr_write(MSR_IA32_PRED_CMD, PRED_CMD_IBPB, - X86_FEATURE_USE_IBPB); + u64 val = PRED_CMD_IBPB; + + alternative_msr_write(MSR_IA32_PRED_CMD, val, X86_FEATURE_USE_IBPB); } +/* The Intel SPEC CTRL MSR base value cache */ +extern u64 x86_spec_ctrl_base; + /* * With retpoline, we must use IBRS to restrict branch prediction * before calling into firmware. @@ -265,14 +278,18 @@ static inline void indirect_branch_prediction_barrier(void) */ #define firmware_restrict_branch_speculation_start() \ do { \ + u64 val = x86_spec_ctrl_base | SPEC_CTRL_IBRS; \ + \ preempt_disable(); \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, SPEC_CTRL_IBRS, \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ } while (0) #define firmware_restrict_branch_speculation_end() \ do { \ - alternative_msr_write(MSR_IA32_SPEC_CTRL, 0, \ + u64 val = x86_spec_ctrl_base; \ + \ + alternative_msr_write(MSR_IA32_SPEC_CTRL, val, \ X86_FEATURE_USE_IBRS_FW); \ preempt_enable(); \ } while (0) diff --git a/arch/x86/include/asm/page_64_types.h b/arch/x86/include/asm/page_64_types.h index 2c5a966dc222..6afac386a434 100644 --- a/arch/x86/include/asm/page_64_types.h +++ b/arch/x86/include/asm/page_64_types.h @@ -53,7 +53,7 @@ #define __PHYSICAL_MASK_SHIFT 52 #ifdef CONFIG_X86_5LEVEL -#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47) +#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47) #else #define __VIRTUAL_MASK_SHIFT 47 #endif diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 9be2bf13825b..d49bbf4bb5c8 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -574,14 +574,14 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd) } #define set_pgd(pgdp, pgdval) do { \ - if (pgtable_l5_enabled) \ + if (pgtable_l5_enabled()) \ __set_pgd(pgdp, pgdval); \ else \ set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \ } while (0) #define pgd_clear(pgdp) do { \ - if (pgtable_l5_enabled) \ + if (pgtable_l5_enabled()) \ set_pgd(pgdp, __pgd(0)); \ } while (0) diff --git a/arch/x86/include/asm/pci.h b/arch/x86/include/asm/pci.h index d32175e30259..662963681ea6 100644 --- a/arch/x86/include/asm/pci.h +++ b/arch/x86/include/asm/pci.h @@ -117,9 +117,6 @@ void native_restore_msi_irqs(struct pci_dev *dev); #define native_setup_msi_irqs NULL #define native_teardown_msi_irq NULL #endif - -#define PCI_DMA_BUS_IS_PHYS (dma_ops->is_phys) - #endif /* __KERNEL__ */ #ifdef CONFIG_X86_64 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 263c142a6a6c..ada6410fd2ec 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -167,7 +167,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, #if CONFIG_PGTABLE_LEVELS > 4 static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d) { - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return; paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT); set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d))); @@ -193,7 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d); static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d, unsigned long address) { - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) ___p4d_free_tlb(tlb, p4d); } diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index f1633de5a675..99ecde23c3ec 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags; #ifndef __PAGETABLE_P4D_FOLDED #define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd) -#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0) +#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0) #endif #ifndef set_p4d @@ -881,7 +881,7 @@ static inline unsigned long p4d_index(unsigned long address) #if CONFIG_PGTABLE_LEVELS > 4 static inline int pgd_present(pgd_t pgd) { - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return 1; return pgd_flags(pgd) & _PAGE_PRESENT; } @@ -898,9 +898,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd) #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd)) /* to find an entry in a page-table-directory. */ -static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) { - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address); } @@ -909,7 +909,7 @@ static inline int pgd_bad(pgd_t pgd) { unsigned long ignore_flags = _PAGE_USER; - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return 0; if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) @@ -920,7 +920,7 @@ static inline int pgd_bad(pgd_t pgd) static inline int pgd_none(pgd_t pgd) { - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return 0; /* * There is no need to do a workaround for the KNL stray diff --git a/arch/x86/include/asm/pgtable_32_types.h b/arch/x86/include/asm/pgtable_32_types.h index e3225e83db7d..d9a001a4a872 100644 --- a/arch/x86/include/asm/pgtable_32_types.h +++ b/arch/x86/include/asm/pgtable_32_types.h @@ -15,7 +15,7 @@ # include <asm/pgtable-2level_types.h> #endif -#define pgtable_l5_enabled 0 +#define pgtable_l5_enabled() 0 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE - 1)) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 877bc27718ae..3c5385f9a88f 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -220,7 +220,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d) { pgd_t pgd; - if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { + if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) { *p4dp = p4d; return; } diff --git a/arch/x86/include/asm/pgtable_64_types.h b/arch/x86/include/asm/pgtable_64_types.h index adb47552e6bb..054765ab2da2 100644 --- a/arch/x86/include/asm/pgtable_64_types.h +++ b/arch/x86/include/asm/pgtable_64_types.h @@ -22,12 +22,23 @@ typedef struct { pteval_t pte; } pte_t; #ifdef CONFIG_X86_5LEVEL extern unsigned int __pgtable_l5_enabled; -#ifndef pgtable_l5_enabled -#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57) -#endif + +#ifdef USE_EARLY_PGTABLE_L5 +/* + * cpu_feature_enabled() is not available in early boot code. + * Use variable instead. + */ +static inline bool pgtable_l5_enabled(void) +{ + return __pgtable_l5_enabled; +} #else -#define pgtable_l5_enabled 0 -#endif +#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57) +#endif /* USE_EARLY_PGTABLE_L5 */ + +#else +#define pgtable_l5_enabled() 0 +#endif /* CONFIG_X86_5LEVEL */ extern unsigned int pgdir_shift; extern unsigned int ptrs_per_p4d; @@ -102,7 +113,7 @@ extern unsigned int ptrs_per_p4d; #define LDT_PGD_ENTRY_L4 -3UL #define LDT_PGD_ENTRY_L5 -112UL -#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) +#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4) #define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT) #define __VMALLOC_BASE_L4 0xffffc90000000000UL @@ -116,7 +127,7 @@ extern unsigned int ptrs_per_p4d; #ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT # define VMALLOC_START vmalloc_base -# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) +# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4) # define VMEMMAP_START vmemmap_base #else # define VMALLOC_START __VMALLOC_BASE_L4 diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..851c04b7a092 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H +#define ARCH_DEFAULT_PKEY 0 + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, @@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return 0; + return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); } @@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { /* * "Allocated" pkeys are those that have been returned - * from pkey_alloc(). pkey 0 is special, and never - * returned from pkey_alloc(). + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. */ - if (pkey <= 0) + if (pkey < 0) return false; if (pkey >= arch_max_pkey()) return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + return mm_pkey_allocation_map(mm) & (1U << pkey); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 21a114914ba4..e28add6b791f 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -186,15 +186,6 @@ extern void identify_boot_cpu(void); extern void identify_secondary_cpu(struct cpuinfo_x86 *); extern void print_cpu_info(struct cpuinfo_x86 *); void print_cpu_msr(struct cpuinfo_x86 *); -extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); -extern u32 get_scattered_cpuid_leaf(unsigned int level, - unsigned int sub_leaf, - enum cpuid_regs_idx reg); -extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); -extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); - -extern void detect_extended_topology(struct cpuinfo_x86 *c); -extern void detect_ht(struct cpuinfo_x86 *c); #ifdef CONFIG_X86_32 extern int have_cpuid_p(void); diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index a7471dcd2205..b6033680d458 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -12,7 +12,7 @@ void pvclock_set_flags(u8 flags); unsigned long pvclock_tsc_khz(struct pvclock_vcpu_time_info *src); void pvclock_read_wallclock(struct pvclock_wall_clock *wall, struct pvclock_vcpu_time_info *vcpu, - struct timespec *ts); + struct timespec64 *ts); void pvclock_resume(void); void pvclock_touch_watchdogs(void); diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index 5e16b5d40d32..3e70bed8a978 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -7,6 +7,14 @@ #include <asm-generic/qspinlock_types.h> #include <asm/paravirt.h> +#define _Q_PENDING_LOOPS (1 << 9) + +#ifdef CONFIG_PARAVIRT_SPINLOCKS +extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __pv_init_lock_hash(void); +extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); +extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); + #define queued_spin_unlock queued_spin_unlock /** * queued_spin_unlock - release a queued spinlock @@ -16,15 +24,9 @@ */ static inline void native_queued_spin_unlock(struct qspinlock *lock) { - smp_store_release((u8 *)lock, 0); + smp_store_release(&lock->locked, 0); } -#ifdef CONFIG_PARAVIRT_SPINLOCKS -extern void native_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __pv_init_lock_hash(void); -extern void __pv_queued_spin_lock_slowpath(struct qspinlock *lock, u32 val); -extern void __raw_callee_save___pv_queued_spin_unlock(struct qspinlock *lock); - static inline void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) { pv_queued_spin_lock_slowpath(lock, val); @@ -40,11 +42,6 @@ static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } -#else -static inline void queued_spin_unlock(struct qspinlock *lock) -{ - native_queued_spin_unlock(lock); -} #endif #ifdef CONFIG_PARAVIRT diff --git a/arch/x86/include/asm/qspinlock_paravirt.h b/arch/x86/include/asm/qspinlock_paravirt.h index 923307ea11c7..9ef5ee03d2d7 100644 --- a/arch/x86/include/asm/qspinlock_paravirt.h +++ b/arch/x86/include/asm/qspinlock_paravirt.h @@ -22,8 +22,7 @@ PV_CALLEE_SAVE_REGS_THUNK(__pv_queued_spin_unlock_slowpath); * * void __pv_queued_spin_unlock(struct qspinlock *lock) * { - * struct __qspinlock *l = (void *)lock; - * u8 lockval = cmpxchg(&l->locked, _Q_LOCKED_VAL, 0); + * u8 lockval = cmpxchg(&lock->locked, _Q_LOCKED_VAL, 0); * * if (likely(lockval == _Q_LOCKED_VAL)) * return; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index f75bff8f9d82..547c4fe50711 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } -#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/include/asm/sparsemem.h b/arch/x86/include/asm/sparsemem.h index 4617a2bf123c..199218719a86 100644 --- a/arch/x86/include/asm/sparsemem.h +++ b/arch/x86/include/asm/sparsemem.h @@ -27,8 +27,8 @@ # endif #else /* CONFIG_X86_32 */ # define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */ -# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44) -# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46) +# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44) +# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46) #endif #endif /* CONFIG_SPARSEMEM */ diff --git a/arch/x86/include/asm/spec-ctrl.h b/arch/x86/include/asm/spec-ctrl.h new file mode 100644 index 000000000000..ae7c2c5cd7f0 --- /dev/null +++ b/arch/x86/include/asm/spec-ctrl.h @@ -0,0 +1,80 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_SPECCTRL_H_ +#define _ASM_X86_SPECCTRL_H_ + +#include <linux/thread_info.h> +#include <asm/nospec-branch.h> + +/* + * On VMENTER we must preserve whatever view of the SPEC_CTRL MSR + * the guest has, while on VMEXIT we restore the host view. This + * would be easier if SPEC_CTRL were architecturally maskable or + * shadowable for guests but this is not (currently) the case. + * Takes the guest view of SPEC_CTRL MSR as a parameter and also + * the guest's version of VIRT_SPEC_CTRL, if emulated. + */ +extern void x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool guest); + +/** + * x86_spec_ctrl_set_guest - Set speculation control registers for the guest + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_set_guest(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, true); +} + +/** + * x86_spec_ctrl_restore_host - Restore host speculation control registers + * @guest_spec_ctrl: The guest content of MSR_SPEC_CTRL + * @guest_virt_spec_ctrl: The guest controlled bits of MSR_VIRT_SPEC_CTRL + * (may get translated to MSR_AMD64_LS_CFG bits) + * + * Avoids writing to the MSR if the content/bits are the same + */ +static inline +void x86_spec_ctrl_restore_host(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl) +{ + x86_virt_spec_ctrl(guest_spec_ctrl, guest_virt_spec_ctrl, false); +} + +/* AMD specific Speculative Store Bypass MSR data */ +extern u64 x86_amd_ls_cfg_base; +extern u64 x86_amd_ls_cfg_ssbd_mask; + +static inline u64 ssbd_tif_to_spec_ctrl(u64 tifn) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (tifn & _TIF_SSBD) >> (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline unsigned long ssbd_spec_ctrl_to_tif(u64 spec_ctrl) +{ + BUILD_BUG_ON(TIF_SSBD < SPEC_CTRL_SSBD_SHIFT); + return (spec_ctrl & SPEC_CTRL_SSBD) << (TIF_SSBD - SPEC_CTRL_SSBD_SHIFT); +} + +static inline u64 ssbd_tif_to_amd_ls_cfg(u64 tifn) +{ + return (tifn & _TIF_SSBD) ? x86_amd_ls_cfg_ssbd_mask : 0ULL; +} + +#ifdef CONFIG_SMP +extern void speculative_store_bypass_ht_init(void); +#else +static inline void speculative_store_bypass_ht_init(void) { } +#endif + +extern void speculative_store_bypass_update(unsigned long tif); + +static inline void speculative_store_bypass_update_current(void) +{ + speculative_store_bypass_update(current_thread_info()->flags); +} + +#endif diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 133d9425fced..b6dc698f992a 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -111,4 +111,6 @@ static inline unsigned long caller_frame_pointer(void) return (unsigned long)frame; } +void show_opcodes(u8 *rip, const char *loglvl); +void show_ip(struct pt_regs *regs, const char *loglvl); #endif /* _ASM_X86_STACKTRACE_H */ diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h index 533f74c300c2..d33f92b9fa22 100644 --- a/arch/x86/include/asm/string_64.h +++ b/arch/x86/include/asm/string_64.h @@ -116,7 +116,8 @@ int strcmp(const char *cs, const char *ct); #endif #define __HAVE_ARCH_MEMCPY_MCSAFE 1 -__must_check int memcpy_mcsafe_unrolled(void *dst, const void *src, size_t cnt); +__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src, + size_t cnt); DECLARE_STATIC_KEY_FALSE(mcsafe_key); /** @@ -131,14 +132,15 @@ DECLARE_STATIC_KEY_FALSE(mcsafe_key); * actually do machine check recovery. Everyone else can just * use memcpy(). * - * Return 0 for success, -EFAULT for fail + * Return 0 for success, or number of bytes not copied if there was an + * exception. */ -static __always_inline __must_check int +static __always_inline __must_check unsigned long memcpy_mcsafe(void *dst, const void *src, size_t cnt) { #ifdef CONFIG_X86_MCE if (static_branch_unlikely(&mcsafe_key)) - return memcpy_mcsafe_unrolled(dst, src, cnt); + return __memcpy_mcsafe(dst, src, cnt); else #endif memcpy(dst, src, cnt); diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index a5d9521bb2cb..2ff2a30a264f 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -79,6 +79,7 @@ struct thread_info { #define TIF_SIGPENDING 2 /* signal pending */ #define TIF_NEED_RESCHED 3 /* rescheduling necessary */ #define TIF_SINGLESTEP 4 /* reenable singlestep on user return*/ +#define TIF_SSBD 5 /* Reduced data speculation */ #define TIF_SYSCALL_EMU 6 /* syscall emulation active */ #define TIF_SYSCALL_AUDIT 7 /* syscall auditing active */ #define TIF_SECCOMP 8 /* secure computing */ @@ -105,6 +106,7 @@ struct thread_info { #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_SINGLESTEP (1 << TIF_SINGLESTEP) +#define _TIF_SSBD (1 << TIF_SSBD) #define _TIF_SYSCALL_EMU (1 << TIF_SYSCALL_EMU) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SECCOMP (1 << TIF_SECCOMP) @@ -144,7 +146,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP) + (_TIF_IO_BITMAP|_TIF_NOCPUID|_TIF_NOTSC|_TIF_BLOCKSTEP|_TIF_SSBD) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW) diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h index 62546b3a398e..62acb613114b 100644 --- a/arch/x86/include/asm/uaccess_64.h +++ b/arch/x86/include/asm/uaccess_64.h @@ -47,6 +47,17 @@ copy_user_generic(void *to, const void *from, unsigned len) } static __always_inline __must_check unsigned long +copy_to_user_mcsafe(void *to, const void *from, unsigned len) +{ + unsigned long ret; + + __uaccess_begin(); + ret = memcpy_mcsafe(to, from, len); + __uaccess_end(); + return ret; +} + +static __always_inline __must_check unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long size) { int ret = 0; @@ -194,4 +205,7 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size) unsigned long copy_user_handle_tail(char *to, char *from, unsigned len); +unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len); + #endif /* _ASM_X86_UACCESS_64_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index ce8b4da07e35..2d27236c16a3 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -170,7 +170,7 @@ struct x86_cpuinit_ops { void (*fixup_cpu_id)(struct cpuinfo_x86 *c, int node); }; -struct timespec; +struct timespec64; /** * struct x86_legacy_devices - legacy x86 devices @@ -264,8 +264,8 @@ struct x86_hyper_runtime { struct x86_platform_ops { unsigned long (*calibrate_cpu)(void); unsigned long (*calibrate_tsc)(void); - void (*get_wallclock)(struct timespec *ts); - int (*set_wallclock)(const struct timespec *ts); + void (*get_wallclock)(struct timespec64 *ts); + int (*set_wallclock)(const struct timespec64 *ts); void (*iommu_shutdown)(void); bool (*is_untracked_pat_range)(u64 start, u64 end); void (*nmi_init)(void); diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 4c851ebb3ceb..0ede697c3961 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,7 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 -#define KVM_HINTS_DEDICATED 0 +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/include/uapi/asm/sembuf.h b/arch/x86/include/uapi/asm/sembuf.h index cabd7476bd6c..89de6cd9f0a7 100644 --- a/arch/x86/include/uapi/asm/sembuf.h +++ b/arch/x86/include/uapi/asm/sembuf.h @@ -8,15 +8,24 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values + * + * x86_64 and x32 incorrectly added padding here, so the structures + * are still incompatible with the padding on x86. */ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ +#ifdef __i386__ + unsigned long sem_otime; /* last semop time */ + unsigned long sem_otime_high; + unsigned long sem_ctime; /* last change time */ + unsigned long sem_ctime_high; +#else __kernel_time_t sem_otime; /* last semop time */ __kernel_ulong_t __unused1; __kernel_time_t sem_ctime; /* last change time */ __kernel_ulong_t __unused2; +#endif __kernel_ulong_t sem_nsems; /* no. of semaphores in array */ __kernel_ulong_t __unused3; __kernel_ulong_t __unused4; diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c88e0b127810..b481b95bd8f6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,8 +14,11 @@ #include <asm/amd_nb.h> #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -24,6 +27,7 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, {} }; @@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8b04234e010b..7685444a106b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) goto update; } cmsk = cluster_hotplug_mask; + cmsk->clusterid = cluster; cluster_hotplug_mask = NULL; update: this_cpu_write(cluster_masks, cmsk); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index dfcbe6924eaf..5d0de79fdab0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -1715,19 +1715,6 @@ static int proc_apm_show(struct seq_file *m, void *v) return 0; } -static int proc_apm_open(struct inode *inode, struct file *file) -{ - return single_open(file, proc_apm_show, NULL); -} - -static const struct file_operations apm_file_ops = { - .owner = THIS_MODULE, - .open = proc_apm_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static int apm(void *unused) { unsigned short bx; @@ -2360,7 +2347,7 @@ static int __init apm_init(void) set_desc_base(&gdt[APM_DS >> 3], (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); - proc_create("apm", 0, NULL, &apm_file_ops); + proc_create_single("apm", 0, NULL, proc_apm_show); kapmd_task = kthread_create(apm, NULL, "kapmd"); if (IS_ERR(kapmd_task)) { @@ -2446,7 +2433,7 @@ MODULE_PARM_DESC(idle_threshold, "System idle percentage above which to make APM BIOS idle calls"); module_param(idle_period, int, 0444); MODULE_PARM_DESC(idle_period, - "Period (in sec/100) over which to caculate the idle percentage"); + "Period (in sec/100) over which to calculate the idle percentage"); module_param(smp, bool, 0444); MODULE_PARM_DESC(smp, "Set this to enable APM use on an SMP platform. Use with caution on older systems"); diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index a66229f51b12..7a40196967cb 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n nostackp := $(call cc-option, -fno-stack-protector) CFLAGS_common.o := $(nostackp) -obj-y := intel_cacheinfo.o scattered.o topology.o +obj-y := cacheinfo.o scattered.o topology.o obj-y += common.o obj-y += rdrand.o obj-y += match.o diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 12bc0a1139da..082d7875cef8 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -9,7 +9,9 @@ #include <linux/random.h> #include <asm/processor.h> #include <asm/apic.h> +#include <asm/cacheinfo.h> #include <asm/cpu.h> +#include <asm/spec-ctrl.h> #include <asm/smp.h> #include <asm/pci-direct.h> #include <asm/delay.h> @@ -297,7 +299,6 @@ static int nearby_node(int apicid) } #endif -#ifdef CONFIG_SMP /* * Fix up cpu_core_id for pre-F17h systems to be in the * [0 .. cores_per_node - 1] range. Not really needed but @@ -327,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + int err; u32 eax, ebx, ecx, edx; cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); @@ -345,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) } /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. + * In case leaf B is available, use it to derive + * topology information. */ - if (cpuid_edx(0x80000006)) { - if (c->x86 == 0x17) { - /* - * LLC is at the core complex level. - * Core complex id is ApicId[3]. - */ - per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; - } else { - /* LLC is at the node level. */ - per_cpu(cpu_llc_id, cpu) = node_id; - } - } + err = detect_extended_topology(c); + if (!err) + c->x86_coreid_bits = get_count_order(c->x86_max_cores); + + cacheinfo_amd_init_llc_id(c, cpu, node_id); + } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -375,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) legacy_fixup_core_id(c); } } -#endif /* * On a AMD dual core setup the lower bits of the APIC id distinguish the cores. @@ -383,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c) */ static void amd_detect_cmp(struct cpuinfo_x86 *c) { -#ifdef CONFIG_SMP unsigned bits; int cpu = smp_processor_id(); @@ -394,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) c->phys_proc_id = c->initial_apicid >> bits; /* use socket ID also for last level cache */ per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; - amd_get_topology(c); -#endif } u16 amd_get_nb_id(int cpu) { - u16 id = 0; -#ifdef CONFIG_SMP - id = per_cpu(cpu_llc_id, cpu); -#endif - return id; + return per_cpu(cpu_llc_id, cpu); } EXPORT_SYMBOL_GPL(amd_get_nb_id); @@ -554,6 +542,26 @@ static void bsp_init_amd(struct cpuinfo_x86 *c) rdmsrl(MSR_FAM10H_NODE_ID, value); nodes_per_socket = ((value >> 3) & 7) + 1; } + + if (c->x86 >= 0x15 && c->x86 <= 0x17) { + unsigned int bit; + + switch (c->x86) { + case 0x15: bit = 54; break; + case 0x16: bit = 33; break; + case 0x17: bit = 10; break; + default: return; + } + /* + * Try to cache the base value so further operations can + * avoid RMW. If that faults, do not enable SSBD. + */ + if (!rdmsrl_safe(MSR_AMD64_LS_CFG, &x86_amd_ls_cfg_base)) { + setup_force_cpu_cap(X86_FEATURE_LS_CFG_SSBD); + setup_force_cpu_cap(X86_FEATURE_SSBD); + x86_amd_ls_cfg_ssbd_mask = 1ULL << bit; + } + } } static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) @@ -791,6 +799,7 @@ static void init_amd_bd(struct cpuinfo_x86 *c) static void init_amd_zn(struct cpuinfo_x86 *c) { + set_cpu_cap(c, X86_FEATURE_ZEN); /* * Fix erratum 1076: CPB feature bit not being set in CPUID. It affects * all up to and including B1. @@ -842,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c) /* Multi core CPU? */ if (c->extended_cpuid_level >= 0x80000008) { amd_detect_cmp(c); + amd_get_topology(c); srat_detect_node(c); } diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index bfca937bdcc3..7416fc206b4a 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -12,8 +12,10 @@ #include <linux/utsname.h> #include <linux/cpu.h> #include <linux/module.h> +#include <linux/nospec.h> +#include <linux/prctl.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/cmdline.h> #include <asm/bugs.h> #include <asm/processor.h> @@ -27,6 +29,27 @@ #include <asm/intel-family.h> static void __init spectre_v2_select_mitigation(void); +static void __init ssb_select_mitigation(void); + +/* + * Our boot-time value of the SPEC_CTRL MSR. We read it once so that any + * writes to SPEC_CTRL contain whatever reserved bits have been set. + */ +u64 __ro_after_init x86_spec_ctrl_base; +EXPORT_SYMBOL_GPL(x86_spec_ctrl_base); + +/* + * The vendor and possibly platform specific bits which can be modified in + * x86_spec_ctrl_base. + */ +static u64 __ro_after_init x86_spec_ctrl_mask = SPEC_CTRL_IBRS; + +/* + * AMD specific MSR info for Speculative Store Bypass control. + * x86_amd_ls_cfg_ssbd_mask is initialized in identify_boot_cpu(). + */ +u64 __ro_after_init x86_amd_ls_cfg_base; +u64 __ro_after_init x86_amd_ls_cfg_ssbd_mask; void __init check_bugs(void) { @@ -37,9 +60,27 @@ void __init check_bugs(void) print_cpu_info(&boot_cpu_data); } + /* + * Read the SPEC_CTRL MSR to account for reserved bits which may + * have unknown values. AMD64_LS_CFG MSR is cached in the early AMD + * init code as it is not enumerated and depends on the family. + */ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + rdmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + /* Allow STIBP in MSR_SPEC_CTRL if supported */ + if (boot_cpu_has(X86_FEATURE_STIBP)) + x86_spec_ctrl_mask |= SPEC_CTRL_STIBP; + /* Select the proper spectre mitigation before patching alternatives */ spectre_v2_select_mitigation(); + /* + * Select proper mitigation for any exposure to the Speculative Store + * Bypass vulnerability. + */ + ssb_select_mitigation(); + #ifdef CONFIG_X86_32 /* * Check whether we are able to run this kernel safely on SMP. @@ -93,7 +134,76 @@ static const char *spectre_v2_strings[] = { #undef pr_fmt #define pr_fmt(fmt) "Spectre V2 : " fmt -static enum spectre_v2_mitigation spectre_v2_enabled = SPECTRE_V2_NONE; +static enum spectre_v2_mitigation spectre_v2_enabled __ro_after_init = + SPECTRE_V2_NONE; + +void +x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool setguest) +{ + u64 msrval, guestval, hostval = x86_spec_ctrl_base; + struct thread_info *ti = current_thread_info(); + + /* Is MSR_SPEC_CTRL implemented ? */ + if (static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) { + /* + * Restrict guest_spec_ctrl to supported values. Clear the + * modifiable bits in the host base value and or the + * modifiable bits from the guest value. + */ + guestval = hostval & ~x86_spec_ctrl_mask; + guestval |= guest_spec_ctrl & x86_spec_ctrl_mask; + + /* SSBD controlled in MSR_SPEC_CTRL */ + if (static_cpu_has(X86_FEATURE_SPEC_CTRL_SSBD)) + hostval |= ssbd_tif_to_spec_ctrl(ti->flags); + + if (hostval != guestval) { + msrval = setguest ? guestval : hostval; + wrmsrl(MSR_IA32_SPEC_CTRL, msrval); + } + } + + /* + * If SSBD is not handled in MSR_SPEC_CTRL on AMD, update + * MSR_AMD64_L2_CFG or MSR_VIRT_SPEC_CTRL if supported. + */ + if (!static_cpu_has(X86_FEATURE_LS_CFG_SSBD) && + !static_cpu_has(X86_FEATURE_VIRT_SSBD)) + return; + + /* + * If the host has SSBD mitigation enabled, force it in the host's + * virtual MSR value. If its not permanently enabled, evaluate + * current's TIF_SSBD thread flag. + */ + if (static_cpu_has(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE)) + hostval = SPEC_CTRL_SSBD; + else + hostval = ssbd_tif_to_spec_ctrl(ti->flags); + + /* Sanitize the guest value */ + guestval = guest_virt_spec_ctrl & SPEC_CTRL_SSBD; + + if (hostval != guestval) { + unsigned long tif; + + tif = setguest ? ssbd_spec_ctrl_to_tif(guestval) : + ssbd_spec_ctrl_to_tif(hostval); + + speculative_store_bypass_update(tif); + } +} +EXPORT_SYMBOL_GPL(x86_virt_spec_ctrl); + +static void x86_amd_ssb_disable(void) +{ + u64 msrval = x86_amd_ls_cfg_base | x86_amd_ls_cfg_ssbd_mask; + + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, SPEC_CTRL_SSBD); + else if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + wrmsrl(MSR_AMD64_LS_CFG, msrval); +} #ifdef RETPOLINE static bool spectre_v2_bad_module; @@ -312,32 +422,289 @@ retpoline_auto: } #undef pr_fmt +#define pr_fmt(fmt) "Speculative Store Bypass: " fmt + +static enum ssb_mitigation ssb_mode __ro_after_init = SPEC_STORE_BYPASS_NONE; + +/* The kernel command line selection */ +enum ssb_mitigation_cmd { + SPEC_STORE_BYPASS_CMD_NONE, + SPEC_STORE_BYPASS_CMD_AUTO, + SPEC_STORE_BYPASS_CMD_ON, + SPEC_STORE_BYPASS_CMD_PRCTL, + SPEC_STORE_BYPASS_CMD_SECCOMP, +}; + +static const char *ssb_strings[] = { + [SPEC_STORE_BYPASS_NONE] = "Vulnerable", + [SPEC_STORE_BYPASS_DISABLE] = "Mitigation: Speculative Store Bypass disabled", + [SPEC_STORE_BYPASS_PRCTL] = "Mitigation: Speculative Store Bypass disabled via prctl", + [SPEC_STORE_BYPASS_SECCOMP] = "Mitigation: Speculative Store Bypass disabled via prctl and seccomp", +}; + +static const struct { + const char *option; + enum ssb_mitigation_cmd cmd; +} ssb_mitigation_options[] = { + { "auto", SPEC_STORE_BYPASS_CMD_AUTO }, /* Platform decides */ + { "on", SPEC_STORE_BYPASS_CMD_ON }, /* Disable Speculative Store Bypass */ + { "off", SPEC_STORE_BYPASS_CMD_NONE }, /* Don't touch Speculative Store Bypass */ + { "prctl", SPEC_STORE_BYPASS_CMD_PRCTL }, /* Disable Speculative Store Bypass via prctl */ + { "seccomp", SPEC_STORE_BYPASS_CMD_SECCOMP }, /* Disable Speculative Store Bypass via prctl and seccomp */ +}; + +static enum ssb_mitigation_cmd __init ssb_parse_cmdline(void) +{ + enum ssb_mitigation_cmd cmd = SPEC_STORE_BYPASS_CMD_AUTO; + char arg[20]; + int ret, i; + + if (cmdline_find_option_bool(boot_command_line, "nospec_store_bypass_disable")) { + return SPEC_STORE_BYPASS_CMD_NONE; + } else { + ret = cmdline_find_option(boot_command_line, "spec_store_bypass_disable", + arg, sizeof(arg)); + if (ret < 0) + return SPEC_STORE_BYPASS_CMD_AUTO; + + for (i = 0; i < ARRAY_SIZE(ssb_mitigation_options); i++) { + if (!match_option(arg, ret, ssb_mitigation_options[i].option)) + continue; + + cmd = ssb_mitigation_options[i].cmd; + break; + } + + if (i >= ARRAY_SIZE(ssb_mitigation_options)) { + pr_err("unknown option (%s). Switching to AUTO select\n", arg); + return SPEC_STORE_BYPASS_CMD_AUTO; + } + } + + return cmd; +} + +static enum ssb_mitigation __init __ssb_select_mitigation(void) +{ + enum ssb_mitigation mode = SPEC_STORE_BYPASS_NONE; + enum ssb_mitigation_cmd cmd; + + if (!boot_cpu_has(X86_FEATURE_SSBD)) + return mode; + + cmd = ssb_parse_cmdline(); + if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS) && + (cmd == SPEC_STORE_BYPASS_CMD_NONE || + cmd == SPEC_STORE_BYPASS_CMD_AUTO)) + return mode; + + switch (cmd) { + case SPEC_STORE_BYPASS_CMD_AUTO: + case SPEC_STORE_BYPASS_CMD_SECCOMP: + /* + * Choose prctl+seccomp as the default mode if seccomp is + * enabled. + */ + if (IS_ENABLED(CONFIG_SECCOMP)) + mode = SPEC_STORE_BYPASS_SECCOMP; + else + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_ON: + mode = SPEC_STORE_BYPASS_DISABLE; + break; + case SPEC_STORE_BYPASS_CMD_PRCTL: + mode = SPEC_STORE_BYPASS_PRCTL; + break; + case SPEC_STORE_BYPASS_CMD_NONE: + break; + } + + /* + * We have three CPU feature flags that are in play here: + * - X86_BUG_SPEC_STORE_BYPASS - CPU is susceptible. + * - X86_FEATURE_SSBD - CPU is able to turn off speculative store bypass + * - X86_FEATURE_SPEC_STORE_BYPASS_DISABLE - engage the mitigation + */ + if (mode == SPEC_STORE_BYPASS_DISABLE) { + setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE); + /* + * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses + * a completely different MSR and bit dependent on family. + */ + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + x86_spec_ctrl_base |= SPEC_CTRL_SSBD; + x86_spec_ctrl_mask |= SPEC_CTRL_SSBD; + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + break; + case X86_VENDOR_AMD: + x86_amd_ssb_disable(); + break; + } + } + + return mode; +} + +static void ssb_select_mitigation(void) +{ + ssb_mode = __ssb_select_mitigation(); + + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + pr_info("%s\n", ssb_strings[ssb_mode]); +} + +#undef pr_fmt +#define pr_fmt(fmt) "Speculation prctl: " fmt + +static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + bool update; + + if (ssb_mode != SPEC_STORE_BYPASS_PRCTL && + ssb_mode != SPEC_STORE_BYPASS_SECCOMP) + return -ENXIO; + + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + update = test_and_clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + task_set_spec_ssb_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + update = !test_and_set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + /* + * If being set on non-current task, delay setting the CPU + * mitigation until it is next scheduled. + */ + if (task == current && update) + speculative_store_bypass_update_current(); + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +#ifdef CONFIG_SECCOMP +void arch_seccomp_spec_mitigate(struct task_struct *task) +{ + if (ssb_mode == SPEC_STORE_BYPASS_SECCOMP) + ssb_prctl_set(task, PR_SPEC_FORCE_DISABLE); +} +#endif + +static int ssb_prctl_get(struct task_struct *task) +{ + switch (ssb_mode) { + case SPEC_STORE_BYPASS_DISABLE: + return PR_SPEC_DISABLE; + case SPEC_STORE_BYPASS_SECCOMP: + case SPEC_STORE_BYPASS_PRCTL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + default: + if (boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) + return PR_SPEC_ENABLE; + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssb_prctl_get(task); + default: + return -ENODEV; + } +} + +void x86_spec_ctrl_setup_ap(void) +{ + if (boot_cpu_has(X86_FEATURE_MSR_SPEC_CTRL)) + wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base); + + if (ssb_mode == SPEC_STORE_BYPASS_DISABLE) + x86_amd_ssb_disable(); +} #ifdef CONFIG_SYSFS -ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) + +static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr, + char *buf, unsigned int bug) { - if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) + if (!boot_cpu_has_bug(bug)) return sprintf(buf, "Not affected\n"); - if (boot_cpu_has(X86_FEATURE_PTI)) - return sprintf(buf, "Mitigation: PTI\n"); + + switch (bug) { + case X86_BUG_CPU_MELTDOWN: + if (boot_cpu_has(X86_FEATURE_PTI)) + return sprintf(buf, "Mitigation: PTI\n"); + + break; + + case X86_BUG_SPECTRE_V1: + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + + case X86_BUG_SPECTRE_V2: + return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], + boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", + boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", + spectre_v2_module_string()); + + case X86_BUG_SPEC_STORE_BYPASS: + return sprintf(buf, "%s\n", ssb_strings[ssb_mode]); + + default: + break; + } + return sprintf(buf, "Vulnerable\n"); } +ssize_t cpu_show_meltdown(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_CPU_MELTDOWN); +} + ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V1)) - return sprintf(buf, "Not affected\n"); - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V1); } ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf) { - if (!boot_cpu_has_bug(X86_BUG_SPECTRE_V2)) - return sprintf(buf, "Not affected\n"); + return cpu_show_common(dev, attr, buf, X86_BUG_SPECTRE_V2); +} - return sprintf(buf, "%s%s%s%s\n", spectre_v2_strings[spectre_v2_enabled], - boot_cpu_has(X86_FEATURE_USE_IBPB) ? ", IBPB" : "", - boot_cpu_has(X86_FEATURE_USE_IBRS_FW) ? ", IBRS_FW" : "", - spectre_v2_module_string()); +ssize_t cpu_show_spec_store_bypass(struct device *dev, struct device_attribute *attr, char *buf) +{ + return cpu_show_common(dev, attr, buf, X86_BUG_SPEC_STORE_BYPASS); } #endif diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 54d04d574148..38354c66df81 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -20,6 +20,8 @@ #include <asm/amd_nb.h> #include <asm/smp.h> +#include "cpu.h" + #define LVL_1_INST 1 #define LVL_1_DATA 2 #define LVL_2 3 @@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } +void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id) +{ + /* + * We may have multiple LLCs if L3 caches exist, so check if we + * have an L3 cache by looking at the L3 cache CPUID leaf. + */ + if (!cpuid_edx(0x80000006)) + return; + + if (c->x86 < 0x17) { + /* LLC is at the node level. */ + per_cpu(cpu_llc_id, cpu) = node_id; + } else if (c->x86 == 0x17 && + c->x86_model >= 0 && c->x86_model <= 0x1F) { + /* + * LLC is at the core complex level. + * Core complex ID is ApicId[3] for these processors. + */ + per_cpu(cpu_llc_id, cpu) = c->apicid >> 3; + } else { + /* + * LLC ID is calculated from the number of threads sharing the + * cache. + * */ + u32 eax, ebx, ecx, edx, num_sharing_cache = 0; + u32 llc_index = find_num_cache_leaves(c) - 1; + + cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx); + if (eax) + num_sharing_cache = ((eax >> 14) & 0xfff) + 1; + + if (num_sharing_cache) { + int bits = get_count_order(num_sharing_cache) - 1; + + per_cpu(cpu_llc_id, cpu) = c->apicid >> bits; + } + } +} + void init_amd_cacheinfo(struct cpuinfo_x86 *c) { @@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c) } } -unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) +void init_intel_cacheinfo(struct cpuinfo_x86 *c) { /* Cache sizes */ unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; @@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c) c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - return l2; + if (!l2) + cpu_detect_cache_sizes(c); } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index e5ec0f11c0de..14433ff5b828 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -18,6 +18,13 @@ #define RNG_ENABLED (1 << 3) #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ +#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000 +#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000 +#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000 +#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001 +#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002 +#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020 + static void init_c3(struct cpuinfo_x86 *c) { u32 lo, hi; @@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c) } } +static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c) +{ + u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2; + + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high); + msr_ctl = vmx_msr_high | vmx_msr_low; + + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW) + set_cpu_cap(c, X86_FEATURE_TPR_SHADOW); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI) + set_cpu_cap(c, X86_FEATURE_VNMI); + if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) { + rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2, + vmx_msr_low, vmx_msr_high); + msr_ctl2 = vmx_msr_high | vmx_msr_low; + if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) && + (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)) + set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT) + set_cpu_cap(c, X86_FEATURE_EPT); + if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID) + set_cpu_cap(c, X86_FEATURE_VPID); + } +} + static void init_centaur(struct cpuinfo_x86 *c) { #ifdef CONFIG_X86_32 @@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c) clear_cpu_cap(c, 0*32+31); #endif early_init_centaur(c); + init_intel_cacheinfo(c); + detect_num_cpu_cores(c); +#ifdef CONFIG_X86_32 + detect_ht(c); +#endif + + if (c->cpuid_level > 9) { + unsigned int eax = cpuid_eax(10); + + /* + * Check for version and the number of counters + * Version(eax[7:0]) can't be 0; + * Counters(eax[15:8]) should be greater than 1; + */ + if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1)) + set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); + } + switch (c->x86) { #ifdef CONFIG_X86_32 case 5: @@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); #endif + + if (cpu_has(c, X86_FEATURE_VMX)) + centaur_detect_vmx_virtcap(c); } #ifdef CONFIG_X86_32 diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 8a5b185735e1..95c8e507580d 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask; /* representing cpus for which sibling maps can be computed */ cpumask_var_t cpu_sibling_setup_mask; +/* Number of siblings per CPU package */ +int smp_num_siblings = 1; +EXPORT_SYMBOL(smp_num_siblings); + +/* Last level cache ID of each logical CPU */ +DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { @@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c) *(s + 1) = '\0'; } +void detect_num_cpu_cores(struct cpuinfo_x86 *c) +{ + unsigned int eax, ebx, ecx, edx; + + c->x86_max_cores = 1; + if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) + return; + + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); + if (eax & 0x1f) + c->x86_max_cores = (eax >> 26) + 1; +} + void cpu_detect_cache_sizes(struct cpuinfo_x86 *c) { unsigned int n, dummy, ebx, ecx, edx, l2size; @@ -757,17 +777,32 @@ static void init_speculation_control(struct cpuinfo_x86 *c) * and they also have a different bit for STIBP support. Also, * a hypervisor might have set the individual AMD bits even on * Intel CPUs, for finer-grained selection of what's available. - * - * We use the AMD bits in 0x8000_0008 EBX as the generic hardware - * features, which are visible in /proc/cpuinfo and used by the - * kernel. So set those accordingly from the Intel bits. */ if (cpu_has(c, X86_FEATURE_SPEC_CTRL)) { set_cpu_cap(c, X86_FEATURE_IBRS); set_cpu_cap(c, X86_FEATURE_IBPB); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); } + if (cpu_has(c, X86_FEATURE_INTEL_STIBP)) set_cpu_cap(c, X86_FEATURE_STIBP); + + if (cpu_has(c, X86_FEATURE_SPEC_CTRL_SSBD) || + cpu_has(c, X86_FEATURE_VIRT_SSBD)) + set_cpu_cap(c, X86_FEATURE_SSBD); + + if (cpu_has(c, X86_FEATURE_AMD_IBRS)) { + set_cpu_cap(c, X86_FEATURE_IBRS); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } + + if (cpu_has(c, X86_FEATURE_AMD_IBPB)) + set_cpu_cap(c, X86_FEATURE_IBPB); + + if (cpu_has(c, X86_FEATURE_AMD_STIBP)) { + set_cpu_cap(c, X86_FEATURE_STIBP); + set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL); + } } void get_cpu_cap(struct cpuinfo_x86 *c) @@ -848,6 +883,11 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_power = edx; } + if (c->extended_cpuid_level >= 0x80000008) { + cpuid(0x80000008, &eax, &ebx, &ecx, &edx); + c->x86_capability[CPUID_8000_0008_EBX] = ebx; + } + if (c->extended_cpuid_level >= 0x8000000a) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); @@ -871,7 +911,6 @@ static void get_cpu_address_sizes(struct cpuinfo_x86 *c) c->x86_virt_bits = (eax >> 8) & 0xff; c->x86_phys_bits = eax & 0xff; - c->x86_capability[CPUID_8000_0008_EBX] = ebx; } #ifdef CONFIG_X86_32 else if (cpu_has(c, X86_FEATURE_PAE) || cpu_has(c, X86_FEATURE_PSE36)) @@ -923,21 +962,47 @@ static const __initconst struct x86_cpu_id cpu_no_meltdown[] = { {} }; -static bool __init cpu_vulnerable_to_meltdown(struct cpuinfo_x86 *c) +/* Only list CPUs which speculate but are non susceptible to SSB */ +static const __initconst struct x86_cpu_id cpu_no_spec_store_bypass[] = { + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT1 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_AIRMONT }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_SILVERMONT2 }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_ATOM_MERRIFIELD }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_CORE_YONAH }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNL }, + { X86_VENDOR_INTEL, 6, INTEL_FAM6_XEON_PHI_KNM }, + { X86_VENDOR_AMD, 0x12, }, + { X86_VENDOR_AMD, 0x11, }, + { X86_VENDOR_AMD, 0x10, }, + { X86_VENDOR_AMD, 0xf, }, + {} +}; + +static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) { u64 ia32_cap = 0; - if (x86_match_cpu(cpu_no_meltdown)) - return false; + if (x86_match_cpu(cpu_no_speculation)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); + if (!x86_match_cpu(cpu_no_spec_store_bypass) && + !(ia32_cap & ARCH_CAP_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); + + if (x86_match_cpu(cpu_no_meltdown)) + return; + /* Rogue Data Cache Load? No! */ if (ia32_cap & ARCH_CAP_RDCL_NO) - return false; + return; - return true; + setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); } /* @@ -988,12 +1053,7 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) setup_force_cpu_cap(X86_FEATURE_ALWAYS); - if (!x86_match_cpu(cpu_no_speculation)) { - if (cpu_vulnerable_to_meltdown(c)) - setup_force_cpu_bug(X86_BUG_CPU_MELTDOWN); - setup_force_cpu_bug(X86_BUG_SPECTRE_V1); - setup_force_cpu_bug(X86_BUG_SPECTRE_V2); - } + cpu_set_bug_bits(c); fpu__init_system(c); @@ -1004,6 +1064,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) */ setup_clear_cpu_cap(X86_FEATURE_PCID); #endif + + /* + * Later in the boot process pgtable_l5_enabled() relies on + * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not + * enabled by this point we need to clear the feature bit to avoid + * false-positives at the later stage. + * + * pgtable_l5_enabled() can be false here for several reasons: + * - 5-level paging is disabled compile-time; + * - it's 32-bit kernel; + * - machine doesn't support 5-level paging; + * - user specified 'no5lvl' in kernel command line. + */ + if (!pgtable_l5_enabled()) + setup_clear_cpu_cap(X86_FEATURE_LA57); } void __init early_cpu_init(void) @@ -1355,6 +1430,7 @@ void identify_secondary_cpu(struct cpuinfo_x86 *c) #endif mtrr_ap_init(); validate_apic_and_package_id(c); + x86_spec_ctrl_setup_ap(); } static __init int setup_noclflush(char *arg) diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index e806b11a99af..38216f678fc3 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -47,7 +47,19 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[], extern void get_cpu_cap(struct cpuinfo_x86 *c); extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); +extern u32 get_scattered_cpuid_leaf(unsigned int level, + unsigned int sub_leaf, + enum cpuid_regs_idx reg); +extern void init_intel_cacheinfo(struct cpuinfo_x86 *c); +extern void init_amd_cacheinfo(struct cpuinfo_x86 *c); + +extern void detect_num_cpu_cores(struct cpuinfo_x86 *c); +extern int detect_extended_topology(struct cpuinfo_x86 *c); +extern void detect_ht(struct cpuinfo_x86 *c); unsigned int aperfmperf_get_khz(int cpu); +extern void x86_spec_ctrl_setup_ap(void); + #endif /* ARCH_X86_CPU_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 60d1897041da..eb75564f2d25 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -188,7 +188,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) setup_clear_cpu_cap(X86_FEATURE_IBPB); setup_clear_cpu_cap(X86_FEATURE_STIBP); setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL); + setup_clear_cpu_cap(X86_FEATURE_MSR_SPEC_CTRL); setup_clear_cpu_cap(X86_FEATURE_INTEL_STIBP); + setup_clear_cpu_cap(X86_FEATURE_SSBD); + setup_clear_cpu_cap(X86_FEATURE_SPEC_CTRL_SSBD); } /* @@ -453,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c) #endif } -/* - * find out the number of processor cores on the die - */ -static int intel_num_cpu_cores(struct cpuinfo_x86 *c) -{ - unsigned int eax, ebx, ecx, edx; - - if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4) - return 1; - - /* Intel has a non-standard dependency on %ecx for this CPUID level. */ - cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); - if (eax & 0x1f) - return (eax >> 26) + 1; - else - return 1; -} - static void detect_vmx_virtcap(struct cpuinfo_x86 *c) { /* Intel VMX MSR indicated features */ @@ -653,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c) static void init_intel(struct cpuinfo_x86 *c) { - unsigned int l2 = 0; - early_init_intel(c); intel_workarounds(c); @@ -671,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c) * let's use the legacy cpuid vector 0x1 and 0x4 for topology * detection. */ - c->x86_max_cores = intel_num_cpu_cores(c); + detect_num_cpu_cores(c); #ifdef CONFIG_X86_32 detect_ht(c); #endif } - l2 = init_intel_cacheinfo(c); - - /* Detect legacy cache sizes if init_intel_cacheinfo did not */ - if (l2 == 0) { - cpu_detect_cache_sizes(c); - l2 = c->x86_cache_size; - } + init_intel_cacheinfo(c); if (c->cpuid_level > 9) { unsigned eax = cpuid_eax(10); @@ -696,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); if (boot_cpu_has(X86_FEATURE_DS)) { - unsigned int l1; + unsigned int l1, l2; + rdmsr(MSR_IA32_MISC_ENABLE, l1, l2); if (!(l1 & (1<<11))) set_cpu_cap(c, X86_FEATURE_BTS); @@ -724,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c) * Dixon is NOT a Celeron. */ if (c->x86 == 6) { + unsigned int l2 = c->x86_cache_size; char *p = NULL; switch (c->x86_model) { diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c index 589b948e6e01..24bfa63e86cf 100644 --- a/arch/x86/kernel/cpu/intel_rdt.c +++ b/arch/x86/kernel/cpu/intel_rdt.c @@ -33,8 +33,8 @@ #include <asm/intel_rdt_sched.h> #include "intel_rdt.h" -#define MAX_MBA_BW 100u #define MBA_IS_LINEAR 0x4 +#define MBA_MAX_MBPS U32_MAX /* Mutex to protect rdtgroup access. */ DEFINE_MUTEX(rdtgroup_mutex); @@ -178,7 +178,7 @@ struct rdt_resource rdt_resources_all[] = { .msr_update = mba_wrmsr, .cache_level = 3, .parse_ctrlval = parse_bw, - .format_str = "%d=%*d", + .format_str = "%d=%*u", .fflags = RFTYPE_RES_MB, }, }; @@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void) rdt_alloc_capable = true; } +bool is_mba_sc(struct rdt_resource *r) +{ + if (!r) + return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc; + + return r->membw.mba_sc; +} + /* * rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values * exposed to user interface and the h/w understandable delay values. @@ -341,7 +349,7 @@ static int get_cache_id(int cpu, int level) * that can be written to QOS_MSRs. * There are currently no SKUs which support non linear delay values. */ -static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) +u32 delay_bw_map(unsigned long bw, struct rdt_resource *r) { if (r->membw.delay_linear) return MAX_MBA_BW - bw; @@ -431,25 +439,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id, return NULL; } +void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm) +{ + int i; + + /* + * Initialize the Control MSRs to having no control. + * For Cache Allocation: Set all bits in cbm + * For Memory Allocation: Set b/w requested to 100% + * and the bandwidth in MBps to U32_MAX + */ + for (i = 0; i < r->num_closid; i++, dc++, dm++) { + *dc = r->default_ctrl; + *dm = MBA_MAX_MBPS; + } +} + static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d) { struct msr_param m; - u32 *dc; - int i; + u32 *dc, *dm; dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL); if (!dc) return -ENOMEM; - d->ctrl_val = dc; + dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL); + if (!dm) { + kfree(dc); + return -ENOMEM; + } - /* - * Initialize the Control MSRs to having no control. - * For Cache Allocation: Set all bits in cbm - * For Memory Allocation: Set b/w requested to 100 - */ - for (i = 0; i < r->num_closid; i++, dc++) - *dc = r->default_ctrl; + d->ctrl_val = dc; + d->mbps_val = dm; + setup_default_ctrlval(r, dc, dm); m.low = 0; m.high = r->num_closid; @@ -588,6 +611,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r) } kfree(d->ctrl_val); + kfree(d->mbps_val); kfree(d->rmid_busy_llc); kfree(d->mbm_total); kfree(d->mbm_local); diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h index 3fd7a70ee04a..39752825e376 100644 --- a/arch/x86/kernel/cpu/intel_rdt.h +++ b/arch/x86/kernel/cpu/intel_rdt.h @@ -28,6 +28,7 @@ #define MBM_CNTR_WIDTH 24 #define MBM_OVERFLOW_INTERVAL 1000 +#define MAX_MBA_BW 100u #define RMID_VAL_ERROR BIT_ULL(63) #define RMID_VAL_UNAVAIL BIT_ULL(62) @@ -180,10 +181,20 @@ struct rftype { * struct mbm_state - status for each MBM counter in each domain * @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes) * @prev_msr Value of IA32_QM_CTR for this RMID last time we read it + * @chunks_bw Total local data moved. Used for bandwidth calculation + * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting + * @prev_bw The most recent bandwidth in MBps + * @delta_bw Difference between the current and previous bandwidth + * @delta_comp Indicates whether to compute the delta_bw */ struct mbm_state { u64 chunks; u64 prev_msr; + u64 chunks_bw; + u64 prev_bw_msr; + u32 prev_bw; + u32 delta_bw; + bool delta_comp; }; /** @@ -202,6 +213,7 @@ struct mbm_state { * @cqm_work_cpu: * worker cpu for CQM h/w counters * @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID) + * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps * @new_ctrl: new ctrl value to be loaded * @have_new_ctrl: did user provide new_ctrl for this domain */ @@ -217,6 +229,7 @@ struct rdt_domain { int mbm_work_cpu; int cqm_work_cpu; u32 *ctrl_val; + u32 *mbps_val; u32 new_ctrl; bool have_new_ctrl; }; @@ -259,6 +272,7 @@ struct rdt_cache { * @min_bw: Minimum memory bandwidth percentage user can request * @bw_gran: Granularity at which the memory bandwidth is allocated * @delay_linear: True if memory B/W delay is in linear scale + * @mba_sc: True if MBA software controller(mba_sc) is enabled * @mb_map: Mapping of memory B/W percentage to memory B/W delay */ struct rdt_membw { @@ -266,6 +280,7 @@ struct rdt_membw { u32 min_bw; u32 bw_gran; u32 delay_linear; + bool mba_sc; u32 *mb_map; }; @@ -445,6 +460,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d, void mbm_setup_overflow_handler(struct rdt_domain *dom, unsigned long delay_ms); void mbm_handle_overflow(struct work_struct *work); +bool is_mba_sc(struct rdt_resource *r); +void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm); +u32 delay_bw_map(unsigned long bw, struct rdt_resource *r); void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms); void cqm_handle_limbo(struct work_struct *work); bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d); diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c index 23e1d5c249c6..116d57b248d3 100644 --- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c +++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c @@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r) return false; } - if (bw < r->membw.min_bw || bw > r->default_ctrl) { + if ((bw < r->membw.min_bw || bw > r->default_ctrl) && + !is_mba_sc(r)) { rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw, r->membw.min_bw, r->default_ctrl); return false; @@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid) struct msr_param msr_param; cpumask_var_t cpu_mask; struct rdt_domain *d; + bool mba_sc; + u32 *dc; int cpu; if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL)) @@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid) msr_param.high = msr_param.low + 1; msr_param.res = r; + mba_sc = is_mba_sc(r); list_for_each_entry(d, &r->domains, list) { - if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) { + dc = !mba_sc ? d->ctrl_val : d->mbps_val; + if (d->have_new_ctrl && d->new_ctrl != dc[closid]) { cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask); - d->ctrl_val[closid] = d->new_ctrl; + dc[closid] = d->new_ctrl; } } - if (cpumask_empty(cpu_mask)) + + /* + * Avoid writing the control msr with control values when + * MBA software controller is enabled + */ + if (cpumask_empty(cpu_mask) || mba_sc) goto done; cpu = get_cpu(); /* Update CBM on this cpu if it's in cpu_mask. */ @@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid) { struct rdt_domain *dom; bool sep = false; + u32 ctrl_val; seq_printf(s, "%*s:", max_name_width, r->name); list_for_each_entry(dom, &r->domains, list) { if (sep) seq_puts(s, ";"); + + ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] : + dom->mbps_val[closid]); seq_printf(s, r->format_str, dom->id, max_data_width, - dom->ctrl_val[closid]); + ctrl_val); sep = true; } seq_puts(s, "\n"); diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c index 681450eee428..b0f3aed76b75 100644 --- a/arch/x86/kernel/cpu/intel_rdt_monitor.c +++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c @@ -225,10 +225,18 @@ void free_rmid(u32 rmid) list_add_tail(&entry->list, &rmid_free_lru); } +static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr) +{ + u64 shift = 64 - MBM_CNTR_WIDTH, chunks; + + chunks = (cur_msr << shift) - (prev_msr << shift); + return chunks >>= shift; +} + static int __mon_event_count(u32 rmid, struct rmid_read *rr) { - u64 chunks, shift, tval; struct mbm_state *m; + u64 chunks, tval; tval = __rmid_read(rmid, rr->evtid); if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) { @@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) } if (rr->first) { - m->prev_msr = tval; - m->chunks = 0; + memset(m, 0, sizeof(struct mbm_state)); + m->prev_bw_msr = m->prev_msr = tval; return 0; } - shift = 64 - MBM_CNTR_WIDTH; - chunks = (tval << shift) - (m->prev_msr << shift); - chunks >>= shift; + chunks = mbm_overflow_count(m->prev_msr, tval); m->chunks += chunks; m->prev_msr = tval; @@ -270,6 +276,32 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr) } /* + * Supporting function to calculate the memory bandwidth + * and delta bandwidth in MBps. + */ +static void mbm_bw_count(u32 rmid, struct rmid_read *rr) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3]; + struct mbm_state *m = &rr->d->mbm_local[rmid]; + u64 tval, cur_bw, chunks; + + tval = __rmid_read(rmid, rr->evtid); + if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) + return; + + chunks = mbm_overflow_count(m->prev_bw_msr, tval); + m->chunks_bw += chunks; + m->chunks = m->chunks_bw; + cur_bw = (chunks * r->mon_scale) >> 20; + + if (m->delta_comp) + m->delta_bw = abs(cur_bw - m->prev_bw); + m->delta_comp = false; + m->prev_bw = cur_bw; + m->prev_bw_msr = tval; +} + +/* * This is called via IPI to read the CQM/MBM counters * on a domain. */ @@ -297,6 +329,118 @@ void mon_event_count(void *info) } } +/* + * Feedback loop for MBA software controller (mba_sc) + * + * mba_sc is a feedback loop where we periodically read MBM counters and + * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so + * that: + * + * current bandwdith(cur_bw) < user specified bandwidth(user_bw) + * + * This uses the MBM counters to measure the bandwidth and MBA throttle + * MSRs to control the bandwidth for a particular rdtgrp. It builds on the + * fact that resctrl rdtgroups have both monitoring and control. + * + * The frequency of the checks is 1s and we just tag along the MBM overflow + * timer. Having 1s interval makes the calculation of bandwidth simpler. + * + * Although MBA's goal is to restrict the bandwidth to a maximum, there may + * be a need to increase the bandwidth to avoid uncecessarily restricting + * the L2 <-> L3 traffic. + * + * Since MBA controls the L2 external bandwidth where as MBM measures the + * L3 external bandwidth the following sequence could lead to such a + * situation. + * + * Consider an rdtgroup which had high L3 <-> memory traffic in initial + * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but + * after some time rdtgroup has mostly L2 <-> L3 traffic. + * + * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its + * throttle MSRs already have low percentage values. To avoid + * unnecessarily restricting such rdtgroups, we also increase the bandwidth. + */ +static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm) +{ + u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val; + struct mbm_state *pmbm_data, *cmbm_data; + u32 cur_bw, delta_bw, user_bw; + struct rdt_resource *r_mba; + struct rdt_domain *dom_mba; + struct list_head *head; + struct rdtgroup *entry; + + r_mba = &rdt_resources_all[RDT_RESOURCE_MBA]; + closid = rgrp->closid; + rmid = rgrp->mon.rmid; + pmbm_data = &dom_mbm->mbm_local[rmid]; + + dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba); + if (!dom_mba) { + pr_warn_once("Failure to get domain for MBA update\n"); + return; + } + + cur_bw = pmbm_data->prev_bw; + user_bw = dom_mba->mbps_val[closid]; + delta_bw = pmbm_data->delta_bw; + cur_msr_val = dom_mba->ctrl_val[closid]; + + /* + * For Ctrl groups read data from child monitor groups. + */ + head = &rgrp->mon.crdtgrp_list; + list_for_each_entry(entry, head, mon.crdtgrp_list) { + cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; + cur_bw += cmbm_data->prev_bw; + delta_bw += cmbm_data->delta_bw; + } + + /* + * Scale up/down the bandwidth linearly for the ctrl group. The + * bandwidth step is the bandwidth granularity specified by the + * hardware. + * + * The delta_bw is used when increasing the bandwidth so that we + * dont alternately increase and decrease the control values + * continuously. + * + * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if + * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep + * switching between 90 and 110 continuously if we only check + * cur_bw < user_bw. + */ + if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) { + new_msr_val = cur_msr_val - r_mba->membw.bw_gran; + } else if (cur_msr_val < MAX_MBA_BW && + (user_bw > (cur_bw + delta_bw))) { + new_msr_val = cur_msr_val + r_mba->membw.bw_gran; + } else { + return; + } + + cur_msr = r_mba->msr_base + closid; + wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba)); + dom_mba->ctrl_val[closid] = new_msr_val; + + /* + * Delta values are updated dynamically package wise for each + * rdtgrp everytime the throttle MSR changes value. + * + * This is because (1)the increase in bandwidth is not perfectly + * linear and only "approximately" linear even when the hardware + * says it is linear.(2)Also since MBA is a core specific + * mechanism, the delta values vary based on number of cores used + * by the rdtgrp. + */ + pmbm_data->delta_comp = true; + list_for_each_entry(entry, head, mon.crdtgrp_list) { + cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid]; + cmbm_data->delta_comp = true; + } +} + static void mbm_update(struct rdt_domain *d, int rmid) { struct rmid_read rr; @@ -314,7 +458,16 @@ static void mbm_update(struct rdt_domain *d, int rmid) } if (is_mbm_local_enabled()) { rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID; - __mon_event_count(rmid, &rr); + + /* + * Call the MBA software controller only for the + * control groups and when user has enabled + * the software controller explicitly. + */ + if (!is_mba_sc(NULL)) + __mon_event_count(rmid, &rr); + else + mbm_bw_count(rmid, &rr); } } @@ -385,6 +538,9 @@ void mbm_handle_overflow(struct work_struct *work) head = &prgrp->mon.crdtgrp_list; list_for_each_entry(crgrp, head, mon.crdtgrp_list) mbm_update(d, crgrp->mon.rmid); + + if (is_mba_sc(NULL)) + update_mba_bw(prgrp, d); } schedule_delayed_work_on(cpu, &d->mbm_over, delay); diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index fca759d272a1..749856a2e736 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg) wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL); } +static inline bool is_mba_linear(void) +{ + return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear; +} + static int set_cache_qos_cfg(int level, bool enable) { void (*update)(void *arg); @@ -1041,6 +1046,28 @@ static int set_cache_qos_cfg(int level, bool enable) return 0; } +/* + * Enable or disable the MBA software controller + * which helps user specify bandwidth in MBps. + * MBA software controller is supported only if + * MBM is supported and MBA is in linear scale. + */ +static int set_mba_sc(bool mba_sc) +{ + struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA]; + struct rdt_domain *d; + + if (!is_mbm_enabled() || !is_mba_linear() || + mba_sc == is_mba_sc(r)) + return -EINVAL; + + r->membw.mba_sc = mba_sc; + list_for_each_entry(d, &r->domains, list) + setup_default_ctrlval(r, d->ctrl_val, d->mbps_val); + + return 0; +} + static int cdp_enable(int level, int data_type, int code_type) { struct rdt_resource *r_ldata = &rdt_resources_all[data_type]; @@ -1123,6 +1150,10 @@ static int parse_rdtgroupfs_options(char *data) ret = cdpl2_enable(); if (ret) goto out; + } else if (!strcmp(token, "mba_MBps")) { + ret = set_mba_sc(true); + if (ret) + goto out; } else { ret = -EINVAL; goto out; @@ -1445,6 +1476,8 @@ static void rdt_kill_sb(struct super_block *sb) cpus_read_lock(); mutex_lock(&rdtgroup_mutex); + set_mba_sc(false); + /*Put everything back to default values. */ for_each_alloc_enabled_rdt_resource(r) reset_all_ctrls(r); diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 475cb4f5f14f..c805a06e14c3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -48,7 +48,7 @@ static struct dentry *dfs_inj; static u8 n_banks; -#define MAX_FLAG_OPT_SIZE 3 +#define MAX_FLAG_OPT_SIZE 4 #define NBCFG 0x44 enum injection_type { diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 42cf2880d0ed..cd76380af79f 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1727,6 +1727,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c) } } +static void mce_centaur_feature_init(struct cpuinfo_x86 *c) +{ + struct mca_config *cfg = &mca_cfg; + + /* + * All newer Centaur CPUs support MCE broadcasting. Enable + * synchronization with a one second timeout. + */ + if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) || + c->x86 > 6) { + if (cfg->monarch_timeout < 0) + cfg->monarch_timeout = USEC_PER_SEC; + } +} + static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) { switch (c->x86_vendor) { @@ -1739,6 +1754,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) mce_amd_feature_init(c); break; } + case X86_VENDOR_CENTAUR: + mce_centaur_feature_init(c); + break; default: break; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f7666eef4a87..f591b01930db 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = { [SMCA_SMU] = { "smu", "System Management Unit" }, }; +static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = +{ + [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } +}; + const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -431,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c) wrmsr(MSR_CU_DEF_ERR, low, high); } -static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, - unsigned int block) +static u32 smca_get_block_address(unsigned int bank, unsigned int block) { u32 low, high; u32 addr = 0; @@ -443,24 +447,30 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); + /* Check our cache first: */ + if (smca_bank_addrs[bank][block] != -1) + return smca_bank_addrs[bank][block]; + /* * For SMCA enabled processors, BLKPTR field of the first MISC register * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). */ - if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; + if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) + goto out; if (!(low & MCI_CONFIG_MCAX)) - return addr; + goto out; - if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && + if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); +out: + smca_bank_addrs[bank][block] = addr; return addr; } -static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high, +static u32 get_block_address(u32 current_addr, u32 low, u32 high, unsigned int bank, unsigned int block) { u32 addr = 0, offset = 0; @@ -468,20 +478,8 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) return addr; - /* Get address from already initialized block. */ - if (per_cpu(threshold_banks, cpu)) { - struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; - - if (bankp && bankp->blocks) { - struct threshold_block *blockp = &bankp->blocks[block]; - - if (blockp) - return blockp->address; - } - } - if (mce_flags.smca) - return smca_get_block_address(cpu, bank, block); + return smca_get_block_address(bank, block); /* Fall back to method we used for older processors: */ switch (block) { @@ -559,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c) smca_configure(bank, cpu); for (block = 0; block < NR_BLOCKS; ++block) { - address = get_block_address(cpu, address, low, high, bank, block); + address = get_block_address(address, low, high, bank, block); if (!address) break; @@ -1176,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank, if (err) goto out_free; recurse: - address = get_block_address(cpu, address, low, high, bank, ++block); + address = get_block_address(address, low, high, bank, ++block); if (!address) return 0; diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile index ad9e5ed81181..2ad9107ee980 100644 --- a/arch/x86/kernel/cpu/mtrr/Makefile +++ b/arch/x86/kernel/cpu/mtrr/Makefile @@ -1,3 +1,3 @@ -obj-y := main.o if.o generic.o cleanup.o +obj-y := mtrr.o if.o generic.o cleanup.o obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/mtrr.c index 7468de429087..9a19c800fe40 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/mtrr.c @@ -46,6 +46,7 @@ #include <linux/pci.h> #include <linux/smp.h> #include <linux/syscore_ops.h> +#include <linux/rcupdate.h> #include <asm/cpufeature.h> #include <asm/e820/api.h> @@ -100,7 +101,7 @@ static int have_wrcomb(void) if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && dev->device == PCI_DEVICE_ID_SERVERWORKS_LE && dev->revision <= 5) { - pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); + pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n"); pci_dev_put(dev); return 0; } @@ -110,7 +111,7 @@ static int have_wrcomb(void) */ if (dev->vendor == PCI_VENDOR_ID_INTEL && dev->device == PCI_DEVICE_ID_INTEL_82451NX) { - pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); + pr_info("Intel 450NX MMC detected. Write-combining disabled.\n"); pci_dev_put(dev); return 0; } @@ -312,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, return error; if (type >= MTRR_NUM_TYPES) { - pr_warn("mtrr: type: %u invalid\n", type); + pr_warn("type: %u invalid\n", type); return -EINVAL; } /* If the type is WC, check that this processor supports it */ if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { - pr_warn("mtrr: your processor doesn't support write-combining\n"); + pr_warn("your processor doesn't support write-combining\n"); return -ENOSYS; } if (!size) { - pr_warn("mtrr: zero sized request\n"); + pr_warn("zero sized request\n"); return -EINVAL; } if ((base | (base + size - 1)) >> (boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) { - pr_warn("mtrr: base or size exceeds the MTRR width\n"); + pr_warn("base or size exceeds the MTRR width\n"); return -EINVAL; } @@ -360,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, } else if (types_compatible(type, ltype)) continue; } - pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing" - " 0x%lx000,0x%lx000\n", base, size, lbase, + pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase, lsize); goto out; } @@ -369,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, if (ltype != type) { if (types_compatible(type, ltype)) continue; - pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", + pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n", base, size, mtrr_attrib_to_str(ltype), mtrr_attrib_to_str(type)); goto out; @@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, } } } else { - pr_info("mtrr: no more MTRRs available\n"); + pr_info("no more MTRRs available\n"); } error = i; out: @@ -407,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, static int mtrr_check(unsigned long base, unsigned long size) { if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - pr_warn("mtrr: size and base must be multiples of 4 kiB\n"); - pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); + pr_warn("size and base must be multiples of 4 kiB\n"); + pr_debug("size: 0x%lx base: 0x%lx\n", size, base); dump_stack(); return -1; } @@ -499,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) } } if (reg < 0) { - pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", + pr_debug("no MTRR for %lx000,%lx000 found\n", base, size); goto out; } } if (reg >= max) { - pr_warn("mtrr: register: %d too big\n", reg); + pr_warn("register: %d too big\n", reg); goto out; } mtrr_if->get(reg, &lbase, &lsize, <ype); if (lsize < 1) { - pr_warn("mtrr: MTRR %d not used\n", reg); + pr_warn("MTRR %d not used\n", reg); goto out; } if (mtrr_usage_table[reg] < 1) { - pr_warn("mtrr: reg: %d has count=0\n", reg); + pr_warn("reg: %d has count=0\n", reg); goto out; } if (--mtrr_usage_table[reg] < 1) @@ -775,7 +775,7 @@ void __init mtrr_bp_init(void) } if (!mtrr_enabled()) { - pr_info("MTRR: Disabled\n"); + pr_info("Disabled\n"); /* * PAT initialization relies on MTRR's rendezvous handler. @@ -793,6 +793,9 @@ void mtrr_ap_init(void) if (!use_intel() || mtrr_aps_delayed_init) return; + + rcu_cpu_starting(smp_processor_id()); + /* * Ideally we should hold mtrr_mutex here to avoid mtrr entries * changed, but this routine will be called in cpu boot time, diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c index b099024d339c..81c0afb39d0a 100644 --- a/arch/x86/kernel/cpu/topology.c +++ b/arch/x86/kernel/cpu/topology.c @@ -27,7 +27,7 @@ * exists, use it for populating initial_apicid and cpu topology * detection. */ -void detect_extended_topology(struct cpuinfo_x86 *c) +int detect_extended_topology(struct cpuinfo_x86 *c) { #ifdef CONFIG_SMP unsigned int eax, ebx, ecx, edx, sub_index; @@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) static bool printed; if (c->cpuid_level < 0xb) - return; + return -1; cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); @@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c) * check if the cpuid leaf 0xb is actually implemented. */ if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) - return; + return -1; set_cpu_cap(c, X86_FEATURE_XTOPOLOGY); @@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c) c->cpu_core_id); printed = 1; } - return; #endif + return 0; } diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 18fa9d74c182..666a284116ac 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -22,11 +22,14 @@ #include <asm/stacktrace.h> #include <asm/unwind.h> +#define OPCODE_BUFSIZE 64 + int panic_on_unrecovered_nmi; int panic_on_io_nmi; -static unsigned int code_bytes = 64; static int die_counter; +static struct pt_regs exec_summary_regs; + bool in_task_stack(unsigned long *stack, struct task_struct *task, struct stack_info *info) { @@ -69,9 +72,62 @@ static void printk_stack_address(unsigned long address, int reliable, printk("%s %s%pB\n", log_lvl, reliable ? "" : "? ", (void *)address); } +/* + * There are a couple of reasons for the 2/3rd prologue, courtesy of Linus: + * + * In case where we don't have the exact kernel image (which, if we did, we can + * simply disassemble and navigate to the RIP), the purpose of the bigger + * prologue is to have more context and to be able to correlate the code from + * the different toolchains better. + * + * In addition, it helps in recreating the register allocation of the failing + * kernel and thus make sense of the register dump. + * + * What is more, the additional complication of a variable length insn arch like + * x86 warrants having longer byte sequence before rIP so that the disassembler + * can "sync" up properly and find instruction boundaries when decoding the + * opcode bytes. + * + * Thus, the 2/3rds prologue and 64 byte OPCODE_BUFSIZE is just a random + * guesstimate in attempt to achieve all of the above. + */ +void show_opcodes(u8 *rip, const char *loglvl) +{ + unsigned int code_prologue = OPCODE_BUFSIZE * 2 / 3; + u8 opcodes[OPCODE_BUFSIZE]; + u8 *ip; + int i; + + printk("%sCode: ", loglvl); + + ip = (u8 *)rip - code_prologue; + if (probe_kernel_read(opcodes, ip, OPCODE_BUFSIZE)) { + pr_cont("Bad RIP value.\n"); + return; + } + + for (i = 0; i < OPCODE_BUFSIZE; i++, ip++) { + if (ip == rip) + pr_cont("<%02x> ", opcodes[i]); + else + pr_cont("%02x ", opcodes[i]); + } + pr_cont("\n"); +} + +void show_ip(struct pt_regs *regs, const char *loglvl) +{ +#ifdef CONFIG_X86_32 + printk("%sEIP: %pS\n", loglvl, (void *)regs->ip); +#else + printk("%sRIP: %04x:%pS\n", loglvl, (int)regs->cs, (void *)regs->ip); +#endif + show_opcodes((u8 *)regs->ip, loglvl); +} + void show_iret_regs(struct pt_regs *regs) { - printk(KERN_DEFAULT "RIP: %04x:%pS\n", (int)regs->cs, (void *)regs->ip); + show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "RSP: %04x:%016lx EFLAGS: %08lx", (int)regs->ss, regs->sp, regs->flags); } @@ -267,7 +323,6 @@ unsigned long oops_begin(void) bust_spinlocks(1); return flags; } -EXPORT_SYMBOL_GPL(oops_begin); NOKPROBE_SYMBOL(oops_begin); void __noreturn rewind_stack_do_exit(int signr); @@ -287,6 +342,9 @@ void oops_end(unsigned long flags, struct pt_regs *regs, int signr) raw_local_irq_restore(flags); oops_exit(); + /* Executive summary in case the oops scrolled away */ + __show_regs(&exec_summary_regs, true); + if (!signr) return; if (in_interrupt()) @@ -305,10 +363,10 @@ NOKPROBE_SYMBOL(oops_end); int __die(const char *str, struct pt_regs *regs, long err) { -#ifdef CONFIG_X86_32 - unsigned short ss; - unsigned long sp; -#endif + /* Save the regs of the first oops for the executive summary later. */ + if (!die_counter) + exec_summary_regs = *regs; + printk(KERN_DEFAULT "%s: %04lx [#%d]%s%s%s%s%s\n", str, err & 0xffff, ++die_counter, IS_ENABLED(CONFIG_PREEMPT) ? " PREEMPT" : "", @@ -318,26 +376,13 @@ int __die(const char *str, struct pt_regs *regs, long err) IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION) ? (boot_cpu_has(X86_FEATURE_PTI) ? " PTI" : " NOPTI") : ""); + show_regs(regs); + print_modules(); + if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_nr, SIGSEGV) == NOTIFY_STOP) return 1; - print_modules(); - show_regs(regs); -#ifdef CONFIG_X86_32 - if (user_mode(regs)) { - sp = regs->sp; - ss = regs->ss; - } else { - sp = kernel_stack_pointer(regs); - savesegment(ss, ss); - } - printk(KERN_EMERG "EIP: %pS SS:ESP: %04x:%08lx\n", - (void *)regs->ip, ss, sp); -#else - /* Executive summary in case the oops scrolled away */ - printk(KERN_ALERT "RIP: %pS RSP: %016lx\n", (void *)regs->ip, regs->sp); -#endif return 0; } NOKPROBE_SYMBOL(__die); @@ -356,30 +401,9 @@ void die(const char *str, struct pt_regs *regs, long err) oops_end(flags, regs, sig); } -static int __init code_bytes_setup(char *s) -{ - ssize_t ret; - unsigned long val; - - if (!s) - return -EINVAL; - - ret = kstrtoul(s, 0, &val); - if (ret) - return ret; - - code_bytes = val; - if (code_bytes > 8192) - code_bytes = 8192; - - return 1; -} -__setup("code_bytes=", code_bytes_setup); - void show_regs(struct pt_regs *regs) { bool all = true; - int i; show_regs_print_info(KERN_DEFAULT); @@ -389,36 +413,8 @@ void show_regs(struct pt_regs *regs) __show_regs(regs, all); /* - * When in-kernel, we also print out the stack and code at the - * time of the fault.. + * When in-kernel, we also print out the stack at the time of the fault.. */ - if (!user_mode(regs)) { - unsigned int code_prologue = code_bytes * 43 / 64; - unsigned int code_len = code_bytes; - unsigned char c; - u8 *ip; - + if (!user_mode(regs)) show_trace_log_lvl(current, regs, NULL, KERN_DEFAULT); - - printk(KERN_DEFAULT "Code: "); - - ip = (u8 *)regs->ip - code_prologue; - if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { - /* try starting at IP */ - ip = (u8 *)regs->ip; - code_len = code_len - code_prologue + 1; - } - for (i = 0; i < code_len; i++, ip++) { - if (ip < (u8 *)PAGE_OFFSET || - probe_kernel_address(ip, c)) { - pr_cont(" Bad RIP value."); - break; - } - if (ip == (u8 *)regs->ip) - pr_cont("<%02x> ", c); - else - pr_cont("%02x ", c); - } - } - pr_cont("\n"); } diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 6a2cb1442e05..d1f25c831447 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -155,7 +155,8 @@ static void __init __e820__range_add(struct e820_table *table, u64 start, u64 si int x = table->nr_entries; if (x >= ARRAY_SIZE(table->entries)) { - pr_err("e820: too many entries; ignoring [mem %#010llx-%#010llx]\n", start, start + size - 1); + pr_err("too many entries; ignoring [mem %#010llx-%#010llx]\n", + start, start + size - 1); return; } @@ -190,9 +191,10 @@ void __init e820__print_table(char *who) int i; for (i = 0; i < e820_table->nr_entries; i++) { - pr_info("%s: [mem %#018Lx-%#018Lx] ", who, - e820_table->entries[i].addr, - e820_table->entries[i].addr + e820_table->entries[i].size - 1); + pr_info("%s: [mem %#018Lx-%#018Lx] ", + who, + e820_table->entries[i].addr, + e820_table->entries[i].addr + e820_table->entries[i].size - 1); e820_print_type(e820_table->entries[i].type); pr_cont("\n"); @@ -574,7 +576,7 @@ void __init e820__update_table_print(void) if (e820__update_table(e820_table)) return; - pr_info("e820: modified physical RAM map:\n"); + pr_info("modified physical RAM map:\n"); e820__print_table("modified"); } @@ -636,9 +638,8 @@ __init void e820__setup_pci_gap(void) if (!found) { #ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; - pr_err( - "e820: Cannot find an available gap in the 32-bit address range\n" - "e820: PCI devices with unassigned 32-bit BARs may not work!\n"); + pr_err("Cannot find an available gap in the 32-bit address range\n"); + pr_err("PCI devices with unassigned 32-bit BARs may not work!\n"); #else gapstart = 0x10000000; #endif @@ -649,7 +650,8 @@ __init void e820__setup_pci_gap(void) */ pci_mem_start = gapstart; - pr_info("e820: [mem %#010lx-%#010lx] available for PCI devices\n", gapstart, gapstart + gapsize - 1); + pr_info("[mem %#010lx-%#010lx] available for PCI devices\n", + gapstart, gapstart + gapsize - 1); } /* @@ -711,7 +713,7 @@ void __init e820__memory_setup_extended(u64 phys_addr, u32 data_len) memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); early_memunmap(sdata, data_len); - pr_info("e820: extended physical RAM map:\n"); + pr_info("extended physical RAM map:\n"); e820__print_table("extended"); } @@ -780,7 +782,7 @@ u64 __init e820__memblock_alloc_reserved(u64 size, u64 align) addr = __memblock_alloc_base(size, align, MEMBLOCK_ALLOC_ACCESSIBLE); if (addr) { e820__range_update_kexec(addr, size, E820_TYPE_RAM, E820_TYPE_RESERVED); - pr_info("e820: update e820_table_kexec for e820__memblock_alloc_reserved()\n"); + pr_info("update e820_table_kexec for e820__memblock_alloc_reserved()\n"); e820__update_table_kexec(); } @@ -830,8 +832,8 @@ static unsigned long __init e820_end_pfn(unsigned long limit_pfn, enum e820_type if (last_pfn > max_arch_pfn) last_pfn = max_arch_pfn; - pr_info("e820: last_pfn = %#lx max_arch_pfn = %#lx\n", - last_pfn, max_arch_pfn); + pr_info("last_pfn = %#lx max_arch_pfn = %#lx\n", + last_pfn, max_arch_pfn); return last_pfn; } @@ -1005,7 +1007,7 @@ void __init e820__finish_early_params(void) if (e820__update_table(e820_table) < 0) early_panic("Invalid user supplied memory map"); - pr_info("e820: user-defined physical RAM map:\n"); + pr_info("user-defined physical RAM map:\n"); e820__print_table("user"); } } @@ -1238,7 +1240,7 @@ void __init e820__memory_setup(void) memcpy(e820_table_kexec, e820_table, sizeof(*e820_table_kexec)); memcpy(e820_table_firmware, e820_table, sizeof(*e820_table_firmware)); - pr_info("e820: BIOS-provided physical RAM map:\n"); + pr_info("BIOS-provided physical RAM map:\n"); e820__print_table(who); } diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c index bae0d32e327b..da5d8ac60062 100644 --- a/arch/x86/kernel/early-quirks.c +++ b/arch/x86/kernel/early-quirks.c @@ -28,8 +28,6 @@ #include <asm/irq_remapping.h> #include <asm/early_ioremap.h> -#define dev_err(msg) pr_err("pci 0000:%02x:%02x.%d: %s", bus, slot, func, msg) - static void __init fix_hypertransport_config(int num, int slot, int func) { u32 htcfg; @@ -617,7 +615,8 @@ static void __init apple_airport_reset(int bus, int slot, int func) pmcsr = read_pci_config_16(bus, slot, func, BCM4331_PM_CAP + PCI_PM_CTRL); if ((pmcsr & PCI_PM_CTRL_STATE_MASK) != PCI_D0) { - dev_err("Cannot power up Apple AirPort card\n"); + pr_err("pci 0000:%02x:%02x.%d: Cannot power up Apple AirPort card\n", + bus, slot, func); return; } } @@ -628,7 +627,8 @@ static void __init apple_airport_reset(int bus, int slot, int func) mmio = early_ioremap(addr, BCM4331_MMIO_SIZE); if (!mmio) { - dev_err("Cannot iomap Apple AirPort card\n"); + pr_err("pci 0000:%02x:%02x.%d: Cannot iomap Apple AirPort card\n", + bus, slot, func); return; } diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0c408f8c4ed4..a21d6ace648e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -6,6 +6,10 @@ */ #define DISABLE_BRANCH_PROFILING + +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 + #include <linux/init.h> #include <linux/linkage.h> #include <linux/types.h> @@ -32,11 +36,6 @@ #include <asm/microcode.h> #include <asm/kasan.h> -#ifdef CONFIG_X86_5LEVEL -#undef pgtable_l5_enabled -#define pgtable_l5_enabled __pgtable_l5_enabled -#endif - /* * Manage page tables very early on. */ @@ -45,8 +44,7 @@ static unsigned int __initdata next_early_pgt; pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX); #ifdef CONFIG_X86_5LEVEL -unsigned int __pgtable_l5_enabled __ro_after_init; -EXPORT_SYMBOL(__pgtable_l5_enabled); +unsigned int __pgtable_l5_enabled __initdata; unsigned int pgdir_shift __ro_after_init = 39; EXPORT_SYMBOL(pgdir_shift); unsigned int ptrs_per_p4d __ro_after_init = 1; @@ -82,13 +80,14 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr) static bool __head check_la57_support(unsigned long physaddr) { - if (native_cpuid_eax(0) < 7) - return false; - - if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) + /* + * 5-level paging is detected and enabled at kernel decomression + * stage. Only check if it has been enabled there. + */ + if (!(native_read_cr4() & X86_CR4_LA57)) return false; - *fixup_int(&pgtable_l5_enabled, physaddr) = 1; + *fixup_int(&__pgtable_l5_enabled, physaddr) = 1; *fixup_int(&pgdir_shift, physaddr) = 48; *fixup_int(&ptrs_per_p4d, physaddr) = 512; *fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5; @@ -104,6 +103,12 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif +/* Code in __startup_64() can be relocated during execution, but the compiler + * doesn't have to generate PC-relative relocations when accessing globals from + * that function. Clang actually does not generate them, which leads to + * boot-time crashes. To work around this problem, every global pointer must + * be adjusted using fixup_pointer(). + */ unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { @@ -113,6 +118,7 @@ unsigned long __head __startup_64(unsigned long physaddr, p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; + pteval_t *mask_ptr; bool la57; int i; unsigned int *next_pgt_ptr; @@ -196,7 +202,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ - pmd_entry &= __supported_pte_mask; + mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); + pmd_entry &= *mask_ptr; pmd_entry += sme_get_me_mask(); pmd_entry += physaddr; @@ -273,7 +280,7 @@ again: * critical -- __PAGE_OFFSET would point us back into the dynamic * range and we might end up looping forever... */ - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) p4d_p = pgd_p; else if (pgd) p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base); diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 8ce4212e2b8d..b6be34ee88e9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -975,8 +975,7 @@ int __init hpet_enable(void) cfg &= ~(HPET_CFG_ENABLE | HPET_CFG_LEGACY); hpet_writel(cfg, HPET_CFG); if (cfg) - pr_warn("HPET: Unrecognized bits %#x set in global cfg\n", - cfg); + pr_warn("Unrecognized bits %#x set in global cfg\n", cfg); for (i = 0; i <= last; ++i) { cfg = hpet_readl(HPET_Tn_CFG(i)); @@ -988,7 +987,7 @@ int __init hpet_enable(void) | HPET_TN_64BIT_CAP | HPET_TN_32BIT | HPET_TN_ROUTE | HPET_TN_FSB | HPET_TN_FSB_CAP); if (cfg) - pr_warn("HPET: Unrecognized bits %#x set in cfg#%u\n", + pr_warn("Unrecognized bits %#x set in cfg#%u\n", cfg, i); } hpet_print_config(); diff --git a/arch/x86/kernel/jailhouse.c b/arch/x86/kernel/jailhouse.c index a15fe0e92cf9..108c48d0d40e 100644 --- a/arch/x86/kernel/jailhouse.c +++ b/arch/x86/kernel/jailhouse.c @@ -37,7 +37,7 @@ static uint32_t __init jailhouse_detect(void) return jailhouse_cpuid_base(); } -static void jailhouse_get_wallclock(struct timespec *now) +static void jailhouse_get_wallclock(struct timespec64 *now) { memset(now, 0, sizeof(*now)); } diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0715f827607c..6f4d42377fe5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return 0; + #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(insn)) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7867417cfaff..5b2300b818af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) static_branch_disable(&virt_spin_lock_key); } @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; __pv_init_lock_hash(); diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 8b26c9e01cc4..bf8d1eb7fca3 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -53,7 +53,7 @@ static struct pvclock_wall_clock *wall_clock; * have elapsed since the hypervisor wrote the data. So we try to account for * that with system time */ -static void kvm_get_wallclock(struct timespec *now) +static void kvm_get_wallclock(struct timespec64 *now) { struct pvclock_vcpu_time_info *vcpu_time; int low, high; @@ -72,7 +72,7 @@ static void kvm_get_wallclock(struct timespec *now) put_cpu(); } -static int kvm_set_wallclock(const struct timespec *now) +static int kvm_set_wallclock(const struct timespec64 *now) { return -ENODEV; } diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 60cdec6628b0..d1ab07ec8c9a 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -57,12 +57,17 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { free_page((unsigned long)image->arch.pgd); + image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); + image->arch.pmd0 = NULL; free_page((unsigned long)image->arch.pmd1); + image->arch.pmd1 = NULL; #endif free_page((unsigned long)image->arch.pte0); + image->arch.pte0 = NULL; free_page((unsigned long)image->arch.pte1); + image->arch.pte1 = NULL; } static int machine_kexec_alloc_page_tables(struct kimage *image) @@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) !image->arch.pmd0 || !image->arch.pmd1 || #endif !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); return -ENOMEM; } return 0; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index a5e55d832d0a..4c8acdfdc5a7 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); + image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); + image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); + image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); + image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) @@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); return 0; err: - free_transition_pgtable(image); return result; } @@ -351,7 +354,8 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(phys_base); VMCOREINFO_SYMBOL(init_top_pgt); - VMCOREINFO_NUMBER(pgtable_l5_enabled); + vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n", + pgtable_l5_enabled()); #ifdef CONFIG_NUMA VMCOREINFO_SYMBOL(node_data); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 77625b60a510..ab5d9dd668d2 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -15,13 +15,11 @@ #include <asm/x86_init.h> #include <asm/iommu_table.h> -static int forbid_dac __read_mostly; +static bool disable_dac_quirk __read_mostly; const struct dma_map_ops *dma_ops = &dma_direct_ops; EXPORT_SYMBOL(dma_ops); -static int iommu_sac_force __read_mostly; - #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; @@ -55,9 +53,6 @@ struct device x86_dma_fallback_dev = { }; EXPORT_SYMBOL(x86_dma_fallback_dev); -/* Number of entries preallocated for DMA-API debugging */ -#define PREALLOC_DMA_DEBUG_ENTRIES 65536 - void __init pci_iommu_alloc(void) { struct iommu_table_entry *p; @@ -76,7 +71,7 @@ void __init pci_iommu_alloc(void) } } -bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp) +bool arch_dma_alloc_attrs(struct device **dev) { if (!*dev) *dev = &x86_dma_fallback_dev; @@ -125,13 +120,13 @@ static __init int iommu_setup(char *p) if (!strncmp(p, "nomerge", 7)) iommu_merge = 0; if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; + pr_warn("forcesac option ignored.\n"); if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; + pr_warn("allowdac option ignored.\n"); if (!strncmp(p, "nodac", 5)) - forbid_dac = 1; + pr_warn("nodac option ignored.\n"); if (!strncmp(p, "usedac", 6)) { - forbid_dac = -1; + disable_dac_quirk = true; return 1; } #ifdef CONFIG_SWIOTLB @@ -156,40 +151,9 @@ static __init int iommu_setup(char *p) } early_param("iommu", iommu_setup); -int arch_dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - dev_info(dev, "PCI: Disallowing DAC for device\n"); - return 0; - } -#endif - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_BIT_MASK(40))) { - dev_info(dev, "Force SAC with mask %Lx\n", mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(arch_dma_supported); - static int __init pci_iommu_init(void) { struct iommu_table_entry *p; - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); #ifdef CONFIG_PCI dma_debug_add_bus(&pci_bus_type); @@ -209,11 +173,17 @@ rootfs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ +static int via_no_dac_cb(struct pci_dev *pdev, void *data) +{ + pdev->dev.dma_32bit_limit = true; + return 0; +} + static void via_no_dac(struct pci_dev *dev) { - if (forbid_dac == 0) { + if (!disable_dac_quirk) { dev_info(&dev->dev, "disabling DAC on VIA PCI bridge\n"); - forbid_dac = 1; + pci_walk_bus(dev->subordinate, via_no_dac_cb, NULL); } } DECLARE_PCI_FIXUP_CLASS_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index e47b2dbbdef3..c06c4c16c6b6 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -151,17 +151,19 @@ void perf_get_regs_user(struct perf_regs *regs_user, regs_user_copy->sp = user_regs->sp; regs_user_copy->cs = user_regs->cs; regs_user_copy->ss = user_regs->ss; - /* - * Most system calls don't save these registers, don't report them. + * Store user space frame-pointer value on sample + * to facilitate stack unwinding for cases when + * user space executable code has such support + * enabled at compile time: */ + regs_user_copy->bp = user_regs->bp; + regs_user_copy->bx = -1; - regs_user_copy->bp = -1; regs_user_copy->r12 = -1; regs_user_copy->r13 = -1; regs_user_copy->r14 = -1; regs_user_copy->r15 = -1; - /* * For this to be at all useful, we need a reasonable guess for * the ABI. Be careful: we're in NMI context, and we're diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 03408b942adb..30ca2d1a9231 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -38,6 +38,7 @@ #include <asm/switch_to.h> #include <asm/desc.h> #include <asm/prctl.h> +#include <asm/spec-ctrl.h> /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -278,6 +279,148 @@ static inline void switch_to_bitmap(struct tss_struct *tss, } } +#ifdef CONFIG_SMP + +struct ssb_state { + struct ssb_state *shared_state; + raw_spinlock_t lock; + unsigned int disable_state; + unsigned long local_state; +}; + +#define LSTATE_SSB 0 + +static DEFINE_PER_CPU(struct ssb_state, ssb_state); + +void speculative_store_bypass_ht_init(void) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + unsigned int this_cpu = smp_processor_id(); + unsigned int cpu; + + st->local_state = 0; + + /* + * Shared state setup happens once on the first bringup + * of the CPU. It's not destroyed on CPU hotunplug. + */ + if (st->shared_state) + return; + + raw_spin_lock_init(&st->lock); + + /* + * Go over HT siblings and check whether one of them has set up the + * shared state pointer already. + */ + for_each_cpu(cpu, topology_sibling_cpumask(this_cpu)) { + if (cpu == this_cpu) + continue; + + if (!per_cpu(ssb_state, cpu).shared_state) + continue; + + /* Link it to the state of the sibling: */ + st->shared_state = per_cpu(ssb_state, cpu).shared_state; + return; + } + + /* + * First HT sibling to come up on the core. Link shared state of + * the first HT sibling to itself. The siblings on the same core + * which come up later will see the shared state pointer and link + * themself to the state of this CPU. + */ + st->shared_state = st; +} + +/* + * Logic is: First HT sibling enables SSBD for both siblings in the core + * and last sibling to disable it, disables it for the whole core. This how + * MSR_SPEC_CTRL works in "hardware": + * + * CORE_SPEC_CTRL = THREAD0_SPEC_CTRL | THREAD1_SPEC_CTRL + */ +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + struct ssb_state *st = this_cpu_ptr(&ssb_state); + u64 msr = x86_amd_ls_cfg_base; + + if (!static_cpu_has(X86_FEATURE_ZEN)) { + msr |= ssbd_tif_to_amd_ls_cfg(tifn); + wrmsrl(MSR_AMD64_LS_CFG, msr); + return; + } + + if (tifn & _TIF_SSBD) { + /* + * Since this can race with prctl(), block reentry on the + * same CPU. + */ + if (__test_and_set_bit(LSTATE_SSB, &st->local_state)) + return; + + msr |= x86_amd_ls_cfg_ssbd_mask; + + raw_spin_lock(&st->shared_state->lock); + /* First sibling enables SSBD: */ + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + st->shared_state->disable_state++; + raw_spin_unlock(&st->shared_state->lock); + } else { + if (!__test_and_clear_bit(LSTATE_SSB, &st->local_state)) + return; + + raw_spin_lock(&st->shared_state->lock); + st->shared_state->disable_state--; + if (!st->shared_state->disable_state) + wrmsrl(MSR_AMD64_LS_CFG, msr); + raw_spin_unlock(&st->shared_state->lock); + } +} +#else +static __always_inline void amd_set_core_ssb_state(unsigned long tifn) +{ + u64 msr = x86_amd_ls_cfg_base | ssbd_tif_to_amd_ls_cfg(tifn); + + wrmsrl(MSR_AMD64_LS_CFG, msr); +} +#endif + +static __always_inline void amd_set_ssb_virt_state(unsigned long tifn) +{ + /* + * SSBD has the same definition in SPEC_CTRL and VIRT_SPEC_CTRL, + * so ssbd_tif_to_spec_ctrl() just works. + */ + wrmsrl(MSR_AMD64_VIRT_SPEC_CTRL, ssbd_tif_to_spec_ctrl(tifn)); +} + +static __always_inline void intel_set_ssb_state(unsigned long tifn) +{ + u64 msr = x86_spec_ctrl_base | ssbd_tif_to_spec_ctrl(tifn); + + wrmsrl(MSR_IA32_SPEC_CTRL, msr); +} + +static __always_inline void __speculative_store_bypass_update(unsigned long tifn) +{ + if (static_cpu_has(X86_FEATURE_VIRT_SSBD)) + amd_set_ssb_virt_state(tifn); + else if (static_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + amd_set_core_ssb_state(tifn); + else + intel_set_ssb_state(tifn); +} + +void speculative_store_bypass_update(unsigned long tif) +{ + preempt_disable(); + __speculative_store_bypass_update(tif); + preempt_enable(); +} + void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, struct tss_struct *tss) { @@ -309,6 +452,9 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, if ((tifp ^ tifn) & _TIF_NOCPUID) set_cpuid_faulting(!!(tifn & _TIF_NOCPUID)); + + if ((tifp ^ tifn) & _TIF_SSBD) + __speculative_store_bypass_update(tifn); } /* diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 5224c6099184..0ae659de21eb 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -76,16 +76,14 @@ void __show_regs(struct pt_regs *regs, int all) savesegment(gs, gs); } - printk(KERN_DEFAULT "EIP: %pS\n", (void *)regs->ip); - printk(KERN_DEFAULT "EFLAGS: %08lx CPU: %d\n", regs->flags, - raw_smp_processor_id()); + show_ip(regs, KERN_DEFAULT); printk(KERN_DEFAULT "EAX: %08lx EBX: %08lx ECX: %08lx EDX: %08lx\n", regs->ax, regs->bx, regs->cx, regs->dx); printk(KERN_DEFAULT "ESI: %08lx EDI: %08lx EBP: %08lx ESP: %08lx\n", regs->si, regs->di, regs->bp, sp); - printk(KERN_DEFAULT " DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x\n", - (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss); + printk(KERN_DEFAULT "DS: %04x ES: %04x FS: %04x GS: %04x SS: %04x EFLAGS: %08lx\n", + (u16)regs->ds, (u16)regs->es, (u16)regs->fs, gs, ss, regs->flags); if (!all) return; diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4b100fe0f508..12bb445fb98d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -542,6 +542,7 @@ void set_personality_64bit(void) clear_thread_flag(TIF_X32); /* Pretend that this comes from a 64bit execve */ task_pt_regs(current)->orig_ax = __NR_execve; + current_thread_info()->status &= ~TS_COMPAT; /* Ensure the corresponding mm is not marked. */ if (current->mm) diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index ed5c4cdf0a34..e2ee403865eb 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -1377,7 +1377,6 @@ static void fill_sigtrap_info(struct task_struct *tsk, tsk->thread.trap_nr = X86_TRAP_DB; tsk->thread.error_code = error_code; - memset(info, 0, sizeof(*info)); info->si_signo = SIGTRAP; info->si_code = si_code; info->si_addr = user_mode(regs) ? (void __user *)regs->ip : NULL; @@ -1395,6 +1394,7 @@ void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, { struct siginfo info; + clear_siginfo(&info); fill_sigtrap_info(tsk, regs, error_code, si_code, &info); /* Send us the fake SIGTRAP */ force_sig_info(SIGTRAP, &info, tsk); diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 761f6af6efa5..637982efecd8 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -123,28 +123,35 @@ u64 pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) void pvclock_read_wallclock(struct pvclock_wall_clock *wall_clock, struct pvclock_vcpu_time_info *vcpu_time, - struct timespec *ts) + struct timespec64 *ts) { u32 version; u64 delta; - struct timespec now; + struct timespec64 now; /* get wallclock at system boot */ do { version = wall_clock->version; rmb(); /* fetch version before time */ + /* + * Note: wall_clock->sec is a u32 value, so it can + * only store dates between 1970 and 2106. To allow + * times beyond that, we need to create a new hypercall + * interface with an extended pvclock_wall_clock structure + * like ARM has. + */ now.tv_sec = wall_clock->sec; now.tv_nsec = wall_clock->nsec; rmb(); /* fetch time before checking version */ } while ((wall_clock->version & 1) || (version != wall_clock->version)); delta = pvclock_clocksource_read(vcpu_time); /* time since system boot */ - delta += now.tv_sec * (u64)NSEC_PER_SEC + now.tv_nsec; + delta += now.tv_sec * NSEC_PER_SEC + now.tv_nsec; now.tv_nsec = do_div(delta, NSEC_PER_SEC); now.tv_sec = delta; - set_normalized_timespec(ts, now.tv_sec, now.tv_nsec); + set_normalized_timespec64(ts, now.tv_sec, now.tv_nsec); } void pvclock_set_pvti_cpu0_va(struct pvclock_vsyscall_time_info *pvti) diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index f7b82ed7b5b5..586f718b8e95 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL(rtc_lock); * jump to the next second precisely 500 ms later. Check the Motorola * MC146818A or Dallas DS12887 data sheet for details. */ -int mach_set_rtc_mmss(const struct timespec *now) +int mach_set_rtc_mmss(const struct timespec64 *now) { unsigned long long nowtime = now->tv_sec; struct rtc_time tm; @@ -60,7 +60,7 @@ int mach_set_rtc_mmss(const struct timespec *now) return retval; } -void mach_get_cmos_time(struct timespec *now) +void mach_get_cmos_time(struct timespec64 *now) { unsigned int status, year, mon, day, hour, min, sec, century = 0; unsigned long flags; @@ -118,7 +118,7 @@ void mach_get_cmos_time(struct timespec *now) } else year += CMOS_YEARS_OFFS; - now->tv_sec = mktime(year, mon, day, hour, min, sec); + now->tv_sec = mktime64(year, mon, day, hour, min, sec); now->tv_nsec = 0; } @@ -145,13 +145,13 @@ void rtc_cmos_write(unsigned char val, unsigned char addr) } EXPORT_SYMBOL(rtc_cmos_write); -int update_persistent_clock(struct timespec now) +int update_persistent_clock64(struct timespec64 now) { return x86_platform.set_wallclock(&now); } /* not static: needed by APM */ -void read_persistent_clock(struct timespec *ts) +void read_persistent_clock64(struct timespec64 *ts) { x86_platform.get_wallclock(ts); } diff --git a/arch/x86/kernel/signal_compat.c b/arch/x86/kernel/signal_compat.c index 14c057f29979..9ccbf0576cd0 100644 --- a/arch/x86/kernel/signal_compat.c +++ b/arch/x86/kernel/signal_compat.c @@ -29,7 +29,7 @@ static inline void signal_compat_build_tests(void) BUILD_BUG_ON(NSIGFPE != 15); BUILD_BUG_ON(NSIGSEGV != 7); BUILD_BUG_ON(NSIGBUS != 5); - BUILD_BUG_ON(NSIGTRAP != 4); + BUILD_BUG_ON(NSIGTRAP != 5); BUILD_BUG_ON(NSIGCHLD != 6); BUILD_BUG_ON(NSIGSYS != 1); diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 0f1cbb042f49..c2f7d1d2a5c3 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -79,13 +79,7 @@ #include <asm/qspinlock.h> #include <asm/intel-family.h> #include <asm/cpu_device_id.h> - -/* Number of siblings per CPU package */ -int smp_num_siblings = 1; -EXPORT_SYMBOL(smp_num_siblings); - -/* Last level cache ID of each logical CPU */ -DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID; +#include <asm/spec-ctrl.h> /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); @@ -244,6 +238,8 @@ static void notrace start_secondary(void *unused) */ check_tsc_sync_target(); + speculative_store_bypass_ht_init(); + /* * Lock vector_lock, set CPU online and bring the vector * allocator online. Online must be set with vector_lock held @@ -1292,6 +1288,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_mtrr_aps_delayed_init(); smp_quirk_init_udelay(); + + speculative_store_bypass_ht_init(); } void arch_enable_nonboot_cpus_begin(void) diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index a3f15ed545b5..6a78d4b36a79 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/compat.h> #include <linux/errno.h> #include <linux/sched.h> #include <linux/sched/mm.h> @@ -19,7 +20,6 @@ #include <linux/elf.h> #include <asm/elf.h> -#include <asm/compat.h> #include <asm/ia32.h> #include <asm/syscalls.h> #include <asm/mpx.h> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 03f3d7695dac..a535dd64de63 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -299,6 +299,7 @@ static void do_error_trap(struct pt_regs *regs, long error_code, char *str, if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) != NOTIFY_STOP) { cond_local_irq_enable(regs); + clear_siginfo(&info); do_trap(trapnr, signr, str, regs, error_code, fill_trap_info(regs, signr, trapnr, &info)); } @@ -854,6 +855,7 @@ static void math_error(struct pt_regs *regs, int error_code, int trapnr) task->thread.trap_nr = trapnr; task->thread.error_code = error_code; + clear_siginfo(&info); info.si_signo = SIGFPE; info.si_errno = 0; info.si_addr = (void __user *)uprobe_get_trap_addr(regs); @@ -929,6 +931,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code) RCU_LOCKDEP_WARN(!rcu_is_watching(), "entry code didn't wake RCU"); local_irq_enable(); + clear_siginfo(&info); info.si_signo = SIGILL; info.si_errno = 0; info.si_code = ILL_BADSTK; diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 91e6da48cbb6..74392d9d51e0 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1067,6 +1067,7 @@ static struct clocksource clocksource_tsc_early = { .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, .tick_stable = tsc_cs_tick_stable, + .list = LIST_HEAD_INIT(clocksource_tsc_early.list), }; /* @@ -1086,6 +1087,7 @@ static struct clocksource clocksource_tsc = { .resume = tsc_resume, .mark_unstable = tsc_cs_mark_unstable, .tick_stable = tsc_cs_tick_stable, + .list = LIST_HEAD_INIT(clocksource_tsc.list), }; void mark_tsc_unstable(char *reason) @@ -1098,13 +1100,9 @@ void mark_tsc_unstable(char *reason) clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) { - clocksource_mark_unstable(&clocksource_tsc); - } else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + + clocksource_mark_unstable(&clocksource_tsc_early); + clocksource_mark_unstable(&clocksource_tsc); } EXPORT_SYMBOL_GPL(mark_tsc_unstable); @@ -1244,7 +1242,7 @@ static void tsc_refine_calibration_work(struct work_struct *work) /* Don't bother refining TSC on unstable systems */ if (tsc_unstable) - return; + goto unreg; /* * Since the work is started early in boot, we may be @@ -1297,11 +1295,12 @@ static void tsc_refine_calibration_work(struct work_struct *work) out: if (tsc_unstable) - return; + goto unreg; if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); +unreg: clocksource_unregister(&clocksource_tsc_early); } @@ -1311,8 +1310,8 @@ static int __init init_tsc_clocksource(void) if (!boot_cpu_has(X86_FEATURE_TSC) || tsc_disabled > 0 || !tsc_khz) return 0; - if (check_tsc_unstable()) - return 0; + if (tsc_unstable) + goto unreg; if (tsc_clocksource_reliable) clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY; @@ -1328,6 +1327,7 @@ static int __init init_tsc_clocksource(void) if (boot_cpu_has(X86_FEATURE_ART)) art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); +unreg: clocksource_unregister(&clocksource_tsc_early); return 0; } diff --git a/arch/x86/kernel/umip.c b/arch/x86/kernel/umip.c index f44ce0fb3583..ff20b35e98dd 100644 --- a/arch/x86/kernel/umip.c +++ b/arch/x86/kernel/umip.c @@ -278,6 +278,7 @@ static void force_sig_info_umip_fault(void __user *addr, struct pt_regs *regs) tsk->thread.error_code = X86_PF_USER | X86_PF_WRITE; tsk->thread.trap_nr = X86_TRAP_PF; + clear_siginfo(&info); info.si_signo = SIGSEGV; info.si_errno = 0; info.si_code = SEGV_MAPERR; diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 85c7ef23d99f..58d8d800875d 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool if (is_prefix_bad(insn)) return -ENOTSUPP; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return -ENOTSUPP; + if (x86_64) good_insns = good_insns_64; else @@ -1079,8 +1083,8 @@ arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs return orig_ret_vaddr; if (nleft != rasize) { - pr_err("uprobe: return address clobbered: pid=%d, %%sp=%#lx, " - "%%ip=%#lx\n", current->pid, regs->sp, regs->ip); + pr_err("return address clobbered: pid=%d, %%sp=%#lx, %%ip=%#lx\n", + current->pid, regs->sp, regs->ip); force_sig_info(SIGSEGV, SEND_SIG_FORCED, current); } diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 795f3a80e576..5e1458f609a1 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -117,11 +117,11 @@ SECTIONS #ifdef CONFIG_X86_64 . = ALIGN(PAGE_SIZE); - VMLINUX_SYMBOL(__entry_trampoline_start) = .; + __entry_trampoline_start = .; _entry_trampoline = .; *(.entry_trampoline) . = ALIGN(PAGE_SIZE); - VMLINUX_SYMBOL(__entry_trampoline_end) = .; + __entry_trampoline_end = .; ASSERT(. - _entry_trampoline == PAGE_SIZE, "entry trampoline is too big"); #endif diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 82055b90a8b3..92bf2f2e7cdd 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -379,7 +379,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 0x80000008.ebx */ const u32 kvm_cpuid_8000_0008_ebx_x86_features = - F(IBPB) | F(IBRS); + F(AMD_IBPB) | F(AMD_IBRS) | F(VIRT_SSBD); /* cpuid 0xC0000001.edx */ const u32 kvm_cpuid_C000_0001_edx_x86_features = @@ -408,7 +408,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) | - F(ARCH_CAPABILITIES); + F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES); /* all calls to cpuid_count() should be made on the same cpu */ get_cpu(); @@ -495,6 +495,11 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, entry->ecx &= ~F(PKU); entry->edx &= kvm_cpuid_7_0_edx_x86_features; cpuid_mask(&entry->edx, CPUID_7_EDX); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. + */ + entry->edx |= F(ARCH_CAPABILITIES); } else { entry->ebx = 0; entry->ecx = 0; @@ -647,13 +652,20 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, g_phys_as = phys_as; entry->eax = g_phys_as | (virt_as << 8); entry->edx = 0; - /* IBRS and IBPB aren't necessarily present in hardware cpuid */ - if (boot_cpu_has(X86_FEATURE_IBPB)) - entry->ebx |= F(IBPB); - if (boot_cpu_has(X86_FEATURE_IBRS)) - entry->ebx |= F(IBRS); + /* + * IBRS, IBPB and VIRT_SSBD aren't necessarily present in + * hardware cpuid + */ + if (boot_cpu_has(X86_FEATURE_AMD_IBPB)) + entry->ebx |= F(AMD_IBPB); + if (boot_cpu_has(X86_FEATURE_AMD_IBRS)) + entry->ebx |= F(AMD_IBRS); + if (boot_cpu_has(X86_FEATURE_VIRT_SSBD)) + entry->ebx |= F(VIRT_SSBD); entry->ebx &= kvm_cpuid_8000_0008_ebx_x86_features; cpuid_mask(&entry->ebx, CPUID_8000_0008_EBX); + if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD)) + entry->ebx |= F(VIRT_SSBD); break; } case 0x80000019: diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 98618e397342..46ff64da44ca 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1260,12 +1260,16 @@ static void kvm_hv_hypercall_set_result(struct kvm_vcpu *vcpu, u64 result) } } -static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +static int kvm_hv_hypercall_complete(struct kvm_vcpu *vcpu, u64 result) { - struct kvm_run *run = vcpu->run; + kvm_hv_hypercall_set_result(vcpu, result); + ++vcpu->stat.hypercalls; + return kvm_skip_emulated_instruction(vcpu); +} - kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); - return 1; +static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) +{ + return kvm_hv_hypercall_complete(vcpu, vcpu->run->hyperv.u.hcall.result); } static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) @@ -1296,8 +1300,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) if (param & ~KVM_HYPERV_CONN_ID_MASK) return HV_STATUS_INVALID_HYPERCALL_INPUT; - /* conn_to_evt is protected by vcpu->kvm->srcu */ + /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ + rcu_read_lock(); eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); + rcu_read_unlock(); if (!eventfd) return HV_STATUS_INVALID_PORT_ID; @@ -1348,7 +1354,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) /* Hypercall continuation is not supported yet */ if (rep_cnt || rep_idx) { ret = HV_STATUS_INVALID_HYPERCALL_CODE; - goto set_result; + goto out; } switch (code) { @@ -1379,9 +1385,8 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu) break; } -set_result: - kvm_hv_hypercall_set_result(vcpu, ret); - return 1; +out: + return kvm_hv_hypercall_complete(vcpu, ret); } void kvm_hv_init_vm(struct kvm *kvm) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 70dcb5548022..3773c4625114 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1463,23 +1463,6 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) local_irq_restore(flags); } -static void start_sw_period(struct kvm_lapic *apic) -{ - if (!apic->lapic_timer.period) - return; - - if (apic_lvtt_oneshot(apic) && - ktime_after(ktime_get(), - apic->lapic_timer.target_expiration)) { - apic_timer_expired(apic); - return; - } - - hrtimer_start(&apic->lapic_timer.timer, - apic->lapic_timer.target_expiration, - HRTIMER_MODE_ABS_PINNED); -} - static void update_target_expiration(struct kvm_lapic *apic, uint32_t old_divisor) { ktime_t now, remaining; @@ -1539,11 +1522,43 @@ static bool set_target_expiration(struct kvm_lapic *apic) static void advance_periodic_target_expiration(struct kvm_lapic *apic) { - apic->lapic_timer.tscdeadline += - nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); + ktime_t now = ktime_get(); + u64 tscl = rdtsc(); + ktime_t delta; + + /* + * Synchronize both deadlines to the same time source or + * differences in the periods (caused by differences in the + * underlying clocks or numerical approximation errors) will + * cause the two to drift apart over time as the errors + * accumulate. + */ apic->lapic_timer.target_expiration = ktime_add_ns(apic->lapic_timer.target_expiration, apic->lapic_timer.period); + delta = ktime_sub(apic->lapic_timer.target_expiration, now); + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + + nsec_to_cycles(apic->vcpu, delta); +} + +static void start_sw_period(struct kvm_lapic *apic) +{ + if (!apic->lapic_timer.period) + return; + + if (ktime_after(ktime_get(), + apic->lapic_timer.target_expiration)) { + apic_timer_expired(apic); + + if (apic_lvtt_oneshot(apic)) + return; + + advance_periodic_target_expiration(apic); + } + + hrtimer_start(&apic->lapic_timer.timer, + apic->lapic_timer.target_expiration, + HRTIMER_MODE_ABS_PINNED); } bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 8494dbae41b9..d634f0332c0f 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3007,6 +3007,7 @@ static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct * { siginfo_t info; + clear_siginfo(&info); info.si_signo = SIGBUS; info.si_errno = 0; info.si_code = BUS_MCEERR_AR; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 1fc05e428aba..26110c202b19 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -49,7 +49,7 @@ #include <asm/debugreg.h> #include <asm/kvm_para.h> #include <asm/irq_remapping.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/virtext.h> #include "trace.h" @@ -213,6 +213,12 @@ struct vcpu_svm { } host; u64 spec_ctrl; + /* + * Contains guest-controlled bits of VIRT_SPEC_CTRL, which will be + * translated into the appropriate L2_CFG bits on the host to + * perform speculative control. + */ + u64 virt_spec_ctrl; u32 *msrpm; @@ -2060,6 +2066,7 @@ static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.microcode_version = 0x01000065; svm->spec_ctrl = 0; + svm->virt_spec_ctrl = 0; if (!init_event) { svm->vcpu.arch.apic_base = APIC_DEFAULT_PHYS_BASE | @@ -4108,11 +4115,18 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; msr_info->data = svm->spec_ctrl; break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr_info->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + msr_info->data = svm->virt_spec_ctrl; + break; case MSR_F15H_IC_CFG: { int family, model; @@ -4203,7 +4217,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_SPEC_CTRL: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBRS)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ @@ -4230,7 +4244,7 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; case MSR_IA32_PRED_CMD: if (!msr->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB)) + !guest_cpuid_has(vcpu, X86_FEATURE_AMD_IBPB)) return 1; if (data & ~PRED_CMD_IBPB) @@ -4244,6 +4258,16 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr) break; set_msr_interception(svm->msrpm, MSR_IA32_PRED_CMD, 0, 1); break; + case MSR_AMD64_VIRT_SPEC_CTRL: + if (!msr->host_initiated && + !guest_cpuid_has(vcpu, X86_FEATURE_VIRT_SSBD)) + return 1; + + if (data & ~SPEC_CTRL_SSBD) + return 1; + + svm->virt_spec_ctrl = data; + break; case MSR_STAR: svm->vmcb->save.star = data; break; @@ -5557,8 +5581,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, svm->spec_ctrl); + x86_spec_ctrl_set_guest(svm->spec_ctrl, svm->virt_spec_ctrl); asm volatile ( "push %%" _ASM_BP "; \n\t" @@ -5652,6 +5675,18 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) #endif ); + /* Eliminate branch target predictions from guest mode */ + vmexit_fill_RSB(); + +#ifdef CONFIG_X86_64 + wrmsrl(MSR_GS_BASE, svm->host.gs_base); +#else + loadsegment(fs, svm->host.fs); +#ifndef CONFIG_X86_32_LAZY_GS + loadsegment(gs, svm->host.gs); +#endif +#endif + /* * We do not use IBRS in the kernel. If this vCPU has used the * SPEC_CTRL MSR it may have left it on; save the value and @@ -5670,20 +5705,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) svm->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (svm->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); - - /* Eliminate branch target predictions from guest mode */ - vmexit_fill_RSB(); - -#ifdef CONFIG_X86_64 - wrmsrl(MSR_GS_BASE, svm->host.gs_base); -#else - loadsegment(fs, svm->host.fs); -#ifndef CONFIG_X86_32_LAZY_GS - loadsegment(gs, svm->host.gs); -#endif -#endif + x86_spec_ctrl_restore_host(svm->spec_ctrl, svm->virt_spec_ctrl); reload_tss(vcpu); @@ -5786,7 +5808,7 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static bool svm_has_high_real_mode_segbase(void) +static bool svm_has_emulated_msr(int index) { return true; } @@ -7012,7 +7034,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .hardware_enable = svm_hardware_enable, .hardware_disable = svm_hardware_disable, .cpu_has_accelerated_tpr = svm_cpu_has_accelerated_tpr, - .cpu_has_high_real_mode_segbase = svm_has_high_real_mode_segbase, + .has_emulated_msr = svm_has_emulated_msr, .vcpu_create = svm_create_vcpu, .vcpu_free = svm_free_vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c7668806163f..40aa29204baf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -51,7 +51,7 @@ #include <asm/apic.h> #include <asm/irq_remapping.h> #include <asm/mmu_context.h> -#include <asm/nospec-branch.h> +#include <asm/spec-ctrl.h> #include <asm/mshyperv.h> #include "trace.h" @@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void) SECONDARY_EXEC_ENABLE_VMFUNC; } +static bool vmx_umip_emulated(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_DESC; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -3523,7 +3529,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) return kvm_get_msr_common(vcpu, msr_info); case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -3642,12 +3647,11 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_SPEC_CTRL: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBRS) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; /* The STIBP bit doesn't fault even if it's not advertised */ - if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP)) + if (data & ~(SPEC_CTRL_IBRS | SPEC_CTRL_STIBP | SPEC_CTRL_SSBD)) return 1; vmx->spec_ctrl = data; @@ -3673,7 +3677,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info) break; case MSR_IA32_PRED_CMD: if (!msr_info->host_initiated && - !guest_cpuid_has(vcpu, X86_FEATURE_IBPB) && !guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL)) return 1; @@ -4761,14 +4764,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) else hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_DESC); - hw_cr4 &= ~X86_CR4_UMIP; - } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { + if (cr4 & X86_CR4_UMIP) { + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); + hw_cr4 &= ~X86_CR4_UMIP; + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_DESC); + } if (cr4 & X86_CR4_VMXE) { /* @@ -9480,9 +9485,21 @@ static void vmx_handle_external_intr(struct kvm_vcpu *vcpu) } STACK_FRAME_NON_STANDARD(vmx_handle_external_intr); -static bool vmx_has_high_real_mode_segbase(void) +static bool vmx_has_emulated_msr(int index) { - return enable_unrestricted_guest || emulate_invalid_guest_state; + switch (index) { + case MSR_IA32_SMBASE: + /* + * We cannot do SMM unless we can run the guest in big + * real mode. + */ + return enable_unrestricted_guest || emulate_invalid_guest_state; + case MSR_AMD64_VIRT_SPEC_CTRL: + /* This is AMD only. */ + return false; + default: + return true; + } } static bool vmx_mpx_supported(void) @@ -9497,12 +9514,6 @@ static bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } -static bool vmx_umip_emulated(void) -{ - return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_DESC; -} - static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -9720,8 +9731,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) * is no need to worry about the conditional branch over the wrmsr * being speculatively taken. */ - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, vmx->spec_ctrl); + x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); vmx->__launched = vmx->loaded_vmcs->launched; @@ -9869,8 +9879,7 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu) if (unlikely(!msr_write_intercepted(vcpu, MSR_IA32_SPEC_CTRL))) vmx->spec_ctrl = native_read_msr(MSR_IA32_SPEC_CTRL); - if (vmx->spec_ctrl) - native_wrmsrl(MSR_IA32_SPEC_CTRL, 0); + x86_spec_ctrl_restore_host(vmx->spec_ctrl, 0); /* Eliminate branch target predictions from guest mode */ vmexit_fill_RSB(); @@ -12630,7 +12639,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .hardware_enable = hardware_enable, .hardware_disable = hardware_disable, .cpu_has_accelerated_tpr = report_flexpriority, - .cpu_has_high_real_mode_segbase = vmx_has_high_real_mode_segbase, + .has_emulated_msr = vmx_has_emulated_msr, .vm_init = vmx_vm_init, .vm_alloc = vmx_vm_alloc, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51ecd381793b..71e7cda6d014 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); static bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); -unsigned int min_timer_period_us = 500; +unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; @@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { #ifdef CONFIG_X86_64 - cr3 &= ~CR3_PCID_INVD; + bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); + + if (pcid_enabled) + cr3 &= ~CR3_PCID_INVD; #endif if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { @@ -1058,6 +1061,7 @@ static u32 emulated_msrs[] = { MSR_SMI_COUNT, MSR_PLATFORM_INFO, MSR_MISC_FEATURES_ENABLES, + MSR_AMD64_VIRT_SPEC_CTRL, }; static unsigned num_emulated_msrs; @@ -2903,7 +2907,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) * fringe case that is not enabled except via specific settings * of the module parameters. */ - r = kvm_x86_ops->cpu_has_high_real_mode_segbase(); + r = kvm_x86_ops->has_emulated_msr(MSR_IA32_SMBASE); break; case KVM_CAP_VAPIC: r = !kvm_x86_ops->cpu_has_accelerated_tpr(); @@ -4603,14 +4607,8 @@ static void kvm_init_msr_list(void) num_msrs_to_save = j; for (i = j = 0; i < ARRAY_SIZE(emulated_msrs); i++) { - switch (emulated_msrs[i]) { - case MSR_IA32_SMBASE: - if (!kvm_x86_ops->cpu_has_high_real_mode_segbase()) - continue; - break; - default: - break; - } + if (!kvm_x86_ops->has_emulated_msr(emulated_msrs[i])) + continue; if (j < i) emulated_msrs[j] = emulated_msrs[i]; @@ -6671,9 +6669,7 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit, r; - - r = kvm_skip_emulated_instruction(vcpu); + int op_64_bit; if (kvm_hv_hypercall_enabled(vcpu->kvm)) return kvm_hv_hypercall(vcpu); @@ -6721,8 +6717,9 @@ out: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); + ++vcpu->stat.hypercalls; - return r; + return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); @@ -7979,6 +7976,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) { struct msr_data apic_base_msr; int mmu_reset_needed = 0; + int cpuid_update_needed = 0; int pending_vec, max_bits, idx; struct desc_ptr dt; int ret = -EINVAL; @@ -8017,8 +8015,10 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs) vcpu->arch.cr0 = sregs->cr0; mmu_reset_needed |= kvm_read_cr4(vcpu) != sregs->cr4; + cpuid_update_needed |= ((kvm_read_cr4(vcpu) ^ sregs->cr4) & + (X86_CR4_OSXSAVE | X86_CR4_PKE)); kvm_x86_ops->set_cr4(vcpu, sregs->cr4); - if (sregs->cr4 & (X86_CR4_OSXSAVE | X86_CR4_PKE)) + if (cpuid_update_needed) kvm_update_cpuid(vcpu); idx = srcu_read_lock(&vcpu->kvm->srcu); diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 9a53a06e5a3e..c3b527a9f95d 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -184,11 +184,11 @@ ENDPROC(memcpy_orig) #ifndef CONFIG_UML /* - * memcpy_mcsafe_unrolled - memory copy with machine check exception handling + * __memcpy_mcsafe - memory copy with machine check exception handling * Note that we only catch machine checks when reading the source addresses. * Writes to target are posted and don't generate machine checks. */ -ENTRY(memcpy_mcsafe_unrolled) +ENTRY(__memcpy_mcsafe) cmpl $8, %edx /* Less than 8 bytes? Go to byte copy loop */ jb .L_no_whole_words @@ -204,58 +204,29 @@ ENTRY(memcpy_mcsafe_unrolled) subl $8, %ecx negl %ecx subl %ecx, %edx -.L_copy_leading_bytes: +.L_read_leading_bytes: movb (%rsi), %al +.L_write_leading_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_leading_bytes + jnz .L_read_leading_bytes .L_8byte_aligned: - /* Figure out how many whole cache lines (64-bytes) to copy */ - movl %edx, %ecx - andl $63, %edx - shrl $6, %ecx - jz .L_no_whole_cache_lines - - /* Loop copying whole cache lines */ -.L_cache_w0: movq (%rsi), %r8 -.L_cache_w1: movq 1*8(%rsi), %r9 -.L_cache_w2: movq 2*8(%rsi), %r10 -.L_cache_w3: movq 3*8(%rsi), %r11 - movq %r8, (%rdi) - movq %r9, 1*8(%rdi) - movq %r10, 2*8(%rdi) - movq %r11, 3*8(%rdi) -.L_cache_w4: movq 4*8(%rsi), %r8 -.L_cache_w5: movq 5*8(%rsi), %r9 -.L_cache_w6: movq 6*8(%rsi), %r10 -.L_cache_w7: movq 7*8(%rsi), %r11 - movq %r8, 4*8(%rdi) - movq %r9, 5*8(%rdi) - movq %r10, 6*8(%rdi) - movq %r11, 7*8(%rdi) - leaq 64(%rsi), %rsi - leaq 64(%rdi), %rdi - decl %ecx - jnz .L_cache_w0 - - /* Are there any trailing 8-byte words? */ -.L_no_whole_cache_lines: movl %edx, %ecx andl $7, %edx shrl $3, %ecx jz .L_no_whole_words - /* Copy trailing words */ -.L_copy_trailing_words: +.L_read_words: movq (%rsi), %r8 - mov %r8, (%rdi) - leaq 8(%rsi), %rsi - leaq 8(%rdi), %rdi +.L_write_words: + movq %r8, (%rdi) + addq $8, %rsi + addq $8, %rdi decl %ecx - jnz .L_copy_trailing_words + jnz .L_read_words /* Any trailing bytes? */ .L_no_whole_words: @@ -264,38 +235,53 @@ ENTRY(memcpy_mcsafe_unrolled) /* Copy trailing bytes */ movl %edx, %ecx -.L_copy_trailing_bytes: +.L_read_trailing_bytes: movb (%rsi), %al +.L_write_trailing_bytes: movb %al, (%rdi) incq %rsi incq %rdi decl %ecx - jnz .L_copy_trailing_bytes + jnz .L_read_trailing_bytes /* Copy successful. Return zero */ .L_done_memcpy_trap: xorq %rax, %rax ret -ENDPROC(memcpy_mcsafe_unrolled) -EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) +ENDPROC(__memcpy_mcsafe) +EXPORT_SYMBOL_GPL(__memcpy_mcsafe) .section .fixup, "ax" - /* Return -EFAULT for any failure */ -.L_memcpy_mcsafe_fail: - mov $-EFAULT, %rax + /* + * Return number of bytes not copied for any failure. Note that + * there is no "tail" handling since the source buffer is 8-byte + * aligned and poison is cacheline aligned. + */ +.E_read_words: + shll $3, %ecx +.E_leading_bytes: + addl %edx, %ecx +.E_trailing_bytes: + mov %ecx, %eax ret + /* + * For write fault handling, given the destination is unaligned, + * we handle faults on multi-byte writes with a byte-by-byte + * copy up to the write-protected page. + */ +.E_write_words: + shll $3, %ecx + addl %edx, %ecx + movl %ecx, %edx + jmp mcsafe_handle_tail + .previous - _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w6, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w7, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_words, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_copy_trailing_bytes, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE_FAULT(.L_read_words, .E_read_words) + _ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes) + _ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes) + _ASM_EXTABLE(.L_write_words, .E_write_words) + _ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes) #endif diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c index 75d3776123cc..9c5606d88f61 100644 --- a/arch/x86/lib/usercopy_64.c +++ b/arch/x86/lib/usercopy_64.c @@ -23,13 +23,13 @@ unsigned long __clear_user(void __user *addr, unsigned long size) asm volatile( " testq %[size8],%[size8]\n" " jz 4f\n" - "0: movq %[zero],(%[dst])\n" - " addq %[eight],%[dst]\n" + "0: movq $0,(%[dst])\n" + " addq $8,%[dst]\n" " decl %%ecx ; jnz 0b\n" "4: movq %[size1],%%rcx\n" " testl %%ecx,%%ecx\n" " jz 2f\n" - "1: movb %b[zero],(%[dst])\n" + "1: movb $0,(%[dst])\n" " incq %[dst]\n" " decl %%ecx ; jnz 1b\n" "2:\n" @@ -40,8 +40,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size) _ASM_EXTABLE(0b,3b) _ASM_EXTABLE(1b,2b) : [size8] "=&c"(size), [dst] "=&D" (__d0) - : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), - [zero] "r" (0UL), [eight] "r" (8UL)); + : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr)); clac(); return size; } @@ -75,6 +74,27 @@ copy_user_handle_tail(char *to, char *from, unsigned len) return len; } +/* + * Similar to copy_user_handle_tail, probe for the write fault point, + * but reuse __memcpy_mcsafe in case a new read error is encountered. + * clac() is handled in _copy_to_iter_mcsafe(). + */ +__visible unsigned long +mcsafe_handle_tail(char *to, char *from, unsigned len) +{ + for (; len; --len, to++, from++) { + /* + * Call the assembly routine back directly since + * memcpy_mcsafe() may silently fallback to memcpy. + */ + unsigned long rem = __memcpy_mcsafe(to, from, 1); + + if (rem) + break; + } + return len; +} + #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE /** * clean_cache_range - write back a cache range with CLWB diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index cc7ff5957194..2f3c9196b834 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -360,7 +360,7 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st, void *pt) { if (__pa(pt) == __pa(kasan_zero_pmd) || - (pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) || + (pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) || __pa(pt) == __pa(kasan_zero_pud)) { pgprotval_t prot = pte_flags(kasan_zero_pte[0]); note_page(m, st, __pgprot(prot), 0, 5); @@ -476,8 +476,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } } -#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) -#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) +#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a)))) +#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a)))) static inline bool is_hypervisor_range(int idx) { diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 73bd8c95ac71..9a84a0d08727 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -209,6 +209,7 @@ force_sig_info_fault(int si_signo, int si_code, unsigned long address, unsigned lsb = 0; siginfo_t info; + clear_siginfo(&info); info.si_signo = si_signo; info.si_errno = 0; info.si_code = si_code; @@ -439,7 +440,7 @@ static noinline int vmalloc_fault(unsigned long address) if (pgd_none(*pgd_k)) return -1; - if (pgtable_l5_enabled) { + if (pgtable_l5_enabled()) { if (pgd_none(*pgd)) { set_pgd(pgd, *pgd_k); arch_flush_lazy_mmu_mode(); @@ -454,7 +455,7 @@ static noinline int vmalloc_fault(unsigned long address) if (p4d_none(*p4d_k)) return -1; - if (p4d_none(*p4d) && !pgtable_l5_enabled) { + if (p4d_none(*p4d) && !pgtable_l5_enabled()) { set_p4d(p4d, *p4d_k); arch_flush_lazy_mmu_mode(); } else { @@ -828,6 +829,8 @@ static inline void show_signal_msg(struct pt_regs *regs, unsigned long error_code, unsigned long address, struct task_struct *tsk) { + const char *loglvl = task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG; + if (!unhandled_signal(tsk, SIGSEGV)) return; @@ -835,13 +838,14 @@ show_signal_msg(struct pt_regs *regs, unsigned long error_code, return; printk("%s%s[%d]: segfault at %lx ip %px sp %px error %lx", - task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG, - tsk->comm, task_pid_nr(tsk), address, + loglvl, tsk->comm, task_pid_nr(tsk), address, (void *)regs->ip, (void *)regs->sp, error_code); print_vma_addr(KERN_CONT " in ", regs->ip); printk(KERN_CONT "\n"); + + show_opcodes((u8 *)regs->ip, loglvl); } static void diff --git a/arch/x86/mm/ident_map.c b/arch/x86/mm/ident_map.c index a2f0c7e20fb0..fe7a12599d8e 100644 --- a/arch/x86/mm/ident_map.c +++ b/arch/x86/mm/ident_map.c @@ -123,7 +123,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page, result = ident_p4d_init(info, p4d, addr, next); if (result) return result; - if (pgtable_l5_enabled) { + if (pgtable_l5_enabled()) { set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag)); } else { /* diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 0a400606dea0..17383f9677fa 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -180,7 +180,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end) */ void sync_global_pgds(unsigned long start, unsigned long end) { - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) sync_global_pgds_l5(start, end); else sync_global_pgds_l4(start, end); @@ -643,7 +643,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end, unsigned long vaddr = (unsigned long)__va(paddr); int i = p4d_index(vaddr); - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask); for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) { @@ -723,7 +723,7 @@ kernel_physical_mapping_init(unsigned long paddr_start, page_size_mask); spin_lock(&init_mm.page_table_lock); - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) pgd_populate(&init_mm, pgd, p4d); else p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d); @@ -1100,7 +1100,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end, * 5-level case we should free them. This code will have to change * to adapt for boot-time switching between 4 and 5 level page tables. */ - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) free_pud_table(pud_base, p4d); } diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 980dbebd0ca7..e3e77527f8df 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -2,10 +2,8 @@ #define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt -#ifdef CONFIG_X86_5LEVEL -/* Too early to use cpu_feature_enabled() */ -#define pgtable_l5_enabled __pgtable_l5_enabled -#endif +/* cpu_feature_enabled() cannot be used this early */ +#define USE_EARLY_PGTABLE_L5 #include <linux/bootmem.h> #include <linux/kasan.h> @@ -182,7 +180,7 @@ static void __init clear_pgds(unsigned long start, * With folded p4d, pgd_clear() is nop, use p4d_clear() * instead. */ - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) pgd_clear(pgd); else p4d_clear(p4d_offset(pgd, start)); @@ -197,7 +195,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr) { unsigned long p4d; - if (!pgtable_l5_enabled) + if (!pgtable_l5_enabled()) return (p4d_t *)pgd; p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK; @@ -284,7 +282,7 @@ void __init kasan_early_init(void) for (i = 0; i < PTRS_PER_PUD; i++) kasan_zero_pud[i] = __pud(pud_val); - for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++) + for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++) kasan_zero_p4d[i] = __p4d(p4d_val); kasan_map_early_shadow(early_top_pgt); @@ -315,7 +313,7 @@ void __init kasan_init(void) * bunch of things like kernel code, modules, EFI mapping, etc. * We need to take extra steps to not overwrite them. */ - if (pgtable_l5_enabled) { + if (pgtable_l5_enabled()) { void *ptr; ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END)); diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 615cc03ced84..61db77b0eda9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -78,7 +78,7 @@ void __init kernel_randomize_memory(void) struct rnd_state rand_state; unsigned long remain_entropy; - vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; + vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4; vaddr = vaddr_start; /* @@ -124,7 +124,7 @@ void __init kernel_randomize_memory(void) */ entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i); prandom_bytes_state(&rand_state, &rand, sizeof(rand)); - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) entropy = (rand % (entropy + 1)) & P4D_MASK; else entropy = (rand % (entropy + 1)) & PUD_MASK; @@ -136,7 +136,7 @@ void __init kernel_randomize_memory(void) * randomization alignment. */ vaddr += get_padding(&kaslr_regions[i]); - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) vaddr = round_up(vaddr + 1, P4D_SIZE); else vaddr = round_up(vaddr + 1, PUD_SIZE); @@ -212,7 +212,7 @@ void __meminit init_trampoline(void) return; } - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) init_trampoline_p4d(); else init_trampoline_pud(); diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 25504d5aa816..fa150855647c 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -136,13 +136,13 @@ static int __init numa_add_memblk_to(int nid, u64 start, u64 end, /* whine about and ignore invalid blks */ if (start > end || nid < 0 || nid >= MAX_NUMNODES) { - pr_warning("NUMA: Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", - nid, start, end - 1); + pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n", + nid, start, end - 1); return 0; } if (mi->nr_blks >= NR_NODE_MEMBLKS) { - pr_err("NUMA: too many memblk ranges\n"); + pr_err("too many memblk ranges\n"); return -EINVAL; } @@ -267,14 +267,14 @@ int __init numa_cleanup_meminfo(struct numa_meminfo *mi) */ if (bi->end > bj->start && bi->start < bj->end) { if (bi->nid != bj->nid) { - pr_err("NUMA: node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", + pr_err("node %d [mem %#010Lx-%#010Lx] overlaps with node %d [mem %#010Lx-%#010Lx]\n", bi->nid, bi->start, bi->end - 1, bj->nid, bj->start, bj->end - 1); return -EINVAL; } - pr_warning("NUMA: Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", - bi->nid, bi->start, bi->end - 1, - bj->start, bj->end - 1); + pr_warn("Warning: node %d [mem %#010Lx-%#010Lx] overlaps with itself [mem %#010Lx-%#010Lx]\n", + bi->nid, bi->start, bi->end - 1, + bj->start, bj->end - 1); } /* @@ -364,7 +364,7 @@ static int __init numa_alloc_distance(void) phys = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), size, PAGE_SIZE); if (!phys) { - pr_warning("NUMA: Warning: can't allocate distance table!\n"); + pr_warn("Warning: can't allocate distance table!\n"); /* don't retry until explicitly reset */ numa_distance = (void *)1LU; return -ENOMEM; @@ -410,14 +410,14 @@ void __init numa_set_distance(int from, int to, int distance) if (from >= numa_distance_cnt || to >= numa_distance_cnt || from < 0 || to < 0) { - pr_warn_once("NUMA: Warning: node ids are out of bound, from=%d to=%d distance=%d\n", - from, to, distance); + pr_warn_once("Warning: node ids are out of bound, from=%d to=%d distance=%d\n", + from, to, distance); return; } if ((u8)distance != distance || (from == to && distance != LOCAL_DISTANCE)) { - pr_warn_once("NUMA: Warning: invalid distance parameter, from=%d to=%d distance=%d\n", + pr_warn_once("Warning: invalid distance parameter, from=%d to=%d distance=%d\n", from, to, distance); return; } diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index d7bc0eea20a5..6e98e0a7c923 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey */ if (pkey != -1) return pkey; - /* - * Look for a protection-key-drive execute-only mapping - * which is now being given permissions that are not - * execute-only. Move it back to the default pkey. - */ - if (vma_is_pkey_exec_only(vma) && - (prot & (PROT_READ|PROT_WRITE))) { - return 0; - } + /* * The mapping is execute-only. Go try to get the * execute-only protection key. If we fail to do that, * fall through as if we do not have execute-only - * support. + * support in this mm. */ if (prot == PROT_EXEC) { pkey = execute_only_pkey(vma->vm_mm); if (pkey > 0) return pkey; + } else if (vma_is_pkey_exec_only(vma)) { + /* + * Protections are *not* PROT_EXEC, but the mapping + * is using the exec-only pkey. This mapping was + * PROT_EXEC and will no longer be. Move back to + * the default pkey. + */ + return ARCH_DEFAULT_PKEY; } + /* * This is a vanilla, non-pkey mprotect (or we failed to * setup execute-only), inherit the pkey from the VMA we diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index e055d1a06699..6eb1f34c3c85 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm) unsigned long sp = current_stack_pointer; pgd_t *pgd = pgd_offset(mm, sp); - if (pgtable_l5_enabled) { + if (pgtable_l5_enabled()) { if (unlikely(pgd_none(*pgd))) { pgd_t *pgd_ref = pgd_offset_k(sp); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b725154182cc..d765acedc05c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1,4 +1,5 @@ -/* bpf_jit_comp.c : BPF JIT compiler +/* + * bpf_jit_comp.c: BPF JIT compiler * * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) * Internal BPF Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com @@ -17,7 +18,7 @@ #include <asm/nospec-branch.h> /* - * assembly code in arch/x86/net/bpf_jit.S + * Assembly code in arch/x86/net/bpf_jit.S */ extern u8 sk_load_word[], sk_load_half[], sk_load_byte[]; extern u8 sk_load_word_positive_offset[], sk_load_half_positive_offset[]; @@ -45,14 +46,15 @@ static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) #define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) #define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) #define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) + #define EMIT1_off32(b1, off) \ - do {EMIT1(b1); EMIT(off, 4); } while (0) + do { EMIT1(b1); EMIT(off, 4); } while (0) #define EMIT2_off32(b1, b2, off) \ - do {EMIT2(b1, b2); EMIT(off, 4); } while (0) + do { EMIT2(b1, b2); EMIT(off, 4); } while (0) #define EMIT3_off32(b1, b2, b3, off) \ - do {EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) + do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) #define EMIT4_off32(b1, b2, b3, b4, off) \ - do {EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) + do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) static bool is_imm8(int value) { @@ -70,9 +72,10 @@ static bool is_uimm32(u64 value) } /* mov dst, src */ -#define EMIT_mov(DST, SRC) \ - do {if (DST != SRC) \ - EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ +#define EMIT_mov(DST, SRC) \ + do { \ + if (DST != SRC) \ + EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ } while (0) static int bpf_size_to_x86_bytes(int bpf_size) @@ -89,7 +92,8 @@ static int bpf_size_to_x86_bytes(int bpf_size) return 0; } -/* list of x86 cond jumps opcodes (. + s8) +/* + * List of x86 cond jumps opcodes (. + s8) * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) */ #define X86_JB 0x72 @@ -106,35 +110,37 @@ static int bpf_size_to_x86_bytes(int bpf_size) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -/* pick a register outside of BPF range for JIT internal work */ +/* Pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_JIT_REG + 1) -/* The following table maps BPF registers to x64 registers. +/* + * The following table maps BPF registers to x86-64 registers. * - * x64 register r12 is unused, since if used as base address + * x86-64 register R12 is unused, since if used as base address * register in load/store instructions, it always needs an * extra byte of encoding and is callee saved. * - * r9 caches skb->len - skb->data_len - * r10 caches skb->data, and used for blinding (if enabled) + * R9 caches skb->len - skb->data_len + * R10 caches skb->data, and used for blinding (if enabled) */ static const int reg2hex[] = { - [BPF_REG_0] = 0, /* rax */ - [BPF_REG_1] = 7, /* rdi */ - [BPF_REG_2] = 6, /* rsi */ - [BPF_REG_3] = 2, /* rdx */ - [BPF_REG_4] = 1, /* rcx */ - [BPF_REG_5] = 0, /* r8 */ - [BPF_REG_6] = 3, /* rbx callee saved */ - [BPF_REG_7] = 5, /* r13 callee saved */ - [BPF_REG_8] = 6, /* r14 callee saved */ - [BPF_REG_9] = 7, /* r15 callee saved */ - [BPF_REG_FP] = 5, /* rbp readonly */ - [BPF_REG_AX] = 2, /* r10 temp register */ - [AUX_REG] = 3, /* r11 temp register */ + [BPF_REG_0] = 0, /* RAX */ + [BPF_REG_1] = 7, /* RDI */ + [BPF_REG_2] = 6, /* RSI */ + [BPF_REG_3] = 2, /* RDX */ + [BPF_REG_4] = 1, /* RCX */ + [BPF_REG_5] = 0, /* R8 */ + [BPF_REG_6] = 3, /* RBX callee saved */ + [BPF_REG_7] = 5, /* R13 callee saved */ + [BPF_REG_8] = 6, /* R14 callee saved */ + [BPF_REG_9] = 7, /* R15 callee saved */ + [BPF_REG_FP] = 5, /* RBP readonly */ + [BPF_REG_AX] = 2, /* R10 temp register */ + [AUX_REG] = 3, /* R11 temp register */ }; -/* is_ereg() == true if BPF register 'reg' maps to x64 r8..r15 +/* + * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 * which need extra byte of encoding. * rax,rcx,...,rbp have simpler encoding */ @@ -153,7 +159,7 @@ static bool is_axreg(u32 reg) return reg == BPF_REG_0; } -/* add modifiers if 'reg' maps to x64 registers r8..r15 */ +/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ static u8 add_1mod(u8 byte, u32 reg) { if (is_ereg(reg)) @@ -170,13 +176,13 @@ static u8 add_2mod(u8 byte, u32 r1, u32 r2) return byte; } -/* encode 'dst_reg' register into x64 opcode 'byte' */ +/* Encode 'dst_reg' register into x86-64 opcode 'byte' */ static u8 add_1reg(u8 byte, u32 dst_reg) { return byte + reg2hex[dst_reg]; } -/* encode 'dst_reg' and 'src_reg' registers into x64 opcode 'byte' */ +/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) { return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); @@ -184,27 +190,28 @@ static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) static void jit_fill_hole(void *area, unsigned int size) { - /* fill whole space with int3 instructions */ + /* Fill whole space with INT3 instructions */ memset(area, 0xcc, size); } struct jit_context { - int cleanup_addr; /* epilogue code offset */ + int cleanup_addr; /* Epilogue code offset */ bool seen_ld_abs; bool seen_ax_reg; }; -/* maximum number of bytes emitted while JITing one eBPF insn */ +/* Maximum number of bytes emitted while JITing one eBPF insn */ #define BPF_MAX_INSN_SIZE 128 #define BPF_INSN_SAFETY 64 #define AUX_STACK_SPACE \ - (32 /* space for rbx, r13, r14, r15 */ + \ - 8 /* space for skb_copy_bits() buffer */) + (32 /* Space for RBX, R13, R14, R15 */ + \ + 8 /* Space for skb_copy_bits() buffer */) #define PROLOGUE_SIZE 37 -/* emit x64 prologue code for BPF program and check it's size. +/* + * Emit x86-64 prologue code for BPF program and check its size. * bpf_tail_call helper will skip it while jumping into another program */ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) @@ -212,8 +219,11 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) u8 *prog = *pprog; int cnt = 0; - EMIT1(0x55); /* push rbp */ - EMIT3(0x48, 0x89, 0xE5); /* mov rbp,rsp */ + /* push rbp */ + EMIT1(0x55); + + /* mov rbp,rsp */ + EMIT3(0x48, 0x89, 0xE5); /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */ EMIT3_off32(0x48, 0x81, 0xEC, @@ -222,14 +232,15 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) /* sub rbp, AUX_STACK_SPACE */ EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE); - /* all classic BPF filters use R6(rbx) save it */ + /* All classic BPF filters use R6(rbx) save it */ /* mov qword ptr [rbp+0],rbx */ EMIT4(0x48, 0x89, 0x5D, 0); - /* bpf_convert_filter() maps classic BPF register X to R7 and uses R8 - * as temporary, so all tcpdump filters need to spill/fill R7(r13) and - * R8(r14). R9(r15) spill could be made conditional, but there is only + /* + * bpf_convert_filter() maps classic BPF register X to R7 and uses R8 + * as temporary, so all tcpdump filters need to spill/fill R7(R13) and + * R8(R14). R9(R15) spill could be made conditional, but there is only * one 'bpf_error' return path out of helper functions inside bpf_jit.S * The overhead of extra spill is negligible for any filter other * than synthetic ones. Therefore not worth adding complexity. @@ -243,9 +254,10 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) EMIT4(0x4C, 0x89, 0x7D, 24); if (!ebpf_from_cbpf) { - /* Clear the tail call counter (tail_call_cnt): for eBPF tail + /* + * Clear the tail call counter (tail_call_cnt): for eBPF tail * calls we need to reset the counter to 0. It's done in two - * instructions, resetting rax register to 0, and moving it + * instructions, resetting RAX register to 0, and moving it * to the counter location. */ @@ -260,7 +272,9 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf) *pprog = prog; } -/* generate the following code: +/* + * Generate the following code: + * * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... * if (index >= array->map.max_entries) * goto out; @@ -278,23 +292,26 @@ static void emit_bpf_tail_call(u8 **pprog) int label1, label2, label3; int cnt = 0; - /* rdi - pointer to ctx + /* + * rdi - pointer to ctx * rsi - pointer to bpf_array * rdx - index in bpf_array */ - /* if (index >= array->map.max_entries) - * goto out; + /* + * if (index >= array->map.max_entries) + * goto out; */ EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; - /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) - * goto out; + /* + * if (tail_call_cnt > MAX_TAIL_CALL_CNT) + * goto out; */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ @@ -308,8 +325,9 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT4_off32(0x48, 0x8B, 0x84, 0xD6, /* mov rax, [rsi + rdx * 8 + offsetof(...)] */ offsetof(struct bpf_array, ptrs)); - /* if (prog == NULL) - * goto out; + /* + * if (prog == NULL) + * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ #define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) @@ -321,7 +339,8 @@ static void emit_bpf_tail_call(u8 **pprog) offsetof(struct bpf_prog, bpf_func)); EMIT4(0x48, 0x83, 0xC0, PROLOGUE_SIZE); /* add rax, prologue_size */ - /* now we're ready to jump into next BPF program + /* + * Wow we're ready to jump into next BPF program * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ @@ -340,7 +359,8 @@ static void emit_load_skb_data_hlen(u8 **pprog) u8 *prog = *pprog; int cnt = 0; - /* r9d = skb->len - skb->data_len (headlen) + /* + * r9d = skb->len - skb->data_len (headlen) * r10 = skb->data */ /* mov %r9d, off32(%rdi) */ @@ -361,7 +381,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, u8 b1, b2, b3; int cnt = 0; - /* optimization: if imm32 is positive, use 'mov %eax, imm32' + /* + * Optimization: if imm32 is positive, use 'mov %eax, imm32' * (which zero-extends imm32) to save 2 bytes. */ if (sign_propagate && (s32)imm32 < 0) { @@ -373,7 +394,8 @@ static void emit_mov_imm32(u8 **pprog, bool sign_propagate, goto done; } - /* optimization: if imm32 is zero, use 'xor %eax, %eax' + /* + * Optimization: if imm32 is zero, use 'xor %eax, %eax' * to save 3 bytes. */ if (imm32 == 0) { @@ -400,7 +422,8 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, int cnt = 0; if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { - /* For emitting plain u32, where sign bit must not be + /* + * For emitting plain u32, where sign bit must not be * propagated LLVM tends to load imm64 over mov32 * directly, so save couple of bytes by just doing * 'mov %eax, imm32' instead. @@ -525,7 +548,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, else if (is_ereg(dst_reg)) EMIT1(add_1mod(0x40, dst_reg)); - /* b3 holds 'normal' opcode, b2 short form only valid + /* + * b3 holds 'normal' opcode, b2 short form only valid * in case dst is eax/rax. */ switch (BPF_OP(insn->code)) { @@ -593,7 +617,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, /* mov rax, dst_reg */ EMIT_mov(BPF_REG_0, dst_reg); - /* xor edx, edx + /* + * xor edx, edx * equivalent to 'xor rdx, rdx', but one byte less */ EMIT2(0x31, 0xd2); @@ -655,7 +680,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, } break; } - /* shifts */ + /* Shifts */ case BPF_ALU | BPF_LSH | BPF_K: case BPF_ALU | BPF_RSH | BPF_K: case BPF_ALU | BPF_ARSH | BPF_K: @@ -686,7 +711,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU64 | BPF_RSH | BPF_X: case BPF_ALU64 | BPF_ARSH | BPF_X: - /* check for bad case when dst_reg == rcx */ + /* Check for bad case when dst_reg == rcx */ if (dst_reg == BPF_REG_4) { /* mov r11, dst_reg */ EMIT_mov(AUX_REG, dst_reg); @@ -724,13 +749,13 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_END | BPF_FROM_BE: switch (imm32) { case 16: - /* emit 'ror %ax, 8' to swap lower 2 bytes */ + /* Emit 'ror %ax, 8' to swap lower 2 bytes */ EMIT1(0x66); if (is_ereg(dst_reg)) EMIT1(0x41); EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); - /* emit 'movzwl eax, ax' */ + /* Emit 'movzwl eax, ax' */ if (is_ereg(dst_reg)) EMIT3(0x45, 0x0F, 0xB7); else @@ -738,7 +763,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: - /* emit 'bswap eax' to swap lower 4 bytes */ + /* Emit 'bswap eax' to swap lower 4 bytes */ if (is_ereg(dst_reg)) EMIT2(0x41, 0x0F); else @@ -746,7 +771,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1(add_1reg(0xC8, dst_reg)); break; case 64: - /* emit 'bswap rax' to swap 8 bytes */ + /* Emit 'bswap rax' to swap 8 bytes */ EMIT3(add_1mod(0x48, dst_reg), 0x0F, add_1reg(0xC8, dst_reg)); break; @@ -756,7 +781,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, case BPF_ALU | BPF_END | BPF_FROM_LE: switch (imm32) { case 16: - /* emit 'movzwl eax, ax' to zero extend 16-bit + /* + * Emit 'movzwl eax, ax' to zero extend 16-bit * into 64 bit */ if (is_ereg(dst_reg)) @@ -766,7 +792,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); break; case 32: - /* emit 'mov eax, eax' to clear upper 32-bits */ + /* Emit 'mov eax, eax' to clear upper 32-bits */ if (is_ereg(dst_reg)) EMIT1(0x45); EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); @@ -809,9 +835,9 @@ st: if (is_imm8(insn->off)) /* STX: *(u8*)(dst_reg + off) = src_reg */ case BPF_STX | BPF_MEM | BPF_B: - /* emit 'mov byte ptr [rax + off], al' */ + /* Emit 'mov byte ptr [rax + off], al' */ if (is_ereg(dst_reg) || is_ereg(src_reg) || - /* have to add extra byte for x86 SIL, DIL regs */ + /* We have to add extra byte for x86 SIL, DIL regs */ src_reg == BPF_REG_1 || src_reg == BPF_REG_2) EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); else @@ -840,25 +866,26 @@ stx: if (is_imm8(insn->off)) /* LDX: dst_reg = *(u8*)(src_reg + off) */ case BPF_LDX | BPF_MEM | BPF_B: - /* emit 'movzx rax, byte ptr [rax + off]' */ + /* Emit 'movzx rax, byte ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); goto ldx; case BPF_LDX | BPF_MEM | BPF_H: - /* emit 'movzx rax, word ptr [rax + off]' */ + /* Emit 'movzx rax, word ptr [rax + off]' */ EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); goto ldx; case BPF_LDX | BPF_MEM | BPF_W: - /* emit 'mov eax, dword ptr [rax+0x14]' */ + /* Emit 'mov eax, dword ptr [rax+0x14]' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); else EMIT1(0x8B); goto ldx; case BPF_LDX | BPF_MEM | BPF_DW: - /* emit 'mov rax, qword ptr [rax+0x14]' */ + /* Emit 'mov rax, qword ptr [rax+0x14]' */ EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); -ldx: /* if insn->off == 0 we can save one extra byte, but - * special case of x86 r13 which always needs an offset +ldx: /* + * If insn->off == 0 we can save one extra byte, but + * special case of x86 R13 which always needs an offset * is not worth the hassle */ if (is_imm8(insn->off)) @@ -870,7 +897,7 @@ ldx: /* if insn->off == 0 we can save one extra byte, but /* STX XADD: lock *(u32*)(dst_reg + off) += src_reg */ case BPF_STX | BPF_XADD | BPF_W: - /* emit 'lock add dword ptr [rax + off], eax' */ + /* Emit 'lock add dword ptr [rax + off], eax' */ if (is_ereg(dst_reg) || is_ereg(src_reg)) EMIT3(0xF0, add_2mod(0x40, dst_reg, src_reg), 0x01); else @@ -897,14 +924,15 @@ xadd: if (is_imm8(insn->off)) } else { EMIT2(0x41, 0x52); /* push %r10 */ EMIT2(0x41, 0x51); /* push %r9 */ - /* need to adjust jmp offset, since + /* + * We need to adjust jmp offset, since * pop %r9, pop %r10 take 4 bytes after call insn */ jmp_offset += 4; } } if (!imm32 || !is_simm32(jmp_offset)) { - pr_err("unsupported bpf func %d addr %p image %p\n", + pr_err("unsupported BPF func %d addr %p image %p\n", imm32, func, image); return -EINVAL; } @@ -970,7 +998,7 @@ xadd: if (is_imm8(insn->off)) else EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); -emit_cond_jmp: /* convert BPF opcode to x86 */ +emit_cond_jmp: /* Convert BPF opcode to x86 */ switch (BPF_OP(insn->code)) { case BPF_JEQ: jmp_cond = X86_JE; @@ -996,22 +1024,22 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ jmp_cond = X86_JBE; break; case BPF_JSGT: - /* signed '>', GT in x86 */ + /* Signed '>', GT in x86 */ jmp_cond = X86_JG; break; case BPF_JSLT: - /* signed '<', LT in x86 */ + /* Signed '<', LT in x86 */ jmp_cond = X86_JL; break; case BPF_JSGE: - /* signed '>=', GE in x86 */ + /* Signed '>=', GE in x86 */ jmp_cond = X86_JGE; break; case BPF_JSLE: - /* signed '<=', LE in x86 */ + /* Signed '<=', LE in x86 */ jmp_cond = X86_JLE; break; - default: /* to silence gcc warning */ + default: /* to silence GCC warning */ return -EFAULT; } jmp_offset = addrs[i + insn->off] - addrs[i]; @@ -1027,9 +1055,19 @@ emit_cond_jmp: /* convert BPF opcode to x86 */ break; case BPF_JMP | BPF_JA: - jmp_offset = addrs[i + insn->off] - addrs[i]; + if (insn->off == -1) + /* -1 jmp instructions will always jump + * backwards two bytes. Explicitly handling + * this case avoids wasting too many passes + * when there are long sequences of replaced + * dead code. + */ + jmp_offset = -2; + else + jmp_offset = addrs[i + insn->off] - addrs[i]; + if (!jmp_offset) - /* optimize out nop jumps */ + /* Optimize out nop jumps */ break; emit_jmp: if (is_imm8(jmp_offset)) { @@ -1051,7 +1089,7 @@ common_load: ctx->seen_ld_abs = seen_ld_abs = true; jmp_offset = func - (image + addrs[i]); if (!func || !is_simm32(jmp_offset)) { - pr_err("unsupported bpf func %d addr %p image %p\n", + pr_err("unsupported BPF func %d addr %p image %p\n", imm32, func, image); return -EINVAL; } @@ -1070,7 +1108,8 @@ common_load: EMIT2_off32(0x81, 0xC6, imm32); } } - /* skb pointer is in R6 (%rbx), it will be copied into + /* + * skb pointer is in R6 (%rbx), it will be copied into * %rdi if skb_copy_bits() call is necessary. * sk_load_* helpers also use %r10 and %r9d. * See bpf_jit.S @@ -1101,7 +1140,7 @@ common_load: goto emit_jmp; } seen_exit = true; - /* update cleanup_addr */ + /* Update cleanup_addr */ ctx->cleanup_addr = proglen; /* mov rbx, qword ptr [rbp+0] */ EMIT4(0x48, 0x8B, 0x5D, 0); @@ -1119,10 +1158,11 @@ common_load: break; default: - /* By design x64 JIT should support all BPF instructions + /* + * By design x86-64 JIT should support all BPF instructions. * This error will be seen if new instruction was added - * to interpreter, but not to JIT - * or if there is junk in bpf_prog + * to the interpreter, but not to the JIT, or if there is + * junk in bpf_prog. */ pr_err("bpf_jit: unknown opcode %02x\n", insn->code); return -EINVAL; @@ -1174,7 +1214,8 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) return orig_prog; tmp = bpf_jit_blind_constants(prog); - /* If blinding was requested and we failed during blinding, + /* + * If blinding was requested and we failed during blinding, * we must fall back to the interpreter. */ if (IS_ERR(tmp)) @@ -1208,8 +1249,9 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) goto out_addrs; } - /* Before first pass, make a rough estimation of addrs[] - * each bpf instruction is translated to less than 64 bytes + /* + * Before first pass, make a rough estimation of addrs[] + * each BPF instruction is translated to less than 64 bytes */ for (proglen = 0, i = 0; i < prog->len; i++) { proglen += 64; @@ -1218,14 +1260,16 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) ctx.cleanup_addr = proglen; skip_init_addrs: - /* JITed image shrinks with every pass and the loop iterates - * until the image stops shrinking. Very large bpf programs + /* + * JITed image shrinks with every pass and the loop iterates + * until the image stops shrinking. Very large BPF programs * may converge on the last pass. In such case do one more - * pass to emit the final image + * pass to emit the final image. */ for (pass = 0; pass < 20 || image; pass++) { proglen = do_jit(prog, addrs, image, oldproglen, &ctx); if (proglen <= 0) { +out_image: image = NULL; if (header) bpf_jit_binary_free(header); @@ -1236,8 +1280,7 @@ skip_init_addrs: if (proglen != oldproglen) { pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", proglen, oldproglen); - prog = orig_prog; - goto out_addrs; + goto out_image; } break; } @@ -1273,7 +1316,7 @@ skip_init_addrs: prog = orig_prog; } - if (!prog->is_func || extra_pass) { + if (!image || !prog->is_func || extra_pass) { out_addrs: kfree(addrs); kfree(jit_data); diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index bed7e7f4e44c..e01f7ceb9e7a 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -225,7 +225,7 @@ int __init efi_alloc_page_tables(void) pud = pud_alloc(&init_mm, p4d, EFI_VA_END); if (!pud) { - if (pgtable_l5_enabled) + if (pgtable_l5_enabled()) free_page((unsigned long) pgd_page_vaddr(*pgd)); free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER); return -ENOMEM; diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c index 58024862a7eb..a52914aa3b6c 100644 --- a/arch/x86/platform/intel-mid/intel_mid_vrtc.c +++ b/arch/x86/platform/intel-mid/intel_mid_vrtc.c @@ -57,7 +57,7 @@ void vrtc_cmos_write(unsigned char val, unsigned char reg) } EXPORT_SYMBOL_GPL(vrtc_cmos_write); -void vrtc_get_time(struct timespec *now) +void vrtc_get_time(struct timespec64 *now) { u8 sec, min, hour, mday, mon; unsigned long flags; @@ -83,18 +83,18 @@ void vrtc_get_time(struct timespec *now) pr_info("vRTC: sec: %d min: %d hour: %d day: %d " "mon: %d year: %d\n", sec, min, hour, mday, mon, year); - now->tv_sec = mktime(year, mon, mday, hour, min, sec); + now->tv_sec = mktime64(year, mon, mday, hour, min, sec); now->tv_nsec = 0; } -int vrtc_set_mmss(const struct timespec *now) +int vrtc_set_mmss(const struct timespec64 *now) { unsigned long flags; struct rtc_time tm; int year; int retval = 0; - rtc_time_to_tm(now->tv_sec, &tm); + rtc_time64_to_tm(now->tv_sec, &tm); if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) { /* * tm.year is the number of years since 1900, and the @@ -110,8 +110,8 @@ int vrtc_set_mmss(const struct timespec *now) vrtc_cmos_write(tm.tm_sec, RTC_SECONDS); spin_unlock_irqrestore(&rtc_lock, flags); } else { - pr_err("%s: Invalid vRTC value: write of %lx to vRTC failed\n", - __func__, now->tv_sec); + pr_err("%s: Invalid vRTC value: write of %llx to vRTC failed\n", + __func__, (s64)now->tv_sec); retval = -EINVAL; } return retval; diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c index ccf4a49bb065..67ccf64c8bd8 100644 --- a/arch/x86/power/hibernate_64.c +++ b/arch/x86/power/hibernate_64.c @@ -72,7 +72,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd) * tables used by the image kernel. */ - if (pgtable_l5_enabled) { + if (pgtable_l5_enabled()) { p4d = (p4d_t *)get_safe_page(GFP_ATOMIC); if (!p4d) return -ENOMEM; diff --git a/arch/x86/um/vdso/Makefile b/arch/x86/um/vdso/Makefile index 10003359e633..b2d6967262b2 100644 --- a/arch/x86/um/vdso/Makefile +++ b/arch/x86/um/vdso/Makefile @@ -23,14 +23,14 @@ $(obj)/vdso.o: $(obj)/vdso.so targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) -export CPPFLAGS_vdso.lds += -P -C +CPPFLAGS_vdso.lds += -P -C VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so -$(obj)/vdso.so.dbg: $(src)/vdso.lds $(vobjs) FORCE +$(obj)/vdso.so.dbg: $(obj)/vdso.lds $(vobjs) FORCE $(call if_changed,vdso) $(obj)/%.so: OBJCOPYFLAGS := -S diff --git a/arch/x86/xen/efi.c b/arch/x86/xen/efi.c index a18703be9ead..1804b27f9632 100644 --- a/arch/x86/xen/efi.c +++ b/arch/x86/xen/efi.c @@ -115,6 +115,61 @@ static efi_system_table_t __init *xen_efi_probe(void) return &efi_systab_xen; } +/* + * Determine whether we're in secure boot mode. + * + * Please keep the logic in sync with + * drivers/firmware/efi/libstub/secureboot.c:efi_get_secureboot(). + */ +static enum efi_secureboot_mode xen_efi_get_secureboot(void) +{ + static efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; + static efi_guid_t shim_guid = EFI_SHIM_LOCK_GUID; + efi_status_t status; + u8 moksbstate, secboot, setupmode; + unsigned long size; + + size = sizeof(secboot); + status = efi.get_variable(L"SecureBoot", &efi_variable_guid, + NULL, &size, &secboot); + + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + + if (status != EFI_SUCCESS) + goto out_efi_err; + + size = sizeof(setupmode); + status = efi.get_variable(L"SetupMode", &efi_variable_guid, + NULL, &size, &setupmode); + + if (status != EFI_SUCCESS) + goto out_efi_err; + + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + + /* See if a user has put the shim into insecure mode. */ + size = sizeof(moksbstate); + status = efi.get_variable(L"MokSBStateRT", &shim_guid, + NULL, &size, &moksbstate); + + /* If it fails, we don't care why. Default to secure. */ + if (status != EFI_SUCCESS) + goto secure_boot_enabled; + + if (moksbstate == 1) + return efi_secureboot_mode_disabled; + + secure_boot_enabled: + pr_info("UEFI Secure Boot is enabled.\n"); + return efi_secureboot_mode_enabled; + + out_efi_err: + pr_err("Could not determine UEFI Secure Boot status.\n"); + return efi_secureboot_mode_unknown; +} + void __init xen_efi_init(void) { efi_system_table_t *efi_systab_xen; @@ -129,6 +184,8 @@ void __init xen_efi_init(void) boot_params.efi_info.efi_systab = (__u32)__pa(efi_systab_xen); boot_params.efi_info.efi_systab_hi = (__u32)(__pa(efi_systab_xen) >> 32); + boot_params.secure_boot = xen_efi_get_secureboot(); + set_bit(EFI_BOOT, &efi.flags); set_bit(EFI_PARAVIRT, &efi.flags); set_bit(EFI_64BIT, &efi.flags); diff --git a/arch/x86/xen/enlighten_hvm.c b/arch/x86/xen/enlighten_hvm.c index 826898701045..19c1ff542387 100644 --- a/arch/x86/xen/enlighten_hvm.c +++ b/arch/x86/xen/enlighten_hvm.c @@ -65,6 +65,19 @@ static void __init xen_hvm_init_mem_mapping(void) { early_memunmap(HYPERVISOR_shared_info, PAGE_SIZE); HYPERVISOR_shared_info = __va(PFN_PHYS(shared_info_pfn)); + + /* + * The virtual address of the shared_info page has changed, so + * the vcpu_info pointer for VCPU 0 is now stale. + * + * The prepare_boot_cpu callback will re-initialize it via + * xen_vcpu_setup, but we can't rely on that to be called for + * old Xen versions (xen_have_vector_callback == 0). + * + * It is, in any case, bad to have a stale vcpu_info pointer + * so reset it now. + */ + xen_vcpu_info_reset(0); } static void __init init_hvm_pv_info(void) diff --git a/arch/x86/xen/enlighten_pv.c b/arch/x86/xen/enlighten_pv.c index c36d23aa6c35..357969a3697c 100644 --- a/arch/x86/xen/enlighten_pv.c +++ b/arch/x86/xen/enlighten_pv.c @@ -421,45 +421,33 @@ static void xen_load_gdt(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); - unsigned long frames[pages]; - int f; - - /* - * A GDT can be up to 64k in size, which corresponds to 8192 - * 8-byte entries, or 16 4k pages.. - */ + unsigned long pfn, mfn; + int level; + pte_t *ptep; + void *virt; - BUG_ON(size > 65536); + /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */ + BUG_ON(size > PAGE_SIZE); BUG_ON(va & ~PAGE_MASK); - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - int level; - pte_t *ptep; - unsigned long pfn, mfn; - void *virt; - - /* - * The GDT is per-cpu and is in the percpu data area. - * That can be virtually mapped, so we need to do a - * page-walk to get the underlying MFN for the - * hypercall. The page can also be in the kernel's - * linear range, so we need to RO that mapping too. - */ - ptep = lookup_address(va, &level); - BUG_ON(ptep == NULL); - - pfn = pte_pfn(*ptep); - mfn = pfn_to_mfn(pfn); - virt = __va(PFN_PHYS(pfn)); + /* + * The GDT is per-cpu and is in the percpu data area. + * That can be virtually mapped, so we need to do a + * page-walk to get the underlying MFN for the + * hypercall. The page can also be in the kernel's + * linear range, so we need to RO that mapping too. + */ + ptep = lookup_address(va, &level); + BUG_ON(ptep == NULL); - frames[f] = mfn; + pfn = pte_pfn(*ptep); + mfn = pfn_to_mfn(pfn); + virt = __va(PFN_PHYS(pfn)); - make_lowmem_page_readonly((void *)va); - make_lowmem_page_readonly(virt); - } + make_lowmem_page_readonly((void *)va); + make_lowmem_page_readonly(virt); - if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct))) BUG(); } @@ -470,34 +458,22 @@ static void __init xen_load_gdt_boot(const struct desc_ptr *dtr) { unsigned long va = dtr->address; unsigned int size = dtr->size + 1; - unsigned pages = DIV_ROUND_UP(size, PAGE_SIZE); - unsigned long frames[pages]; - int f; - - /* - * A GDT can be up to 64k in size, which corresponds to 8192 - * 8-byte entries, or 16 4k pages.. - */ + unsigned long pfn, mfn; + pte_t pte; - BUG_ON(size > 65536); + /* @size should be at most GDT_SIZE which is smaller than PAGE_SIZE. */ + BUG_ON(size > PAGE_SIZE); BUG_ON(va & ~PAGE_MASK); - for (f = 0; va < dtr->address + size; va += PAGE_SIZE, f++) { - pte_t pte; - unsigned long pfn, mfn; + pfn = virt_to_pfn(va); + mfn = pfn_to_mfn(pfn); - pfn = virt_to_pfn(va); - mfn = pfn_to_mfn(pfn); + pte = pfn_pte(pfn, PAGE_KERNEL_RO); - pte = pfn_pte(pfn, PAGE_KERNEL_RO); - - if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) - BUG(); - - frames[f] = mfn; - } + if (HYPERVISOR_update_va_mapping((unsigned long)va, pte, 0)) + BUG(); - if (HYPERVISOR_set_gdt(frames, size / sizeof(struct desc_struct))) + if (HYPERVISOR_set_gdt(&mfn, size / sizeof(struct desc_struct))) BUG(); } diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d33e7dbe3129..2d76106788a3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 486c0a34d00b..2c30cabfda90 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/time.c b/arch/x86/xen/time.c index 29163c43ebbd..e0f1bcf01d63 100644 --- a/arch/x86/xen/time.c +++ b/arch/x86/xen/time.c @@ -57,7 +57,7 @@ static u64 xen_clocksource_get_cycles(struct clocksource *cs) return xen_clocksource_read(); } -static void xen_read_wallclock(struct timespec *ts) +static void xen_read_wallclock(struct timespec64 *ts) { struct shared_info *s = HYPERVISOR_shared_info; struct pvclock_wall_clock *wall_clock = &(s->wc); @@ -68,12 +68,12 @@ static void xen_read_wallclock(struct timespec *ts) put_cpu_var(xen_vcpu); } -static void xen_get_wallclock(struct timespec *now) +static void xen_get_wallclock(struct timespec64 *now) { xen_read_wallclock(now); } -static int xen_set_wallclock(const struct timespec *now) +static int xen_set_wallclock(const struct timespec64 *now) { return -ENODEV; } @@ -461,7 +461,7 @@ static void __init xen_time_init(void) { struct pvclock_vcpu_time_info *pvti; int cpu = smp_processor_id(); - struct timespec tp; + struct timespec64 tp; /* As Dom0 is never moved, no penalty on using TSC there */ if (xen_initial_domain()) @@ -479,7 +479,7 @@ static void __init xen_time_init(void) /* Set initial system time with full resolution */ xen_read_wallclock(&tp); - do_settimeofday(&tp); + do_settimeofday64(&tp); setup_force_cpu_cap(X86_FEATURE_TSC); diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index c921e8bccdc8..17df332269b2 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -19,7 +19,6 @@ config XTENSA select HAVE_ARCH_KASAN if MMU select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_KMEMLEAK - select HAVE_DMA_API_DEBUG select HAVE_DMA_CONTIGUOUS select HAVE_EXIT_THREAD select HAVE_FUNCTION_TRACER diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 436b20337168..e5e1e61c538c 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,4 +1,5 @@ generic-y += bug.h +generic-y += compat.h generic-y += device.h generic-y += div64.h generic-y += dma-contiguous.h diff --git a/arch/xtensa/include/asm/pci.h b/arch/xtensa/include/asm/pci.h index d5a82153a7c5..6ddf0a30c60d 100644 --- a/arch/xtensa/include/asm/pci.h +++ b/arch/xtensa/include/asm/pci.h @@ -42,8 +42,6 @@ extern struct pci_controller* pcibios_alloc_controller(void); * decisions. */ -#define PCI_DMA_BUS_IS_PHYS (1) - /* Tell PCI code what kind of PCI resource mappings we support */ #define HAVE_PCI_MMAP 1 #define ARCH_GENERIC_PCI_MMAP_RESOURCE 1 diff --git a/arch/xtensa/include/uapi/asm/msgbuf.h b/arch/xtensa/include/uapi/asm/msgbuf.h index 36e2e103ca38..d6915e9f071c 100644 --- a/arch/xtensa/include/uapi/asm/msgbuf.h +++ b/arch/xtensa/include/uapi/asm/msgbuf.h @@ -7,7 +7,6 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values * * This file is subject to the terms and conditions of the GNU General @@ -21,19 +20,19 @@ struct msqid64_ds { struct ipc64_perm msg_perm; #ifdef __XTENSA_EB__ - unsigned int __unused1; - __kernel_time_t msg_stime; /* last msgsnd time */ - unsigned int __unused2; - __kernel_time_t msg_rtime; /* last msgrcv time */ - unsigned int __unused3; - __kernel_time_t msg_ctime; /* last change time */ + unsigned long msg_stime_high; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_rtime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_ctime_high; + unsigned long msg_ctime; /* last change time */ #elif defined(__XTENSA_EL__) - __kernel_time_t msg_stime; /* last msgsnd time */ - unsigned int __unused1; - __kernel_time_t msg_rtime; /* last msgrcv time */ - unsigned int __unused2; - __kernel_time_t msg_ctime; /* last change time */ - unsigned int __unused3; + unsigned long msg_stime; /* last msgsnd time */ + unsigned long msg_stime_high; + unsigned long msg_rtime; /* last msgrcv time */ + unsigned long msg_rtime_high; + unsigned long msg_ctime; /* last change time */ + unsigned long msg_ctime_high; #else # error processor byte order undefined! #endif diff --git a/arch/xtensa/include/uapi/asm/sembuf.h b/arch/xtensa/include/uapi/asm/sembuf.h index f61b6331a10c..09f348d643f1 100644 --- a/arch/xtensa/include/uapi/asm/sembuf.h +++ b/arch/xtensa/include/uapi/asm/sembuf.h @@ -14,7 +14,6 @@ * between kernel and user space. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values * */ @@ -27,15 +26,15 @@ struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ #ifdef __XTENSA_EL__ - __kernel_time_t sem_otime; /* last semop time */ - unsigned long __unused1; - __kernel_time_t sem_ctime; /* last change time */ - unsigned long __unused2; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_otime_high; + unsigned long sem_ctime; /* last change time */ + unsigned long sem_ctime_high; #else - unsigned long __unused1; - __kernel_time_t sem_otime; /* last semop time */ - unsigned long __unused2; - __kernel_time_t sem_ctime; /* last change time */ + unsigned long sem_otime_high; + unsigned long sem_otime; /* last semop time */ + unsigned long sem_ctime_high; + unsigned long sem_ctime; /* last change time */ #endif unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused3; diff --git a/arch/xtensa/include/uapi/asm/shmbuf.h b/arch/xtensa/include/uapi/asm/shmbuf.h index 26550bdc8430..554a57a6a90f 100644 --- a/arch/xtensa/include/uapi/asm/shmbuf.h +++ b/arch/xtensa/include/uapi/asm/shmbuf.h @@ -4,10 +4,10 @@ * * The shmid64_ds structure for Xtensa architecture. * Note extra padding because this structure is passed back and forth - * between kernel and user space. + * between kernel and user space, but the padding is on the wrong + * side for big-endian xtensa, for historic reasons. * * Pad space is left for: - * - 64-bit time_t to solve y2038 problem * - 2 miscellaneous 32-bit values * * This file is subject to the terms and conditions of the GNU General Public @@ -20,42 +20,21 @@ #ifndef _XTENSA_SHMBUF_H #define _XTENSA_SHMBUF_H -#if defined (__XTENSA_EL__) struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - unsigned long __unused1; - __kernel_time_t shm_dtime; /* last detach time */ - unsigned long __unused2; - __kernel_time_t shm_ctime; /* last change time */ - unsigned long __unused3; + unsigned long shm_atime; /* last attach time */ + unsigned long shm_atime_high; + unsigned long shm_dtime; /* last detach time */ + unsigned long shm_dtime_high; + unsigned long shm_ctime; /* last change time */ + unsigned long shm_ctime_high; __kernel_pid_t shm_cpid; /* pid of creator */ __kernel_pid_t shm_lpid; /* pid of last operator */ unsigned long shm_nattch; /* no. of current attaches */ unsigned long __unused4; unsigned long __unused5; }; -#elif defined (__XTENSA_EB__) -struct shmid64_ds { - struct ipc64_perm shm_perm; /* operation perms */ - size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - unsigned long __unused1; - __kernel_time_t shm_dtime; /* last detach time */ - unsigned long __unused2; - __kernel_time_t shm_ctime; /* last change time */ - unsigned long __unused3; - __kernel_pid_t shm_cpid; /* pid of creator */ - __kernel_pid_t shm_lpid; /* pid of last operator */ - unsigned long shm_nattch; /* no. of current attaches */ - unsigned long __unused4; - unsigned long __unused5; -}; -#else -# error endian order not defined -#endif - struct shminfo64 { unsigned long shmmax; diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 732631ce250f..392b4a80ebc2 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -261,12 +261,3 @@ const struct dma_map_ops xtensa_dma_map_ops = { .mapping_error = xtensa_dma_mapping_error, }; EXPORT_SYMBOL(xtensa_dma_map_ops); - -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) - -static int __init xtensa_dma_init(void) -{ - dma_debug_init(PREALLOC_DMA_DEBUG_ENTRIES); - return 0; -} -fs_initcall(xtensa_dma_init); diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 32c5207f1226..86507fa7c2d7 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -323,8 +323,6 @@ do_illegal_instruction(struct pt_regs *regs) void do_unaligned_user (struct pt_regs *regs) { - siginfo_t info; - __die_if_kernel("Unhandled unaligned exception in kernel", regs, SIGKILL); @@ -334,12 +332,7 @@ do_unaligned_user (struct pt_regs *regs) "(pid = %d, pc = %#010lx)\n", regs->excvaddr, current->comm, task_pid_nr(current), regs->pc); - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRALN; - info.si_addr = (void *) regs->excvaddr; - force_sig_info(SIGSEGV, &info, current); - + force_sig_fault(SIGBUS, BUS_ADRALN, (void *) regs->excvaddr, current); } #endif diff --git a/arch/xtensa/mm/fault.c b/arch/xtensa/mm/fault.c index 8b9b6f44bb06..c111a833205a 100644 --- a/arch/xtensa/mm/fault.c +++ b/arch/xtensa/mm/fault.c @@ -39,13 +39,13 @@ void do_page_fault(struct pt_regs *regs) struct mm_struct *mm = current->mm; unsigned int exccause = regs->exccause; unsigned int address = regs->excvaddr; - siginfo_t info; + int code; int is_write, is_exec; int fault; unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; - info.si_code = SEGV_MAPERR; + code = SEGV_MAPERR; /* We fault-in kernel-space virtual memory on-demand. The * 'reference' page table is init_mm.pgd. @@ -91,7 +91,7 @@ retry: */ good_area: - info.si_code = SEGV_ACCERR; + code = SEGV_ACCERR; if (is_write) { if (!(vma->vm_flags & VM_WRITE)) @@ -157,11 +157,7 @@ bad_area: if (user_mode(regs)) { current->thread.bad_vaddr = address; current->thread.error_code = is_write; - info.si_signo = SIGSEGV; - info.si_errno = 0; - /* info.si_code has been set above */ - info.si_addr = (void *) address; - force_sig_info(SIGSEGV, &info, current); + force_sig_fault(SIGSEGV, code, (void *) address, current); return; } bad_page_fault(regs, address, SIGSEGV); @@ -186,11 +182,7 @@ do_sigbus: * or user mode. */ current->thread.bad_vaddr = address; - info.si_code = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void *) address; - force_sig_info(SIGBUS, &info, current); + force_sig_fault(SIGBUS, BUS_ADRERR, (void *) address, current); /* Kernel mode? Handle exceptions or die */ if (!user_mode(regs)) diff --git a/arch/xtensa/platforms/iss/console.c b/arch/xtensa/platforms/iss/console.c index 92f567f9a21e..af81a62faba6 100644 --- a/arch/xtensa/platforms/iss/console.c +++ b/arch/xtensa/platforms/iss/console.c @@ -153,19 +153,6 @@ static int rs_proc_show(struct seq_file *m, void *v) return 0; } -static int rs_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, rs_proc_show, NULL); -} - -static const struct file_operations rs_proc_fops = { - .owner = THIS_MODULE, - .open = rs_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - static const struct tty_operations serial_ops = { .open = rs_open, .close = rs_close, @@ -176,7 +163,7 @@ static const struct tty_operations serial_ops = { .chars_in_buffer = rs_chars_in_buffer, .hangup = rs_hangup, .wait_until_sent = rs_wait_until_sent, - .proc_fops = &rs_proc_fops, + .proc_show = rs_proc_show, }; int __init rs_init(void) |