diff options
Diffstat (limited to 'arch')
338 files changed, 5493 insertions, 2655 deletions
diff --git a/arch/Kconfig b/arch/Kconfig index 678a80713b21..17709ac1030a 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -599,21 +599,22 @@ config STACKPROTECTOR_STRONG config ARCH_SUPPORTS_SHADOW_CALL_STACK bool help - An architecture should select this if it supports Clang's Shadow - Call Stack and implements runtime support for shadow stack + An architecture should select this if it supports the compiler's + Shadow Call Stack and implements runtime support for shadow stack switching. config SHADOW_CALL_STACK - bool "Clang Shadow Call Stack" - depends on CC_IS_CLANG && ARCH_SUPPORTS_SHADOW_CALL_STACK + bool "Shadow Call Stack" + depends on ARCH_SUPPORTS_SHADOW_CALL_STACK depends on DYNAMIC_FTRACE_WITH_REGS || !FUNCTION_GRAPH_TRACER help - This option enables Clang's Shadow Call Stack, which uses a - shadow stack to protect function return addresses from being - overwritten by an attacker. More information can be found in - Clang's documentation: + This option enables the compiler's Shadow Call Stack, which + uses a shadow stack to protect function return addresses from + being overwritten by an attacker. More information can be found + in the compiler's documentation: - https://clang.llvm.org/docs/ShadowCallStack.html + - Clang: https://clang.llvm.org/docs/ShadowCallStack.html + - GCC: https://gcc.gnu.org/onlinedocs/gcc/Instrumentation-Options.html#Instrumentation-Options Note that security guarantees in the kernel differ from the ones documented for user space. The kernel must store addresses @@ -1159,16 +1160,30 @@ config HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET to the compiler, so it will attempt to add canary checks regardless of the static branch state. -config RANDOMIZE_KSTACK_OFFSET_DEFAULT - bool "Randomize kernel stack offset on syscall entry" +config RANDOMIZE_KSTACK_OFFSET + bool "Support for randomizing kernel stack offset on syscall entry" if EXPERT + default y depends on HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET + depends on INIT_STACK_NONE || !CC_IS_CLANG || CLANG_VERSION >= 140000 help The kernel stack offset can be randomized (after pt_regs) by roughly 5 bits of entropy, frustrating memory corruption attacks that depend on stack address determinism or - cross-syscall address exposures. This feature is controlled - by kernel boot param "randomize_kstack_offset=on/off", and this - config chooses the default boot state. + cross-syscall address exposures. + + The feature is controlled via the "randomize_kstack_offset=on/off" + kernel boot param, and if turned off has zero overhead due to its use + of static branches (see JUMP_LABEL). + + If unsure, say Y. + +config RANDOMIZE_KSTACK_OFFSET_DEFAULT + bool "Default state of kernel stack offset randomization" + depends on RANDOMIZE_KSTACK_OFFSET + help + Kernel stack offset randomization is controlled by kernel boot param + "randomize_kstack_offset=on/off", and this config chooses the default + boot state. config ARCH_OPTIONAL_KERNEL_RWX def_bool n @@ -1278,12 +1293,41 @@ config HAVE_STATIC_CALL_INLINE config HAVE_PREEMPT_DYNAMIC bool + +config HAVE_PREEMPT_DYNAMIC_CALL + bool depends on HAVE_STATIC_CALL - depends on GENERIC_ENTRY + select HAVE_PREEMPT_DYNAMIC help - Select this if the architecture support boot time preempt setting - on top of static calls. It is strongly advised to support inline - static call to avoid any overhead. + An architecture should select this if it can handle the preemption + model being selected at boot time using static calls. + + Where an architecture selects HAVE_STATIC_CALL_INLINE, any call to a + preemption function will be patched directly. + + Where an architecture does not select HAVE_STATIC_CALL_INLINE, any + call to a preemption function will go through a trampoline, and the + trampoline will be patched. + + It is strongly advised to support inline static call to avoid any + overhead. + +config HAVE_PREEMPT_DYNAMIC_KEY + bool + depends on HAVE_ARCH_JUMP_LABEL && CC_HAS_ASM_GOTO + select HAVE_PREEMPT_DYNAMIC + help + An architecture should select this if it can handle the preemption + model being selected at boot time using static keys. + + Each preemption function will be given an early return based on a + static key. This should have slightly lower overhead than non-inline + static calls, as this effectively inlines each trampoline into the + start of its callee. This may avoid redundant work, and may + integrate better with CFI schemes. + + This will have greater overhead than using inline static calls as + the call to the preemption function cannot be entirely elided. config ARCH_WANT_LD_ORPHAN_WARN bool diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index 4e87783c90ad..14c97acea351 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig @@ -12,7 +12,6 @@ config ALPHA select FORCE_PCI if !ALPHA_JENSEN select PCI_DOMAINS if PCI select PCI_SYSCALL if PCI - select HAVE_AOUT select HAVE_ASM_MODVERSIONS select HAVE_PCSPKR_PLATFORM select HAVE_PERF_EVENTS diff --git a/arch/alpha/configs/defconfig b/arch/alpha/configs/defconfig index 7f1ca30b115b..7e9336930880 100644 --- a/arch/alpha/configs/defconfig +++ b/arch/alpha/configs/defconfig @@ -62,7 +62,6 @@ CONFIG_TMPFS=y CONFIG_NFS_FS=m CONFIG_NFS_V3=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/alpha/include/asm/xor.h b/arch/alpha/include/asm/xor.h index 5aeb4fb3cb7c..e0de0c233ab9 100644 --- a/arch/alpha/include/asm/xor.h +++ b/arch/alpha/include/asm/xor.h @@ -5,24 +5,43 @@ * Optimized RAID-5 checksumming functions for alpha EV5 and EV6 */ -extern void xor_alpha_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_alpha_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_alpha_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_alpha_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +extern void +xor_alpha_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); -extern void xor_alpha_prefetch_2(unsigned long, unsigned long *, - unsigned long *); -extern void xor_alpha_prefetch_3(unsigned long, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_alpha_prefetch_4(unsigned long, unsigned long *, - unsigned long *, unsigned long *, - unsigned long *); -extern void xor_alpha_prefetch_5(unsigned long, unsigned long *, - unsigned long *, unsigned long *, - unsigned long *, unsigned long *); +extern void +xor_alpha_prefetch_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void +xor_alpha_prefetch_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void +xor_alpha_prefetch_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void +xor_alpha_prefetch_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); asm(" \n\ .text \n\ diff --git a/arch/alpha/mm/init.c b/arch/alpha/mm/init.c index f6114d03357c..7511723b7669 100644 --- a/arch/alpha/mm/init.c +++ b/arch/alpha/mm/init.c @@ -76,14 +76,14 @@ pgd_alloc(struct mm_struct *mm) pmd_t * __bad_pagetable(void) { - memset((void *) EMPTY_PGT, 0, PAGE_SIZE); + memset(absolute_pointer(EMPTY_PGT), 0, PAGE_SIZE); return (pmd_t *) EMPTY_PGT; } pte_t __bad_page(void) { - memset((void *) EMPTY_PGE, 0, PAGE_SIZE); + memset(absolute_pointer(EMPTY_PGE), 0, PAGE_SIZE); return pte_mkdirty(mk_pte(virt_to_page(EMPTY_PGE), PAGE_SHARED)); } @@ -253,7 +253,7 @@ void __init paging_init(void) free_area_init(max_zone_pfn); /* Initialize the kernel's ZERO_PGE. */ - memset((void *)ZERO_PGE, 0, PAGE_SIZE); + memset(absolute_pointer(ZERO_PGE), 0, PAGE_SIZE); } #if defined(CONFIG_ALPHA_GENERIC) || defined(CONFIG_ALPHA_SRM) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index ba6ba78a9cb6..49652ef9eff4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -5,6 +5,7 @@ config ARM select ARCH_32BIT_OFF_T select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE if HAVE_KRETPROBES && FRAME_POINTER && !ARM_UNWIND select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL if MMU select ARCH_HAS_DMA_WRITE_COMBINE if !ARM_DMA_MEM_BUFFERABLE select ARCH_HAS_ELF_RANDOMIZE diff --git a/arch/arm/boot/dts/dra7-l4.dtsi b/arch/arm/boot/dts/dra7-l4.dtsi index 956a26d52a4c..0a11bacffc1f 100644 --- a/arch/arm/boot/dts/dra7-l4.dtsi +++ b/arch/arm/boot/dts/dra7-l4.dtsi @@ -3482,8 +3482,7 @@ ti,timer-pwm; }; }; - - target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ + timer15_target: target-module@2c000 { /* 0x4882c000, ap 17 02.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2c000 0x4>, <0x2c010 0x4>; @@ -3511,7 +3510,7 @@ }; }; - target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ + timer16_target: target-module@2e000 { /* 0x4882e000, ap 19 14.0 */ compatible = "ti,sysc-omap4-timer", "ti,sysc"; reg = <0x2e000 0x4>, <0x2e010 0x4>; diff --git a/arch/arm/boot/dts/dra7.dtsi b/arch/arm/boot/dts/dra7.dtsi index 42bff117656c..97ce0c4f1df7 100644 --- a/arch/arm/boot/dts/dra7.dtsi +++ b/arch/arm/boot/dts/dra7.dtsi @@ -1339,20 +1339,20 @@ }; /* Local timers, see ARM architected timer wrap erratum i940 */ -&timer3_target { +&timer15_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER3_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER15_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; -&timer4_target { +&timer16_target { ti,no-reset-on-init; ti,no-idle; timer@0 { - assigned-clocks = <&l4per_clkctrl DRA7_L4PER_TIMER4_CLKCTRL 24>; + assigned-clocks = <&l4per3_clkctrl DRA7_L4PER3_TIMER16_CLKCTRL 24>; assigned-clock-parents = <&timer_sys_clk_div>; }; }; diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig index e849367c0566..b58d45a03607 100644 --- a/arch/arm/configs/davinci_all_defconfig +++ b/arch/arm/configs/davinci_all_defconfig @@ -258,7 +258,6 @@ CONFIG_MINIX_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=m CONFIG_NLS_ISO8859_1=y diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig index ec84d80096b1..0788a892e160 100644 --- a/arch/arm/configs/ezx_defconfig +++ b/arch/arm/configs/ezx_defconfig @@ -309,7 +309,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_SMB_FS=m CONFIG_CIFS=m diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig index 6db871d4e077..015b7ef237de 100644 --- a/arch/arm/configs/imote2_defconfig +++ b/arch/arm/configs/imote2_defconfig @@ -283,7 +283,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_NFS_V3_ACL=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_SMB_FS=m CONFIG_CIFS=m diff --git a/arch/arm/configs/integrator_defconfig b/arch/arm/configs/integrator_defconfig index 4dfe321a79f6..5b485722ccf9 100644 --- a/arch/arm/configs/integrator_defconfig +++ b/arch/arm/configs/integrator_defconfig @@ -79,7 +79,6 @@ CONFIG_CRAMFS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/iop32x_defconfig b/arch/arm/configs/iop32x_defconfig index 18a21faa834c..de2cc6759cae 100644 --- a/arch/arm/configs/iop32x_defconfig +++ b/arch/arm/configs/iop32x_defconfig @@ -94,7 +94,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_MAGIC_SYSRQ=y CONFIG_DEBUG_KERNEL=y diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig index 33c917df7b32..b1bcb858216b 100644 --- a/arch/arm/configs/keystone_defconfig +++ b/arch/arm/configs/keystone_defconfig @@ -213,7 +213,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/arm/configs/lart_defconfig b/arch/arm/configs/lart_defconfig index b6ddb9884326..2dfa33d7dca3 100644 --- a/arch/arm/configs/lart_defconfig +++ b/arch/arm/configs/lart_defconfig @@ -59,7 +59,6 @@ CONFIG_CRAMFS=m CONFIG_NFS_FS=m CONFIG_NFS_V3=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m diff --git a/arch/arm/configs/netwinder_defconfig b/arch/arm/configs/netwinder_defconfig index 2e3b20ef0db1..c3c171cec91b 100644 --- a/arch/arm/configs/netwinder_defconfig +++ b/arch/arm/configs/netwinder_defconfig @@ -71,7 +71,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_SMB_FS=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=y diff --git a/arch/arm/configs/versatile_defconfig b/arch/arm/configs/versatile_defconfig index d06aa64e05a1..c2d79a67f81f 100644 --- a/arch/arm/configs/versatile_defconfig +++ b/arch/arm/configs/versatile_defconfig @@ -86,7 +86,6 @@ CONFIG_ROMFS_FS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_850=m CONFIG_NLS_ISO8859_1=m CONFIG_MAGIC_SYSRQ=y diff --git a/arch/arm/configs/viper_defconfig b/arch/arm/configs/viper_defconfig index 989599ce5300..c28539bfd128 100644 --- a/arch/arm/configs/viper_defconfig +++ b/arch/arm/configs/viper_defconfig @@ -145,7 +145,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m diff --git a/arch/arm/configs/zeus_defconfig b/arch/arm/configs/zeus_defconfig index d3b98c4d225b..25bb6995f105 100644 --- a/arch/arm/configs/zeus_defconfig +++ b/arch/arm/configs/zeus_defconfig @@ -160,7 +160,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_850=m diff --git a/arch/arm/crypto/aes-neonbs-core.S b/arch/arm/crypto/aes-neonbs-core.S index 7d0cc7f226a5..7b61032f29fa 100644 --- a/arch/arm/crypto/aes-neonbs-core.S +++ b/arch/arm/crypto/aes-neonbs-core.S @@ -758,29 +758,24 @@ ENTRY(aesbs_cbc_decrypt) ENDPROC(aesbs_cbc_decrypt) .macro next_ctr, q - vmov.32 \q\()h[1], r10 + vmov \q\()h, r9, r10 adds r10, r10, #1 - vmov.32 \q\()h[0], r9 adcs r9, r9, #0 - vmov.32 \q\()l[1], r8 + vmov \q\()l, r7, r8 adcs r8, r8, #0 - vmov.32 \q\()l[0], r7 adc r7, r7, #0 vrev32.8 \q, \q .endm /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 ctr[], u8 final[]) + * int rounds, int bytes, u8 ctr[]) */ ENTRY(aesbs_ctr_encrypt) mov ip, sp push {r4-r10, lr} - ldm ip, {r5-r7} // load args 4-6 - teq r7, #0 - addne r5, r5, #1 // one extra block if final != 0 - + ldm ip, {r5, r6} // load args 4-5 vld1.8 {q0}, [r6] // load counter vrev32.8 q1, q0 vmov r9, r10, d3 @@ -792,20 +787,19 @@ ENTRY(aesbs_ctr_encrypt) adc r7, r7, #0 99: vmov q1, q0 + sub lr, r5, #1 vmov q2, q0 + adr ip, 0f vmov q3, q0 + and lr, lr, #112 vmov q4, q0 + cmp r5, #112 vmov q5, q0 + sub ip, ip, lr, lsl #1 vmov q6, q0 + add ip, ip, lr, lsr #2 vmov q7, q0 - - adr ip, 0f - sub lr, r5, #1 - and lr, lr, #7 - cmp r5, #8 - sub ip, ip, lr, lsl #5 - sub ip, ip, lr, lsl #2 - movlt pc, ip // computed goto if blocks < 8 + movle pc, ip // computed goto if bytes < 112 next_ctr q1 next_ctr q2 @@ -820,12 +814,14 @@ ENTRY(aesbs_ctr_encrypt) bl aesbs_encrypt8 adr ip, 1f - and lr, r5, #7 - cmp r5, #8 - movgt r4, #0 - ldrle r4, [sp, #40] // load final in the last round - sub ip, ip, lr, lsl #2 - movlt pc, ip // computed goto if blocks < 8 + sub lr, r5, #1 + cmp r5, #128 + bic lr, lr, #15 + ands r4, r5, #15 // preserves C flag + teqcs r5, r5 // set Z flag if not last iteration + sub ip, ip, lr, lsr #2 + rsb r4, r4, #16 + movcc pc, ip // computed goto if bytes < 128 vld1.8 {q8}, [r1]! vld1.8 {q9}, [r1]! @@ -834,46 +830,70 @@ ENTRY(aesbs_ctr_encrypt) vld1.8 {q12}, [r1]! vld1.8 {q13}, [r1]! vld1.8 {q14}, [r1]! - teq r4, #0 // skip last block if 'final' -1: bne 2f +1: subne r1, r1, r4 vld1.8 {q15}, [r1]! -2: adr ip, 3f - cmp r5, #8 - sub ip, ip, lr, lsl #3 - movlt pc, ip // computed goto if blocks < 8 + add ip, ip, #2f - 1b veor q0, q0, q8 - vst1.8 {q0}, [r0]! veor q1, q1, q9 - vst1.8 {q1}, [r0]! veor q4, q4, q10 - vst1.8 {q4}, [r0]! veor q6, q6, q11 - vst1.8 {q6}, [r0]! veor q3, q3, q12 - vst1.8 {q3}, [r0]! veor q7, q7, q13 - vst1.8 {q7}, [r0]! veor q2, q2, q14 + bne 3f + veor q5, q5, q15 + + movcc pc, ip // computed goto if bytes < 128 + + vst1.8 {q0}, [r0]! + vst1.8 {q1}, [r0]! + vst1.8 {q4}, [r0]! + vst1.8 {q6}, [r0]! + vst1.8 {q3}, [r0]! + vst1.8 {q7}, [r0]! vst1.8 {q2}, [r0]! - teq r4, #0 // skip last block if 'final' - W(bne) 5f -3: veor q5, q5, q15 +2: subne r0, r0, r4 vst1.8 {q5}, [r0]! -4: next_ctr q0 + next_ctr q0 - subs r5, r5, #8 + subs r5, r5, #128 bgt 99b vst1.8 {q0}, [r6] pop {r4-r10, pc} -5: vst1.8 {q5}, [r4] - b 4b +3: adr lr, .Lpermute_table + 16 + cmp r5, #16 // Z flag remains cleared + sub lr, lr, r4 + vld1.8 {q8-q9}, [lr] + vtbl.8 d16, {q5}, d16 + vtbl.8 d17, {q5}, d17 + veor q5, q8, q15 + bcc 4f // have to reload prev if R5 < 16 + vtbx.8 d10, {q2}, d18 + vtbx.8 d11, {q2}, d19 + mov pc, ip // branch back to VST sequence + +4: sub r0, r0, r4 + vshr.s8 q9, q9, #7 // create mask for VBIF + vld1.8 {q8}, [r0] // reload + vbif q5, q8, q9 + vst1.8 {q5}, [r0] + pop {r4-r10, pc} ENDPROC(aesbs_ctr_encrypt) + .align 6 +.Lpermute_table: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .macro next_tweak, out, in, const, tmp vshr.s64 \tmp, \in, #63 vand \tmp, \tmp, \const @@ -888,6 +908,7 @@ ENDPROC(aesbs_ctr_encrypt) * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, * int blocks, u8 iv[], int reorder_last_tweak) */ + .align 6 __xts_prepare8: vld1.8 {q14}, [r7] // load iv vmov.i32 d30, #0x87 // compose tweak mask vector diff --git a/arch/arm/crypto/aes-neonbs-glue.c b/arch/arm/crypto/aes-neonbs-glue.c index 5c6cd3c63cbc..f00f042ef357 100644 --- a/arch/arm/crypto/aes-neonbs-glue.c +++ b/arch/arm/crypto/aes-neonbs-glue.c @@ -37,7 +37,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 ctr[], u8 final[]); + int rounds, int blocks, u8 ctr[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[], int); @@ -243,32 +243,25 @@ static int ctr_encrypt(struct skcipher_request *req) err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes > 0) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; + int bytes = walk.nbytes; - if (walk.nbytes < walk.total) { - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - final = NULL; - } + if (unlikely(bytes < AES_BLOCK_SIZE)) + src = dst = memcpy(buf + sizeof(buf) - bytes, + src, bytes); + else if (walk.nbytes < walk.total) + bytes &= ~(8 * AES_BLOCK_SIZE - 1); kernel_neon_begin(); - aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->rk, ctx->rounds, blocks, walk.iv, final); + aesbs_ctr_encrypt(dst, src, ctx->rk, ctx->rounds, bytes, walk.iv); kernel_neon_end(); - if (final) { - u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; + if (unlikely(bytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - bytes, bytes); - crypto_xor_cpy(dst, src, final, - walk.total % AES_BLOCK_SIZE); - - err = skcipher_walk_done(&walk, 0); - break; - } - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + err = skcipher_walk_done(&walk, walk.nbytes - bytes); } return err; diff --git a/arch/arm/include/asm/paravirt_api_clock.h b/arch/arm/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/arm/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/arm/include/asm/xor.h b/arch/arm/include/asm/xor.h index aefddec79286..669cad5194d3 100644 --- a/arch/arm/include/asm/xor.h +++ b/arch/arm/include/asm/xor.h @@ -44,7 +44,8 @@ : "0" (dst), "r" (a1), "r" (a2), "r" (a3), "r" (a4)) static void -xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_arm4regs_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned int lines = bytes / sizeof(unsigned long) / 4; register unsigned int a1 __asm__("r4"); @@ -64,8 +65,9 @@ xor_arm4regs_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_arm4regs_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned int lines = bytes / sizeof(unsigned long) / 4; register unsigned int a1 __asm__("r4"); @@ -86,8 +88,10 @@ xor_arm4regs_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_arm4regs_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned int lines = bytes / sizeof(unsigned long) / 2; register unsigned int a1 __asm__("r8"); @@ -105,8 +109,11 @@ xor_arm4regs_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_arm4regs_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_arm4regs_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned int lines = bytes / sizeof(unsigned long) / 2; register unsigned int a1 __asm__("r8"); @@ -146,7 +153,8 @@ static struct xor_block_template xor_block_arm4regs = { extern struct xor_block_template const xor_block_neon_inner; static void -xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { if (in_interrupt()) { xor_arm4regs_2(bytes, p1, p2); @@ -158,8 +166,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { if (in_interrupt()) { xor_arm4regs_3(bytes, p1, p2, p3); @@ -171,8 +180,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { if (in_interrupt()) { xor_arm4regs_4(bytes, p1, p2, p3, p4); @@ -184,8 +195,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { if (in_interrupt()) { xor_arm4regs_5(bytes, p1, p2, p3, p4, p5); diff --git a/arch/arm/lib/xor-neon.c b/arch/arm/lib/xor-neon.c index b99dd8e1c93f..522510baed49 100644 --- a/arch/arm/lib/xor-neon.c +++ b/arch/arm/lib/xor-neon.c @@ -17,17 +17,11 @@ MODULE_LICENSE("GPL"); /* * Pull in the reference implementations while instructing GCC (through * -ftree-vectorize) to attempt to exploit implicit parallelism and emit - * NEON instructions. + * NEON instructions. Clang does this by default at O2 so no pragma is + * needed. */ -#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 6) +#ifdef CONFIG_CC_IS_GCC #pragma GCC optimize "tree-vectorize" -#else -/* - * While older versions of GCC do not generate incorrect code, they fail to - * recognize the parallel nature of these functions, and emit plain ARM code, - * which is known to be slower than the optimized ARM code in asm-arm/xor.h. - */ -#warning This code requires at least version 4.6 of GCC #endif #pragma GCC diagnostic ignored "-Wunused-variable" diff --git a/arch/arm/mach-pxa/corgi.c b/arch/arm/mach-pxa/corgi.c index 593c7f793da5..44659fbc37ba 100644 --- a/arch/arm/mach-pxa/corgi.c +++ b/arch/arm/mach-pxa/corgi.c @@ -530,6 +530,16 @@ static struct pxa2xx_spi_controller corgi_spi_info = { .num_chipselect = 3, }; +static struct gpiod_lookup_table corgi_spi_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", CORGI_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void corgi_wait_for_hsync(void) { while (gpio_get_value(CORGI_GPIO_HSYNC)) @@ -548,10 +558,6 @@ static struct ads7846_platform_data corgi_ads7846_info = { .wait_for_sync = corgi_wait_for_hsync, }; -static struct pxa2xx_spi_chip corgi_ads7846_chip = { - .gpio_cs = CORGI_GPIO_ADS7846_CS, -}; - static void corgi_bl_kick_battery(void) { void (*kick_batt)(void); @@ -580,14 +586,6 @@ static struct corgi_lcd_platform_data corgi_lcdcon_info = { .kick_battery = corgi_bl_kick_battery, }; -static struct pxa2xx_spi_chip corgi_lcdcon_chip = { - .gpio_cs = CORGI_GPIO_LCDCON_CS, -}; - -static struct pxa2xx_spi_chip corgi_max1111_chip = { - .gpio_cs = CORGI_GPIO_MAX1111_CS, -}; - static struct spi_board_info corgi_spi_devices[] = { { .modalias = "ads7846", @@ -595,7 +593,6 @@ static struct spi_board_info corgi_spi_devices[] = { .bus_num = 1, .chip_select = 0, .platform_data = &corgi_ads7846_info, - .controller_data= &corgi_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(CORGI_GPIO_TP_INT), }, { .modalias = "corgi-lcd", @@ -603,18 +600,17 @@ static struct spi_board_info corgi_spi_devices[] = { .bus_num = 1, .chip_select = 1, .platform_data = &corgi_lcdcon_info, - .controller_data= &corgi_lcdcon_chip, }, { .modalias = "max1111", .max_speed_hz = 450000, .bus_num = 1, .chip_select = 2, - .controller_data= &corgi_max1111_chip, }, }; static void __init corgi_init_spi(void) { + gpiod_add_lookup_table(&corgi_spi_gpio_table); pxa2xx_set_spi_info(1, &corgi_spi_info); gpiod_add_lookup_table(&corgi_lcdcon_gpio_table); spi_register_board_info(ARRAY_AND_SIZE(corgi_spi_devices)); diff --git a/arch/arm/mach-pxa/hx4700.c b/arch/arm/mach-pxa/hx4700.c index 1d4c5db54be2..e1870fbb19e7 100644 --- a/arch/arm/mach-pxa/hx4700.c +++ b/arch/arm/mach-pxa/hx4700.c @@ -616,7 +616,6 @@ static struct pxa2xx_spi_chip tsc2046_chip = { .tx_threshold = 1, .rx_threshold = 2, .timeout = 64, - .gpio_cs = GPIO88_HX4700_TSC2046_CS, }; static struct spi_board_info tsc2046_board_info[] __initdata = { @@ -635,6 +634,14 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .enable_dma = 1, }; +static struct gpiod_lookup_table pxa_ssp2_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_HX4700_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + /* * External power */ @@ -896,6 +903,7 @@ static void __init hx4700_init(void) pxa_set_i2c_info(NULL); i2c_register_board_info(0, ARRAY_AND_SIZE(i2c_board_info)); i2c_register_board_info(1, ARRAY_AND_SIZE(pi2c_board_info)); + gpiod_add_lookup_table(&pxa_ssp2_gpio_table); pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); spi_register_board_info(ARRAY_AND_SIZE(tsc2046_board_info)); diff --git a/arch/arm/mach-pxa/icontrol.c b/arch/arm/mach-pxa/icontrol.c index 04a12523cdee..753fe166ab68 100644 --- a/arch/arm/mach-pxa/icontrol.c +++ b/arch/arm/mach-pxa/icontrol.c @@ -13,7 +13,7 @@ #include <linux/irq.h> #include <linux/platform_device.h> #include <linux/property.h> -#include <linux/gpio.h> +#include <linux/gpio/machine.h> #include <asm/mach-types.h> #include <asm/mach/arch.h> @@ -42,7 +42,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info1 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS1 }; static struct pxa2xx_spi_chip mcp251x_chip_info2 = { @@ -50,7 +49,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info2 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS2 }; static struct pxa2xx_spi_chip mcp251x_chip_info3 = { @@ -58,7 +56,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info3 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS3 }; static struct pxa2xx_spi_chip mcp251x_chip_info4 = { @@ -66,7 +63,6 @@ static struct pxa2xx_spi_chip mcp251x_chip_info4 = { .rx_threshold = 128, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = ICONTROL_MCP251x_nCS4 }; static const struct property_entry mcp251x_properties[] = { @@ -143,6 +139,24 @@ struct platform_device pxa_spi_ssp4 = { } }; +static struct gpiod_lookup_table pxa_ssp3_gpio_table = { + .dev_id = "pxa2xx-spi.3", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS1, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS2, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp4_gpio_table = { + .dev_id = "pxa2xx-spi.4", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS3, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", ICONTROL_MCP251x_nCS4, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct platform_device *icontrol_spi_devices[] __initdata = { &pxa_spi_ssp3, &pxa_spi_ssp4, @@ -175,6 +189,8 @@ static mfp_cfg_t mfp_can_cfg[] __initdata = { static void __init icontrol_can_init(void) { pxa3xx_mfp_config(ARRAY_AND_SIZE(mfp_can_cfg)); + gpiod_add_lookup_table(&pxa_ssp3_gpio_table); + gpiod_add_lookup_table(&pxa_ssp4_gpio_table); platform_add_devices(ARRAY_AND_SIZE(icontrol_spi_devices)); spi_register_board_info(ARRAY_AND_SIZE(mcp251x_board_info)); } diff --git a/arch/arm/mach-pxa/littleton.c b/arch/arm/mach-pxa/littleton.c index 793f61375ee8..73f5953b3bb6 100644 --- a/arch/arm/mach-pxa/littleton.c +++ b/arch/arm/mach-pxa/littleton.c @@ -195,7 +195,6 @@ static struct pxa2xx_spi_controller littleton_spi_info = { static struct pxa2xx_spi_chip littleton_tdo24m_chip = { .rx_threshold = 1, .tx_threshold = 1, - .gpio_cs = LITTLETON_GPIO_LCD_CS, }; static struct spi_board_info littleton_spi_devices[] __initdata = { @@ -208,8 +207,17 @@ static struct spi_board_info littleton_spi_devices[] __initdata = { }, }; +static struct gpiod_lookup_table littleton_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", LITTLETON_GPIO_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init littleton_init_spi(void) { + gpiod_add_lookup_table(&littleton_spi_gpio_table); pxa2xx_set_spi_info(2, &littleton_spi_info); spi_register_board_info(ARRAY_AND_SIZE(littleton_spi_devices)); } diff --git a/arch/arm/mach-pxa/magician.c b/arch/arm/mach-pxa/magician.c index cd9fa465b9b2..200fd35168e0 100644 --- a/arch/arm/mach-pxa/magician.c +++ b/arch/arm/mach-pxa/magician.c @@ -938,8 +938,6 @@ struct pxa2xx_spi_chip tsc2046_chip_info = { .tx_threshold = 1, .rx_threshold = 2, .timeout = 64, - /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ - .gpio_cs = GPIO14_MAGICIAN_TSC2046_CS, }; static struct pxa2xx_spi_controller magician_spi_info = { @@ -947,6 +945,15 @@ static struct pxa2xx_spi_controller magician_spi_info = { .enable_dma = 1, }; +static struct gpiod_lookup_table magician_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + /* NOTICE must be GPIO, incompatibility with hw PXA SPI framing */ + GPIO_LOOKUP_IDX("gpio-pxa", GPIO14_MAGICIAN_TSC2046_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct spi_board_info ads7846_spi_board_info[] __initdata = { { .modalias = "ads7846", @@ -1031,6 +1038,7 @@ static void __init magician_init(void) } else pr_err("LCD detection: CPLD mapping failed\n"); + gpiod_add_lookup_table(&magician_spi_gpio_table); pxa2xx_set_spi_info(2, &magician_spi_info); spi_register_board_info(ARRAY_AND_SIZE(ads7846_spi_board_info)); diff --git a/arch/arm/mach-pxa/poodle.c b/arch/arm/mach-pxa/poodle.c index 3a4ecc3c8f8b..58cfa434afde 100644 --- a/arch/arm/mach-pxa/poodle.c +++ b/arch/arm/mach-pxa/poodle.c @@ -197,6 +197,14 @@ static struct pxa2xx_spi_controller poodle_spi_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table poodle_spi_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", POODLE_GPIO_TP_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static struct ads7846_platform_data poodle_ads7846_info = { .model = 7846, .vref_delay_usecs = 100, @@ -205,23 +213,19 @@ static struct ads7846_platform_data poodle_ads7846_info = { .gpio_pendown = POODLE_GPIO_TP_INT, }; -static struct pxa2xx_spi_chip poodle_ads7846_chip = { - .gpio_cs = POODLE_GPIO_TP_CS, -}; - static struct spi_board_info poodle_spi_devices[] = { { .modalias = "ads7846", .max_speed_hz = 10000, .bus_num = 1, .platform_data = &poodle_ads7846_info, - .controller_data= &poodle_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(POODLE_GPIO_TP_INT), }, }; static void __init poodle_init_spi(void) { + gpiod_add_lookup_table(&poodle_spi_gpio_table); pxa2xx_set_spi_info(1, &poodle_spi_info); spi_register_board_info(ARRAY_AND_SIZE(poodle_spi_devices)); } diff --git a/arch/arm/mach-pxa/spitz.c b/arch/arm/mach-pxa/spitz.c index 371008e9bb02..a648e7094e84 100644 --- a/arch/arm/mach-pxa/spitz.c +++ b/arch/arm/mach-pxa/spitz.c @@ -510,10 +510,6 @@ static struct ads7846_platform_data spitz_ads7846_info = { .wait_for_sync = spitz_ads7846_wait_for_hsync, }; -static struct pxa2xx_spi_chip spitz_ads7846_chip = { - .gpio_cs = SPITZ_GPIO_ADS7846_CS, -}; - static void spitz_bl_kick_battery(void) { void (*kick_batt)(void); @@ -555,14 +551,6 @@ static struct corgi_lcd_platform_data spitz_lcdcon_info = { .kick_battery = spitz_bl_kick_battery, }; -static struct pxa2xx_spi_chip spitz_lcdcon_chip = { - .gpio_cs = SPITZ_GPIO_LCDCON_CS, -}; - -static struct pxa2xx_spi_chip spitz_max1111_chip = { - .gpio_cs = SPITZ_GPIO_MAX1111_CS, -}; - static struct spi_board_info spitz_spi_devices[] = { { .modalias = "ads7846", @@ -570,7 +558,6 @@ static struct spi_board_info spitz_spi_devices[] = { .bus_num = 2, .chip_select = 0, .platform_data = &spitz_ads7846_info, - .controller_data = &spitz_ads7846_chip, .irq = PXA_GPIO_TO_IRQ(SPITZ_GPIO_TP_INT), }, { .modalias = "corgi-lcd", @@ -578,13 +565,11 @@ static struct spi_board_info spitz_spi_devices[] = { .bus_num = 2, .chip_select = 1, .platform_data = &spitz_lcdcon_info, - .controller_data = &spitz_lcdcon_chip, }, { .modalias = "max1111", .max_speed_hz = 450000, .bus_num = 2, .chip_select = 2, - .controller_data = &spitz_max1111_chip, }, }; @@ -592,6 +577,16 @@ static struct pxa2xx_spi_controller spitz_spi_info = { .num_chipselect = 3, }; +static struct gpiod_lookup_table spitz_spi_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_ADS7846_CS, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_LCDCON_CS, "cs", 1, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("gpio-pxa", SPITZ_GPIO_MAX1111_CS, "cs", 2, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init spitz_spi_init(void) { if (machine_is_akita()) @@ -599,6 +594,7 @@ static void __init spitz_spi_init(void) else gpiod_add_lookup_table(&spitz_lcdcon_gpio_table); + gpiod_add_lookup_table(&spitz_spi_gpio_table); pxa2xx_set_spi_info(2, &spitz_spi_info); spi_register_board_info(ARRAY_AND_SIZE(spitz_spi_devices)); } diff --git a/arch/arm/mach-pxa/stargate2.c b/arch/arm/mach-pxa/stargate2.c index 8ca02ec1d44c..b43e2f4536a5 100644 --- a/arch/arm/mach-pxa/stargate2.c +++ b/arch/arm/mach-pxa/stargate2.c @@ -346,6 +346,22 @@ static struct pxa2xx_spi_controller pxa_ssp_master_2_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table pxa_ssp1_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", 24, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp3_gpio_table = { + .dev_id = "pxa2xx-spi.3", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", 39, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + /* An upcoming kernel change will scrap SFRM usage so these * drivers have been moved to use GPIOs */ static struct pxa2xx_spi_chip staccel_chip_info = { @@ -353,7 +369,6 @@ static struct pxa2xx_spi_chip staccel_chip_info = { .rx_threshold = 8, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = 24, }; static struct pxa2xx_spi_chip cc2420_info = { @@ -361,7 +376,6 @@ static struct pxa2xx_spi_chip cc2420_info = { .rx_threshold = 8, .dma_burst_size = 8, .timeout = 235, - .gpio_cs = 39, }; static struct spi_board_info spi_board_info[] __initdata = { @@ -410,6 +424,8 @@ static void __init imote2_stargate2_init(void) pxa_set_btuart_info(NULL); pxa_set_stuart_info(NULL); + gpiod_add_lookup_table(&pxa_ssp1_gpio_table); + gpiod_add_lookup_table(&pxa_ssp3_gpio_table); pxa2xx_set_spi_info(1, &pxa_ssp_master_0_info); pxa2xx_set_spi_info(2, &pxa_ssp_master_1_info); pxa2xx_set_spi_info(3, &pxa_ssp_master_2_info); diff --git a/arch/arm/mach-pxa/z2.c b/arch/arm/mach-pxa/z2.c index 8e74fbb0a96e..7eaeda269927 100644 --- a/arch/arm/mach-pxa/z2.c +++ b/arch/arm/mach-pxa/z2.c @@ -570,7 +570,6 @@ static struct pxa2xx_spi_chip z2_lbs_chip_info = { .rx_threshold = 8, .tx_threshold = 8, .timeout = 1000, - .gpio_cs = GPIO24_ZIPITZ2_WIFI_CS, }; static struct libertas_spi_platform_data z2_lbs_pdata = { @@ -584,7 +583,6 @@ static struct pxa2xx_spi_chip lms283_chip_info = { .rx_threshold = 1, .tx_threshold = 1, .timeout = 64, - .gpio_cs = GPIO88_ZIPITZ2_LCD_CS, }; static struct gpiod_lookup_table lms283_gpio_table = { @@ -624,8 +622,26 @@ static struct pxa2xx_spi_controller pxa_ssp2_master_info = { .num_chipselect = 1, }; +static struct gpiod_lookup_table pxa_ssp1_gpio_table = { + .dev_id = "pxa2xx-spi.1", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO24_ZIPITZ2_WIFI_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + +static struct gpiod_lookup_table pxa_ssp2_gpio_table = { + .dev_id = "pxa2xx-spi.2", + .table = { + GPIO_LOOKUP_IDX("gpio-pxa", GPIO88_ZIPITZ2_LCD_CS, "cs", 0, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init z2_spi_init(void) { + gpiod_add_lookup_table(&pxa_ssp1_gpio_table); + gpiod_add_lookup_table(&pxa_ssp2_gpio_table); pxa2xx_set_spi_info(1, &pxa_ssp1_master_info); pxa2xx_set_spi_info(2, &pxa_ssp2_master_info); gpiod_add_lookup_table(&lms283_gpio_table); diff --git a/arch/arm/mach-s3c/Kconfig b/arch/arm/mach-s3c/Kconfig index 25606e668cf9..1899fc3f44fd 100644 --- a/arch/arm/mach-s3c/Kconfig +++ b/arch/arm/mach-s3c/Kconfig @@ -191,18 +191,6 @@ config S3C64XX_DEV_SPI0 Compile in platform device definitions for S3C64XX's type SPI controller 0 -config S3C64XX_DEV_SPI1 - bool - help - Compile in platform device definitions for S3C64XX's type - SPI controller 1 - -config S3C64XX_DEV_SPI2 - bool - help - Compile in platform device definitions for S3C64XX's type - SPI controller 2 - config SAMSUNG_DEV_TS bool help diff --git a/arch/arm/mach-s3c/devs.c b/arch/arm/mach-s3c/devs.c index 06dec64848f9..1e266fc24f9b 100644 --- a/arch/arm/mach-s3c/devs.c +++ b/arch/arm/mach-s3c/devs.c @@ -1107,8 +1107,7 @@ struct platform_device s3c64xx_device_spi0 = { }, }; -void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) +void __init s3c64xx_spi0_set_platdata(int src_clk_nr, int num_cs) { struct s3c64xx_spi_info pd; @@ -1120,80 +1119,8 @@ void __init s3c64xx_spi0_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, pd.num_cs = num_cs; pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi0_cfg_gpio; + pd.cfg_gpio = s3c64xx_spi0_cfg_gpio; s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi0); } #endif /* CONFIG_S3C64XX_DEV_SPI0 */ - -#ifdef CONFIG_S3C64XX_DEV_SPI1 -static struct resource s3c64xx_spi1_resource[] = { - [0] = DEFINE_RES_MEM(S3C_PA_SPI1, SZ_256), - [1] = DEFINE_RES_IRQ(IRQ_SPI1), -}; - -struct platform_device s3c64xx_device_spi1 = { - .name = "s3c6410-spi", - .id = 1, - .num_resources = ARRAY_SIZE(s3c64xx_spi1_resource), - .resource = s3c64xx_spi1_resource, - .dev = { - .dma_mask = &samsung_device_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -void __init s3c64xx_spi1_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) -{ - struct s3c64xx_spi_info pd; - - /* Reject invalid configuration */ - if (!num_cs || src_clk_nr < 0) { - pr_err("%s: Invalid SPI configuration\n", __func__); - return; - } - - pd.num_cs = num_cs; - pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi1_cfg_gpio; - - s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi1); -} -#endif /* CONFIG_S3C64XX_DEV_SPI1 */ - -#ifdef CONFIG_S3C64XX_DEV_SPI2 -static struct resource s3c64xx_spi2_resource[] = { - [0] = DEFINE_RES_MEM(S3C_PA_SPI2, SZ_256), - [1] = DEFINE_RES_IRQ(IRQ_SPI2), -}; - -struct platform_device s3c64xx_device_spi2 = { - .name = "s3c6410-spi", - .id = 2, - .num_resources = ARRAY_SIZE(s3c64xx_spi2_resource), - .resource = s3c64xx_spi2_resource, - .dev = { - .dma_mask = &samsung_device_dma_mask, - .coherent_dma_mask = DMA_BIT_MASK(32), - }, -}; - -void __init s3c64xx_spi2_set_platdata(int (*cfg_gpio)(void), int src_clk_nr, - int num_cs) -{ - struct s3c64xx_spi_info pd; - - /* Reject invalid configuration */ - if (!num_cs || src_clk_nr < 0) { - pr_err("%s: Invalid SPI configuration\n", __func__); - return; - } - - pd.num_cs = num_cs; - pd.src_clk_nr = src_clk_nr; - pd.cfg_gpio = (cfg_gpio) ? cfg_gpio : s3c64xx_spi2_cfg_gpio; - - s3c_set_platdata(&pd, sizeof(pd), &s3c64xx_device_spi2); -} -#endif /* CONFIG_S3C64XX_DEV_SPI2 */ diff --git a/arch/arm/mach-s3c/mach-crag6410-module.c b/arch/arm/mach-s3c/mach-crag6410-module.c index 407ad493493e..5d1d4b67a4b7 100644 --- a/arch/arm/mach-s3c/mach-crag6410-module.c +++ b/arch/arm/mach-s3c/mach-crag6410-module.c @@ -32,10 +32,6 @@ #include "crag6410.h" -static struct s3c64xx_spi_csinfo wm0010_spi_csinfo = { - .line = S3C64XX_GPC(3), -}; - static struct wm0010_pdata wm0010_pdata = { .gpio_reset = S3C64XX_GPN(6), .reset_active_high = 1, /* Active high for Glenfarclas Rev 2 */ @@ -49,7 +45,6 @@ static struct spi_board_info wm1253_devs[] = { .chip_select = 0, .mode = SPI_MODE_0, .irq = S3C_EINT(4), - .controller_data = &wm0010_spi_csinfo, .platform_data = &wm0010_pdata, }, }; @@ -62,7 +57,6 @@ static struct spi_board_info balblair_devs[] = { .chip_select = 0, .mode = SPI_MODE_0, .irq = S3C_EINT(4), - .controller_data = &wm0010_spi_csinfo, .platform_data = &wm0010_pdata, }, }; @@ -229,10 +223,6 @@ static struct arizona_pdata wm5102_reva_pdata = { }, }; -static struct s3c64xx_spi_csinfo codec_spi_csinfo = { - .line = S3C64XX_GPN(5), -}; - static struct spi_board_info wm5102_reva_spi_devs[] = { [0] = { .modalias = "wm5102", @@ -242,7 +232,6 @@ static struct spi_board_info wm5102_reva_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_reva_pdata, }, }; @@ -275,7 +264,6 @@ static struct spi_board_info wm5102_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_pdata, }, }; @@ -298,7 +286,6 @@ static struct spi_board_info wm5110_spi_devs[] = { .mode = SPI_MODE_0, .irq = GLENFARCLAS_PMIC_IRQ_BASE + WM831X_IRQ_GPIO_2, - .controller_data = &codec_spi_csinfo, .platform_data = &wm5102_reva_pdata, }, }; diff --git a/arch/arm/mach-s3c/mach-crag6410.c b/arch/arm/mach-s3c/mach-crag6410.c index 4a12c75d407f..e3e0fe897bcc 100644 --- a/arch/arm/mach-s3c/mach-crag6410.c +++ b/arch/arm/mach-s3c/mach-crag6410.c @@ -825,6 +825,15 @@ static const struct gpio_led_platform_data gpio_leds_pdata = { static struct dwc2_hsotg_plat crag6410_hsotg_pdata; +static struct gpiod_lookup_table crag_spi0_gpiod_table = { + .dev_id = "s3c6410-spi.0", + .table = { + GPIO_LOOKUP_IDX("GPIOC", 3, "cs", 0, GPIO_ACTIVE_LOW), + GPIO_LOOKUP_IDX("GPION", 5, "cs", 1, GPIO_ACTIVE_LOW), + { }, + }, +}; + static void __init crag6410_machine_init(void) { /* Open drain IRQs need pullups */ @@ -856,7 +865,9 @@ static void __init crag6410_machine_init(void) i2c_register_board_info(1, i2c_devs1, ARRAY_SIZE(i2c_devs1)); samsung_keypad_set_platdata(&crag6410_keypad_data); - s3c64xx_spi0_set_platdata(NULL, 0, 2); + + gpiod_add_lookup_table(&crag_spi0_gpiod_table); + s3c64xx_spi0_set_platdata(0, 2); pwm_add_table(crag6410_pwm_lookup, ARRAY_SIZE(crag6410_pwm_lookup)); platform_add_devices(crag6410_devices, ARRAY_SIZE(crag6410_devices)); diff --git a/arch/arm/mach-s3c/setup-spi-s3c64xx.c b/arch/arm/mach-s3c/setup-spi-s3c64xx.c index efcf78d41585..497aff71c29c 100644 --- a/arch/arm/mach-s3c/setup-spi-s3c64xx.c +++ b/arch/arm/mach-s3c/setup-spi-s3c64xx.c @@ -16,12 +16,3 @@ int s3c64xx_spi0_cfg_gpio(void) return 0; } #endif - -#ifdef CONFIG_S3C64XX_DEV_SPI1 -int s3c64xx_spi1_cfg_gpio(void) -{ - s3c_gpio_cfgall_range(S3C64XX_GPC(4), 3, - S3C_GPIO_SFN(2), S3C_GPIO_PULL_UP); - return 0; -} -#endif diff --git a/arch/arm/mach-s3c/spi-core-s3c24xx.h b/arch/arm/mach-s3c/spi-core-s3c24xx.h index 057667469cc3..919c5fd0c9af 100644 --- a/arch/arm/mach-s3c/spi-core-s3c24xx.h +++ b/arch/arm/mach-s3c/spi-core-s3c24xx.h @@ -16,12 +16,6 @@ static inline void s3c24xx_spi_setname(char *name) #ifdef CONFIG_S3C64XX_DEV_SPI0 s3c64xx_device_spi0.name = name; #endif -#ifdef CONFIG_S3C64XX_DEV_SPI1 - s3c64xx_device_spi1.name = name; -#endif -#ifdef CONFIG_S3C64XX_DEV_SPI2 - s3c64xx_device_spi2.name = name; -#endif } #endif /* __PLAT_S3C_SPI_CORE_S3C24XX_H */ diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 6d0cb0f7bc54..fe249ea91908 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -164,47 +164,6 @@ phys_addr_t __init arm_memblock_steal(phys_addr_t size, phys_addr_t align) return phys; } -static void __init arm_initrd_init(void) -{ -#ifdef CONFIG_BLK_DEV_INITRD - phys_addr_t start; - unsigned long size; - - initrd_start = initrd_end = 0; - - if (!phys_initrd_size) - return; - - /* - * Round the memory region to page boundaries as per free_initrd_mem() - * This allows us to detect whether the pages overlapping the initrd - * are in use, but more importantly, reserves the entire set of pages - * as we don't want these pages allocated for other purposes. - */ - start = round_down(phys_initrd_start, PAGE_SIZE); - size = phys_initrd_size + (phys_initrd_start - start); - size = round_up(size, PAGE_SIZE); - - if (!memblock_is_region_memory(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region - disabling initrd\n", - (u64)start, size); - return; - } - - if (memblock_is_region_reserved(start, size)) { - pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region - disabling initrd\n", - (u64)start, size); - return; - } - - memblock_reserve(start, size); - - /* Now convert initrd to virtual addresses */ - initrd_start = __phys_to_virt(phys_initrd_start); - initrd_end = initrd_start + phys_initrd_size; -#endif -} - #ifdef CONFIG_CPU_ICACHE_MISMATCH_WORKAROUND void check_cpu_icache_size(int cpuid) { @@ -226,7 +185,7 @@ void __init arm_memblock_init(const struct machine_desc *mdesc) /* Register the kernel text, kernel data and initrd with memblock. */ memblock_reserve(__pa(KERNEL_START), KERNEL_END - KERNEL_START); - arm_initrd_init(); + reserve_initrd_mem(); arm_mm_memblock_reserve(); diff --git a/arch/arm/vdso/Makefile b/arch/arm/vdso/Makefile index 7c9e395b77f7..ec52b776f926 100644 --- a/arch/arm/vdso/Makefile +++ b/arch/arm/vdso/Makefile @@ -18,7 +18,7 @@ ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO32 ldflags-$(CONFIG_CPU_ENDIAN_BE8) := --be8 ldflags-y := -Bsymbolic --no-undefined -soname=linux-vdso.so.1 \ - -z max-page-size=4096 -nostdlib -shared $(ldflags-y) \ + -z max-page-size=4096 -shared $(ldflags-y) \ --hash-style=sysv --build-id=sha1 \ -T diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index c842878f8133..13aa8d955c46 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ACPI_SPCR_TABLE if ACPI select ACPI_PPTT if ACPI select ARCH_HAS_DEBUG_WX + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_BINFMT_ELF_STATE select ARCH_CORRECT_STACKTRACE_ON_KRETPROBE select ARCH_ENABLE_HUGEPAGE_MIGRATION if HUGETLB_PAGE && MIGRATION @@ -18,6 +19,7 @@ config ARM64 select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 select ARCH_ENABLE_THP_MIGRATION if TRANSPARENT_HUGEPAGE select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DMA_PREP_COHERENT @@ -192,6 +194,7 @@ config ARM64 select HAVE_PERF_EVENTS select HAVE_PERF_REGS select HAVE_PERF_USER_STACK_DUMP + select HAVE_PREEMPT_DYNAMIC_KEY select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_POSIX_CPU_TIMERS_TASK_WORK select HAVE_FUNCTION_ARG_ACCESS_API @@ -891,13 +894,17 @@ config CAVIUM_ERRATUM_23144 If unsure, say Y. config CAVIUM_ERRATUM_23154 - bool "Cavium erratum 23154: Access to ICC_IAR1_EL1 is not sync'ed" + bool "Cavium errata 23154 and 38545: GICv3 lacks HW synchronisation" default y help - The gicv3 of ThunderX requires a modified version for + The ThunderX GICv3 implementation requires a modified version for reading the IAR status to ensure data synchronization (access to icc_iar1_el1 is not sync'ed before and after). + It also suffers from erratum 38545 (also present on Marvell's + OcteonTX and OcteonTX2), resulting in deactivated interrupts being + spuriously presented to the CPU interface. + If unsure, say Y. config CAVIUM_ERRATUM_27456 @@ -1252,7 +1259,7 @@ config HW_PERF_EVENTS def_bool y depends on ARM_PMU -# Supported by clang >= 7.0 +# Supported by clang >= 7.0 or GCC >= 12.0.0 config CC_HAVE_SHADOW_CALL_STACK def_bool $(cc-option, -fsanitize=shadow-call-stack -ffixed-x18) diff --git a/arch/arm64/boot/dts/apple/t8103.dtsi b/arch/arm64/boot/dts/apple/t8103.dtsi index 19afbc91020a..9f8f4145db88 100644 --- a/arch/arm64/boot/dts/apple/t8103.dtsi +++ b/arch/arm64/boot/dts/apple/t8103.dtsi @@ -97,6 +97,18 @@ <AIC_FIQ AIC_TMR_HV_VIRT IRQ_TYPE_LEVEL_HIGH>; }; + pmu-e { + compatible = "apple,icestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = <AIC_FIQ AIC_CPU_PMU_E IRQ_TYPE_LEVEL_HIGH>; + }; + + pmu-p { + compatible = "apple,firestorm-pmu"; + interrupt-parent = <&aic>; + interrupts = <AIC_FIQ AIC_CPU_PMU_P IRQ_TYPE_LEVEL_HIGH>; + }; + clkref: clock-ref { compatible = "fixed-clock"; #clock-cells = <0>; @@ -213,6 +225,18 @@ interrupt-controller; reg = <0x2 0x3b100000 0x0 0x8000>; power-domains = <&ps_aic>; + + affinities { + e-core-pmu-affinity { + apple,fiq-index = <AIC_CPU_PMU_E>; + cpus = <&cpu0 &cpu1 &cpu2 &cpu3>; + }; + + p-core-pmu-affinity { + apple,fiq-index = <AIC_CPU_PMU_P>; + cpus = <&cpu4 &cpu5 &cpu6 &cpu7>; + }; + }; }; pmgr: power-management@23b700000 { diff --git a/arch/arm64/crypto/Kconfig b/arch/arm64/crypto/Kconfig index addfa413650b..2a965aa0188d 100644 --- a/arch/arm64/crypto/Kconfig +++ b/arch/arm64/crypto/Kconfig @@ -45,7 +45,7 @@ config CRYPTO_SM3_ARM64_CE tristate "SM3 digest algorithm (ARMv8.2 Crypto Extensions)" depends on KERNEL_MODE_NEON select CRYPTO_HASH - select CRYPTO_SM3 + select CRYPTO_LIB_SM3 config CRYPTO_SM4_ARM64_CE tristate "SM4 symmetric cipher (ARMv8.2 Crypto Extensions)" diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 30b7cc6a7079..561dd2332571 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -24,7 +24,6 @@ #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 -#define STRIDE 5 #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 -#define STRIDE 4 #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int bytes, u8 const iv[]); asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int bytes, u8 ctr[], u8 finalbuf[]); + int rounds, int bytes, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], @@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) unsigned int nbytes = walk.nbytes; u8 *dst = walk.dst.virt.addr; u8 buf[AES_BLOCK_SIZE]; - unsigned int tail; if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = memcpy(buf, src, nbytes); + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); kernel_neon_begin(); aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, buf); + walk.iv); kernel_neon_end(); - tail = nbytes % (STRIDE * AES_BLOCK_SIZE); - if (tail > 0 && tail < AES_BLOCK_SIZE) - /* - * The final partial block could not be returned using - * an overlapping store, so it was passed via buf[] - * instead. - */ - memcpy(dst + nbytes - tail, buf, tail); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } @@ -983,6 +976,7 @@ module_cpu_feature_match(AES, aes_init); module_init(aes_init); EXPORT_SYMBOL(neon_aes_ecb_encrypt); EXPORT_SYMBOL(neon_aes_cbc_encrypt); +EXPORT_SYMBOL(neon_aes_ctr_encrypt); EXPORT_SYMBOL(neon_aes_xts_encrypt); EXPORT_SYMBOL(neon_aes_xts_decrypt); #endif diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index ff01f0167ba2..dc35eb0245c5 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt) /* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int bytes, u8 ctr[], u8 finalbuf[]) + * int bytes, u8 ctr[]) */ AES_FUNC_START(aes_ctr_encrypt) @@ -414,8 +414,8 @@ ST5( st1 {v4.16b}, [x0], #16 ) .Lctrtail: /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ mov x16, #16 - ands x13, x4, #0xf - csel x13, x13, x16, ne + ands x6, x4, #0xf + csel x13, x6, x16, ne ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) ST5( csel x14, x16, xzr, gt ) @@ -424,10 +424,10 @@ ST5( csel x14, x16, xzr, gt ) cmp w4, #32 - (MAX_STRIDE << 4) csel x16, x16, xzr, gt cmp w4, #16 - (MAX_STRIDE << 4) - ble .Lctrtail1x adr_l x12, .Lcts_permute_table add x12, x12, x13 + ble .Lctrtail1x ST5( ld1 {v5.16b}, [x1], x14 ) ld1 {v6.16b}, [x1], x15 @@ -462,11 +462,19 @@ ST5( st1 {v5.16b}, [x0], x14 ) b .Lctrout .Lctrtail1x: - csel x0, x0, x6, eq // use finalbuf if less than a full block + sub x7, x6, #16 + csel x6, x6, x7, eq + add x1, x1, x6 + add x0, x0, x6 ld1 {v5.16b}, [x1] + ld1 {v6.16b}, [x0] ST5( mov v3.16b, v4.16b ) encrypt_block v3, w3, x2, x8, w7 + ld1 {v10.16b-v11.16b}, [x12] + tbl v3.16b, {v3.16b}, v10.16b + sshr v11.16b, v11.16b, #7 eor v5.16b, v5.16b, v3.16b + bif v5.16b, v6.16b, v11.16b st1 {v5.16b}, [x0] b .Lctrout AES_FUNC_END(aes_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-core.S b/arch/arm64/crypto/aes-neonbs-core.S index a3405b8c344b..d427f4556b6e 100644 --- a/arch/arm64/crypto/aes-neonbs-core.S +++ b/arch/arm64/crypto/aes-neonbs-core.S @@ -735,119 +735,67 @@ SYM_FUNC_END(aesbs_cbc_decrypt) * int blocks, u8 iv[]) */ SYM_FUNC_START_LOCAL(__xts_crypt8) - mov x6, #1 - lsl x6, x6, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x6, x6, xzr, mi + movi v18.2s, #0x1 + movi v19.2s, #0x87 + uzp1 v18.4s, v18.4s, v19.4s + + ld1 {v0.16b-v3.16b}, [x1], #64 + ld1 {v4.16b-v7.16b}, [x1], #64 + + next_tweak v26, v25, v18, v19 + next_tweak v27, v26, v18, v19 + next_tweak v28, v27, v18, v19 + next_tweak v29, v28, v18, v19 + next_tweak v30, v29, v18, v19 + next_tweak v31, v30, v18, v19 + next_tweak v16, v31, v18, v19 + next_tweak v17, v16, v18, v19 - ld1 {v0.16b}, [x20], #16 - next_tweak v26, v25, v30, v31 eor v0.16b, v0.16b, v25.16b - tbnz x6, #1, 0f - - ld1 {v1.16b}, [x20], #16 - next_tweak v27, v26, v30, v31 eor v1.16b, v1.16b, v26.16b - tbnz x6, #2, 0f - - ld1 {v2.16b}, [x20], #16 - next_tweak v28, v27, v30, v31 eor v2.16b, v2.16b, v27.16b - tbnz x6, #3, 0f - - ld1 {v3.16b}, [x20], #16 - next_tweak v29, v28, v30, v31 eor v3.16b, v3.16b, v28.16b - tbnz x6, #4, 0f - - ld1 {v4.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset] eor v4.16b, v4.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #5, 0f - - ld1 {v5.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 16] - eor v5.16b, v5.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #6, 0f - - ld1 {v6.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 32] - eor v6.16b, v6.16b, v29.16b - next_tweak v29, v29, v30, v31 - tbnz x6, #7, 0f + eor v5.16b, v5.16b, v30.16b + eor v6.16b, v6.16b, v31.16b + eor v7.16b, v7.16b, v16.16b - ld1 {v7.16b}, [x20], #16 - str q29, [sp, #.Lframe_local_offset + 48] - eor v7.16b, v7.16b, v29.16b - next_tweak v29, v29, v30, v31 + stp q16, q17, [sp, #16] -0: mov bskey, x21 - mov rounds, x22 + mov bskey, x2 + mov rounds, x3 br x16 SYM_FUNC_END(__xts_crypt8) .macro __xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7 - frame_push 6, 64 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 + stp x29, x30, [sp, #-48]! + mov x29, sp - movi v30.2s, #0x1 - movi v25.2s, #0x87 - uzp1 v30.4s, v30.4s, v25.4s - ld1 {v25.16b}, [x24] + ld1 {v25.16b}, [x5] -99: adr x16, \do8 +0: adr x16, \do8 bl __xts_crypt8 - ldp q16, q17, [sp, #.Lframe_local_offset] - ldp q18, q19, [sp, #.Lframe_local_offset + 32] + eor v16.16b, \o0\().16b, v25.16b + eor v17.16b, \o1\().16b, v26.16b + eor v18.16b, \o2\().16b, v27.16b + eor v19.16b, \o3\().16b, v28.16b - eor \o0\().16b, \o0\().16b, v25.16b - eor \o1\().16b, \o1\().16b, v26.16b - eor \o2\().16b, \o2\().16b, v27.16b - eor \o3\().16b, \o3\().16b, v28.16b + ldp q24, q25, [sp, #16] - st1 {\o0\().16b}, [x19], #16 - mov v25.16b, v26.16b - tbnz x6, #1, 1f - st1 {\o1\().16b}, [x19], #16 - mov v25.16b, v27.16b - tbnz x6, #2, 1f - st1 {\o2\().16b}, [x19], #16 - mov v25.16b, v28.16b - tbnz x6, #3, 1f - st1 {\o3\().16b}, [x19], #16 - mov v25.16b, v29.16b - tbnz x6, #4, 1f + eor v20.16b, \o4\().16b, v29.16b + eor v21.16b, \o5\().16b, v30.16b + eor v22.16b, \o6\().16b, v31.16b + eor v23.16b, \o7\().16b, v24.16b - eor \o4\().16b, \o4\().16b, v16.16b - eor \o5\().16b, \o5\().16b, v17.16b - eor \o6\().16b, \o6\().16b, v18.16b - eor \o7\().16b, \o7\().16b, v19.16b + st1 {v16.16b-v19.16b}, [x0], #64 + st1 {v20.16b-v23.16b}, [x0], #64 - st1 {\o4\().16b}, [x19], #16 - tbnz x6, #5, 1f - st1 {\o5\().16b}, [x19], #16 - tbnz x6, #6, 1f - st1 {\o6\().16b}, [x19], #16 - tbnz x6, #7, 1f - st1 {\o7\().16b}, [x19], #16 + subs x4, x4, #8 + b.gt 0b - cbz x23, 1f - st1 {v25.16b}, [x24] - - b 99b - -1: st1 {v25.16b}, [x24] - frame_pop + st1 {v25.16b}, [x5] + ldp x29, x30, [sp], #48 ret .endm @@ -869,133 +817,51 @@ SYM_FUNC_END(aesbs_xts_decrypt) /* * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - * int rounds, int blocks, u8 iv[], u8 final[]) + * int rounds, int blocks, u8 iv[]) */ SYM_FUNC_START(aesbs_ctr_encrypt) - frame_push 8 - - mov x19, x0 - mov x20, x1 - mov x21, x2 - mov x22, x3 - mov x23, x4 - mov x24, x5 - mov x25, x6 + stp x29, x30, [sp, #-16]! + mov x29, sp - cmp x25, #0 - cset x26, ne - add x23, x23, x26 // do one extra block if final - - ldp x7, x8, [x24] - ld1 {v0.16b}, [x24] + ldp x7, x8, [x5] + ld1 {v0.16b}, [x5] CPU_LE( rev x7, x7 ) CPU_LE( rev x8, x8 ) adds x8, x8, #1 adc x7, x7, xzr -99: mov x9, #1 - lsl x9, x9, x23 - subs w23, w23, #8 - csel x23, x23, xzr, pl - csel x9, x9, xzr, le - - tbnz x9, #1, 0f - next_ctr v1 - tbnz x9, #2, 0f +0: next_ctr v1 next_ctr v2 - tbnz x9, #3, 0f next_ctr v3 - tbnz x9, #4, 0f next_ctr v4 - tbnz x9, #5, 0f next_ctr v5 - tbnz x9, #6, 0f next_ctr v6 - tbnz x9, #7, 0f next_ctr v7 -0: mov bskey, x21 - mov rounds, x22 + mov bskey, x2 + mov rounds, x3 bl aesbs_encrypt8 - lsr x9, x9, x26 // disregard the extra block - tbnz x9, #0, 0f - - ld1 {v8.16b}, [x20], #16 - eor v0.16b, v0.16b, v8.16b - st1 {v0.16b}, [x19], #16 - tbnz x9, #1, 1f + ld1 { v8.16b-v11.16b}, [x1], #64 + ld1 {v12.16b-v15.16b}, [x1], #64 - ld1 {v9.16b}, [x20], #16 - eor v1.16b, v1.16b, v9.16b - st1 {v1.16b}, [x19], #16 - tbnz x9, #2, 2f + eor v8.16b, v0.16b, v8.16b + eor v9.16b, v1.16b, v9.16b + eor v10.16b, v4.16b, v10.16b + eor v11.16b, v6.16b, v11.16b + eor v12.16b, v3.16b, v12.16b + eor v13.16b, v7.16b, v13.16b + eor v14.16b, v2.16b, v14.16b + eor v15.16b, v5.16b, v15.16b - ld1 {v10.16b}, [x20], #16 - eor v4.16b, v4.16b, v10.16b - st1 {v4.16b}, [x19], #16 - tbnz x9, #3, 3f + st1 { v8.16b-v11.16b}, [x0], #64 + st1 {v12.16b-v15.16b}, [x0], #64 - ld1 {v11.16b}, [x20], #16 - eor v6.16b, v6.16b, v11.16b - st1 {v6.16b}, [x19], #16 - tbnz x9, #4, 4f - - ld1 {v12.16b}, [x20], #16 - eor v3.16b, v3.16b, v12.16b - st1 {v3.16b}, [x19], #16 - tbnz x9, #5, 5f - - ld1 {v13.16b}, [x20], #16 - eor v7.16b, v7.16b, v13.16b - st1 {v7.16b}, [x19], #16 - tbnz x9, #6, 6f + next_ctr v0 + subs x4, x4, #8 + b.gt 0b - ld1 {v14.16b}, [x20], #16 - eor v2.16b, v2.16b, v14.16b - st1 {v2.16b}, [x19], #16 - tbnz x9, #7, 7f - - ld1 {v15.16b}, [x20], #16 - eor v5.16b, v5.16b, v15.16b - st1 {v5.16b}, [x19], #16 - -8: next_ctr v0 - st1 {v0.16b}, [x24] - cbz x23, .Lctr_done - - b 99b - -.Lctr_done: - frame_pop + st1 {v0.16b}, [x5] + ldp x29, x30, [sp], #16 ret - - /* - * If we are handling the tail of the input (x6 != NULL), return the - * final keystream block back to the caller. - */ -0: cbz x25, 8b - st1 {v0.16b}, [x25] - b 8b -1: cbz x25, 8b - st1 {v1.16b}, [x25] - b 8b -2: cbz x25, 8b - st1 {v4.16b}, [x25] - b 8b -3: cbz x25, 8b - st1 {v6.16b}, [x25] - b 8b -4: cbz x25, 8b - st1 {v3.16b}, [x25] - b 8b -5: cbz x25, 8b - st1 {v7.16b}, [x25] - b 8b -6: cbz x25, 8b - st1 {v2.16b}, [x25] - b 8b -7: cbz x25, 8b - st1 {v5.16b}, [x25] - b 8b SYM_FUNC_END(aesbs_ctr_encrypt) diff --git a/arch/arm64/crypto/aes-neonbs-glue.c b/arch/arm64/crypto/aes-neonbs-glue.c index 8df6ad8cb09d..bac4cabef607 100644 --- a/arch/arm64/crypto/aes-neonbs-glue.c +++ b/arch/arm64/crypto/aes-neonbs-glue.c @@ -34,7 +34,7 @@ asmlinkage void aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], - int rounds, int blocks, u8 iv[], u8 final[]); + int rounds, int blocks, u8 iv[]); asmlinkage void aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, int blocks, u8 iv[]); @@ -46,6 +46,8 @@ asmlinkage void neon_aes_ecb_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks); asmlinkage void neon_aes_cbc_encrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int blocks, u8 iv[]); +asmlinkage void neon_aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], + int rounds, int bytes, u8 ctr[]); asmlinkage void neon_aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], int first); @@ -58,7 +60,7 @@ struct aesbs_ctx { int rounds; } __aligned(AES_BLOCK_SIZE); -struct aesbs_cbc_ctx { +struct aesbs_cbc_ctr_ctx { struct aesbs_ctx key; u32 enc[AES_MAX_KEYLENGTH_U32]; }; @@ -128,10 +130,10 @@ static int ecb_decrypt(struct skcipher_request *req) return __ecb_crypt(req, aesbs_ecb_decrypt); } -static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, +static int aesbs_cbc_ctr_setkey(struct crypto_skcipher *tfm, const u8 *in_key, unsigned int key_len) { - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct crypto_aes_ctx rk; int err; @@ -154,7 +156,7 @@ static int aesbs_cbc_setkey(struct crypto_skcipher *tfm, const u8 *in_key, static int cbc_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; @@ -177,7 +179,7 @@ static int cbc_encrypt(struct skcipher_request *req) static int cbc_decrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_cbc_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; int err; @@ -205,40 +207,32 @@ static int cbc_decrypt(struct skcipher_request *req) static int ctr_encrypt(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); - struct aesbs_ctx *ctx = crypto_skcipher_ctx(tfm); + struct aesbs_cbc_ctr_ctx *ctx = crypto_skcipher_ctx(tfm); struct skcipher_walk walk; - u8 buf[AES_BLOCK_SIZE]; int err; err = skcipher_walk_virt(&walk, req, false); while (walk.nbytes > 0) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - u8 *final = (walk.total % AES_BLOCK_SIZE) ? buf : NULL; - - if (walk.nbytes < walk.total) { - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - final = NULL; - } + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; + int nbytes = walk.nbytes % (8 * AES_BLOCK_SIZE); + const u8 *src = walk.src.virt.addr; + u8 *dst = walk.dst.virt.addr; kernel_neon_begin(); - aesbs_ctr_encrypt(walk.dst.virt.addr, walk.src.virt.addr, - ctx->rk, ctx->rounds, blocks, walk.iv, final); - kernel_neon_end(); - - if (final) { - u8 *dst = walk.dst.virt.addr + blocks * AES_BLOCK_SIZE; - u8 *src = walk.src.virt.addr + blocks * AES_BLOCK_SIZE; - - crypto_xor_cpy(dst, src, final, - walk.total % AES_BLOCK_SIZE); - - err = skcipher_walk_done(&walk, 0); - break; + if (blocks >= 8) { + aesbs_ctr_encrypt(dst, src, ctx->key.rk, ctx->key.rounds, + blocks, walk.iv); + dst += blocks * AES_BLOCK_SIZE; + src += blocks * AES_BLOCK_SIZE; } - err = skcipher_walk_done(&walk, - walk.nbytes - blocks * AES_BLOCK_SIZE); + if (nbytes && walk.nbytes == walk.total) { + neon_aes_ctr_encrypt(dst, src, ctx->enc, ctx->key.rounds, + nbytes, walk.iv); + nbytes = 0; + } + kernel_neon_end(); + err = skcipher_walk_done(&walk, nbytes); } return err; } @@ -308,23 +302,18 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, return err; while (walk.nbytes >= AES_BLOCK_SIZE) { - unsigned int blocks = walk.nbytes / AES_BLOCK_SIZE; - - if (walk.nbytes < walk.total || walk.nbytes % AES_BLOCK_SIZE) - blocks = round_down(blocks, - walk.stride / AES_BLOCK_SIZE); - + int blocks = (walk.nbytes / AES_BLOCK_SIZE) & ~7; out = walk.dst.virt.addr; in = walk.src.virt.addr; nbytes = walk.nbytes; kernel_neon_begin(); - if (likely(blocks > 6)) { /* plain NEON is faster otherwise */ - if (first) + if (blocks >= 8) { + if (first == 1) neon_aes_ecb_encrypt(walk.iv, walk.iv, ctx->twkey, ctx->key.rounds, 1); - first = 0; + first = 2; fn(out, in, ctx->key.rk, ctx->key.rounds, blocks, walk.iv); @@ -333,10 +322,17 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, in += blocks * AES_BLOCK_SIZE; nbytes -= blocks * AES_BLOCK_SIZE; } - - if (walk.nbytes == walk.total && nbytes > 0) - goto xts_tail; - + if (walk.nbytes == walk.total && nbytes > 0) { + if (encrypt) + neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + else + neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, + ctx->key.rounds, nbytes, + ctx->twkey, walk.iv, first); + nbytes = first = 0; + } kernel_neon_end(); err = skcipher_walk_done(&walk, nbytes); } @@ -361,13 +357,12 @@ static int __xts_crypt(struct skcipher_request *req, bool encrypt, nbytes = walk.nbytes; kernel_neon_begin(); -xts_tail: if (encrypt) neon_aes_xts_encrypt(out, in, ctx->cts.key_enc, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); else neon_aes_xts_decrypt(out, in, ctx->cts.key_dec, ctx->key.rounds, - nbytes, ctx->twkey, walk.iv, first ?: 2); + nbytes, ctx->twkey, walk.iv, first); kernel_neon_end(); return skcipher_walk_done(&walk, 0); @@ -402,14 +397,14 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_driver_name = "cbc-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = AES_BLOCK_SIZE, - .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctx), + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx), .base.cra_module = THIS_MODULE, .min_keysize = AES_MIN_KEY_SIZE, .max_keysize = AES_MAX_KEY_SIZE, .walksize = 8 * AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_cbc_setkey, + .setkey = aesbs_cbc_ctr_setkey, .encrypt = cbc_encrypt, .decrypt = cbc_decrypt, }, { @@ -417,7 +412,7 @@ static struct skcipher_alg aes_algs[] = { { .base.cra_driver_name = "ctr-aes-neonbs", .base.cra_priority = 250, .base.cra_blocksize = 1, - .base.cra_ctxsize = sizeof(struct aesbs_ctx), + .base.cra_ctxsize = sizeof(struct aesbs_cbc_ctr_ctx), .base.cra_module = THIS_MODULE, .min_keysize = AES_MIN_KEY_SIZE, @@ -425,7 +420,7 @@ static struct skcipher_alg aes_algs[] = { { .chunksize = AES_BLOCK_SIZE, .walksize = 8 * AES_BLOCK_SIZE, .ivsize = AES_BLOCK_SIZE, - .setkey = aesbs_setkey, + .setkey = aesbs_cbc_ctr_setkey, .encrypt = ctr_encrypt, .decrypt = ctr_encrypt, }, { diff --git a/arch/arm64/crypto/sha3-ce-glue.c b/arch/arm64/crypto/sha3-ce-glue.c index 8c65cecf560a..250e1377c481 100644 --- a/arch/arm64/crypto/sha3-ce-glue.c +++ b/arch/arm64/crypto/sha3-ce-glue.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * sha3-ce-glue.c - core SHA-3 transform using v8.2 Crypto Extensions * diff --git a/arch/arm64/crypto/sha512-armv8.pl b/arch/arm64/crypto/sha512-armv8.pl index 2d8655d5b1af..35ec9ae99fe1 100644 --- a/arch/arm64/crypto/sha512-armv8.pl +++ b/arch/arm64/crypto/sha512-armv8.pl @@ -43,7 +43,7 @@ # on Cortex-A53 (or by 4 cycles per round). # (***) Super-impressive coefficients over gcc-generated code are # indication of some compiler "pathology", most notably code -# generated with -mgeneral-regs-only is significanty faster +# generated with -mgeneral-regs-only is significantly faster # and the gap is only 40-90%. # # October 2016. diff --git a/arch/arm64/crypto/sha512-ce-glue.c b/arch/arm64/crypto/sha512-ce-glue.c index e62a094a9d52..94cb7580deb7 100644 --- a/arch/arm64/crypto/sha512-ce-glue.c +++ b/arch/arm64/crypto/sha512-ce-glue.c @@ -1,4 +1,4 @@ -/* SPDX-License-Identifier: GPL-2.0 */ +// SPDX-License-Identifier: GPL-2.0 /* * sha512-ce-glue.c - SHA-384/SHA-512 using ARMv8 Crypto Extensions * diff --git a/arch/arm64/crypto/sm3-ce-glue.c b/arch/arm64/crypto/sm3-ce-glue.c index d71faca322f2..ee98954ae8ca 100644 --- a/arch/arm64/crypto/sm3-ce-glue.c +++ b/arch/arm64/crypto/sm3-ce-glue.c @@ -26,8 +26,10 @@ asmlinkage void sm3_ce_transform(struct sm3_state *sst, u8 const *src, static int sm3_ce_update(struct shash_desc *desc, const u8 *data, unsigned int len) { - if (!crypto_simd_usable()) - return crypto_sm3_update(desc, data, len); + if (!crypto_simd_usable()) { + sm3_update(shash_desc_ctx(desc), data, len); + return 0; + } kernel_neon_begin(); sm3_base_do_update(desc, data, len, sm3_ce_transform); @@ -38,8 +40,10 @@ static int sm3_ce_update(struct shash_desc *desc, const u8 *data, static int sm3_ce_final(struct shash_desc *desc, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, NULL, 0, out); + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } kernel_neon_begin(); sm3_base_do_finalize(desc, sm3_ce_transform); @@ -51,14 +55,22 @@ static int sm3_ce_final(struct shash_desc *desc, u8 *out) static int sm3_ce_finup(struct shash_desc *desc, const u8 *data, unsigned int len, u8 *out) { - if (!crypto_simd_usable()) - return crypto_sm3_finup(desc, data, len, out); + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + sm3_final(sctx, out); + return 0; + } kernel_neon_begin(); - sm3_base_do_update(desc, data, len, sm3_ce_transform); + if (len) + sm3_base_do_update(desc, data, len, sm3_ce_transform); + sm3_base_do_finalize(desc, sm3_ce_transform); kernel_neon_end(); - return sm3_ce_final(desc, out); + return sm3_base_finish(desc, out); } static struct shash_alg sm3_alg = { diff --git a/arch/arm64/include/asm/apple_m1_pmu.h b/arch/arm64/include/asm/apple_m1_pmu.h new file mode 100644 index 000000000000..99483b19b99f --- /dev/null +++ b/arch/arm64/include/asm/apple_m1_pmu.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 + +#ifndef __ASM_APPLE_M1_PMU_h +#define __ASM_APPLE_M1_PMU_h + +#include <linux/bits.h> +#include <asm/sysreg.h> + +/* Counters */ +#define SYS_IMP_APL_PMC0_EL1 sys_reg(3, 2, 15, 0, 0) +#define SYS_IMP_APL_PMC1_EL1 sys_reg(3, 2, 15, 1, 0) +#define SYS_IMP_APL_PMC2_EL1 sys_reg(3, 2, 15, 2, 0) +#define SYS_IMP_APL_PMC3_EL1 sys_reg(3, 2, 15, 3, 0) +#define SYS_IMP_APL_PMC4_EL1 sys_reg(3, 2, 15, 4, 0) +#define SYS_IMP_APL_PMC5_EL1 sys_reg(3, 2, 15, 5, 0) +#define SYS_IMP_APL_PMC6_EL1 sys_reg(3, 2, 15, 6, 0) +#define SYS_IMP_APL_PMC7_EL1 sys_reg(3, 2, 15, 7, 0) +#define SYS_IMP_APL_PMC8_EL1 sys_reg(3, 2, 15, 9, 0) +#define SYS_IMP_APL_PMC9_EL1 sys_reg(3, 2, 15, 10, 0) + +/* Core PMC control register */ +#define SYS_IMP_APL_PMCR0_EL1 sys_reg(3, 1, 15, 0, 0) +#define PMCR0_CNT_ENABLE_0_7 GENMASK(7, 0) +#define PMCR0_IMODE GENMASK(10, 8) +#define PMCR0_IMODE_OFF 0 +#define PMCR0_IMODE_PMI 1 +#define PMCR0_IMODE_AIC 2 +#define PMCR0_IMODE_HALT 3 +#define PMCR0_IMODE_FIQ 4 +#define PMCR0_IACT BIT(11) +#define PMCR0_PMI_ENABLE_0_7 GENMASK(19, 12) +#define PMCR0_STOP_CNT_ON_PMI BIT(20) +#define PMCR0_CNT_GLOB_L2C_EVT BIT(21) +#define PMCR0_DEFER_PMI_TO_ERET BIT(22) +#define PMCR0_ALLOW_CNT_EN_EL0 BIT(30) +#define PMCR0_CNT_ENABLE_8_9 GENMASK(33, 32) +#define PMCR0_PMI_ENABLE_8_9 GENMASK(45, 44) + +#define SYS_IMP_APL_PMCR1_EL1 sys_reg(3, 1, 15, 1, 0) +#define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8) +#define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16) +#define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40) +#define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48) + +#define SYS_IMP_APL_PMCR2_EL1 sys_reg(3, 1, 15, 2, 0) +#define SYS_IMP_APL_PMCR3_EL1 sys_reg(3, 1, 15, 3, 0) +#define SYS_IMP_APL_PMCR4_EL1 sys_reg(3, 1, 15, 4, 0) + +#define SYS_IMP_APL_PMESR0_EL1 sys_reg(3, 1, 15, 5, 0) +#define PMESR0_EVT_CNT_2 GENMASK(7, 0) +#define PMESR0_EVT_CNT_3 GENMASK(15, 8) +#define PMESR0_EVT_CNT_4 GENMASK(23, 16) +#define PMESR0_EVT_CNT_5 GENMASK(31, 24) + +#define SYS_IMP_APL_PMESR1_EL1 sys_reg(3, 1, 15, 6, 0) +#define PMESR1_EVT_CNT_6 GENMASK(7, 0) +#define PMESR1_EVT_CNT_7 GENMASK(15, 8) +#define PMESR1_EVT_CNT_8 GENMASK(23, 16) +#define PMESR1_EVT_CNT_9 GENMASK(31, 24) + +#define SYS_IMP_APL_PMSR_EL1 sys_reg(3, 1, 15, 13, 0) +#define PMSR_OVERFLOW GENMASK(9, 0) + +#endif /* __ASM_APPLE_M1_PMU_h */ diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 4ad22c3135db..8bd5afc7b692 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -53,17 +53,36 @@ static inline u64 gic_read_iar_common(void) * The gicv3 of ThunderX requires a modified version for reading the * IAR status to ensure data synchronization (access to icc_iar1_el1 * is not sync'ed before and after). + * + * Erratum 38545 + * + * When a IAR register read races with a GIC interrupt RELEASE event, + * GIC-CPU interface could wrongly return a valid INTID to the CPU + * for an interrupt that is already released(non activated) instead of 0x3ff. + * + * To workaround this, return a valid interrupt ID only if there is a change + * in the active priority list after the IAR read. + * + * Common function used for both the workarounds since, + * 1. On Thunderx 88xx 1.x both erratas are applicable. + * 2. Having extra nops doesn't add any side effects for Silicons where + * erratum 23154 is not applicable. */ static inline u64 gic_read_iar_cavium_thunderx(void) { - u64 irqstat; + u64 irqstat, apr; + apr = read_sysreg_s(SYS_ICC_AP1R0_EL1); nops(8); irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1); nops(4); mb(); - return irqstat; + /* Max priority groups implemented is only 32 */ + if (likely(apr != read_sysreg_s(SYS_ICC_AP1R0_EL1))) + return irqstat; + + return 0x3ff; } static inline void gic_write_ctlr(u32 val) diff --git a/arch/arm64/include/asm/archrandom.h b/arch/arm64/include/asm/archrandom.h index 09e43272ccb0..d1bb5e71df25 100644 --- a/arch/arm64/include/asm/archrandom.h +++ b/arch/arm64/include/asm/archrandom.h @@ -42,13 +42,47 @@ static inline bool __arm64_rndr(unsigned long *v) return ok; } +static inline bool __arm64_rndrrs(unsigned long *v) +{ + bool ok; + + /* + * Reads of RNDRRS set PSTATE.NZCV to 0b0000 on success, + * and set PSTATE.NZCV to 0b0100 otherwise. + */ + asm volatile( + __mrs_s("%0", SYS_RNDRRS_EL0) "\n" + " cset %w1, ne\n" + : "=r" (*v), "=r" (ok) + : + : "cc"); + + return ok; +} + static inline bool __must_check arch_get_random_long(unsigned long *v) { + /* + * Only support the generic interface after we have detected + * the system wide capability, avoiding complexity with the + * cpufeature code and with potential scheduling between CPUs + * with and without the feature. + */ + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + return true; return false; } static inline bool __must_check arch_get_random_int(unsigned int *v) { + if (cpus_have_const_cap(ARM64_HAS_RNG)) { + unsigned long val; + + if (__arm64_rndr(&val)) { + *v = val; + return true; + } + } return false; } @@ -71,12 +105,11 @@ static inline bool __must_check arch_get_random_seed_long(unsigned long *v) } /* - * Only support the generic interface after we have detected - * the system wide capability, avoiding complexity with the - * cpufeature code and with potential scheduling between CPUs - * with and without the feature. + * RNDRRS is not backed by an entropy source but by a DRBG that is + * reseeded after each invocation. This is not a 100% fit but good + * enough to implement this API if no other entropy source exists. */ - if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndr(v)) + if (cpus_have_const_cap(ARM64_HAS_RNG) && __arm64_rndrrs(v)) return true; return false; @@ -96,7 +129,7 @@ static inline bool __must_check arch_get_random_seed_int(unsigned int *v) } if (cpus_have_const_cap(ARM64_HAS_RNG)) { - if (__arm64_rndr(&val)) { + if (__arm64_rndrrs(&val)) { *v = val; return true; } diff --git a/arch/arm64/include/asm/asm_pointer_auth.h b/arch/arm64/include/asm/asm_pointer_auth.h index f1bba5fc61c4..ead62f7dd269 100644 --- a/arch/arm64/include/asm/asm_pointer_auth.h +++ b/arch/arm64/include/asm/asm_pointer_auth.h @@ -60,6 +60,9 @@ alternative_else_nop_endif .macro __ptrauth_keys_init_cpu tsk, tmp1, tmp2, tmp3 mrs \tmp1, id_aa64isar1_el1 ubfx \tmp1, \tmp1, #ID_AA64ISAR1_APA_SHIFT, #8 + mrs_s \tmp2, SYS_ID_AA64ISAR2_EL1 + ubfx \tmp2, \tmp2, #ID_AA64ISAR2_APA3_SHIFT, #4 + orr \tmp1, \tmp1, \tmp2 cbz \tmp1, .Lno_addr_auth\@ mov_q \tmp1, (SCTLR_ELx_ENIA | SCTLR_ELx_ENIB | \ SCTLR_ELx_ENDA | SCTLR_ELx_ENDB) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 6ebdc0f834a7..8c5a61aeaf8e 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -542,11 +542,6 @@ alternative_endif #define EXPORT_SYMBOL_NOKASAN(name) EXPORT_SYMBOL(name) #endif -#ifdef CONFIG_KASAN_HW_TAGS -#define EXPORT_SYMBOL_NOHWKASAN(name) -#else -#define EXPORT_SYMBOL_NOHWKASAN(name) EXPORT_SYMBOL_NOKASAN(name) -#endif /* * Emit a 64-bit absolute little endian symbol reference in a way that * ensures that it will be resolved at build time, even when building a diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index a77b5f49b3a6..c62e7e5e2f0c 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -356,6 +356,7 @@ struct arm64_cpu_capabilities { struct { /* Feature register checking */ u32 sys_reg; u8 field_pos; + u8 field_width; u8 min_field_value; u8 hwcap_type; bool sign; @@ -576,6 +577,8 @@ static inline u64 arm64_ftr_reg_user_value(const struct arm64_ftr_reg *reg) static inline int __attribute_const__ cpuid_feature_extract_field_width(u64 features, int field, int width, bool sign) { + if (WARN_ON_ONCE(!width)) + width = 4; return (sign) ? cpuid_feature_extract_signed_field_width(features, field, width) : cpuid_feature_extract_unsigned_field_width(features, field, width); @@ -883,6 +886,7 @@ static inline unsigned int get_vmid_bits(u64 mmfr1) extern struct arm64_ftr_override id_aa64mmfr1_override; extern struct arm64_ftr_override id_aa64pfr1_override; extern struct arm64_ftr_override id_aa64isar1_override; +extern struct arm64_ftr_override id_aa64isar2_override; u32 get_kvm_ipa_limit(void); void dump_cpu_features(void); diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index bfbf0c4c7c5e..232b439cbaf3 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -88,6 +88,13 @@ #define CAVIUM_CPU_PART_THUNDERX_81XX 0x0A2 #define CAVIUM_CPU_PART_THUNDERX_83XX 0x0A3 #define CAVIUM_CPU_PART_THUNDERX2 0x0AF +/* OcteonTx2 series */ +#define CAVIUM_CPU_PART_OCTX2_98XX 0x0B1 +#define CAVIUM_CPU_PART_OCTX2_96XX 0x0B2 +#define CAVIUM_CPU_PART_OCTX2_95XX 0x0B3 +#define CAVIUM_CPU_PART_OCTX2_95XXN 0x0B4 +#define CAVIUM_CPU_PART_OCTX2_95XXMM 0x0B5 +#define CAVIUM_CPU_PART_OCTX2_95XXO 0x0B6 #define BRCM_CPU_PART_BRAHMA_B53 0x100 #define BRCM_CPU_PART_VULCAN 0x516 @@ -132,6 +139,12 @@ #define MIDR_THUNDERX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX) #define MIDR_THUNDERX_81XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_81XX) #define MIDR_THUNDERX_83XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX_83XX) +#define MIDR_OCTX2_98XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_98XX) +#define MIDR_OCTX2_96XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_96XX) +#define MIDR_OCTX2_95XX MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XX) +#define MIDR_OCTX2_95XXN MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXN) +#define MIDR_OCTX2_95XXMM MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXMM) +#define MIDR_OCTX2_95XXO MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_OCTX2_95XXO) #define MIDR_CAVIUM_THUNDERX2 MIDR_CPU_MODEL(ARM_CPU_IMP_CAVIUM, CAVIUM_CPU_PART_THUNDERX2) #define MIDR_BRAHMA_B53 MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_BRAHMA_B53) #define MIDR_BRCM_VULCAN MIDR_CPU_MODEL(ARM_CPU_IMP_BRCM, BRCM_CPU_PART_VULCAN) diff --git a/arch/arm64/include/asm/debug-monitors.h b/arch/arm64/include/asm/debug-monitors.h index 657c921fd784..00c291067e57 100644 --- a/arch/arm64/include/asm/debug-monitors.h +++ b/arch/arm64/include/asm/debug-monitors.h @@ -34,18 +34,6 @@ */ #define BREAK_INSTR_SIZE AARCH64_INSN_SIZE -/* - * BRK instruction encoding - * The #imm16 value should be placed at bits[20:5] within BRK ins - */ -#define AARCH64_BREAK_MON 0xd4200000 - -/* - * BRK instruction for provoking a fault on purpose - * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. - */ -#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) - #define AARCH64_BREAK_KGDB_DYN_DBG \ (AARCH64_BREAK_MON | (KGDB_DYN_DBG_BRK_IMM << 5)) diff --git a/arch/arm64/include/asm/hwcap.h b/arch/arm64/include/asm/hwcap.h index f68fbb207473..8db5ec0089db 100644 --- a/arch/arm64/include/asm/hwcap.h +++ b/arch/arm64/include/asm/hwcap.h @@ -108,6 +108,7 @@ #define KERNEL_HWCAP_ECV __khwcap2_feature(ECV) #define KERNEL_HWCAP_AFP __khwcap2_feature(AFP) #define KERNEL_HWCAP_RPRES __khwcap2_feature(RPRES) +#define KERNEL_HWCAP_MTE3 __khwcap2_feature(MTE3) /* * This yields a mask that user programs can use to figure out what diff --git a/arch/arm64/include/asm/insn-def.h b/arch/arm64/include/asm/insn-def.h index 2c075f615c6a..1a7d0d483698 100644 --- a/arch/arm64/include/asm/insn-def.h +++ b/arch/arm64/include/asm/insn-def.h @@ -3,7 +3,21 @@ #ifndef __ASM_INSN_DEF_H #define __ASM_INSN_DEF_H +#include <asm/brk-imm.h> + /* A64 instructions are always 32 bits. */ #define AARCH64_INSN_SIZE 4 +/* + * BRK instruction encoding + * The #imm16 value should be placed at bits[20:5] within BRK ins + */ +#define AARCH64_BREAK_MON 0xd4200000 + +/* + * BRK instruction for provoking a fault on purpose + * Unlike kgdb, #imm16 value with unallocated handler is used for faulting. + */ +#define AARCH64_BREAK_FAULT (AARCH64_BREAK_MON | (FAULT_BRK_IMM << 5)) + #endif /* __ASM_INSN_DEF_H */ diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index b02f0c328c8e..1e5760d567ae 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -206,7 +206,9 @@ enum aarch64_insn_ldst_type { AARCH64_INSN_LDST_LOAD_PAIR_POST_INDEX, AARCH64_INSN_LDST_STORE_PAIR_POST_INDEX, AARCH64_INSN_LDST_LOAD_EX, + AARCH64_INSN_LDST_LOAD_ACQ_EX, AARCH64_INSN_LDST_STORE_EX, + AARCH64_INSN_LDST_STORE_REL_EX, }; enum aarch64_insn_adsb_type { @@ -281,6 +283,36 @@ enum aarch64_insn_adr_type { AARCH64_INSN_ADR_TYPE_ADR, }; +enum aarch64_insn_mem_atomic_op { + AARCH64_INSN_MEM_ATOMIC_ADD, + AARCH64_INSN_MEM_ATOMIC_CLR, + AARCH64_INSN_MEM_ATOMIC_EOR, + AARCH64_INSN_MEM_ATOMIC_SET, + AARCH64_INSN_MEM_ATOMIC_SWP, +}; + +enum aarch64_insn_mem_order_type { + AARCH64_INSN_MEM_ORDER_NONE, + AARCH64_INSN_MEM_ORDER_ACQ, + AARCH64_INSN_MEM_ORDER_REL, + AARCH64_INSN_MEM_ORDER_ACQREL, +}; + +enum aarch64_insn_mb_type { + AARCH64_INSN_MB_SY, + AARCH64_INSN_MB_ST, + AARCH64_INSN_MB_LD, + AARCH64_INSN_MB_ISH, + AARCH64_INSN_MB_ISHST, + AARCH64_INSN_MB_ISHLD, + AARCH64_INSN_MB_NSH, + AARCH64_INSN_MB_NSHST, + AARCH64_INSN_MB_NSHLD, + AARCH64_INSN_MB_OSH, + AARCH64_INSN_MB_OSHST, + AARCH64_INSN_MB_OSHLD, +}; + #define __AARCH64_INSN_FUNCS(abbr, mask, val) \ static __always_inline bool aarch64_insn_is_##abbr(u32 code) \ { \ @@ -304,6 +336,11 @@ __AARCH64_INSN_FUNCS(store_post, 0x3FE00C00, 0x38000400) __AARCH64_INSN_FUNCS(load_post, 0x3FE00C00, 0x38400400) __AARCH64_INSN_FUNCS(str_reg, 0x3FE0EC00, 0x38206800) __AARCH64_INSN_FUNCS(ldadd, 0x3F20FC00, 0x38200000) +__AARCH64_INSN_FUNCS(ldclr, 0x3F20FC00, 0x38201000) +__AARCH64_INSN_FUNCS(ldeor, 0x3F20FC00, 0x38202000) +__AARCH64_INSN_FUNCS(ldset, 0x3F20FC00, 0x38203000) +__AARCH64_INSN_FUNCS(swp, 0x3F20FC00, 0x38208000) +__AARCH64_INSN_FUNCS(cas, 0x3FA07C00, 0x08A07C00) __AARCH64_INSN_FUNCS(ldr_reg, 0x3FE0EC00, 0x38606800) __AARCH64_INSN_FUNCS(ldr_lit, 0xBF000000, 0x18000000) __AARCH64_INSN_FUNCS(ldrsw_lit, 0xFF000000, 0x98000000) @@ -475,13 +512,6 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, enum aarch64_insn_register state, enum aarch64_insn_size_type size, enum aarch64_insn_ldst_type type); -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size); u32 aarch64_insn_gen_add_sub_imm(enum aarch64_insn_register dst, enum aarch64_insn_register src, int imm, enum aarch64_insn_variant variant, @@ -542,6 +572,42 @@ u32 aarch64_insn_gen_prefetch(enum aarch64_insn_register base, enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, enum aarch64_insn_prfm_policy policy); +#ifdef CONFIG_ARM64_LSE_ATOMICS +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order); +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order); +#else +static inline +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} + +static inline +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + return AARCH64_BREAK_FAULT; +} +#endif +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type); + s32 aarch64_get_branch_offset(u32 insn); u32 aarch64_set_branch_offset(u32 insn, s32 offset); diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 01d47c5886dc..1767ded83888 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -355,8 +355,8 @@ ECN(SOFTSTP_CUR), ECN(WATCHPT_LOW), ECN(WATCHPT_CUR), \ ECN(BKPT32), ECN(VECTOR32), ECN(BRK64) -#define CPACR_EL1_FPEN (3 << 20) #define CPACR_EL1_TTA (1 << 28) -#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN | CPACR_EL1_ZEN_EL1EN) +#define CPACR_EL1_DEFAULT (CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN |\ + CPACR_EL1_ZEN_EL1EN) #endif /* __ARM64_KVM_ARM_H__ */ diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index 462882f356c7..aa7fa2a08f06 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -118,6 +118,7 @@ extern u64 kvm_nvhe_sym(id_aa64pfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64pfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64isar1_el1_sys_val); +extern u64 kvm_nvhe_sym(id_aa64isar2_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val); extern u64 kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val); diff --git a/arch/arm64/include/asm/linkage.h b/arch/arm64/include/asm/linkage.h index b77e9b3f5371..43f8c25b3fda 100644 --- a/arch/arm64/include/asm/linkage.h +++ b/arch/arm64/include/asm/linkage.h @@ -39,28 +39,4 @@ SYM_START(name, SYM_L_WEAK, SYM_A_NONE) \ bti c ; -/* - * Annotate a function as position independent, i.e., safe to be called before - * the kernel virtual mapping is activated. - */ -#define SYM_FUNC_START_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START(x) - -#define SYM_FUNC_START_WEAK_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_FUNC_START_WEAK(x) - -#define SYM_FUNC_START_WEAK_ALIAS_PI(x) \ - SYM_FUNC_START_ALIAS(__pi_##x); \ - SYM_START(x, SYM_L_WEAK, SYM_A_ALIGN) - -#define SYM_FUNC_END_PI(x) \ - SYM_FUNC_END(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - -#define SYM_FUNC_END_ALIAS_PI(x) \ - SYM_FUNC_END_ALIAS(x); \ - SYM_FUNC_END_ALIAS(__pi_##x) - #endif diff --git a/arch/arm64/include/asm/lse.h b/arch/arm64/include/asm/lse.h index 5d10051c3e62..29c85810ae69 100644 --- a/arch/arm64/include/asm/lse.h +++ b/arch/arm64/include/asm/lse.h @@ -17,12 +17,10 @@ #include <asm/cpucaps.h> extern struct static_key_false cpu_hwcap_keys[ARM64_NCAPS]; -extern struct static_key_false arm64_const_caps_ready; -static inline bool system_uses_lse_atomics(void) +static __always_inline bool system_uses_lse_atomics(void) { - return (static_branch_likely(&arm64_const_caps_ready)) && - static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); + return static_branch_likely(&cpu_hwcap_keys[ARM64_HAS_LSE_ATOMICS]); } #define __lse_ll_sc_body(op, ...) \ diff --git a/arch/arm64/include/asm/module.lds.h b/arch/arm64/include/asm/module.lds.h index a11ccadd47d2..094701ec5500 100644 --- a/arch/arm64/include/asm/module.lds.h +++ b/arch/arm64/include/asm/module.lds.h @@ -1,8 +1,8 @@ SECTIONS { #ifdef CONFIG_ARM64_MODULE_PLTS - .plt 0 (NOLOAD) : { BYTE(0) } - .init.plt 0 (NOLOAD) : { BYTE(0) } - .text.ftrace_trampoline 0 (NOLOAD) : { BYTE(0) } + .plt 0 : { BYTE(0) } + .init.plt 0 : { BYTE(0) } + .text.ftrace_trampoline 0 : { BYTE(0) } #endif #ifdef CONFIG_KASAN_SW_TAGS diff --git a/arch/arm64/include/asm/mte-def.h b/arch/arm64/include/asm/mte-def.h index 626d359b396e..14ee86b019c2 100644 --- a/arch/arm64/include/asm/mte-def.h +++ b/arch/arm64/include/asm/mte-def.h @@ -11,6 +11,7 @@ #define MTE_TAG_SHIFT 56 #define MTE_TAG_SIZE 4 #define MTE_TAG_MASK GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT) +#define MTE_PAGE_TAG_STORAGE (MTE_GRANULES_PER_PAGE * MTE_TAG_SIZE / 8) #define __MTE_PREAMBLE ARM64_ASM_PREAMBLE ".arch_extension memtag\n" diff --git a/arch/arm64/include/asm/mte.h b/arch/arm64/include/asm/mte.h index 075539f5f1c8..adcb937342f1 100644 --- a/arch/arm64/include/asm/mte.h +++ b/arch/arm64/include/asm/mte.h @@ -11,7 +11,9 @@ #ifndef __ASSEMBLY__ #include <linux/bitfield.h> +#include <linux/kasan-enabled.h> #include <linux/page-flags.h> +#include <linux/sched.h> #include <linux/types.h> #include <asm/pgtable-types.h> @@ -86,6 +88,26 @@ static inline int mte_ptrace_copy_tags(struct task_struct *child, #endif /* CONFIG_ARM64_MTE */ +static inline void mte_disable_tco_entry(struct task_struct *task) +{ + if (!system_supports_mte()) + return; + + /* + * Re-enable tag checking (TCO set on exception entry). This is only + * necessary if MTE is enabled in either the kernel or the userspace + * task in synchronous or asymmetric mode (SCTLR_EL1.TCF0 bit 0 is set + * for both). With MTE disabled in the kernel and disabled or + * asynchronous in userspace, tag check faults (including in uaccesses) + * are not reported, therefore there is no need to re-enable checking. + * This is beneficial on microarchitectures where re-enabling TCO is + * expensive. + */ + if (kasan_hw_tags_enabled() || + (task->thread.sctlr_user & (1UL << SCTLR_EL1_TCF0_SHIFT))) + asm volatile(SET_PSTATE_TCO(0)); +} + #ifdef CONFIG_KASAN_HW_TAGS /* Whether the MTE asynchronous mode is enabled. */ DECLARE_STATIC_KEY_FALSE(mte_async_or_asymm_mode); diff --git a/arch/arm64/include/asm/paravirt_api_clock.h b/arch/arm64/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/arm64/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/arm64/include/asm/perf_event.h b/arch/arm64/include/asm/perf_event.h index 4ef6f19331f9..3eaf462f5752 100644 --- a/arch/arm64/include/asm/perf_event.h +++ b/arch/arm64/include/asm/perf_event.h @@ -15,70 +15,70 @@ /* * Common architectural and microarchitectural event numbers. */ -#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x00 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x01 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x02 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x03 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x04 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x05 -#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x06 -#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x07 -#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x08 -#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x09 -#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x0A -#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x0B -#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x0C -#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x0D -#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x0E -#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x0F -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x10 -#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x11 -#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x12 -#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x13 -#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x14 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x15 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x16 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x17 -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x18 -#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x19 -#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x1A -#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x1B -#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x1C -#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x1D -#define ARMV8_PMUV3_PERFCTR_CHAIN 0x1E -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x1F -#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x20 -#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x21 -#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x22 -#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x23 -#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x24 -#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x25 -#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x26 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x27 -#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x28 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x29 -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x2A -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x2B -#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x2C -#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x2D -#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x2E -#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x2F -#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x30 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x31 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x32 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x33 -#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x34 -#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x35 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x36 -#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x37 -#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x38 -#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x39 -#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x3A -#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x3B -#define ARMV8_PMUV3_PERFCTR_STALL 0x3C -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x3D -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x3E -#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x3F +#define ARMV8_PMUV3_PERFCTR_SW_INCR 0x0000 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL 0x0001 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL 0x0002 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL 0x0003 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE 0x0004 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL 0x0005 +#define ARMV8_PMUV3_PERFCTR_LD_RETIRED 0x0006 +#define ARMV8_PMUV3_PERFCTR_ST_RETIRED 0x0007 +#define ARMV8_PMUV3_PERFCTR_INST_RETIRED 0x0008 +#define ARMV8_PMUV3_PERFCTR_EXC_TAKEN 0x0009 +#define ARMV8_PMUV3_PERFCTR_EXC_RETURN 0x000A +#define ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED 0x000B +#define ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED 0x000C +#define ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED 0x000D +#define ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED 0x000E +#define ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED 0x000F +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED 0x0010 +#define ARMV8_PMUV3_PERFCTR_CPU_CYCLES 0x0011 +#define ARMV8_PMUV3_PERFCTR_BR_PRED 0x0012 +#define ARMV8_PMUV3_PERFCTR_MEM_ACCESS 0x0013 +#define ARMV8_PMUV3_PERFCTR_L1I_CACHE 0x0014 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB 0x0015 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE 0x0016 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL 0x0017 +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB 0x0018 +#define ARMV8_PMUV3_PERFCTR_BUS_ACCESS 0x0019 +#define ARMV8_PMUV3_PERFCTR_MEMORY_ERROR 0x001A +#define ARMV8_PMUV3_PERFCTR_INST_SPEC 0x001B +#define ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED 0x001C +#define ARMV8_PMUV3_PERFCTR_BUS_CYCLES 0x001D +#define ARMV8_PMUV3_PERFCTR_CHAIN 0x001E +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE 0x001F +#define ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE 0x0020 +#define ARMV8_PMUV3_PERFCTR_BR_RETIRED 0x0021 +#define ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED 0x0022 +#define ARMV8_PMUV3_PERFCTR_STALL_FRONTEND 0x0023 +#define ARMV8_PMUV3_PERFCTR_STALL_BACKEND 0x0024 +#define ARMV8_PMUV3_PERFCTR_L1D_TLB 0x0025 +#define ARMV8_PMUV3_PERFCTR_L1I_TLB 0x0026 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE 0x0027 +#define ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL 0x0028 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE 0x0029 +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL 0x002A +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE 0x002B +#define ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB 0x002C +#define ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL 0x002D +#define ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL 0x002E +#define ARMV8_PMUV3_PERFCTR_L2D_TLB 0x002F +#define ARMV8_PMUV3_PERFCTR_L2I_TLB 0x0030 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS 0x0031 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE 0x0032 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS 0x0033 +#define ARMV8_PMUV3_PERFCTR_DTLB_WALK 0x0034 +#define ARMV8_PMUV3_PERFCTR_ITLB_WALK 0x0035 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_RD 0x0036 +#define ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD 0x0037 +#define ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD 0x0038 +#define ARMV8_PMUV3_PERFCTR_L1D_CACHE_LMISS_RD 0x0039 +#define ARMV8_PMUV3_PERFCTR_OP_RETIRED 0x003A +#define ARMV8_PMUV3_PERFCTR_OP_SPEC 0x003B +#define ARMV8_PMUV3_PERFCTR_STALL 0x003C +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_BACKEND 0x003D +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT_FRONTEND 0x003E +#define ARMV8_PMUV3_PERFCTR_STALL_SLOT 0x003F /* Statistical profiling extension microarchitectural events */ #define ARMV8_SPE_PERFCTR_SAMPLE_POP 0x4000 @@ -96,6 +96,20 @@ #define ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS 0x400A #define ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD 0x400B +/* Trace buffer events */ +#define ARMV8_PMUV3_PERFCTR_TRB_WRAP 0x400C +#define ARMV8_PMUV3_PERFCTR_TRB_TRIG 0x400E + +/* Trace unit events */ +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT0 0x4010 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT1 0x4011 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT2 0x4012 +#define ARMV8_PMUV3_PERFCTR_TRCEXTOUT3 0x4013 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4 0x4018 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5 0x4019 +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6 0x401A +#define ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7 0x401B + /* additional latency from alignment events */ #define ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT 0x4020 #define ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT 0x4021 @@ -107,91 +121,91 @@ #define ARMV8_MTE_PERFCTR_MEM_ACCESS_CHECKED_WR 0x4026 /* ARMv8 recommended implementation defined event types */ -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x40 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x41 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x42 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x43 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x44 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x45 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x46 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x47 -#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x48 - -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x4C -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x4D -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x4E -#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x4F -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x50 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x51 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x52 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x53 - -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x56 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x57 -#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x58 - -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x5C -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x5D -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x5E -#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x5F -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x60 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x61 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x62 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x63 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x64 -#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x65 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x66 -#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x67 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x68 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x69 -#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x6A - -#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x6C -#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x6D -#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x6E -#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x6F -#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x70 -#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x71 -#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x72 -#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x73 -#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x74 -#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x75 -#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x76 -#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x77 -#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x78 -#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x79 -#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x7A - -#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x7C -#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x7D -#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x7E - -#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x81 -#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x82 -#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x83 -#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x84 - -#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x86 -#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x87 -#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x88 - -#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x8A -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x8B -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x8C -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x8D -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x8E -#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x8F -#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x90 -#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x91 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0xA0 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0xA1 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0xA2 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0xA3 - -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0xA6 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0xA7 -#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0xA8 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_RD 0x0040 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WR 0x0041 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_RD 0x0042 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_WR 0x0043 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_INNER 0x0044 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_REFILL_OUTER 0x0045 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_VICTIM 0x0046 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_WB_CLEAN 0x0047 +#define ARMV8_IMPDEF_PERFCTR_L1D_CACHE_INVAL 0x0048 + +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_RD 0x004C +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_REFILL_WR 0x004D +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_RD 0x004E +#define ARMV8_IMPDEF_PERFCTR_L1D_TLB_WR 0x004F +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_RD 0x0050 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WR 0x0051 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_RD 0x0052 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_REFILL_WR 0x0053 + +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_VICTIM 0x0056 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_WB_CLEAN 0x0057 +#define ARMV8_IMPDEF_PERFCTR_L2D_CACHE_INVAL 0x0058 + +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_RD 0x005C +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_REFILL_WR 0x005D +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_RD 0x005E +#define ARMV8_IMPDEF_PERFCTR_L2D_TLB_WR 0x005F +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_RD 0x0060 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_WR 0x0061 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_SHARED 0x0062 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NOT_SHARED 0x0063 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_NORMAL 0x0064 +#define ARMV8_IMPDEF_PERFCTR_BUS_ACCESS_PERIPH 0x0065 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_RD 0x0066 +#define ARMV8_IMPDEF_PERFCTR_MEM_ACCESS_WR 0x0067 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LD_SPEC 0x0068 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_ST_SPEC 0x0069 +#define ARMV8_IMPDEF_PERFCTR_UNALIGNED_LDST_SPEC 0x006A + +#define ARMV8_IMPDEF_PERFCTR_LDREX_SPEC 0x006C +#define ARMV8_IMPDEF_PERFCTR_STREX_PASS_SPEC 0x006D +#define ARMV8_IMPDEF_PERFCTR_STREX_FAIL_SPEC 0x006E +#define ARMV8_IMPDEF_PERFCTR_STREX_SPEC 0x006F +#define ARMV8_IMPDEF_PERFCTR_LD_SPEC 0x0070 +#define ARMV8_IMPDEF_PERFCTR_ST_SPEC 0x0071 +#define ARMV8_IMPDEF_PERFCTR_LDST_SPEC 0x0072 +#define ARMV8_IMPDEF_PERFCTR_DP_SPEC 0x0073 +#define ARMV8_IMPDEF_PERFCTR_ASE_SPEC 0x0074 +#define ARMV8_IMPDEF_PERFCTR_VFP_SPEC 0x0075 +#define ARMV8_IMPDEF_PERFCTR_PC_WRITE_SPEC 0x0076 +#define ARMV8_IMPDEF_PERFCTR_CRYPTO_SPEC 0x0077 +#define ARMV8_IMPDEF_PERFCTR_BR_IMMED_SPEC 0x0078 +#define ARMV8_IMPDEF_PERFCTR_BR_RETURN_SPEC 0x0079 +#define ARMV8_IMPDEF_PERFCTR_BR_INDIRECT_SPEC 0x007A + +#define ARMV8_IMPDEF_PERFCTR_ISB_SPEC 0x007C +#define ARMV8_IMPDEF_PERFCTR_DSB_SPEC 0x007D +#define ARMV8_IMPDEF_PERFCTR_DMB_SPEC 0x007E + +#define ARMV8_IMPDEF_PERFCTR_EXC_UNDEF 0x0081 +#define ARMV8_IMPDEF_PERFCTR_EXC_SVC 0x0082 +#define ARMV8_IMPDEF_PERFCTR_EXC_PABORT 0x0083 +#define ARMV8_IMPDEF_PERFCTR_EXC_DABORT 0x0084 + +#define ARMV8_IMPDEF_PERFCTR_EXC_IRQ 0x0086 +#define ARMV8_IMPDEF_PERFCTR_EXC_FIQ 0x0087 +#define ARMV8_IMPDEF_PERFCTR_EXC_SMC 0x0088 + +#define ARMV8_IMPDEF_PERFCTR_EXC_HVC 0x008A +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_PABORT 0x008B +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_DABORT 0x008C +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_OTHER 0x008D +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_IRQ 0x008E +#define ARMV8_IMPDEF_PERFCTR_EXC_TRAP_FIQ 0x008F +#define ARMV8_IMPDEF_PERFCTR_RC_LD_SPEC 0x0090 +#define ARMV8_IMPDEF_PERFCTR_RC_ST_SPEC 0x0091 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_RD 0x00A0 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WR 0x00A1 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_RD 0x00A2 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_REFILL_WR 0x00A3 + +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_VICTIM 0x00A6 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_WB_CLEAN 0x00A7 +#define ARMV8_IMPDEF_PERFCTR_L3D_CACHE_INVAL 0x00A8 /* * Per-CPU PMCR: config reg diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 40085e53f573..66671ff05183 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -273,6 +273,8 @@ #define TCR_NFD1 (UL(1) << 54) #define TCR_E0PD0 (UL(1) << 55) #define TCR_E0PD1 (UL(1) << 56) +#define TCR_TCMA0 (UL(1) << 57) +#define TCR_TCMA1 (UL(1) << 58) /* * TTBR. diff --git a/arch/arm64/include/asm/preempt.h b/arch/arm64/include/asm/preempt.h index e83f0982b99c..0159b625cc7f 100644 --- a/arch/arm64/include/asm/preempt.h +++ b/arch/arm64/include/asm/preempt.h @@ -2,6 +2,7 @@ #ifndef __ASM_PREEMPT_H #define __ASM_PREEMPT_H +#include <linux/jump_label.h> #include <linux/thread_info.h> #define PREEMPT_NEED_RESCHED BIT(32) @@ -80,10 +81,24 @@ static inline bool should_resched(int preempt_offset) } #ifdef CONFIG_PREEMPTION + void preempt_schedule(void); -#define __preempt_schedule() preempt_schedule() void preempt_schedule_notrace(void); -#define __preempt_schedule_notrace() preempt_schedule_notrace() + +#ifdef CONFIG_PREEMPT_DYNAMIC + +DECLARE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +void dynamic_preempt_schedule(void); +#define __preempt_schedule() dynamic_preempt_schedule() +void dynamic_preempt_schedule_notrace(void); +#define __preempt_schedule_notrace() dynamic_preempt_schedule_notrace() + +#else /* CONFIG_PREEMPT_DYNAMIC */ + +#define __preempt_schedule() preempt_schedule() +#define __preempt_schedule_notrace() preempt_schedule_notrace() + +#endif /* CONFIG_PREEMPT_DYNAMIC */ #endif /* CONFIG_PREEMPTION */ #endif /* __ASM_PREEMPT_H */ diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 6f41b65f9962..73e38d9a540c 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -21,6 +21,7 @@ #define MTE_CTRL_TCF_SYNC (1UL << 16) #define MTE_CTRL_TCF_ASYNC (1UL << 17) +#define MTE_CTRL_TCF_ASYMM (1UL << 18) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/include/asm/spectre.h b/arch/arm64/include/asm/spectre.h index 86e0cc9b9c68..aa3d3607d5c8 100644 --- a/arch/arm64/include/asm/spectre.h +++ b/arch/arm64/include/asm/spectre.h @@ -67,7 +67,8 @@ struct bp_hardening_data { DECLARE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -static inline void arm64_apply_bp_hardening(void) +/* Called during entry so must be __always_inline */ +static __always_inline void arm64_apply_bp_hardening(void) { struct bp_hardening_data *d; diff --git a/arch/arm64/include/asm/string.h b/arch/arm64/include/asm/string.h index 95f7686b728d..3a3264ff47b9 100644 --- a/arch/arm64/include/asm/string.h +++ b/arch/arm64/include/asm/string.h @@ -12,13 +12,11 @@ extern char *strrchr(const char *, int c); #define __HAVE_ARCH_STRCHR extern char *strchr(const char *, int c); -#ifndef CONFIG_KASAN_HW_TAGS #define __HAVE_ARCH_STRCMP extern int strcmp(const char *, const char *); #define __HAVE_ARCH_STRNCMP extern int strncmp(const char *, const char *, __kernel_size_t); -#endif #define __HAVE_ARCH_STRLEN extern __kernel_size_t strlen(const char *); diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 932d45b17877..c7ca3a105528 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -774,6 +774,8 @@ /* id_aa64isar2 */ #define ID_AA64ISAR2_CLEARBHB_SHIFT 28 +#define ID_AA64ISAR2_APA3_SHIFT 12 +#define ID_AA64ISAR2_GPA3_SHIFT 8 #define ID_AA64ISAR2_RPRES_SHIFT 4 #define ID_AA64ISAR2_WFXT_SHIFT 0 @@ -787,6 +789,16 @@ #define ID_AA64ISAR2_WFXT_NI 0x0 #define ID_AA64ISAR2_WFXT_SUPPORTED 0x2 +#define ID_AA64ISAR2_APA3_NI 0x0 +#define ID_AA64ISAR2_APA3_ARCHITECTED 0x1 +#define ID_AA64ISAR2_APA3_ARCH_EPAC 0x2 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2 0x3 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC 0x4 +#define ID_AA64ISAR2_APA3_ARCH_EPAC2_FPAC_CMB 0x5 + +#define ID_AA64ISAR2_GPA3_NI 0x0 +#define ID_AA64ISAR2_GPA3_ARCHITECTED 0x1 + /* id_aa64pfr0 */ #define ID_AA64PFR0_CSV3_SHIFT 60 #define ID_AA64PFR0_CSV2_SHIFT 56 @@ -1099,13 +1111,11 @@ #define ZCR_ELx_LEN_SIZE 9 #define ZCR_ELx_LEN_MASK 0x1ff +#define CPACR_EL1_FPEN_EL1EN (BIT(20)) /* enable EL1 access */ +#define CPACR_EL1_FPEN_EL0EN (BIT(21)) /* enable EL0 access, if EL1EN set */ + #define CPACR_EL1_ZEN_EL1EN (BIT(16)) /* enable EL1 access */ #define CPACR_EL1_ZEN_EL0EN (BIT(17)) /* enable EL0 access, if EL1EN set */ -#define CPACR_EL1_ZEN (CPACR_EL1_ZEN_EL1EN | CPACR_EL1_ZEN_EL0EN) - -/* TCR EL1 Bit Definitions */ -#define SYS_TCR_EL1_TCMA1 (BIT(58)) -#define SYS_TCR_EL1_TCMA0 (BIT(57)) /* GCR_EL1 Definitions */ #define SYS_GCR_EL1_RRND (BIT(16)) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index f386b90a79c8..9fab663dd2de 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -24,6 +24,10 @@ void update_freq_counters_refs(void); #define arch_scale_freq_capacity topology_get_freq_scale #define arch_scale_freq_invariant topology_scale_freq_invariant +#ifdef CONFIG_ACPI_CPPC_LIB +#define arch_init_invariance_cppc topology_init_cpu_capacity_cppc +#endif + /* Replace task scheduler's default cpu-invariant accounting */ #define arch_scale_cpu_capacity topology_get_cpu_scale diff --git a/arch/arm64/include/asm/xor.h b/arch/arm64/include/asm/xor.h index 947f6a4f1aa0..befcd8a7abc9 100644 --- a/arch/arm64/include/asm/xor.h +++ b/arch/arm64/include/asm/xor.h @@ -16,7 +16,8 @@ extern struct xor_block_template const xor_block_inner_neon; static void -xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { kernel_neon_begin(); xor_block_inner_neon.do_2(bytes, p1, p2); @@ -24,8 +25,9 @@ xor_neon_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { kernel_neon_begin(); xor_block_inner_neon.do_3(bytes, p1, p2, p3); @@ -33,8 +35,10 @@ xor_neon_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { kernel_neon_begin(); xor_block_inner_neon.do_4(bytes, p1, p2, p3, p4); @@ -42,8 +46,11 @@ xor_neon_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_neon_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { kernel_neon_begin(); xor_block_inner_neon.do_5(bytes, p1, p2, p3, p4, p5); diff --git a/arch/arm64/include/uapi/asm/hwcap.h b/arch/arm64/include/uapi/asm/hwcap.h index f03731847d9d..99cb5d383048 100644 --- a/arch/arm64/include/uapi/asm/hwcap.h +++ b/arch/arm64/include/uapi/asm/hwcap.h @@ -78,5 +78,6 @@ #define HWCAP2_ECV (1 << 19) #define HWCAP2_AFP (1 << 20) #define HWCAP2_RPRES (1 << 21) +#define HWCAP2_MTE3 (1 << 22) #endif /* _UAPI__ASM_HWCAP_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 88b3e2a21408..986837d7ec82 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -61,6 +61,7 @@ obj-$(CONFIG_ARM64_ACPI_PARKING_PROTOCOL) += acpi_parking_protocol.o obj-$(CONFIG_PARAVIRT) += paravirt.o obj-$(CONFIG_RANDOMIZE_BASE) += kaslr.o obj-$(CONFIG_HIBERNATION) += hibernate.o hibernate-asm.o +obj-$(CONFIG_ELF_CORE) += elfcore.o obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o relocate_kernel.o \ cpu-reset.o obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file.o kexec_image.o diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 146fa2e76834..4c9b5b4b7a0b 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -214,6 +214,21 @@ static const struct arm64_cpu_capabilities arm64_repeat_tlbi_list[] = { }; #endif +#ifdef CONFIG_CAVIUM_ERRATUM_23154 +const struct midr_range cavium_erratum_23154_cpus[] = { + MIDR_ALL_VERSIONS(MIDR_THUNDERX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_81XX), + MIDR_ALL_VERSIONS(MIDR_THUNDERX_83XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_98XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_96XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XX), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXN), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXMM), + MIDR_ALL_VERSIONS(MIDR_OCTX2_95XXO), + {}, +}; +#endif + #ifdef CONFIG_CAVIUM_ERRATUM_27456 const struct midr_range cavium_erratum_27456_cpus[] = { /* Cavium ThunderX, T88 pass 1.x - 2.1 */ @@ -425,10 +440,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_CAVIUM_ERRATUM_23154 { - /* Cavium ThunderX, pass 1.x */ - .desc = "Cavium erratum 23154", + .desc = "Cavium errata 23154 and 38545", .capability = ARM64_WORKAROUND_CAVIUM_23154, - ERRATA_MIDR_REV_RANGE(MIDR_THUNDERX, 0, 0, 1), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(cavium_erratum_23154_cpus), }, #endif #ifdef CONFIG_CAVIUM_ERRATUM_27456 diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index d33687673f6b..d72c4b4d389c 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -232,6 +232,10 @@ static const struct arm64_ftr_bits ftr_id_aa64isar1[] = { static const struct arm64_ftr_bits ftr_id_aa64isar2[] = { ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_HIGHER_SAFE, ID_AA64ISAR2_CLEARBHB_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_EXACT, ID_AA64ISAR2_APA3_SHIFT, 4, 0), + ARM64_FTR_BITS(FTR_VISIBLE_IF_IS_ENABLED(CONFIG_ARM64_PTR_AUTH), + FTR_STRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_GPA3_SHIFT, 4, 0), ARM64_FTR_BITS(FTR_VISIBLE, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64ISAR2_RPRES_SHIFT, 4, 0), ARM64_FTR_END, }; @@ -602,6 +606,7 @@ static const struct arm64_ftr_bits ftr_raz[] = { struct arm64_ftr_override __ro_after_init id_aa64mmfr1_override; struct arm64_ftr_override __ro_after_init id_aa64pfr1_override; struct arm64_ftr_override __ro_after_init id_aa64isar1_override; +struct arm64_ftr_override __ro_after_init id_aa64isar2_override; static const struct __ftr_reg_entry { u32 sys_id; @@ -650,6 +655,8 @@ static const struct __ftr_reg_entry { ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1, &id_aa64isar1_override), ARM64_FTR_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2), + ARM64_FTR_REG_OVERRIDE(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2, + &id_aa64isar2_override), /* Op1 = 0, CRn = 0, CRm = 7 */ ARM64_FTR_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0), @@ -1313,7 +1320,9 @@ u64 __read_sysreg_by_encoding(u32 sys_id) static bool feature_matches(u64 reg, const struct arm64_cpu_capabilities *entry) { - int val = cpuid_feature_extract_field(reg, entry->field_pos, entry->sign); + int val = cpuid_feature_extract_field_width(reg, entry->field_pos, + entry->field_width, + entry->sign); return val >= entry->min_field_value; } @@ -1787,14 +1796,6 @@ static void cpu_copy_el2regs(const struct arm64_cpu_capabilities *__unused) write_sysreg(read_sysreg(tpidr_el1), tpidr_el2); } -static void cpu_has_fwb(const struct arm64_cpu_capabilities *__unused) -{ - u64 val = read_sysreg_s(SYS_CLIDR_EL1); - - /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */ - WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val)); -} - #ifdef CONFIG_ARM64_PAN static void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused) { @@ -1841,21 +1842,27 @@ static bool has_address_auth_cpucap(const struct arm64_cpu_capabilities *entry, /* Now check for the secondary CPUs with SCOPE_LOCAL_CPU scope */ sec_val = cpuid_feature_extract_field(__read_sysreg_by_encoding(entry->sys_reg), entry->field_pos, entry->sign); - return sec_val == boot_val; + return (sec_val >= entry->min_field_value) && (sec_val == boot_val); } static bool has_address_auth_metacap(const struct arm64_cpu_capabilities *entry, int scope) { - return has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH], scope) || - has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool api = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_IMP_DEF], scope); + bool apa = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5], scope); + bool apa3 = has_address_auth_cpucap(cpu_hwcaps_ptrs[ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3], scope); + + return apa || apa3 || api; } static bool has_generic_auth(const struct arm64_cpu_capabilities *entry, int __unused) { - return __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH) || - __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpi = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_IMP_DEF); + bool gpa = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5); + bool gpa3 = __system_matches_cap(ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3); + + return gpa || gpa3 || gpi; } #endif /* CONFIG_ARM64_PTR_AUTH */ @@ -1957,6 +1964,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_useable_gicv3_cpuif, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1967,6 +1975,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR0_EL1, .field_pos = ID_AA64MMFR0_ECV_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -1978,6 +1987,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, .cpu_enable = cpu_enable_pan, @@ -1991,6 +2001,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64MMFR1_EL1, .field_pos = ID_AA64MMFR1_PAN_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 3, }, @@ -2003,6 +2014,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_ATOMICS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 2, }, @@ -2027,6 +2039,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL0_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, #ifdef CONFIG_KVM @@ -2038,6 +2051,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_EL1_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT, }, { @@ -2058,6 +2072,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { */ .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_CSV3_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = unmap_kernel_at_el0, .cpu_enable = kpti_install_ng_mappings, @@ -2077,6 +2092,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2087,6 +2103,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_DPB_SHIFT, + .field_width = 4, .min_field_value = 2, }, #endif @@ -2098,6 +2115,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_SVE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_SVE, .matches = has_cpuid_feature, .cpu_enable = sve_kernel_enable, @@ -2112,6 +2130,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_RAS_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_RAS_V1, .cpu_enable = cpu_clear_disr, }, @@ -2130,6 +2149,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64PFR0_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64PFR0_AMU_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR0_AMU, .cpu_enable = cpu_amu_enable, }, @@ -2154,9 +2174,9 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_FWB_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, - .cpu_enable = cpu_has_fwb, }, { .desc = "ARMv8.4 Translation Table Level", @@ -2165,6 +2185,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_TTL_SHIFT, + .field_width = 4, .min_field_value = 1, .matches = has_cpuid_feature, }, @@ -2175,6 +2196,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_TLB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64ISAR0_TLB_RANGE, }, @@ -2193,6 +2215,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR1_HADBS_SHIFT, + .field_width = 4, .min_field_value = 2, .matches = has_hw_dbm, .cpu_enable = cpu_enable_hw_dbm, @@ -2205,6 +2228,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_CRC32_SHIFT, + .field_width = 4, .min_field_value = 1, }, { @@ -2214,6 +2238,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_SSBS_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = ID_AA64PFR1_SSBS_PSTATE_ONLY, }, @@ -2226,6 +2251,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64MMFR2_CNP_SHIFT, + .field_width = 4, .min_field_value = 1, .cpu_enable = cpu_enable_cnp, }, @@ -2237,27 +2263,41 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR1_EL1, .field_pos = ID_AA64ISAR1_SB_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, #ifdef CONFIG_ARM64_PTR_AUTH { - .desc = "Address authentication (architected algorithm)", - .capability = ARM64_HAS_ADDRESS_AUTH_ARCH, + .desc = "Address authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_APA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_APA_ARCHITECTED, .matches = has_address_auth_cpucap, }, { + .desc = "Address authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_ADDRESS_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_APA3_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64ISAR2_APA3_ARCHITECTED, + .matches = has_address_auth_cpucap, + }, + { .desc = "Address authentication (IMP DEF algorithm)", .capability = ARM64_HAS_ADDRESS_AUTH_IMP_DEF, .type = ARM64_CPUCAP_BOOT_CPU_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_API_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_API_IMP_DEF, .matches = has_address_auth_cpucap, }, @@ -2267,22 +2307,35 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_address_auth_metacap, }, { - .desc = "Generic authentication (architected algorithm)", - .capability = ARM64_HAS_GENERIC_AUTH_ARCH, + .desc = "Generic authentication (architected QARMA5 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA5, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPA_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPA_ARCHITECTED, .matches = has_cpuid_feature, }, { + .desc = "Generic authentication (architected QARMA3 algorithm)", + .capability = ARM64_HAS_GENERIC_AUTH_ARCH_QARMA3, + .type = ARM64_CPUCAP_SYSTEM_FEATURE, + .sys_reg = SYS_ID_AA64ISAR2_EL1, + .sign = FTR_UNSIGNED, + .field_pos = ID_AA64ISAR2_GPA3_SHIFT, + .field_width = 4, + .min_field_value = ID_AA64ISAR2_GPA3_ARCHITECTED, + .matches = has_cpuid_feature, + }, + { .desc = "Generic authentication (IMP DEF algorithm)", .capability = ARM64_HAS_GENERIC_AUTH_IMP_DEF, .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_GPI_SHIFT, + .field_width = 4, .min_field_value = ID_AA64ISAR1_GPI_IMP_DEF, .matches = has_cpuid_feature, }, @@ -2303,6 +2356,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = can_use_gic_priorities, .sys_reg = SYS_ID_AA64PFR0_EL1, .field_pos = ID_AA64PFR0_GIC_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2314,6 +2368,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .type = ARM64_CPUCAP_SYSTEM_FEATURE, .sys_reg = SYS_ID_AA64MMFR2_EL1, .sign = FTR_UNSIGNED, + .field_width = 4, .field_pos = ID_AA64MMFR2_E0PD_SHIFT, .matches = has_cpuid_feature, .min_field_value = 1, @@ -2328,6 +2383,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64ISAR0_EL1, .field_pos = ID_AA64ISAR0_RNDR_SHIFT, + .field_width = 4, .sign = FTR_UNSIGNED, .min_field_value = 1, }, @@ -2345,6 +2401,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .cpu_enable = bti_enable, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_BT_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_BT_BTI, .sign = FTR_UNSIGNED, }, @@ -2357,6 +2414,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE, .sign = FTR_UNSIGNED, .cpu_enable = cpu_enable_mte, @@ -2368,6 +2426,7 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .matches = has_cpuid_feature, .sys_reg = SYS_ID_AA64PFR1_EL1, .field_pos = ID_AA64PFR1_MTE_SHIFT, + .field_width = 4, .min_field_value = ID_AA64PFR1_MTE_ASYMM, .sign = FTR_UNSIGNED, }, @@ -2379,16 +2438,18 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .sys_reg = SYS_ID_AA64ISAR1_EL1, .sign = FTR_UNSIGNED, .field_pos = ID_AA64ISAR1_LRCPC_SHIFT, + .field_width = 4, .matches = has_cpuid_feature, .min_field_value = 1, }, {}, }; -#define HWCAP_CPUID_MATCH(reg, field, s, min_value) \ +#define HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ .matches = has_cpuid_feature, \ .sys_reg = reg, \ .field_pos = field, \ + .field_width = width, \ .sign = s, \ .min_field_value = min_value, @@ -2398,10 +2459,10 @@ static const struct arm64_cpu_capabilities arm64_features[] = { .hwcap_type = cap_type, \ .hwcap = cap, \ -#define HWCAP_CAP(reg, field, s, min_value, cap_type, cap) \ +#define HWCAP_CAP(reg, field, width, s, min_value, cap_type, cap) \ { \ __HWCAP_CAP(#cap, cap_type, cap) \ - HWCAP_CPUID_MATCH(reg, field, s, min_value) \ + HWCAP_CPUID_MATCH(reg, field, width, s, min_value) \ } #define HWCAP_MULTI_CAP(list, cap_type, cap) \ @@ -2421,11 +2482,16 @@ static const struct arm64_cpu_capabilities arm64_features[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_APA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_APA_ARCHITECTED) + 4, FTR_UNSIGNED, + ID_AA64ISAR1_APA_ARCHITECTED) + }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_APA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_APA3_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_API_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_API_IMP_DEF) }, {}, }; @@ -2433,77 +2499,82 @@ static const struct arm64_cpu_capabilities ptr_auth_hwcap_addr_matches[] = { static const struct arm64_cpu_capabilities ptr_auth_hwcap_gen_matches[] = { { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPA_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPA_ARCHITECTED) + }, + { + HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_GPA3_SHIFT, + 4, FTR_UNSIGNED, ID_AA64ISAR2_GPA3_ARCHITECTED) }, { HWCAP_CPUID_MATCH(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_GPI_SHIFT, - FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) + 4, FTR_UNSIGNED, ID_AA64ISAR1_GPI_IMP_DEF) }, {}, }; #endif static const struct arm64_cpu_capabilities arm64_elf_hwcaps[] = { - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), - HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), - HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), - HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_PMULL), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AES), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA1), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA2_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_SHA512), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_CRC32), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_ATOMICS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ATOMICS), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RDM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDRDM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SHA3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SHA3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM3_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM3), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_SM4_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SM4), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_DP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDDP), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_FHM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDFHM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FLAGM), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_TS_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_FLAGM2), + HWCAP_CAP(SYS_ID_AA64ISAR0_EL1, ID_AA64ISAR0_RNDR_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RNG), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_FP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_FP_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FPHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 0, CAP_HWCAP, KERNEL_HWCAP_ASIMD), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_ASIMD_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ASIMDHP), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_DIT_SHIFT, 4, FTR_SIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DIT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DCPOP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DPB_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_DCPODP), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_JSCVT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_JSCVT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FCMA_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FCMA), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_LRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_LRCPC_SHIFT, 4, FTR_UNSIGNED, 2, CAP_HWCAP, KERNEL_HWCAP_ILRCPC), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_FRINTTS_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_FRINT), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_SB_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_SB), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_BF16_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_BF16), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_DGH_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_DGH), + HWCAP_CAP(SYS_ID_AA64ISAR1_EL1, ID_AA64ISAR1_I8MM_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_I8MM), + HWCAP_CAP(SYS_ID_AA64MMFR2_EL1, ID_AA64MMFR2_AT_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_USCAT), #ifdef CONFIG_ARM64_SVE - HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), - HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), + HWCAP_CAP(SYS_ID_AA64PFR0_EL1, ID_AA64PFR0_SVE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR0_SVE, CAP_HWCAP, KERNEL_HWCAP_SVE), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SVEVER_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SVEVER_SVE2, CAP_HWCAP, KERNEL_HWCAP_SVE2), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES, CAP_HWCAP, KERNEL_HWCAP_SVEAES), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_AES_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_AES_PMULL, CAP_HWCAP, KERNEL_HWCAP_SVEPMULL), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BITPERM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BITPERM, CAP_HWCAP, KERNEL_HWCAP_SVEBITPERM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_BF16_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_BF16, CAP_HWCAP, KERNEL_HWCAP_SVEBF16), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SHA3_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SHA3, CAP_HWCAP, KERNEL_HWCAP_SVESHA3), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_SM4_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_SM4, CAP_HWCAP, KERNEL_HWCAP_SVESM4), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_I8MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_I8MM, CAP_HWCAP, KERNEL_HWCAP_SVEI8MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F32MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F32MM, CAP_HWCAP, KERNEL_HWCAP_SVEF32MM), + HWCAP_CAP(SYS_ID_AA64ZFR0_EL1, ID_AA64ZFR0_F64MM_SHIFT, 4, FTR_UNSIGNED, ID_AA64ZFR0_F64MM, CAP_HWCAP, KERNEL_HWCAP_SVEF64MM), #endif - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_SSBS_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_SSBS_PSTATE_INSNS, CAP_HWCAP, KERNEL_HWCAP_SSBS), #ifdef CONFIG_ARM64_BTI - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_BT_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_BT_BTI, CAP_HWCAP, KERNEL_HWCAP_BTI), #endif #ifdef CONFIG_ARM64_PTR_AUTH HWCAP_MULTI_CAP(ptr_auth_hwcap_addr_matches, CAP_HWCAP, KERNEL_HWCAP_PACA), HWCAP_MULTI_CAP(ptr_auth_hwcap_gen_matches, CAP_HWCAP, KERNEL_HWCAP_PACG), #endif #ifdef CONFIG_ARM64_MTE - HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE, CAP_HWCAP, KERNEL_HWCAP_MTE), + HWCAP_CAP(SYS_ID_AA64PFR1_EL1, ID_AA64PFR1_MTE_SHIFT, 4, FTR_UNSIGNED, ID_AA64PFR1_MTE_ASYMM, CAP_HWCAP, KERNEL_HWCAP_MTE3), #endif /* CONFIG_ARM64_MTE */ - HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), - HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), - HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), + HWCAP_CAP(SYS_ID_AA64MMFR0_EL1, ID_AA64MMFR0_ECV_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_ECV), + HWCAP_CAP(SYS_ID_AA64MMFR1_EL1, ID_AA64MMFR1_AFP_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_AFP), + HWCAP_CAP(SYS_ID_AA64ISAR2_EL1, ID_AA64ISAR2_RPRES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_HWCAP, KERNEL_HWCAP_RPRES), {}, }; @@ -2532,15 +2603,15 @@ static bool compat_has_neon(const struct arm64_cpu_capabilities *cap, int scope) static const struct arm64_cpu_capabilities compat_elf_hwcaps[] = { #ifdef CONFIG_COMPAT HWCAP_CAP_MATCH(compat_has_neon, CAP_COMPAT_HWCAP, COMPAT_HWCAP_NEON), - HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), + HWCAP_CAP(SYS_MVFR1_EL1, MVFR1_SIMDFMAC_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv4), /* Arm v8 mandates MVFR0.FPDP == {0, 2}. So, piggy back on this for the presence of VFP support */ - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), - HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), - HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFP), + HWCAP_CAP(SYS_MVFR0_EL1, MVFR0_FPDP_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP, COMPAT_HWCAP_VFPv3), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 2, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_PMULL), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_AES_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_AES), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA1_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA1), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_SHA2_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_SHA2), + HWCAP_CAP(SYS_ID_ISAR5_EL1, ID_ISAR5_CRC32_SHIFT, 4, FTR_UNSIGNED, 1, CAP_COMPAT_HWCAP2, COMPAT_HWCAP2_CRC32), #endif {}, }; diff --git a/arch/arm64/kernel/cpuidle.c b/arch/arm64/kernel/cpuidle.c index 03991eeff643..3006f4324808 100644 --- a/arch/arm64/kernel/cpuidle.c +++ b/arch/arm64/kernel/cpuidle.c @@ -54,6 +54,9 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) struct acpi_lpi_state *lpi; struct acpi_processor *pr = per_cpu(processors, cpu); + if (unlikely(!pr || !pr->flags.has_lpi)) + return -EINVAL; + /* * If the PSCI cpu_suspend function hook has not been initialized * idle states must not be enabled, so bail out @@ -61,9 +64,6 @@ static int psci_acpi_cpu_init_idle(unsigned int cpu) if (!psci_ops.cpu_suspend) return -EOPNOTSUPP; - if (unlikely(!pr || !pr->flags.has_lpi)) - return -EINVAL; - count = pr->power.count - 1; if (count <= 0) return -ENODEV; diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 591c18a889a5..330b92ea863a 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -97,6 +97,7 @@ static const char *const hwcap_str[] = { [KERNEL_HWCAP_ECV] = "ecv", [KERNEL_HWCAP_AFP] = "afp", [KERNEL_HWCAP_RPRES] = "rpres", + [KERNEL_HWCAP_MTE3] = "mte3", }; #ifdef CONFIG_COMPAT diff --git a/arch/arm64/kernel/crash_core.c b/arch/arm64/kernel/crash_core.c index 314391a156ee..2b65aae332ce 100644 --- a/arch/arm64/kernel/crash_core.c +++ b/arch/arm64/kernel/crash_core.c @@ -20,6 +20,12 @@ void arch_crash_save_vmcoreinfo(void) { VMCOREINFO_NUMBER(VA_BITS); /* Please note VMCOREINFO_NUMBER() uses "%d", not "%x" */ + vmcoreinfo_append_str("NUMBER(MODULES_VADDR)=0x%lx\n", MODULES_VADDR); + vmcoreinfo_append_str("NUMBER(MODULES_END)=0x%lx\n", MODULES_END); + vmcoreinfo_append_str("NUMBER(VMALLOC_START)=0x%lx\n", VMALLOC_START); + vmcoreinfo_append_str("NUMBER(VMALLOC_END)=0x%lx\n", VMALLOC_END); + vmcoreinfo_append_str("NUMBER(VMEMMAP_START)=0x%lx\n", VMEMMAP_START); + vmcoreinfo_append_str("NUMBER(VMEMMAP_END)=0x%lx\n", VMEMMAP_END); vmcoreinfo_append_str("NUMBER(kimage_voffset)=0x%llx\n", kimage_voffset); vmcoreinfo_append_str("NUMBER(PHYS_OFFSET)=0x%llx\n", diff --git a/arch/arm64/kernel/elfcore.c b/arch/arm64/kernel/elfcore.c new file mode 100644 index 000000000000..3ed39c61a510 --- /dev/null +++ b/arch/arm64/kernel/elfcore.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/coredump.h> +#include <linux/elfcore.h> +#include <linux/kernel.h> +#include <linux/mm.h> + +#include <asm/cpufeature.h> +#include <asm/mte.h> + +#ifndef VMA_ITERATOR +#define VMA_ITERATOR(name, mm, addr) \ + struct mm_struct *name = mm +#define for_each_vma(vmi, vma) \ + for (vma = vmi->mmap; vma; vma = vma->vm_next) +#endif + +#define for_each_mte_vma(vmi, vma) \ + if (system_supports_mte()) \ + for_each_vma(vmi, vma) \ + if (vma->vm_flags & VM_MTE) + +static unsigned long mte_vma_tag_dump_size(struct vm_area_struct *vma) +{ + if (vma->vm_flags & VM_DONTDUMP) + return 0; + + return vma_pages(vma) * MTE_PAGE_TAG_STORAGE; +} + +/* Derived from dump_user_range(); start/end must be page-aligned */ +static int mte_dump_tag_range(struct coredump_params *cprm, + unsigned long start, unsigned long end) +{ + unsigned long addr; + + for (addr = start; addr < end; addr += PAGE_SIZE) { + char tags[MTE_PAGE_TAG_STORAGE]; + struct page *page = get_dump_page(addr); + + /* + * get_dump_page() returns NULL when encountering an empty + * page table entry that would otherwise have been filled with + * the zero page. Skip the equivalent tag dump which would + * have been all zeros. + */ + if (!page) { + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + /* + * Pages mapped in user space as !pte_access_permitted() (e.g. + * PROT_EXEC only) may not have the PG_mte_tagged flag set. + */ + if (!test_bit(PG_mte_tagged, &page->flags)) { + put_page(page); + dump_skip(cprm, MTE_PAGE_TAG_STORAGE); + continue; + } + + mte_save_page_tags(page_address(page), tags); + put_page(page); + if (!dump_emit(cprm, tags, MTE_PAGE_TAG_STORAGE)) + return 0; + } + + return 1; +} + +Elf_Half elf_core_extra_phdrs(void) +{ + struct vm_area_struct *vma; + int vma_count = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + vma_count++; + + return vma_count; +} + +int elf_core_write_extra_phdrs(struct coredump_params *cprm, loff_t offset) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + struct elf_phdr phdr; + + phdr.p_type = PT_ARM_MEMTAG_MTE; + phdr.p_offset = offset; + phdr.p_vaddr = vma->vm_start; + phdr.p_paddr = 0; + phdr.p_filesz = mte_vma_tag_dump_size(vma); + phdr.p_memsz = vma->vm_end - vma->vm_start; + offset += phdr.p_filesz; + phdr.p_flags = 0; + phdr.p_align = 0; + + if (!dump_emit(cprm, &phdr, sizeof(phdr))) + return 0; + } + + return 1; +} + +size_t elf_core_extra_data_size(void) +{ + struct vm_area_struct *vma; + size_t data_size = 0; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) + data_size += mte_vma_tag_dump_size(vma); + + return data_size; +} + +int elf_core_write_extra_data(struct coredump_params *cprm) +{ + struct vm_area_struct *vma; + VMA_ITERATOR(vmi, current->mm, 0); + + for_each_mte_vma(vmi, vma) { + if (vma->vm_flags & VM_DONTDUMP) + continue; + + if (!mte_dump_tag_range(cprm, vma->vm_start, vma->vm_end)) + return 0; + } + + return 1; +} diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index ef7fcefb96bd..878c65aa7206 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -6,6 +6,7 @@ */ #include <linux/context_tracking.h> +#include <linux/kasan.h> #include <linux/linkage.h> #include <linux/lockdep.h> #include <linux/ptrace.h> @@ -56,6 +57,7 @@ static void noinstr enter_from_kernel_mode(struct pt_regs *regs) { __enter_from_kernel_mode(regs); mte_check_tfsr_entry(); + mte_disable_tco_entry(current); } /* @@ -103,6 +105,7 @@ static __always_inline void __enter_from_user_mode(void) CT_WARN_ON(ct_state() != CONTEXT_USER); user_exit_irqoff(); trace_hardirqs_off_finish(); + mte_disable_tco_entry(current); } static __always_inline void enter_from_user_mode(struct pt_regs *regs) @@ -220,9 +223,26 @@ static void noinstr arm64_exit_el1_dbg(struct pt_regs *regs) lockdep_hardirqs_on(CALLER_ADDR0); } +#ifdef CONFIG_PREEMPT_DYNAMIC +DEFINE_STATIC_KEY_TRUE(sk_dynamic_irqentry_exit_cond_resched); +#define need_irq_preemption() \ + (static_branch_unlikely(&sk_dynamic_irqentry_exit_cond_resched)) +#else +#define need_irq_preemption() (IS_ENABLED(CONFIG_PREEMPTION)) +#endif + static void __sched arm64_preempt_schedule_irq(void) { - lockdep_assert_irqs_disabled(); + if (!need_irq_preemption()) + return; + + /* + * Note: thread_info::preempt_count includes both thread_info::count + * and thread_info::need_resched, and is not equivalent to + * preempt_count(). + */ + if (READ_ONCE(current_thread_info()->preempt_count) != 0) + return; /* * DAIF.DA are cleared at the start of IRQ/FIQ handling, and when GIC @@ -438,14 +458,7 @@ static __always_inline void __el1_irq(struct pt_regs *regs, do_interrupt_handler(regs, handler); irq_exit_rcu(); - /* - * Note: thread_info::preempt_count includes both thread_info::count - * and thread_info::need_resched, and is not equivalent to - * preempt_count(). - */ - if (IS_ENABLED(CONFIG_PREEMPTION) && - READ_ONCE(current_thread_info()->preempt_count) == 0) - arm64_preempt_schedule_irq(); + arm64_preempt_schedule_irq(); exit_to_kernel_mode(regs); } diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 4a3a653df07e..ede028dee81b 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -307,6 +307,7 @@ alternative_else_nop_endif str w21, [sp, #S_SYSCALLNO] .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Save pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mrs_s x20, SYS_ICC_PMR_EL1 @@ -314,12 +315,6 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING mov x20, #GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET msr_s SYS_ICC_PMR_EL1, x20 alternative_else_nop_endif - - /* Re-enable tag checking (TCO set on exception entry) */ -#ifdef CONFIG_ARM64_MTE -alternative_if ARM64_MTE - SET_PSTATE_TCO(0) -alternative_else_nop_endif #endif /* @@ -337,6 +332,7 @@ alternative_else_nop_endif disable_daif .endif +#ifdef CONFIG_ARM64_PSEUDO_NMI /* Restore pmr */ alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] @@ -346,6 +342,7 @@ alternative_if ARM64_HAS_IRQ_PRIO_MASKING dsb sy // Ensure priority change is seen by redistributor .L__skip_pmr_sync\@: alternative_else_nop_endif +#endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR diff --git a/arch/arm64/kernel/idreg-override.c b/arch/arm64/kernel/idreg-override.c index d8e606fe3c21..8a2ceb591686 100644 --- a/arch/arm64/kernel/idreg-override.c +++ b/arch/arm64/kernel/idreg-override.c @@ -17,7 +17,7 @@ #define FTR_DESC_NAME_LEN 20 #define FTR_DESC_FIELD_LEN 10 #define FTR_ALIAS_NAME_LEN 30 -#define FTR_ALIAS_OPTION_LEN 80 +#define FTR_ALIAS_OPTION_LEN 116 struct ftr_set_desc { char name[FTR_DESC_NAME_LEN]; @@ -71,6 +71,16 @@ static const struct ftr_set_desc isar1 __initconst = { }, }; +static const struct ftr_set_desc isar2 __initconst = { + .name = "id_aa64isar2", + .override = &id_aa64isar2_override, + .fields = { + { "gpa3", ID_AA64ISAR2_GPA3_SHIFT }, + { "apa3", ID_AA64ISAR2_APA3_SHIFT }, + {} + }, +}; + extern struct arm64_ftr_override kaslr_feature_override; static const struct ftr_set_desc kaslr __initconst = { @@ -88,6 +98,7 @@ static const struct ftr_set_desc * const regs[] __initconst = { &mmfr1, &pfr1, &isar1, + &isar2, &kaslr, }; @@ -100,7 +111,8 @@ static const struct { { "arm64.nobti", "id_aa64pfr1.bt=0" }, { "arm64.nopauth", "id_aa64isar1.gpi=0 id_aa64isar1.gpa=0 " - "id_aa64isar1.api=0 id_aa64isar1.apa=0" }, + "id_aa64isar1.api=0 id_aa64isar1.apa=0 " + "id_aa64isar2.gpa3=0 id_aa64isar2.apa3=0" }, { "arm64.nomte", "id_aa64pfr1.mte=0" }, { "nokaslr", "kaslr.disabled=1" }, }; diff --git a/arch/arm64/kernel/mte.c b/arch/arm64/kernel/mte.c index f418ebc65f95..78b3e0f8e997 100644 --- a/arch/arm64/kernel/mte.c +++ b/arch/arm64/kernel/mte.c @@ -186,6 +186,11 @@ void mte_check_tfsr_el1(void) } #endif +/* + * This is where we actually resolve the system and process MTE mode + * configuration into an actual value in SCTLR_EL1 that affects + * userspace. + */ static void mte_update_sctlr_user(struct task_struct *task) { /* @@ -199,9 +204,20 @@ static void mte_update_sctlr_user(struct task_struct *task) unsigned long pref, resolved_mte_tcf; pref = __this_cpu_read(mte_tcf_preferred); + /* + * If there is no overlap between the system preferred and + * program requested values go with what was requested. + */ resolved_mte_tcf = (mte_ctrl & pref) ? pref : mte_ctrl; sctlr &= ~SCTLR_EL1_TCF0_MASK; - if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) + /* + * Pick an actual setting. The order in which we check for + * set bits and map into register values determines our + * default order. + */ + if (resolved_mte_tcf & MTE_CTRL_TCF_ASYMM) + sctlr |= SCTLR_EL1_TCF0_ASYMM; + else if (resolved_mte_tcf & MTE_CTRL_TCF_ASYNC) sctlr |= SCTLR_EL1_TCF0_ASYNC; else if (resolved_mte_tcf & MTE_CTRL_TCF_SYNC) sctlr |= SCTLR_EL1_TCF0_SYNC; @@ -253,6 +269,9 @@ void mte_thread_switch(struct task_struct *next) mte_update_sctlr_user(next); mte_update_gcr_excl(next); + /* TCO may not have been disabled on exception entry for the current task. */ + mte_disable_tco_entry(next); + /* * Check if an async tag exception occurred at EL1. * @@ -293,6 +312,17 @@ long set_mte_ctrl(struct task_struct *task, unsigned long arg) if (arg & PR_MTE_TCF_SYNC) mte_ctrl |= MTE_CTRL_TCF_SYNC; + /* + * If the system supports it and both sync and async modes are + * specified then implicitly enable asymmetric mode. + * Userspace could see a mix of both sync and async anyway due + * to differing or changing defaults on CPUs. + */ + if (cpus_have_cap(ARM64_MTE_ASYMM) && + (arg & PR_MTE_TCF_ASYNC) && + (arg & PR_MTE_TCF_SYNC)) + mte_ctrl |= MTE_CTRL_TCF_ASYMM; + task->thread.mte_ctrl = mte_ctrl; if (task == current) { preempt_disable(); @@ -467,6 +497,8 @@ static ssize_t mte_tcf_preferred_show(struct device *dev, return sysfs_emit(buf, "async\n"); case MTE_CTRL_TCF_SYNC: return sysfs_emit(buf, "sync\n"); + case MTE_CTRL_TCF_ASYMM: + return sysfs_emit(buf, "asymm\n"); default: return sysfs_emit(buf, "???\n"); } @@ -482,6 +514,8 @@ static ssize_t mte_tcf_preferred_store(struct device *dev, tcf = MTE_CTRL_TCF_ASYNC; else if (sysfs_streq(buf, "sync")) tcf = MTE_CTRL_TCF_SYNC; + else if (cpus_have_cap(ARM64_MTE_ASYMM) && sysfs_streq(buf, "asymm")) + tcf = MTE_CTRL_TCF_ASYMM; else return -EINVAL; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index cab678ed6618..cb69ff1e6138 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -242,6 +242,16 @@ static struct attribute *armv8_pmuv3_event_attrs[] = { ARMV8_EVENT_ATTR(l2d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L2D_CACHE_LMISS_RD), ARMV8_EVENT_ATTR(l2i_cache_lmiss, ARMV8_PMUV3_PERFCTR_L2I_CACHE_LMISS), ARMV8_EVENT_ATTR(l3d_cache_lmiss_rd, ARMV8_PMUV3_PERFCTR_L3D_CACHE_LMISS_RD), + ARMV8_EVENT_ATTR(trb_wrap, ARMV8_PMUV3_PERFCTR_TRB_WRAP), + ARMV8_EVENT_ATTR(trb_trig, ARMV8_PMUV3_PERFCTR_TRB_TRIG), + ARMV8_EVENT_ATTR(trcextout0, ARMV8_PMUV3_PERFCTR_TRCEXTOUT0), + ARMV8_EVENT_ATTR(trcextout1, ARMV8_PMUV3_PERFCTR_TRCEXTOUT1), + ARMV8_EVENT_ATTR(trcextout2, ARMV8_PMUV3_PERFCTR_TRCEXTOUT2), + ARMV8_EVENT_ATTR(trcextout3, ARMV8_PMUV3_PERFCTR_TRCEXTOUT3), + ARMV8_EVENT_ATTR(cti_trigout4, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT4), + ARMV8_EVENT_ATTR(cti_trigout5, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT5), + ARMV8_EVENT_ATTR(cti_trigout6, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT6), + ARMV8_EVENT_ATTR(cti_trigout7, ARMV8_PMUV3_PERFCTR_CTI_TRIGOUT7), ARMV8_EVENT_ATTR(ldst_align_lat, ARMV8_PMUV3_PERFCTR_LDST_ALIGN_LAT), ARMV8_EVENT_ATTR(ld_align_lat, ARMV8_PMUV3_PERFCTR_LD_ALIGN_LAT), ARMV8_EVENT_ATTR(st_align_lat, ARMV8_PMUV3_PERFCTR_ST_ALIGN_LAT), diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 5369e649fa79..7fa97df55e3a 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -635,7 +635,8 @@ long set_tagged_addr_ctrl(struct task_struct *task, unsigned long arg) return -EINVAL; if (system_supports_mte()) - valid_mask |= PR_MTE_TCF_MASK | PR_MTE_TAG_MASK; + valid_mask |= PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC \ + | PR_MTE_TAG_MASK; if (arg & ~valid_mask) return -EINVAL; diff --git a/arch/arm64/kernel/proton-pack.c b/arch/arm64/kernel/proton-pack.c index 6d45c63c6454..5777929d35bf 100644 --- a/arch/arm64/kernel/proton-pack.c +++ b/arch/arm64/kernel/proton-pack.c @@ -233,17 +233,20 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn) __this_cpu_write(bp_hardening_data.slot, HYP_VECTOR_SPECTRE_DIRECT); } -static void call_smc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_smc_arch_workaround_1(void) { arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void call_hvc_arch_workaround_1(void) +/* Called during entry so must be noinstr */ +static noinstr void call_hvc_arch_workaround_1(void) { arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } -static void qcom_link_stack_sanitisation(void) +/* Called during entry so must be noinstr */ +static noinstr void qcom_link_stack_sanitisation(void) { u64 tmp; diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index d8aaf4b6f432..50fe8eaf7df0 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -11,7 +11,6 @@ #include <linux/errno.h> #include <linux/kernel.h> #include <linux/signal.h> -#include <linux/personality.h> #include <linux/freezer.h> #include <linux/stddef.h> #include <linux/uaccess.h> @@ -577,10 +576,12 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, { int err; - err = sigframe_alloc(user, &user->fpsimd_offset, - sizeof(struct fpsimd_context)); - if (err) - return err; + if (system_supports_fpsimd()) { + err = sigframe_alloc(user, &user->fpsimd_offset, + sizeof(struct fpsimd_context)); + if (err) + return err; + } /* fault information, if valid */ if (add_all || current->thread.fault_code) { diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index db5159a3055f..12c6864e51e1 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -9,7 +9,6 @@ #include <linux/compat.h> #include <linux/cpufeature.h> -#include <linux/personality.h> #include <linux/sched.h> #include <linux/sched/signal.h> #include <linux/slab.h> diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 70fc42470f13..bb878f52ca0a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -9,7 +9,6 @@ #include <linux/bug.h> #include <linux/context_tracking.h> #include <linux/signal.h> -#include <linux/personality.h> #include <linux/kallsyms.h> #include <linux/kprobes.h> #include <linux/spinlock.h> diff --git a/arch/arm64/kvm/arm.c b/arch/arm64/kvm/arm.c index 4dca6ffd03d4..946e401ef6b0 100644 --- a/arch/arm64/kvm/arm.c +++ b/arch/arm64/kvm/arm.c @@ -1867,6 +1867,7 @@ static int kvm_hyp_init_protection(u32 hyp_va_bits) kvm_nvhe_sym(id_aa64pfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64PFR1_EL1); kvm_nvhe_sym(id_aa64isar0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR0_EL1); kvm_nvhe_sym(id_aa64isar1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR1_EL1); + kvm_nvhe_sym(id_aa64isar2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64ISAR2_EL1); kvm_nvhe_sym(id_aa64mmfr0_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1); kvm_nvhe_sym(id_aa64mmfr1_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR1_EL1); kvm_nvhe_sym(id_aa64mmfr2_el1_sys_val) = read_sanitised_ftr_reg(SYS_ID_AA64MMFR2_EL1); diff --git a/arch/arm64/kvm/hyp/include/hyp/switch.h b/arch/arm64/kvm/hyp/include/hyp/switch.h index 701cfb964905..6379a1e3e6e5 100644 --- a/arch/arm64/kvm/hyp/include/hyp/switch.h +++ b/arch/arm64/kvm/hyp/include/hyp/switch.h @@ -174,9 +174,9 @@ static bool kvm_hyp_handle_fpsimd(struct kvm_vcpu *vcpu, u64 *exit_code) /* Valid trap. Switch the context: */ if (has_vhe()) { - reg = CPACR_EL1_FPEN; + reg = CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN; if (sve_guest) - reg |= CPACR_EL1_ZEN; + reg |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; sysreg_clear_set(cpacr_el1, 0, reg); } else { diff --git a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h index eea1f6a53723..5ad626527d41 100644 --- a/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h +++ b/arch/arm64/kvm/hyp/include/nvhe/fixed_config.h @@ -192,6 +192,11 @@ ARM64_FEATURE_MASK(ID_AA64ISAR1_I8MM) \ ) +#define PVM_ID_AA64ISAR2_ALLOW (\ + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3) | \ + ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) \ + ) + u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id); bool kvm_handle_pvm_sysreg(struct kvm_vcpu *vcpu, u64 *exit_code); bool kvm_handle_pvm_restricted(struct kvm_vcpu *vcpu, u64 *exit_code); diff --git a/arch/arm64/kvm/hyp/nvhe/cache.S b/arch/arm64/kvm/hyp/nvhe/cache.S index 958734f4d6b0..0c367eb5f4e2 100644 --- a/arch/arm64/kvm/hyp/nvhe/cache.S +++ b/arch/arm64/kvm/hyp/nvhe/cache.S @@ -7,7 +7,8 @@ #include <asm/assembler.h> #include <asm/alternative.h> -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) diff --git a/arch/arm64/kvm/hyp/nvhe/sys_regs.c b/arch/arm64/kvm/hyp/nvhe/sys_regs.c index 792cf6e6ac92..33f5181af330 100644 --- a/arch/arm64/kvm/hyp/nvhe/sys_regs.c +++ b/arch/arm64/kvm/hyp/nvhe/sys_regs.c @@ -22,6 +22,7 @@ u64 id_aa64pfr0_el1_sys_val; u64 id_aa64pfr1_el1_sys_val; u64 id_aa64isar0_el1_sys_val; u64 id_aa64isar1_el1_sys_val; +u64 id_aa64isar2_el1_sys_val; u64 id_aa64mmfr0_el1_sys_val; u64 id_aa64mmfr1_el1_sys_val; u64 id_aa64mmfr2_el1_sys_val; @@ -183,6 +184,17 @@ static u64 get_pvm_id_aa64isar1(const struct kvm_vcpu *vcpu) return id_aa64isar1_el1_sys_val & allow_mask; } +static u64 get_pvm_id_aa64isar2(const struct kvm_vcpu *vcpu) +{ + u64 allow_mask = PVM_ID_AA64ISAR2_ALLOW; + + if (!vcpu_has_ptrauth(vcpu)) + allow_mask &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + + return id_aa64isar2_el1_sys_val & allow_mask; +} + static u64 get_pvm_id_aa64mmfr0(const struct kvm_vcpu *vcpu) { u64 set_mask; @@ -225,6 +237,8 @@ u64 pvm_read_id_reg(const struct kvm_vcpu *vcpu, u32 id) return get_pvm_id_aa64isar0(vcpu); case SYS_ID_AA64ISAR1_EL1: return get_pvm_id_aa64isar1(vcpu); + case SYS_ID_AA64ISAR2_EL1: + return get_pvm_id_aa64isar2(vcpu); case SYS_ID_AA64MMFR0_EL1: return get_pvm_id_aa64mmfr0(vcpu); case SYS_ID_AA64MMFR1_EL1: diff --git a/arch/arm64/kvm/hyp/vhe/switch.c b/arch/arm64/kvm/hyp/vhe/switch.c index 54af47005e45..262dfe03134d 100644 --- a/arch/arm64/kvm/hyp/vhe/switch.c +++ b/arch/arm64/kvm/hyp/vhe/switch.c @@ -41,7 +41,7 @@ static void __activate_traps(struct kvm_vcpu *vcpu) val = read_sysreg(cpacr_el1); val |= CPACR_EL1_TTA; - val &= ~CPACR_EL1_ZEN; + val &= ~(CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN); /* * With VHE (HCR.E2H == 1), accesses to CPACR_EL1 are routed to @@ -56,9 +56,9 @@ static void __activate_traps(struct kvm_vcpu *vcpu) if (update_fp_enabled(vcpu)) { if (vcpu_has_sve(vcpu)) - val |= CPACR_EL1_ZEN; + val |= CPACR_EL1_ZEN_EL0EN | CPACR_EL1_ZEN_EL1EN; } else { - val &= ~CPACR_EL1_FPEN; + val &= ~(CPACR_EL1_FPEN_EL0EN | CPACR_EL1_FPEN_EL1EN); __activate_traps_fpsimd32(vcpu); } diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 4dc2fba316ff..baa65292bbc2 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -1097,6 +1097,11 @@ static u64 read_id_reg(const struct kvm_vcpu *vcpu, ARM64_FEATURE_MASK(ID_AA64ISAR1_GPA) | ARM64_FEATURE_MASK(ID_AA64ISAR1_GPI)); break; + case SYS_ID_AA64ISAR2_EL1: + if (!vcpu_has_ptrauth(vcpu)) + val &= ~(ARM64_FEATURE_MASK(ID_AA64ISAR2_APA3) | + ARM64_FEATURE_MASK(ID_AA64ISAR2_GPA3)); + break; case SYS_ID_AA64DFR0_EL1: /* Limit debug to ARMv8.0 */ val &= ~ARM64_FEATURE_MASK(ID_AA64DFR0_DEBUGVER); diff --git a/arch/arm64/lib/clear_page.S b/arch/arm64/lib/clear_page.S index 1fd5d790ab80..ebde40e7fa2b 100644 --- a/arch/arm64/lib/clear_page.S +++ b/arch/arm64/lib/clear_page.S @@ -14,7 +14,7 @@ * Parameters: * x0 - dest */ -SYM_FUNC_START_PI(clear_page) +SYM_FUNC_START(__pi_clear_page) mrs x1, dczid_el0 tbnz x1, #4, 2f /* Branch if DC ZVA is prohibited */ and w1, w1, #0xf @@ -35,5 +35,6 @@ SYM_FUNC_START_PI(clear_page) tst x0, #(PAGE_SIZE - 1) b.ne 2b ret -SYM_FUNC_END_PI(clear_page) +SYM_FUNC_END(__pi_clear_page) +SYM_FUNC_ALIAS(clear_page, __pi_clear_page) EXPORT_SYMBOL(clear_page) diff --git a/arch/arm64/lib/copy_page.S b/arch/arm64/lib/copy_page.S index 29144f4cd449..c336d2ffdec5 100644 --- a/arch/arm64/lib/copy_page.S +++ b/arch/arm64/lib/copy_page.S @@ -17,7 +17,7 @@ * x0 - dest * x1 - src */ -SYM_FUNC_START_PI(copy_page) +SYM_FUNC_START(__pi_copy_page) alternative_if ARM64_HAS_NO_HW_PREFETCH // Prefetch three cache lines ahead. prfm pldl1strm, [x1, #128] @@ -75,5 +75,6 @@ alternative_else_nop_endif stnp x16, x17, [x0, #112 - 256] ret -SYM_FUNC_END_PI(copy_page) +SYM_FUNC_END(__pi_copy_page) +SYM_FUNC_ALIAS(copy_page, __pi_copy_page) EXPORT_SYMBOL(copy_page) diff --git a/arch/arm64/lib/crc32.S b/arch/arm64/lib/crc32.S index 0f9e10ecda23..8340dccff46f 100644 --- a/arch/arm64/lib/crc32.S +++ b/arch/arm64/lib/crc32.S @@ -11,7 +11,44 @@ .arch armv8-a+crc - .macro __crc32, c + .macro byteorder, reg, be + .if \be +CPU_LE( rev \reg, \reg ) + .else +CPU_BE( rev \reg, \reg ) + .endif + .endm + + .macro byteorder16, reg, be + .if \be +CPU_LE( rev16 \reg, \reg ) + .else +CPU_BE( rev16 \reg, \reg ) + .endif + .endm + + .macro bitorder, reg, be + .if \be + rbit \reg, \reg + .endif + .endm + + .macro bitorder16, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #16 + .endif + .endm + + .macro bitorder8, reg, be + .if \be + rbit \reg, \reg + lsr \reg, \reg, #24 + .endif + .endm + + .macro __crc32, c, be=0 + bitorder w0, \be cmp x2, #16 b.lt 8f // less than 16 bytes @@ -24,10 +61,14 @@ add x8, x8, x1 add x1, x1, x7 ldp x5, x6, [x8] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be tst x7, #8 crc32\c\()x w8, w0, x3 @@ -55,33 +96,43 @@ CPU_BE( rev x6, x6 ) 32: ldp x3, x4, [x1], #32 sub x2, x2, #32 ldp x5, x6, [x1, #-16] -CPU_BE( rev x3, x3 ) -CPU_BE( rev x4, x4 ) -CPU_BE( rev x5, x5 ) -CPU_BE( rev x6, x6 ) + byteorder x3, \be + byteorder x4, \be + byteorder x5, \be + byteorder x6, \be + bitorder x3, \be + bitorder x4, \be + bitorder x5, \be + bitorder x6, \be crc32\c\()x w0, w0, x3 crc32\c\()x w0, w0, x4 crc32\c\()x w0, w0, x5 crc32\c\()x w0, w0, x6 cbnz x2, 32b -0: ret +0: bitorder w0, \be + ret 8: tbz x2, #3, 4f ldr x3, [x1], #8 -CPU_BE( rev x3, x3 ) + byteorder x3, \be + bitorder x3, \be crc32\c\()x w0, w0, x3 4: tbz x2, #2, 2f ldr w3, [x1], #4 -CPU_BE( rev w3, w3 ) + byteorder w3, \be + bitorder w3, \be crc32\c\()w w0, w0, w3 2: tbz x2, #1, 1f ldrh w3, [x1], #2 -CPU_BE( rev16 w3, w3 ) + byteorder16 w3, \be + bitorder16 w3, \be crc32\c\()h w0, w0, w3 1: tbz x2, #0, 0f ldrb w3, [x1] + bitorder8 w3, \be crc32\c\()b w0, w0, w3 -0: ret +0: bitorder w0, \be + ret .endm .align 5 @@ -99,3 +150,11 @@ alternative_if_not ARM64_HAS_CRC32 alternative_else_nop_endif __crc32 c SYM_FUNC_END(__crc32c_le) + + .align 5 +SYM_FUNC_START(crc32_be) +alternative_if_not ARM64_HAS_CRC32 + b crc32_be_base +alternative_else_nop_endif + __crc32 be=1 +SYM_FUNC_END(crc32_be) diff --git a/arch/arm64/lib/insn.c b/arch/arm64/lib/insn.c index fccfe363e567..5e90887deec4 100644 --- a/arch/arm64/lib/insn.c +++ b/arch/arm64/lib/insn.c @@ -578,10 +578,16 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, switch (type) { case AARCH64_INSN_LDST_LOAD_EX: + case AARCH64_INSN_LDST_LOAD_ACQ_EX: insn = aarch64_insn_get_load_ex_value(); + if (type == AARCH64_INSN_LDST_LOAD_ACQ_EX) + insn |= BIT(15); break; case AARCH64_INSN_LDST_STORE_EX: + case AARCH64_INSN_LDST_STORE_REL_EX: insn = aarch64_insn_get_store_ex_value(); + if (type == AARCH64_INSN_LDST_STORE_REL_EX) + insn |= BIT(15); break; default: pr_err("%s: unknown load/store exclusive encoding %d\n", __func__, type); @@ -603,12 +609,65 @@ u32 aarch64_insn_gen_load_store_ex(enum aarch64_insn_register reg, state); } -u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, - enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +#ifdef CONFIG_ARM64_LSE_ATOMICS +static u32 aarch64_insn_encode_ldst_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - u32 insn = aarch64_insn_get_ldadd_value(); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = 2; + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = 1; + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = 3; + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~GENMASK(23, 22); + insn |= order << 22; + + return insn; +} + +u32 aarch64_insn_gen_atomic_ld_op(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_atomic_op op, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (op) { + case AARCH64_INSN_MEM_ATOMIC_ADD: + insn = aarch64_insn_get_ldadd_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_CLR: + insn = aarch64_insn_get_ldclr_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_EOR: + insn = aarch64_insn_get_ldeor_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SET: + insn = aarch64_insn_get_ldset_value(); + break; + case AARCH64_INSN_MEM_ATOMIC_SWP: + insn = aarch64_insn_get_swp_value(); + break; + default: + pr_err("%s: unimplemented mem atomic op %d\n", __func__, op); + return AARCH64_BREAK_FAULT; + } switch (size) { case AARCH64_INSN_SIZE_32: @@ -621,6 +680,8 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, insn = aarch64_insn_encode_ldst_size(size, insn); + insn = aarch64_insn_encode_ldst_order(order, insn); + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, result); @@ -631,17 +692,68 @@ u32 aarch64_insn_gen_ldadd(enum aarch64_insn_register result, value); } -u32 aarch64_insn_gen_stadd(enum aarch64_insn_register address, - enum aarch64_insn_register value, - enum aarch64_insn_size_type size) +static u32 aarch64_insn_encode_cas_order(enum aarch64_insn_mem_order_type type, + u32 insn) { - /* - * STADD is simply encoded as an alias for LDADD with XZR as - * the destination register. - */ - return aarch64_insn_gen_ldadd(AARCH64_INSN_REG_ZR, address, - value, size); + u32 order; + + switch (type) { + case AARCH64_INSN_MEM_ORDER_NONE: + order = 0; + break; + case AARCH64_INSN_MEM_ORDER_ACQ: + order = BIT(22); + break; + case AARCH64_INSN_MEM_ORDER_REL: + order = BIT(15); + break; + case AARCH64_INSN_MEM_ORDER_ACQREL: + order = BIT(15) | BIT(22); + break; + default: + pr_err("%s: unknown mem order %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn &= ~(BIT(15) | BIT(22)); + insn |= order; + + return insn; +} + +u32 aarch64_insn_gen_cas(enum aarch64_insn_register result, + enum aarch64_insn_register address, + enum aarch64_insn_register value, + enum aarch64_insn_size_type size, + enum aarch64_insn_mem_order_type order) +{ + u32 insn; + + switch (size) { + case AARCH64_INSN_SIZE_32: + case AARCH64_INSN_SIZE_64: + break; + default: + pr_err("%s: unimplemented size encoding %d\n", __func__, size); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_cas_value(); + + insn = aarch64_insn_encode_ldst_size(size, insn); + + insn = aarch64_insn_encode_cas_order(order, insn); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RT, insn, + result); + + insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, + address); + + return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RS, insn, + value); } +#endif static u32 aarch64_insn_encode_prfm_imm(enum aarch64_insn_prfm_type type, enum aarch64_insn_prfm_target target, @@ -1379,7 +1491,7 @@ static u32 aarch64_encode_immediate(u64 imm, * Compute the rotation to get a continuous set of * ones, with the first bit set at position 0 */ - ror = fls(~imm); + ror = fls64(~imm); } /* @@ -1456,3 +1568,48 @@ u32 aarch64_insn_gen_extr(enum aarch64_insn_variant variant, insn = aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RN, insn, Rn); return aarch64_insn_encode_register(AARCH64_INSN_REGTYPE_RM, insn, Rm); } + +u32 aarch64_insn_gen_dmb(enum aarch64_insn_mb_type type) +{ + u32 opt; + u32 insn; + + switch (type) { + case AARCH64_INSN_MB_SY: + opt = 0xf; + break; + case AARCH64_INSN_MB_ST: + opt = 0xe; + break; + case AARCH64_INSN_MB_LD: + opt = 0xd; + break; + case AARCH64_INSN_MB_ISH: + opt = 0xb; + break; + case AARCH64_INSN_MB_ISHST: + opt = 0xa; + break; + case AARCH64_INSN_MB_ISHLD: + opt = 0x9; + break; + case AARCH64_INSN_MB_NSH: + opt = 0x7; + break; + case AARCH64_INSN_MB_NSHST: + opt = 0x6; + break; + case AARCH64_INSN_MB_NSHLD: + opt = 0x5; + break; + default: + pr_err("%s: unknown dmb type %d\n", __func__, type); + return AARCH64_BREAK_FAULT; + } + + insn = aarch64_insn_get_dmb_value(); + insn &= ~GENMASK(11, 8); + insn |= (opt << 8); + + return insn; +} diff --git a/arch/arm64/lib/memchr.S b/arch/arm64/lib/memchr.S index 7c2276fdab54..37a9f2a4f7f4 100644 --- a/arch/arm64/lib/memchr.S +++ b/arch/arm64/lib/memchr.S @@ -38,7 +38,7 @@ .p2align 4 nop -SYM_FUNC_START_WEAK_PI(memchr) +SYM_FUNC_START(__pi_memchr) and chrin, chrin, #0xff lsr wordcnt, cntin, #3 cbz wordcnt, L(byte_loop) @@ -71,5 +71,6 @@ CPU_LE( rev tmp, tmp) L(not_found): mov result, #0 ret -SYM_FUNC_END_PI(memchr) +SYM_FUNC_END(__pi_memchr) +SYM_FUNC_ALIAS_WEAK(memchr, __pi_memchr) EXPORT_SYMBOL_NOKASAN(memchr) diff --git a/arch/arm64/lib/memcmp.S b/arch/arm64/lib/memcmp.S index 7d956384222f..a5ccf2c55f91 100644 --- a/arch/arm64/lib/memcmp.S +++ b/arch/arm64/lib/memcmp.S @@ -32,7 +32,7 @@ #define tmp1 x7 #define tmp2 x8 -SYM_FUNC_START_WEAK_PI(memcmp) +SYM_FUNC_START(__pi_memcmp) subs limit, limit, 8 b.lo L(less8) @@ -134,6 +134,6 @@ L(byte_loop): b.eq L(byte_loop) sub result, data1w, data2w ret - -SYM_FUNC_END_PI(memcmp) +SYM_FUNC_END(__pi_memcmp) +SYM_FUNC_ALIAS_WEAK(memcmp, __pi_memcmp) EXPORT_SYMBOL_NOKASAN(memcmp) diff --git a/arch/arm64/lib/memcpy.S b/arch/arm64/lib/memcpy.S index b82fd64ee1e1..4ab48d49c451 100644 --- a/arch/arm64/lib/memcpy.S +++ b/arch/arm64/lib/memcpy.S @@ -57,10 +57,7 @@ The loop tail is handled by always copying 64 bytes from the end. */ -SYM_FUNC_START_ALIAS(__memmove) -SYM_FUNC_START_WEAK_ALIAS_PI(memmove) -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK_PI(memcpy) +SYM_FUNC_START(__pi_memcpy) add srcend, src, count add dstend, dstin, count cmp count, 128 @@ -241,12 +238,16 @@ L(copy64_from_start): stp B_l, B_h, [dstin, 16] stp C_l, C_h, [dstin] ret +SYM_FUNC_END(__pi_memcpy) -SYM_FUNC_END_PI(memcpy) -EXPORT_SYMBOL(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) +SYM_FUNC_ALIAS(__memcpy, __pi_memcpy) EXPORT_SYMBOL(__memcpy) -SYM_FUNC_END_ALIAS_PI(memmove) -EXPORT_SYMBOL(memmove) -SYM_FUNC_END_ALIAS(__memmove) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + +SYM_FUNC_ALIAS(__pi_memmove, __pi_memcpy) + +SYM_FUNC_ALIAS(__memmove, __pi_memmove) EXPORT_SYMBOL(__memmove) +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) +EXPORT_SYMBOL(memmove) diff --git a/arch/arm64/lib/memset.S b/arch/arm64/lib/memset.S index a9c1c9a01ea9..a5aebe82ad73 100644 --- a/arch/arm64/lib/memset.S +++ b/arch/arm64/lib/memset.S @@ -42,8 +42,7 @@ dst .req x8 tmp3w .req w9 tmp3 .req x9 -SYM_FUNC_START_ALIAS(__memset) -SYM_FUNC_START_WEAK_PI(memset) +SYM_FUNC_START(__pi_memset) mov dst, dstin /* Preserve return value. */ and A_lw, val, #255 orr A_lw, A_lw, A_lw, lsl #8 @@ -202,7 +201,10 @@ SYM_FUNC_START_WEAK_PI(memset) ands count, count, zva_bits_x b.ne .Ltail_maybe_long ret -SYM_FUNC_END_PI(memset) -EXPORT_SYMBOL(memset) -SYM_FUNC_END_ALIAS(__memset) +SYM_FUNC_END(__pi_memset) + +SYM_FUNC_ALIAS(__memset, __pi_memset) EXPORT_SYMBOL(__memset) + +SYM_FUNC_ALIAS_WEAK(memset, __pi_memset) +EXPORT_SYMBOL(memset) diff --git a/arch/arm64/lib/mte.S b/arch/arm64/lib/mte.S index f531dcb95174..8590af3c98c0 100644 --- a/arch/arm64/lib/mte.S +++ b/arch/arm64/lib/mte.S @@ -134,7 +134,7 @@ SYM_FUNC_END(mte_copy_tags_to_user) /* * Save the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_save_page_tags) multitag_transfer_size x7, x5 @@ -158,7 +158,7 @@ SYM_FUNC_END(mte_save_page_tags) /* * Restore the tags in a page * x0 - page address - * x1 - tag storage + * x1 - tag storage, MTE_PAGE_TAG_STORAGE bytes */ SYM_FUNC_START(mte_restore_page_tags) multitag_transfer_size x7, x5 diff --git a/arch/arm64/lib/strchr.S b/arch/arm64/lib/strchr.S index 1f47eae3b0d6..94ee67a6b212 100644 --- a/arch/arm64/lib/strchr.S +++ b/arch/arm64/lib/strchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of first occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK(strchr) +SYM_FUNC_START(__pi_strchr) and w1, w1, #0xff 1: ldrb w2, [x0], #1 cmp w2, w1 @@ -28,5 +28,7 @@ SYM_FUNC_START_WEAK(strchr) cmp w2, w1 csel x0, x0, xzr, eq ret -SYM_FUNC_END(strchr) +SYM_FUNC_END(__pi_strchr) + +SYM_FUNC_ALIAS_WEAK(strchr, __pi_strchr) EXPORT_SYMBOL_NOKASAN(strchr) diff --git a/arch/arm64/lib/strcmp.S b/arch/arm64/lib/strcmp.S index 83bcad72ec97..9b89b4533607 100644 --- a/arch/arm64/lib/strcmp.S +++ b/arch/arm64/lib/strcmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2012-2021, Arm Limited. + * Copyright (c) 2012-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/afd6244a1f8d9229/string/aarch64/strcmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strcmp.S */ #include <linux/linkage.h> @@ -11,166 +11,180 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 -/* Parameters and result. */ #define src1 x0 #define src2 x1 #define result x0 -/* Internal variables. */ #define data1 x2 #define data1w w2 #define data2 x3 #define data2w w3 #define has_nul x4 #define diff x5 +#define off1 x5 #define syndrome x6 -#define tmp1 x7 -#define tmp2 x8 -#define tmp3 x9 -#define zeroones x10 -#define pos x11 - - /* Start of performance-critical section -- one 64B cache line. */ - .align 6 -SYM_FUNC_START_WEAK_PI(strcmp) - eor tmp1, src1, src2 - mov zeroones, #REP8_01 - tst tmp1, #7 +#define tmp x6 +#define data3 x7 +#define zeroones x8 +#define shift x9 +#define off2 x10 + +/* On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. */ +#ifdef __AARCH64EB__ +# define LS_FW lsl +#else +# define LS_FW lsr +#endif + +/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 + (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and + can be done in parallel across the entire word. + Since carry propagation makes 0x1 bytes before a NUL byte appear + NUL too in big-endian, byte-reverse the data before the NUL check. */ + + +SYM_FUNC_START(__pi_strcmp) + sub off2, src2, src1 + mov zeroones, REP8_01 + and tmp, src1, 7 + tst off2, 7 b.ne L(misaligned8) - ands tmp1, src1, #7 - b.ne L(mutual_align) - /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 - (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and - can be done in parallel across the entire word. */ + cbnz tmp, L(mutual_align) + + .p2align 4 + L(loop_aligned): - ldr data1, [src1], #8 - ldr data2, [src2], #8 + ldr data2, [src1, off2] + ldr data1, [src1], 8 L(start_realigned): - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev tmp, data1 + sub has_nul, tmp, zeroones + orr tmp, tmp, REP8_7f +#else + sub has_nul, data1, zeroones + orr tmp, data1, REP8_7f +#endif + bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ + ccmp data1, data2, 0, eq + b.eq L(loop_aligned) +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_aligned) - /* End of performance-critical section -- one 64B cache line. */ - L(end): -#ifndef __AARCH64EB__ +#ifndef __AARCH64EB__ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ - clz pos, syndrome rev data2, data2 - lsl data1, data1, pos - lsl data2, data2, pos - /* But we need to zero-extend (char is unsigned) the value and then - perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 - ret -#else - /* For big-endian we cannot use the trick with the syndrome value - as carry-propagation can corrupt the upper bits if the trailing - bytes in the string contain 0x01. */ - /* However, if there is no NUL byte in the dword, we can generate - the result directly. We can't just subtract the bytes as the - MSB might be significant. */ - cbnz has_nul, 1f - cmp data1, data2 - cset result, ne - cneg result, result, lo - ret -1: - /* Re-compute the NUL-byte detection, using a byte-reversed value. */ - rev tmp3, data1 - sub tmp1, tmp3, zeroones - orr tmp2, tmp3, #REP8_7f - bic has_nul, tmp1, tmp2 - rev has_nul, has_nul - orr syndrome, diff, has_nul - clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. +#endif + clz shift, syndrome + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ - lsl data1, data1, pos - lsl data2, data2, pos + lsl data1, data1, shift + lsl data2, data2, shift /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ - lsr data1, data1, #56 - sub result, data1, data2, lsr #56 + lsr data1, data1, 56 + sub result, data1, data2, lsr 56 ret -#endif + + .p2align 4 L(mutual_align): /* Sources are mutually aligned, but are not currently at an alignment boundary. Round down the addresses and then mask off - the bytes that preceed the start point. */ - bic src1, src1, #7 - bic src2, src2, #7 - lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ - ldr data1, [src1], #8 - neg tmp1, tmp1 /* Bits to alignment -64. */ - ldr data2, [src2], #8 - mov tmp2, #~0 -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ -#endif - orr data1, data1, tmp2 - orr data2, data2, tmp2 + the bytes that precede the start point. */ + bic src1, src1, 7 + ldr data2, [src1, off2] + ldr data1, [src1], 8 + neg shift, src2, lsl 3 /* Bits to alignment -64. */ + mov tmp, -1 + LS_FW tmp, tmp, shift + orr data1, data1, tmp + orr data2, data2, tmp b L(start_realigned) L(misaligned8): /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always - checking to make sure that we don't access beyond page boundary in - SRC2. */ - tst src1, #7 - b.eq L(loop_misaligned) + checking to make sure that we don't access beyond the end of SRC2. */ + cbz tmp, L(src1_aligned) L(do_misaligned): - ldrb data1w, [src1], #1 - ldrb data2w, [src2], #1 - cmp data1w, #1 - ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ + ldrb data1w, [src1], 1 + ldrb data2w, [src2], 1 + cmp data1w, 0 + ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ b.ne L(done) - tst src1, #7 + tst src1, 7 b.ne L(do_misaligned) -L(loop_misaligned): - /* Test if we are within the last dword of the end of a 4K page. If - yes then jump back to the misaligned loop to copy a byte at a time. */ - and tmp1, src2, #0xff8 - eor tmp1, tmp1, #0xff8 - cbz tmp1, L(do_misaligned) - ldr data1, [src1], #8 - ldr data2, [src2], #8 - - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ +L(src1_aligned): + neg shift, src2, lsl 3 + bic src2, src2, 7 + ldr data3, [src2], 8 +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + lsr tmp, zeroones, shift + orr data3, data3, tmp + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + bics has_nul, has_nul, tmp + b.ne L(tail) + + sub off1, src2, src1 + + .p2align 4 + +L(loop_unaligned): + ldr data3, [src1, off1] + ldr data2, [src1, off2] +#ifdef __AARCH64EB__ + rev data3, data3 +#endif + sub has_nul, data3, zeroones + orr tmp, data3, REP8_7f + ldr data1, [src1], 8 + bics has_nul, has_nul, tmp + ccmp data1, data2, 0, eq + b.eq L(loop_unaligned) + + lsl tmp, has_nul, shift +#ifdef __AARCH64EB__ + rev tmp, tmp +#endif + eor diff, data1, data2 + orr syndrome, diff, tmp + cbnz syndrome, L(end) +L(tail): + ldr data1, [src1] + neg shift, shift + lsr data2, data3, shift + lsr has_nul, has_nul, shift +#ifdef __AARCH64EB__ + rev data2, data2 + rev has_nul, has_nul +#endif + eor diff, data1, data2 orr syndrome, diff, has_nul - cbz syndrome, L(loop_misaligned) b L(end) L(done): sub result, data1, data2 ret - -SYM_FUNC_END_PI(strcmp) -EXPORT_SYMBOL_NOHWKASAN(strcmp) +SYM_FUNC_END(__pi_strcmp) +SYM_FUNC_ALIAS_WEAK(strcmp, __pi_strcmp) +EXPORT_SYMBOL_NOKASAN(strcmp) diff --git a/arch/arm64/lib/strlen.S b/arch/arm64/lib/strlen.S index 1648790e91b3..4919fe81ae54 100644 --- a/arch/arm64/lib/strlen.S +++ b/arch/arm64/lib/strlen.S @@ -79,7 +79,7 @@ whether the first fetch, which may be misaligned, crosses a page boundary. */ -SYM_FUNC_START_WEAK_PI(strlen) +SYM_FUNC_START(__pi_strlen) and tmp1, srcin, MIN_PAGE_SIZE - 1 mov zeroones, REP8_01 cmp tmp1, MIN_PAGE_SIZE - 16 @@ -208,6 +208,6 @@ L(page_cross): csel data1, data1, tmp4, eq csel data2, data2, tmp2, eq b L(page_cross_entry) - -SYM_FUNC_END_PI(strlen) +SYM_FUNC_END(__pi_strlen) +SYM_FUNC_ALIAS_WEAK(strlen, __pi_strlen) EXPORT_SYMBOL_NOKASAN(strlen) diff --git a/arch/arm64/lib/strncmp.S b/arch/arm64/lib/strncmp.S index e42bcfcd37e6..fe7bbc0b42a7 100644 --- a/arch/arm64/lib/strncmp.S +++ b/arch/arm64/lib/strncmp.S @@ -1,9 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0-only */ /* - * Copyright (c) 2013-2021, Arm Limited. + * Copyright (c) 2013-2022, Arm Limited. * * Adapted from the original at: - * https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/strncmp.S + * https://github.com/ARM-software/optimized-routines/blob/189dfefe37d54c5b/string/aarch64/strncmp.S */ #include <linux/linkage.h> @@ -11,14 +11,14 @@ /* Assumptions: * - * ARMv8-a, AArch64 + * ARMv8-a, AArch64. + * MTE compatible. */ #define L(label) .L ## label #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f -#define REP8_80 0x8080808080808080 /* Parameters and result. */ #define src1 x0 @@ -39,12 +39,26 @@ #define tmp3 x10 #define zeroones x11 #define pos x12 -#define limit_wd x13 -#define mask x14 -#define endloop x15 +#define mask x13 +#define endloop x14 #define count mask +#define offset pos +#define neg_offset x15 -SYM_FUNC_START_WEAK_PI(strncmp) +/* Define endian dependent shift operations. + On big-endian early bytes are at MSB and on little-endian LSB. + LS_FW means shifting towards early bytes. + LS_BK means shifting towards later bytes. + */ +#ifdef __AARCH64EB__ +#define LS_FW lsl +#define LS_BK lsr +#else +#define LS_FW lsr +#define LS_BK lsl +#endif + +SYM_FUNC_START(__pi_strncmp) cbz limit, L(ret0) eor tmp1, src1, src2 mov zeroones, #REP8_01 @@ -52,9 +66,6 @@ SYM_FUNC_START_WEAK_PI(strncmp) and count, src1, #7 b.ne L(misaligned8) cbnz count, L(mutual_align) - /* Calculate the number of full and partial words -1. */ - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ - lsr limit_wd, limit_wd, #3 /* Convert to Dwords. */ /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and @@ -64,56 +75,52 @@ L(loop_aligned): ldr data1, [src1], #8 ldr data2, [src2], #8 L(start_realigned): - subs limit_wd, limit_wd, #1 + subs limit, limit, #8 sub tmp1, data1, zeroones orr tmp2, data1, #REP8_7f eor diff, data1, data2 /* Non-zero if differences found. */ - csinv endloop, diff, xzr, pl /* Last Dword or differences. */ + csinv endloop, diff, xzr, hi /* Last Dword or differences. */ bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ ccmp endloop, #0, #0, eq b.eq L(loop_aligned) /* End of main loop */ - /* Not reached the limit, must have found the end or a diff. */ - tbz limit_wd, #63, L(not_limit) - - /* Limit % 8 == 0 => all bytes significant. */ - ands limit, limit, #7 - b.eq L(not_limit) - - lsl limit, limit, #3 /* Bits -> bytes. */ - mov mask, #~0 -#ifdef __AARCH64EB__ - lsr mask, mask, limit -#else - lsl mask, mask, limit -#endif - bic data1, data1, mask - bic data2, data2, mask - - /* Make sure that the NUL byte is marked in the syndrome. */ - orr has_nul, has_nul, mask - -L(not_limit): +L(full_check): +#ifndef __AARCH64EB__ orr syndrome, diff, has_nul - -#ifndef __AARCH64EB__ + add limit, limit, 8 /* Rewind limit to before last subs. */ +L(syndrome_check): + /* Limit was reached. Check if the NUL byte or the difference + is before the limit. */ rev syndrome, syndrome rev data1, data1 - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. - Shifting left now will bring the critical information into the - top bits. */ clz pos, syndrome rev data2, data2 lsl data1, data1, pos + cmp limit, pos, lsr #3 lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then perform a signed 32-bit subtraction. */ lsr data1, data1, #56 sub result, data1, data2, lsr #56 + csel result, result, xzr, hi ret #else + /* Not reached the limit, must have found the end or a diff. */ + tbz limit, #63, L(not_limit) + add tmp1, limit, 8 + cbz limit, L(not_limit) + + lsl limit, tmp1, #3 /* Bits -> bytes. */ + mov mask, #~0 + lsr mask, mask, limit + bic data1, data1, mask + bic data2, data2, mask + + /* Make sure that the NUL byte is marked in the syndrome. */ + orr has_nul, has_nul, mask + +L(not_limit): /* For big-endian we cannot use the trick with the syndrome value as carry-propagation can corrupt the upper bits if the trailing bytes in the string contain 0x01. */ @@ -134,10 +141,11 @@ L(not_limit): rev has_nul, has_nul orr syndrome, diff, has_nul clz pos, syndrome - /* The MS-non-zero bit of the syndrome marks either the first bit - that is different, or the top bit of the first zero byte. + /* The most-significant-non-zero bit of the syndrome marks either the + first bit that is different, or the top bit of the first zero byte. Shifting left now will bring the critical information into the top bits. */ +L(end_quick): lsl data1, data1, pos lsl data2, data2, pos /* But we need to zero-extend (char is unsigned) the value and then @@ -159,22 +167,12 @@ L(mutual_align): neg tmp3, count, lsl #3 /* 64 - bits(bytes beyond align). */ ldr data2, [src2], #8 mov tmp2, #~0 - sub limit_wd, limit, #1 /* limit != 0, so no underflow. */ -#ifdef __AARCH64EB__ - /* Big-endian. Early bytes are at MSB. */ - lsl tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#else - /* Little-endian. Early bytes are at LSB. */ - lsr tmp2, tmp2, tmp3 /* Shift (count & 63). */ -#endif - and tmp3, limit_wd, #7 - lsr limit_wd, limit_wd, #3 - /* Adjust the limit. Only low 3 bits used, so overflow irrelevant. */ - add limit, limit, count - add tmp3, tmp3, count + LS_FW tmp2, tmp2, tmp3 /* Shift (count & 63). */ + /* Adjust the limit and ensure it doesn't overflow. */ + adds limit, limit, count + csinv limit, limit, xzr, lo orr data1, data1, tmp2 orr data2, data2, tmp2 - add limit_wd, limit_wd, tmp3, lsr #3 b L(start_realigned) .p2align 4 @@ -197,13 +195,11 @@ L(done): /* Align the SRC1 to a dword by doing a bytewise compare and then do the dword loop. */ L(try_misaligned_words): - lsr limit_wd, limit, #3 - cbz count, L(do_misaligned) + cbz count, L(src1_aligned) neg count, count and count, count, #7 sub limit, limit, count - lsr limit_wd, limit, #3 L(page_end_loop): ldrb data1w, [src1], #1 @@ -214,48 +210,101 @@ L(page_end_loop): subs count, count, #1 b.hi L(page_end_loop) -L(do_misaligned): - /* Prepare ourselves for the next page crossing. Unlike the aligned - loop, we fetch 1 less dword because we risk crossing bounds on - SRC2. */ - mov count, #8 - subs limit_wd, limit_wd, #1 - b.lo L(done_loop) -L(loop_misaligned): - and tmp2, src2, #0xff8 - eor tmp2, tmp2, #0xff8 - cbz tmp2, L(page_end_loop) + /* The following diagram explains the comparison of misaligned strings. + The bytes are shown in natural order. For little-endian, it is + reversed in the registers. The "x" bytes are before the string. + The "|" separates data that is loaded at one time. + src1 | a a a a a a a a | b b b c c c c c | . . . + src2 | x x x x x a a a a a a a a b b b | c c c c c . . . + + After shifting in each step, the data looks like this: + STEP_A STEP_B STEP_C + data1 a a a a a a a a b b b c c c c c b b b c c c c c + data2 a a a a a a a a b b b 0 0 0 0 0 0 0 0 c c c c c + The bytes with "0" are eliminated from the syndrome via mask. + + Align SRC2 down to 16 bytes. This way we can read 16 bytes at a + time from SRC2. The comparison happens in 3 steps. After each step + the loop can exit, or read from SRC1 or SRC2. */ +L(src1_aligned): + /* Calculate offset from 8 byte alignment to string start in bits. No + need to mask offset since shifts are ignoring upper bits. */ + lsl offset, src2, #3 + bic src2, src2, #0xf + mov mask, -1 + neg neg_offset, offset ldr data1, [src1], #8 - ldr data2, [src2], #8 - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f - eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) - subs limit_wd, limit_wd, #1 - b.pl L(loop_misaligned) + ldp tmp1, tmp2, [src2], #16 + LS_BK mask, mask, neg_offset + and neg_offset, neg_offset, #63 /* Need actual value for cmp later. */ + /* Skip the first compare if data in tmp1 is irrelevant. */ + tbnz offset, 6, L(misaligned_mid_loop) -L(done_loop): - /* We found a difference or a NULL before the limit was reached. */ - and limit, limit, #7 - cbz limit, L(not_limit) - /* Read the last word. */ - sub src1, src1, 8 - sub src2, src2, 8 - ldr data1, [src1, limit] - ldr data2, [src2, limit] - sub tmp1, data1, zeroones - orr tmp2, data1, #REP8_7f +L(loop_misaligned): + /* STEP_A: Compare full 8 bytes when there is enough data from SRC2.*/ + LS_FW data2, tmp1, offset + LS_BK tmp1, tmp2, neg_offset + subs limit, limit, #8 + orr data2, data2, tmp1 /* 8 bytes from SRC2 combined from two regs.*/ + sub has_nul, data1, zeroones eor diff, data1, data2 /* Non-zero if differences found. */ - bics has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ - ccmp diff, #0, #0, eq - b.ne L(not_limit) + orr tmp3, data1, #REP8_7f + csinv endloop, diff, xzr, hi /* If limit, set to all ones. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL byte found in SRC1. */ + orr tmp3, endloop, has_nul + cbnz tmp3, L(full_check) + + ldr data1, [src1], #8 +L(misaligned_mid_loop): + /* STEP_B: Compare first part of data1 to second part of tmp2. */ + LS_FW data2, tmp2, offset +#ifdef __AARCH64EB__ + /* For big-endian we do a byte reverse to avoid carry-propagation + problem described above. This way we can reuse the has_nul in the + next step and also use syndrome value trick at the end. */ + rev tmp3, data1 + #define data1_fixed tmp3 +#else + #define data1_fixed data1 +#endif + sub has_nul, data1_fixed, zeroones + orr tmp3, data1_fixed, #REP8_7f + eor diff, data2, data1 /* Non-zero if differences found. */ + bic has_nul, has_nul, tmp3 /* Non-zero if NUL terminator. */ +#ifdef __AARCH64EB__ + rev has_nul, has_nul +#endif + cmp limit, neg_offset, lsr #3 + orr syndrome, diff, has_nul + bic syndrome, syndrome, mask /* Ignore later bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + /* STEP_C: Compare second part of data1 to first part of tmp1. */ + ldp tmp1, tmp2, [src2], #16 + cmp limit, #8 + LS_BK data2, tmp1, neg_offset + eor diff, data2, data1 /* Non-zero if differences found. */ + orr syndrome, diff, has_nul + and syndrome, syndrome, mask /* Ignore earlier bytes. */ + csinv tmp3, syndrome, xzr, hi /* If limit, set to all ones. */ + cbnz tmp3, L(syndrome_check) + + ldr data1, [src1], #8 + sub limit, limit, #8 + b L(loop_misaligned) + +#ifdef __AARCH64EB__ +L(syndrome_check): + clz pos, syndrome + cmp pos, limit, lsl #3 + b.lo L(end_quick) +#endif L(ret0): mov result, #0 ret - -SYM_FUNC_END_PI(strncmp) -EXPORT_SYMBOL_NOHWKASAN(strncmp) +SYM_FUNC_END(__pi_strncmp) +SYM_FUNC_ALIAS_WEAK(strncmp, __pi_strncmp) +EXPORT_SYMBOL_NOKASAN(strncmp) diff --git a/arch/arm64/lib/strnlen.S b/arch/arm64/lib/strnlen.S index b72913a99038..d5ac0e10a01d 100644 --- a/arch/arm64/lib/strnlen.S +++ b/arch/arm64/lib/strnlen.S @@ -47,7 +47,7 @@ limit_wd .req x14 #define REP8_7f 0x7f7f7f7f7f7f7f7f #define REP8_80 0x8080808080808080 -SYM_FUNC_START_WEAK_PI(strnlen) +SYM_FUNC_START(__pi_strnlen) cbz limit, .Lhit_limit mov zeroones, #REP8_01 bic src, srcin, #15 @@ -156,5 +156,7 @@ CPU_LE( lsr tmp2, tmp2, tmp4 ) /* Shift (tmp1 & 63). */ .Lhit_limit: mov len, limit ret -SYM_FUNC_END_PI(strnlen) +SYM_FUNC_END(__pi_strnlen) + +SYM_FUNC_ALIAS_WEAK(strnlen, __pi_strnlen) EXPORT_SYMBOL_NOKASAN(strnlen) diff --git a/arch/arm64/lib/strrchr.S b/arch/arm64/lib/strrchr.S index 13132d1ed6d1..a5123cf0ce12 100644 --- a/arch/arm64/lib/strrchr.S +++ b/arch/arm64/lib/strrchr.S @@ -18,7 +18,7 @@ * Returns: * x0 - address of last occurrence of 'c' or 0 */ -SYM_FUNC_START_WEAK_PI(strrchr) +SYM_FUNC_START(__pi_strrchr) mov x3, #0 and w1, w1, #0xff 1: ldrb w2, [x0], #1 @@ -29,5 +29,6 @@ SYM_FUNC_START_WEAK_PI(strrchr) b 1b 2: mov x0, x3 ret -SYM_FUNC_END_PI(strrchr) +SYM_FUNC_END(__pi_strrchr) +SYM_FUNC_ALIAS_WEAK(strrchr, __pi_strrchr) EXPORT_SYMBOL_NOKASAN(strrchr) diff --git a/arch/arm64/lib/xor-neon.c b/arch/arm64/lib/xor-neon.c index d189cf4e70ea..96b171995d19 100644 --- a/arch/arm64/lib/xor-neon.c +++ b/arch/arm64/lib/xor-neon.c @@ -10,8 +10,8 @@ #include <linux/module.h> #include <asm/neon-intrinsics.h> -void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1, - unsigned long *p2) +void xor_arm64_neon_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -37,8 +37,9 @@ void xor_arm64_neon_2(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +void xor_arm64_neon_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -72,8 +73,10 @@ void xor_arm64_neon_3(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, unsigned long *p4) +void xor_arm64_neon_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -115,9 +118,11 @@ void xor_arm64_neon_4(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -void xor_arm64_neon_5(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4, unsigned long *p5) +void xor_arm64_neon_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -186,8 +191,10 @@ static inline uint64x2_t eor3(uint64x2_t p, uint64x2_t q, uint64x2_t r) return res; } -static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +static void xor_arm64_eor3_3(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -219,9 +226,11 @@ static void xor_arm64_eor3_3(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4) +static void xor_arm64_eor3_4(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; @@ -261,9 +270,12 @@ static void xor_arm64_eor3_4(unsigned long bytes, unsigned long *p1, } while (--lines > 0); } -static void xor_arm64_eor3_5(unsigned long bytes, unsigned long *p1, - unsigned long *p2, unsigned long *p3, - unsigned long *p4, unsigned long *p5) +static void xor_arm64_eor3_5(unsigned long bytes, + unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { uint64_t *dp1 = (uint64_t *)p1; uint64_t *dp2 = (uint64_t *)p2; diff --git a/arch/arm64/mm/cache.S b/arch/arm64/mm/cache.S index 7d0563db4201..0ea6cc25dc66 100644 --- a/arch/arm64/mm/cache.S +++ b/arch/arm64/mm/cache.S @@ -107,10 +107,11 @@ SYM_FUNC_END(icache_inval_pou) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_inval_poc) +SYM_FUNC_START(__pi_dcache_clean_inval_poc) dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_inval_poc) +SYM_FUNC_END(__pi_dcache_clean_inval_poc) +SYM_FUNC_ALIAS(dcache_clean_inval_poc, __pi_dcache_clean_inval_poc) /* * dcache_clean_pou(start, end) @@ -140,7 +141,7 @@ SYM_FUNC_END(dcache_clean_pou) * - start - kernel start address of region * - end - kernel end address of region */ -SYM_FUNC_START_PI(dcache_inval_poc) +SYM_FUNC_START(__pi_dcache_inval_poc) dcache_line_size x2, x3 sub x3, x2, #1 tst x1, x3 // end cache line aligned? @@ -158,7 +159,8 @@ SYM_FUNC_START_PI(dcache_inval_poc) b.lo 2b dsb sy ret -SYM_FUNC_END_PI(dcache_inval_poc) +SYM_FUNC_END(__pi_dcache_inval_poc) +SYM_FUNC_ALIAS(dcache_inval_poc, __pi_dcache_inval_poc) /* * dcache_clean_poc(start, end) @@ -169,10 +171,11 @@ SYM_FUNC_END_PI(dcache_inval_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_poc) +SYM_FUNC_START(__pi_dcache_clean_poc) dcache_by_line_op cvac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_poc) +SYM_FUNC_END(__pi_dcache_clean_poc) +SYM_FUNC_ALIAS(dcache_clean_poc, __pi_dcache_clean_poc) /* * dcache_clean_pop(start, end) @@ -183,13 +186,14 @@ SYM_FUNC_END_PI(dcache_clean_poc) * - start - virtual start address of region * - end - virtual end address of region */ -SYM_FUNC_START_PI(dcache_clean_pop) +SYM_FUNC_START(__pi_dcache_clean_pop) alternative_if_not ARM64_HAS_DCPOP b dcache_clean_poc alternative_else_nop_endif dcache_by_line_op cvap, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(dcache_clean_pop) +SYM_FUNC_END(__pi_dcache_clean_pop) +SYM_FUNC_ALIAS(dcache_clean_pop, __pi_dcache_clean_pop) /* * __dma_flush_area(start, size) @@ -199,11 +203,12 @@ SYM_FUNC_END_PI(dcache_clean_pop) * - start - virtual start address of region * - size - size in question */ -SYM_FUNC_START_PI(__dma_flush_area) +SYM_FUNC_START(__pi___dma_flush_area) add x1, x0, x1 dcache_by_line_op civac, sy, x0, x1, x2, x3 ret -SYM_FUNC_END_PI(__dma_flush_area) +SYM_FUNC_END(__pi___dma_flush_area) +SYM_FUNC_ALIAS(__dma_flush_area, __pi___dma_flush_area) /* * __dma_map_area(start, size, dir) @@ -211,12 +216,13 @@ SYM_FUNC_END_PI(__dma_flush_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_map_area) +SYM_FUNC_START(__pi___dma_map_area) add x1, x0, x1 cmp w2, #DMA_FROM_DEVICE b.eq __pi_dcache_inval_poc b __pi_dcache_clean_poc -SYM_FUNC_END_PI(__dma_map_area) +SYM_FUNC_END(__pi___dma_map_area) +SYM_FUNC_ALIAS(__dma_map_area, __pi___dma_map_area) /* * __dma_unmap_area(start, size, dir) @@ -224,9 +230,10 @@ SYM_FUNC_END_PI(__dma_map_area) * - size - size of region * - dir - DMA direction */ -SYM_FUNC_START_PI(__dma_unmap_area) +SYM_FUNC_START(__pi___dma_unmap_area) add x1, x0, x1 cmp w2, #DMA_TO_DEVICE b.ne __pi_dcache_inval_poc ret -SYM_FUNC_END_PI(__dma_unmap_area) +SYM_FUNC_END(__pi___dma_unmap_area) +SYM_FUNC_ALIAS(__dma_unmap_area, __pi___dma_unmap_area) diff --git a/arch/arm64/mm/flush.c b/arch/arm64/mm/flush.c index 2aaf950b906c..a06c6ac770d4 100644 --- a/arch/arm64/mm/flush.c +++ b/arch/arm64/mm/flush.c @@ -52,6 +52,13 @@ void __sync_icache_dcache(pte_t pte) { struct page *page = pte_page(pte); + /* + * HugeTLB pages are always fully mapped, so only setting head page's + * PG_dcache_clean flag is enough. + */ + if (PageHuge(page)) + page = compound_head(page); + if (!test_bit(PG_dcache_clean, &page->flags)) { sync_icache_aliases((unsigned long)page_address(page), (unsigned long)page_address(page) + diff --git a/arch/arm64/mm/hugetlbpage.c b/arch/arm64/mm/hugetlbpage.c index 228226c5fa80..cbace1c9e137 100644 --- a/arch/arm64/mm/hugetlbpage.c +++ b/arch/arm64/mm/hugetlbpage.c @@ -56,25 +56,34 @@ void __init arm64_hugetlb_cma_reserve(void) } #endif /* CONFIG_CMA */ -#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION -bool arch_hugetlb_migration_supported(struct hstate *h) +static bool __hugetlb_valid_size(unsigned long size) { - size_t pagesize = huge_page_size(h); - - switch (pagesize) { + switch (size) { #ifndef __PAGETABLE_PMD_FOLDED case PUD_SIZE: return pud_sect_supported(); #endif - case PMD_SIZE: case CONT_PMD_SIZE: + case PMD_SIZE: case CONT_PTE_SIZE: return true; } - pr_warn("%s: unrecognized huge page size 0x%lx\n", - __func__, pagesize); + return false; } + +#ifdef CONFIG_ARCH_ENABLE_HUGEPAGE_MIGRATION +bool arch_hugetlb_migration_supported(struct hstate *h) +{ + size_t pagesize = huge_page_size(h); + + if (!__hugetlb_valid_size(pagesize)) { + pr_warn("%s: unrecognized huge page size 0x%lx\n", + __func__, pagesize); + return false; + } + return true; +} #endif int pmd_huge(pmd_t pmd) @@ -507,16 +516,5 @@ arch_initcall(hugetlbpage_init); bool __init arch_hugetlb_valid_size(unsigned long size) { - switch (size) { -#ifndef __PAGETABLE_PMD_FOLDED - case PUD_SIZE: - return pud_sect_supported(); -#endif - case CONT_PMD_SIZE: - case PMD_SIZE: - case CONT_PTE_SIZE: - return true; - } - - return false; + return __hugetlb_valid_size(size); } diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index db63cc885771..9e26ec80d317 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -61,8 +61,34 @@ EXPORT_SYMBOL(memstart_addr); * unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4). * In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory, * otherwise it is empty. + * + * Memory reservation for crash kernel either done early or deferred + * depending on DMA memory zones configs (ZONE_DMA) -- + * + * In absence of ZONE_DMA configs arm64_dma_phys_limit initialized + * here instead of max_zone_phys(). This lets early reservation of + * crash kernel memory which has a dependency on arm64_dma_phys_limit. + * Reserving memory early for crash kernel allows linear creation of block + * mappings (greater than page-granularity) for all the memory bank rangs. + * In this scheme a comparatively quicker boot is observed. + * + * If ZONE_DMA configs are defined, crash kernel memory reservation + * is delayed until DMA zone memory range size initilazation performed in + * zone_sizes_init(). The defer is necessary to steer clear of DMA zone + * memory range to avoid overlap allocation. So crash kernel memory boundaries + * are not known when mapping all bank memory ranges, which otherwise means + * not possible to exclude crash kernel range from creating block mappings + * so page-granularity mappings are created for the entire memory range. + * Hence a slightly slower boot is observed. + * + * Note: Page-granularity mapppings are necessary for crash kernel memory + * range for shrinking its size via /sys/kernel/kexec_crash_size interface. */ -phys_addr_t arm64_dma_phys_limit __ro_after_init; +#if IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32) +phys_addr_t __ro_after_init arm64_dma_phys_limit; +#else +phys_addr_t __ro_after_init arm64_dma_phys_limit = PHYS_MASK + 1; +#endif #ifdef CONFIG_KEXEC_CORE /* @@ -153,8 +179,6 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (!arm64_dma_phys_limit) arm64_dma_phys_limit = dma32_phys_limit; #endif - if (!arm64_dma_phys_limit) - arm64_dma_phys_limit = PHYS_MASK + 1; max_zone_pfns[ZONE_NORMAL] = max; free_area_init(max_zone_pfns); @@ -315,6 +339,9 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); + if (!IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); + high_memory = __va(memblock_end_of_DRAM() - 1) + 1; } @@ -361,7 +388,8 @@ void __init bootmem_init(void) * request_standard_resources() depends on crashkernel's memory being * reserved, so do it here. */ - reserve_crashkernel(); + if (IS_ENABLED(CONFIG_ZONE_DMA) || IS_ENABLED(CONFIG_ZONE_DMA32)) + reserve_crashkernel(); memblock_dump_all(); } diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 49abbf43bf35..0b7d25887ec3 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -63,6 +63,7 @@ static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused; static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused; static DEFINE_SPINLOCK(swapper_pgdir_lock); +static DEFINE_MUTEX(fixmap_lock); void set_swapper_pgd(pgd_t *pgdp, pgd_t pgd) { @@ -294,18 +295,6 @@ static void alloc_init_cont_pmd(pud_t *pudp, unsigned long addr, } while (addr = next, addr != end); } -static inline bool use_1G_block(unsigned long addr, unsigned long next, - unsigned long phys) -{ - if (PAGE_SHIFT != 12) - return false; - - if (((addr | next | phys) & ~PUD_MASK) != 0) - return false; - - return true; -} - static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, phys_addr_t phys, pgprot_t prot, phys_addr_t (*pgtable_alloc)(int), @@ -329,6 +318,12 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } BUG_ON(p4d_bad(p4d)); + /* + * No need for locking during early boot. And it doesn't work as + * expected with KASLR enabled. + */ + if (system_state != SYSTEM_BOOTING) + mutex_lock(&fixmap_lock); pudp = pud_set_fixmap_offset(p4dp, addr); do { pud_t old_pud = READ_ONCE(*pudp); @@ -338,7 +333,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, /* * For 4K granule only, attempt to put down a 1GB block */ - if (use_1G_block(addr, next, phys) && + if (pud_sect_supported() && + ((addr | next | phys) & ~PUD_MASK) == 0 && (flags & NO_BLOCK_MAPPINGS) == 0) { pud_set_huge(pudp, phys, prot); @@ -359,6 +355,8 @@ static void alloc_init_pud(pgd_t *pgdp, unsigned long addr, unsigned long end, } while (pudp++, addr = next, addr != end); pud_clear_fixmap(); + if (system_state != SYSTEM_BOOTING) + mutex_unlock(&fixmap_lock); } static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, @@ -517,7 +515,7 @@ static void __init map_mem(pgd_t *pgdp) */ BUILD_BUG_ON(pgd_index(direct_map_end - 1) == pgd_index(direct_map_end)); - if (can_set_direct_map() || crash_mem_map || IS_ENABLED(CONFIG_KFENCE)) + if (can_set_direct_map() || IS_ENABLED(CONFIG_KFENCE)) flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; /* @@ -528,6 +526,17 @@ static void __init map_mem(pgd_t *pgdp) */ memblock_mark_nomap(kernel_start, kernel_end - kernel_start); +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map) { + if (IS_ENABLED(CONFIG_ZONE_DMA) || + IS_ENABLED(CONFIG_ZONE_DMA32)) + flags |= NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS; + else if (crashk_res.end) + memblock_mark_nomap(crashk_res.start, + resource_size(&crashk_res)); + } +#endif + /* map all the memory banks */ for_each_mem_range(i, &start, &end) { if (start >= end) @@ -554,6 +563,25 @@ static void __init map_mem(pgd_t *pgdp) __map_memblock(pgdp, kernel_start, kernel_end, PAGE_KERNEL, NO_CONT_MAPPINGS); memblock_clear_nomap(kernel_start, kernel_end - kernel_start); + + /* + * Use page-level mappings here so that we can shrink the region + * in page granularity and put back unused memory to buddy system + * through /sys/kernel/kexec_crash_size interface. + */ +#ifdef CONFIG_KEXEC_CORE + if (crash_mem_map && + !IS_ENABLED(CONFIG_ZONE_DMA) && !IS_ENABLED(CONFIG_ZONE_DMA32)) { + if (crashk_res.end) { + __map_memblock(pgdp, crashk_res.start, + crashk_res.end + 1, + PAGE_KERNEL, + NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS); + memblock_clear_nomap(crashk_res.start, + resource_size(&crashk_res)); + } + } +#endif } void mark_rodata_ro(void) diff --git a/arch/arm64/mm/mteswap.c b/arch/arm64/mm/mteswap.c index 7c4ef56265ee..a9e50e930484 100644 --- a/arch/arm64/mm/mteswap.c +++ b/arch/arm64/mm/mteswap.c @@ -12,7 +12,7 @@ static DEFINE_XARRAY(mte_pages); void *mte_allocate_tag_storage(void) { /* tags granule is 16 bytes, 2 tags stored per byte */ - return kmalloc(PAGE_SIZE / 16 / 2, GFP_KERNEL); + return kmalloc(MTE_PAGE_TAG_STORAGE, GFP_KERNEL); } void mte_free_tag_storage(char *storage) diff --git a/arch/arm64/mm/proc.S b/arch/arm64/mm/proc.S index d35c90d2e47a..50bbed947bec 100644 --- a/arch/arm64/mm/proc.S +++ b/arch/arm64/mm/proc.S @@ -46,7 +46,7 @@ #endif #ifdef CONFIG_KASAN_HW_TAGS -#define TCR_MTE_FLAGS SYS_TCR_EL1_TCMA1 | TCR_TBI1 | TCR_TBID1 +#define TCR_MTE_FLAGS TCR_TCMA1 | TCR_TBI1 | TCR_TBID1 #else /* * The mte_zero_clear_page_tags() implementation uses DC GZVA, which relies on diff --git a/arch/arm64/net/bpf_jit.h b/arch/arm64/net/bpf_jit.h index cc0cf0f5c7c3..9d9250c7cc72 100644 --- a/arch/arm64/net/bpf_jit.h +++ b/arch/arm64/net/bpf_jit.h @@ -89,9 +89,16 @@ #define A64_STXR(sf, Rt, Rn, Rs) \ A64_LSX(sf, Rt, Rn, Rs, STORE_EX) -/* LSE atomics */ +/* + * LSE atomics + * + * STADD is simply encoded as an alias for LDADD with XZR as + * the destination register. + */ #define A64_STADD(sf, Rn, Rs) \ - aarch64_insn_gen_stadd(Rn, Rs, A64_SIZE(sf)) + aarch64_insn_gen_atomic_ld_op(A64_ZR, Rn, Rs, \ + A64_SIZE(sf), AARCH64_INSN_MEM_ATOMIC_ADD, \ + AARCH64_INSN_MEM_ORDER_NONE) /* Add/subtract (immediate) */ #define A64_ADDSUB_IMM(sf, Rd, Rn, imm12, type) \ diff --git a/arch/arm64/tools/Makefile b/arch/arm64/tools/Makefile index 932b4fe5c768..cf1307188150 100644 --- a/arch/arm64/tools/Makefile +++ b/arch/arm64/tools/Makefile @@ -5,18 +5,14 @@ kapi := $(gen)/asm kapi-hdrs-y := $(kapi)/cpucaps.h -targets += $(addprefix ../../../,$(gen-y) $(kapi-hdrs-y)) +targets += $(addprefix ../../../, $(kapi-hdrs-y)) PHONY += kapi -kapi: $(kapi-hdrs-y) $(gen-y) - -# Create output directory if not already present -_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)') +kapi: $(kapi-hdrs-y) quiet_cmd_gen_cpucaps = GEN $@ - cmd_gen_cpucaps = mkdir -p $(dir $@) && \ - $(AWK) -f $(filter-out $(PHONY),$^) > $@ + cmd_gen_cpucaps = mkdir -p $(dir $@); $(AWK) -f $(real-prereqs) > $@ $(kapi)/cpucaps.h: $(src)/gen-cpucaps.awk $(src)/cpucaps FORCE $(call if_changed,gen_cpucaps) diff --git a/arch/arm64/tools/cpucaps b/arch/arm64/tools/cpucaps index cea7533cb304..3ed418f70e3b 100644 --- a/arch/arm64/tools/cpucaps +++ b/arch/arm64/tools/cpucaps @@ -7,7 +7,8 @@ BTI HAS_32BIT_EL0_DO_NOT_USE HAS_32BIT_EL1 HAS_ADDRESS_AUTH -HAS_ADDRESS_AUTH_ARCH +HAS_ADDRESS_AUTH_ARCH_QARMA3 +HAS_ADDRESS_AUTH_ARCH_QARMA5 HAS_ADDRESS_AUTH_IMP_DEF HAS_AMU_EXTN HAS_ARMv8_4_TTL @@ -21,7 +22,8 @@ HAS_E0PD HAS_ECV HAS_EPAN HAS_GENERIC_AUTH -HAS_GENERIC_AUTH_ARCH +HAS_GENERIC_AUTH_ARCH_QARMA3 +HAS_GENERIC_AUTH_ARCH_QARMA5 HAS_GENERIC_AUTH_IMP_DEF HAS_IRQ_PRIO_MASKING HAS_LDAPR diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index a7e01573abd8..e003b2473c64 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -8,6 +8,7 @@ menu "Processor type and features" config IA64 bool + select ARCH_BINFMT_ELF_EXTRA_PHDRS select ARCH_HAS_DMA_MARK_CLEAN select ARCH_HAS_STRNCPY_FROM_USER select ARCH_HAS_STRNLEN_USER diff --git a/arch/ia64/configs/zx1_defconfig b/arch/ia64/configs/zx1_defconfig index 629cb9cdf723..851d8594cdb8 100644 --- a/arch/ia64/configs/zx1_defconfig +++ b/arch/ia64/configs/zx1_defconfig @@ -106,7 +106,6 @@ CONFIG_HUGETLBFS=y CONFIG_NFS_FS=y CONFIG_NFS_V4=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_737=y CONFIG_NLS_CODEPAGE_775=y diff --git a/arch/ia64/include/asm/thread_info.h b/arch/ia64/include/asm/thread_info.h index 51d20cb37706..1684716f0820 100644 --- a/arch/ia64/include/asm/thread_info.h +++ b/arch/ia64/include/asm/thread_info.h @@ -55,15 +55,15 @@ struct thread_info { #ifndef ASM_OFFSETS_C /* how to get the thread information struct from C */ #define current_thread_info() ((struct thread_info *) ((char *) current + IA64_TASK_SIZE)) -#define alloc_thread_stack_node(tsk, node) \ +#define arch_alloc_thread_stack_node(tsk, node) \ ((unsigned long *) ((char *) (tsk) + IA64_TASK_SIZE)) #define task_thread_info(tsk) ((struct thread_info *) ((char *) (tsk) + IA64_TASK_SIZE)) #else #define current_thread_info() ((struct thread_info *) 0) -#define alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) +#define arch_alloc_thread_stack_node(tsk, node) ((unsigned long *) 0) #define task_thread_info(tsk) ((struct thread_info *) 0) #endif -#define free_thread_stack(tsk) /* nothing */ +#define arch_free_thread_stack(tsk) /* nothing */ #define task_stack_page(tsk) ((void *)(tsk)) #define __HAVE_THREAD_FUNCTIONS diff --git a/arch/ia64/include/asm/xor.h b/arch/ia64/include/asm/xor.h index 673051bf9d7d..6785f70d3208 100644 --- a/arch/ia64/include/asm/xor.h +++ b/arch/ia64/include/asm/xor.h @@ -4,13 +4,20 @@ */ -extern void xor_ia64_2(unsigned long, unsigned long *, unsigned long *); -extern void xor_ia64_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -extern void xor_ia64_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -extern void xor_ia64_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +extern void xor_ia64_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +extern void xor_ia64_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +extern void xor_ia64_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +extern void xor_ia64_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); static struct xor_block_template xor_block_ia64 = { .name = "ia64", diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 936e1803c7c7..936cce42ae9a 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -4,6 +4,7 @@ config M68K default y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if HAS_DMA && MMU && !COLDFIRE select ARCH_HAS_SYNC_DMA_FOR_DEVICE if HAS_DMA select ARCH_HAVE_NMI_SAFE_CMPXCHG if RMW_INSNS @@ -17,7 +18,6 @@ config M68K select GENERIC_CPU_DEVICES select GENERIC_IOMAP select GENERIC_IRQ_SHOW - select HAVE_AOUT if MMU select HAVE_ASM_MODVERSIONS select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED diff --git a/arch/m68k/amiga/config.c b/arch/m68k/amiga/config.c index be2dfab48fd4..3137b45750df 100644 --- a/arch/m68k/amiga/config.c +++ b/arch/m68k/amiga/config.c @@ -37,6 +37,7 @@ #include <asm/irq.h> #include <asm/machdep.h> #include <asm/io.h> +#include <asm/config.h> static unsigned long amiga_model; diff --git a/arch/m68k/apollo/config.c b/arch/m68k/apollo/config.c index 581a5f68d102..42a8b8e2b664 100644 --- a/arch/m68k/apollo/config.c +++ b/arch/m68k/apollo/config.c @@ -16,6 +16,7 @@ #include <asm/apollohw.h> #include <asm/irq.h> #include <asm/machdep.h> +#include <asm/config.h> u_long sio01_physaddr; u_long sio23_physaddr; diff --git a/arch/m68k/atari/config.c b/arch/m68k/atari/config.c index 261a0f57cc9a..38a7c0578105 100644 --- a/arch/m68k/atari/config.c +++ b/arch/m68k/atari/config.c @@ -46,6 +46,7 @@ #include <asm/machdep.h> #include <asm/hwtest.h> #include <asm/io.h> +#include <asm/config.h> u_long atari_mch_cookie; EXPORT_SYMBOL(atari_mch_cookie); diff --git a/arch/m68k/atari/stdma.c b/arch/m68k/atari/stdma.c index ba65f942d0c7..ce6818eff75e 100644 --- a/arch/m68k/atari/stdma.c +++ b/arch/m68k/atari/stdma.c @@ -30,7 +30,6 @@ #include <linux/types.h> #include <linux/kdev_t.h> -#include <linux/genhd.h> #include <linux/sched.h> #include <linux/init.h> #include <linux/interrupt.h> diff --git a/arch/m68k/bvme6000/config.c b/arch/m68k/bvme6000/config.c index 0c6feafbbd11..3a1d90e399e0 100644 --- a/arch/m68k/bvme6000/config.c +++ b/arch/m68k/bvme6000/config.c @@ -23,7 +23,6 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/genhd.h> #include <linux/rtc.h> #include <linux/interrupt.h> #include <linux/bcd.h> @@ -36,6 +35,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/bvme6000hw.h> +#include <asm/config.h> static void bvme6000_get_model(char *model); extern void bvme6000_sched_init(void); diff --git a/arch/m68k/configs/amiga_defconfig b/arch/m68k/configs/amiga_defconfig index bc9952f8be66..114aaa3f955a 100644 --- a/arch/m68k/configs/amiga_defconfig +++ b/arch/m68k/configs/amiga_defconfig @@ -104,7 +104,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -204,7 +203,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -229,7 +227,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -435,6 +432,7 @@ CONFIG_FB_AMIGA_ECS=y CONFIG_FB_AMIGA_AGA=y CONFIG_FB_FM2=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_PAULA=m @@ -500,7 +498,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -643,7 +640,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/apollo_defconfig b/arch/m68k/configs/apollo_defconfig index a77269c6e5ba..30b9d932b930 100644 --- a/arch/m68k/configs/apollo_defconfig +++ b/arch/m68k/configs/apollo_defconfig @@ -100,7 +100,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -200,7 +199,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -225,7 +223,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -393,6 +390,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_VGA16 is not set # CONFIG_LOGO_LINUX_CLUT224 is not set @@ -457,7 +455,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -599,7 +596,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/atari_defconfig b/arch/m68k/configs/atari_defconfig index 7a74efa6b9a1..51ff3180e69d 100644 --- a/arch/m68k/configs/atari_defconfig +++ b/arch/m68k/configs/atari_defconfig @@ -107,7 +107,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -207,7 +206,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -232,7 +230,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -478,7 +475,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -621,7 +617,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/bvme6000_defconfig b/arch/m68k/configs/bvme6000_defconfig index a5323bf2eb33..7d95ca4366e4 100644 --- a/arch/m68k/configs/bvme6000_defconfig +++ b/arch/m68k/configs/bvme6000_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -450,7 +447,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -592,7 +588,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/hp300_defconfig b/arch/m68k/configs/hp300_defconfig index 5e80aa0869d5..e306e3813607 100644 --- a/arch/m68k/configs/hp300_defconfig +++ b/arch/m68k/configs/hp300_defconfig @@ -99,7 +99,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -199,7 +198,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -224,7 +222,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -395,6 +392,7 @@ CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set CONFIG_FB=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set @@ -459,7 +457,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -601,7 +598,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mac_defconfig b/arch/m68k/configs/mac_defconfig index e84326a3f62d..41316cf02441 100644 --- a/arch/m68k/configs/mac_defconfig +++ b/arch/m68k/configs/mac_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -480,7 +477,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -623,7 +619,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/multi_defconfig b/arch/m68k/configs/multi_defconfig index 337552f43339..2fc3f0df6d43 100644 --- a/arch/m68k/configs/multi_defconfig +++ b/arch/m68k/configs/multi_defconfig @@ -118,7 +118,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -218,7 +217,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -243,7 +241,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -497,6 +494,7 @@ CONFIG_FB_ATARI=y CONFIG_FB_VALKYRIE=y CONFIG_FB_MAC=y CONFIG_FRAMEBUFFER_CONSOLE=y +CONFIG_FRAMEBUFFER_CONSOLE_LEGACY_ACCELERATION=y CONFIG_LOGO=y CONFIG_SOUND=m CONFIG_DMASOUND_ATARI=m @@ -565,7 +563,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -708,7 +705,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme147_defconfig b/arch/m68k/configs/mvme147_defconfig index 7b688f7d272a..9603f4396469 100644 --- a/arch/m68k/configs/mvme147_defconfig +++ b/arch/m68k/configs/mvme147_defconfig @@ -96,7 +96,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -196,7 +195,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -221,7 +219,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -449,7 +446,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -591,7 +587,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/mvme16x_defconfig b/arch/m68k/configs/mvme16x_defconfig index 7c2cb31d63dd..c9cabd3344df 100644 --- a/arch/m68k/configs/mvme16x_defconfig +++ b/arch/m68k/configs/mvme16x_defconfig @@ -97,7 +97,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -197,7 +196,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -222,7 +220,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -450,7 +447,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -592,7 +588,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/q40_defconfig b/arch/m68k/configs/q40_defconfig index ca43897af26d..5f994bf44fb8 100644 --- a/arch/m68k/configs/q40_defconfig +++ b/arch/m68k/configs/q40_defconfig @@ -98,7 +98,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -198,7 +197,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -223,7 +221,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -467,7 +464,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -610,7 +606,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3_defconfig b/arch/m68k/configs/sun3_defconfig index e3d515f37144..183e33f7d4a0 100644 --- a/arch/m68k/configs/sun3_defconfig +++ b/arch/m68k/configs/sun3_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -388,9 +385,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -452,7 +446,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -593,7 +586,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/configs/sun3x_defconfig b/arch/m68k/configs/sun3x_defconfig index d601606c969b..8214263b9ab8 100644 --- a/arch/m68k/configs/sun3x_defconfig +++ b/arch/m68k/configs/sun3x_defconfig @@ -94,7 +94,6 @@ CONFIG_NF_TABLES_NETDEV=y CONFIG_NFT_NUMGEN=m CONFIG_NFT_CT=m CONFIG_NFT_FLOW_OFFLOAD=m -CONFIG_NFT_COUNTER=m CONFIG_NFT_CONNLIMIT=m CONFIG_NFT_LOG=m CONFIG_NFT_LIMIT=m @@ -194,7 +193,6 @@ CONFIG_IP_SET_LIST_SET=m CONFIG_NFT_DUP_IPV4=m CONFIG_NFT_FIB_IPV4=m CONFIG_NF_TABLES_ARP=y -CONFIG_NF_FLOW_TABLE_IPV4=m CONFIG_NF_LOG_ARP=m CONFIG_NF_LOG_IPV4=m CONFIG_IP_NF_IPTABLES=m @@ -219,7 +217,6 @@ CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NFT_DUP_IPV6=m CONFIG_NFT_FIB_IPV6=m -CONFIG_NF_FLOW_TABLE_IPV6=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP6_NF_MATCH_AH=m CONFIG_IP6_NF_MATCH_EUI64=m @@ -387,9 +384,6 @@ CONFIG_NTP_PPS=y CONFIG_PPS_CLIENT_LDISC=m CONFIG_PTP_1588_CLOCK=m # CONFIG_HWMON is not set -CONFIG_FB=y -CONFIG_FRAMEBUFFER_CONSOLE=y -CONFIG_LOGO=y CONFIG_HID=m CONFIG_HIDRAW=y CONFIG_UHID=m @@ -451,7 +445,6 @@ CONFIG_NFS_V4=m CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m # CONFIG_CIFS_STATS2 is not set # CONFIG_CIFS_DEBUG is not set @@ -593,7 +586,7 @@ CONFIG_TEST_UUID=m CONFIG_TEST_XARRAY=m CONFIG_TEST_OVERFLOW=m CONFIG_TEST_RHASHTABLE=m -CONFIG_TEST_HASH=m +CONFIG_TEST_SIPHASH=m CONFIG_TEST_IDA=m CONFIG_TEST_BITOPS=m CONFIG_TEST_VMALLOC=m diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c index 9c57b245dc12..267b02cc5655 100644 --- a/arch/m68k/emu/nfblock.c +++ b/arch/m68k/emu/nfblock.c @@ -13,7 +13,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/types.h> -#include <linux/genhd.h> #include <linux/blkdev.h> #include <linux/hdreg.h> #include <linux/slab.h> diff --git a/arch/m68k/hp300/config.c b/arch/m68k/hp300/config.c index ce1eb3d3d55d..2c92843397c3 100644 --- a/arch/m68k/hp300/config.c +++ b/arch/m68k/hp300/config.c @@ -22,6 +22,7 @@ #include <asm/blinken.h> #include <asm/io.h> /* readb() and writeb() */ #include <asm/hp300hw.h> +#include <asm/config.h> #include "time.h" diff --git a/arch/m68k/include/asm/cmpxchg.h b/arch/m68k/include/asm/cmpxchg.h index e8ca4b0ccefa..6cf464cdab06 100644 --- a/arch/m68k/include/asm/cmpxchg.h +++ b/arch/m68k/include/asm/cmpxchg.h @@ -4,8 +4,7 @@ #include <linux/irqflags.h> -struct __xchg_dummy { unsigned long a[100]; }; -#define __xg(x) ((volatile struct __xchg_dummy *)(x)) +#define __xg(type, x) ((volatile type *)(x)) extern unsigned long __invalid_xchg_size(unsigned long, volatile void *, int); @@ -50,7 +49,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casb %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u8, ptr)) : "memory"); break; case 2: __asm__ __volatile__ @@ -58,7 +57,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casw %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u16, ptr)) : "memory"); break; case 4: __asm__ __volatile__ @@ -66,7 +65,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void * ptr, int siz "1:\n\t" "casl %0,%1,%2\n\t" "jne 1b" - : "=&d" (x) : "d" (x), "m" (*__xg(ptr)) : "memory"); + : "=&d" (x) : "d" (x), "m" (*__xg(u32, ptr)) : "memory"); break; default: x = __invalid_xchg_size(x, ptr, size); diff --git a/arch/m68k/include/asm/config.h b/arch/m68k/include/asm/config.h new file mode 100644 index 000000000000..e73ffa23c4f5 --- /dev/null +++ b/arch/m68k/include/asm/config.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* + * This file contains prototypes provided by each m68k machine + * to parse bootinfo data structures and to configure the machine + */ + +#ifndef _M68K_CONFIG_H +#define _M68K_CONFIG_H + +extern int amiga_parse_bootinfo(const struct bi_record *record); +extern int apollo_parse_bootinfo(const struct bi_record *record); +extern int atari_parse_bootinfo(const struct bi_record *record); +extern int bvme6000_parse_bootinfo(const struct bi_record *record); +extern int hp300_parse_bootinfo(const struct bi_record *record); +extern int mac_parse_bootinfo(const struct bi_record *record); +extern int mvme147_parse_bootinfo(const struct bi_record *record); +extern int mvme16x_parse_bootinfo(const struct bi_record *record); +extern int q40_parse_bootinfo(const struct bi_record *record); + +extern void config_amiga(void); +extern void config_apollo(void); +extern void config_atari(void); +extern void config_bvme6000(void); +extern void config_hp300(void); +extern void config_mac(void); +extern void config_mvme147(void); +extern void config_mvme16x(void); +extern void config_q40(void); +extern void config_sun3(void); +extern void config_sun3x(void); + +#endif /* _M68K_CONFIG_H */ diff --git a/arch/m68k/include/asm/current.h b/arch/m68k/include/asm/current.h index 6390ef2f7f86..c117907e1276 100644 --- a/arch/m68k/include/asm/current.h +++ b/arch/m68k/include/asm/current.h @@ -24,6 +24,8 @@ static inline struct task_struct *get_current(void) #define current get_current() -#endif /* CONFNIG_MMU */ +#endif /* CONFIG_MMU */ + +register unsigned long current_stack_pointer __asm__("sp"); #endif /* !(_M68K_CURRENT_H) */ diff --git a/arch/m68k/kernel/setup_mm.c b/arch/m68k/kernel/setup_mm.c index 49e573b94326..8f94feed969c 100644 --- a/arch/m68k/kernel/setup_mm.c +++ b/arch/m68k/kernel/setup_mm.c @@ -16,7 +16,6 @@ #include <linux/interrupt.h> #include <linux/fs.h> #include <linux/console.h> -#include <linux/genhd.h> #include <linux/errno.h> #include <linux/string.h> #include <linux/init.h> @@ -47,6 +46,7 @@ #endif #include <asm/macintosh.h> #include <asm/natfeat.h> +#include <asm/config.h> #if !FPSTATESIZE || !NR_IRQS #warning No CPU/platform type selected, your kernel will not work! @@ -113,28 +113,6 @@ EXPORT_SYMBOL(isa_type); EXPORT_SYMBOL(isa_sex); #endif -extern int amiga_parse_bootinfo(const struct bi_record *); -extern int atari_parse_bootinfo(const struct bi_record *); -extern int mac_parse_bootinfo(const struct bi_record *); -extern int q40_parse_bootinfo(const struct bi_record *); -extern int bvme6000_parse_bootinfo(const struct bi_record *); -extern int mvme16x_parse_bootinfo(const struct bi_record *); -extern int mvme147_parse_bootinfo(const struct bi_record *); -extern int hp300_parse_bootinfo(const struct bi_record *); -extern int apollo_parse_bootinfo(const struct bi_record *); - -extern void config_amiga(void); -extern void config_atari(void); -extern void config_mac(void); -extern void config_sun3(void); -extern void config_apollo(void); -extern void config_mvme147(void); -extern void config_mvme16x(void); -extern void config_bvme6000(void); -extern void config_hp300(void); -extern void config_q40(void); -extern void config_sun3x(void); - #define MASK_256K 0xfffc0000 extern void paging_init(void); diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c index 5d16f9b47aa9..65d124ec80bb 100644 --- a/arch/m68k/mac/config.c +++ b/arch/m68k/mac/config.c @@ -47,6 +47,7 @@ #include <asm/mac_via.h> #include <asm/mac_oss.h> #include <asm/mac_psc.h> +#include <asm/config.h> /* Mac bootinfo struct */ struct mac_booter_data mac_bi_data; diff --git a/arch/m68k/mm/fault.c b/arch/m68k/mm/fault.c index 1493cf5eac1e..71aa9f6315dc 100644 --- a/arch/m68k/mm/fault.c +++ b/arch/m68k/mm/fault.c @@ -93,8 +93,6 @@ retry: vma = find_vma(mm, address); if (!vma) goto map_err; - if (vma->vm_flags & VM_IO) - goto acc_err; if (vma->vm_start <= address) goto good_area; if (!(vma->vm_flags & VM_GROWSDOWN)) diff --git a/arch/m68k/mvme147/config.c b/arch/m68k/mvme147/config.c index dfd6202fd403..4e6218115f43 100644 --- a/arch/m68k/mvme147/config.c +++ b/arch/m68k/mvme147/config.c @@ -22,7 +22,6 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/genhd.h> #include <linux/rtc.h> #include <linux/interrupt.h> @@ -34,6 +33,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/mvme147hw.h> +#include <asm/config.h> static void mvme147_get_model(char *model); diff --git a/arch/m68k/mvme16x/config.c b/arch/m68k/mvme16x/config.c index b4422c2dfbbf..f00c7aa058de 100644 --- a/arch/m68k/mvme16x/config.c +++ b/arch/m68k/mvme16x/config.c @@ -24,7 +24,6 @@ #include <linux/linkage.h> #include <linux/init.h> #include <linux/major.h> -#include <linux/genhd.h> #include <linux/rtc.h> #include <linux/interrupt.h> #include <linux/module.h> @@ -37,6 +36,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/mvme16xhw.h> +#include <asm/config.h> extern t_bdid mvme_bdid; diff --git a/arch/m68k/q40/config.c b/arch/m68k/q40/config.c index 5caf1e5be1c2..9237243077ce 100644 --- a/arch/m68k/q40/config.c +++ b/arch/m68k/q40/config.c @@ -34,6 +34,7 @@ #include <asm/traps.h> #include <asm/machdep.h> #include <asm/q40_master.h> +#include <asm/config.h> extern void q40_init_IRQ(void); static void q40_get_model(char *model); diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 59798e43cdb0..da568e981604 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig @@ -45,6 +45,8 @@ config MICROBLAZE select SET_FS select ZONE_DMA select TRACE_IRQFLAGS_SUPPORT + select GENERIC_IRQ_MULTI_HANDLER + select HANDLE_DOMAIN_IRQ # Endianness selection choice diff --git a/arch/microblaze/include/asm/irq.h b/arch/microblaze/include/asm/irq.h index 0a28e80bbab0..cb6ab55d1d01 100644 --- a/arch/microblaze/include/asm/irq.h +++ b/arch/microblaze/include/asm/irq.h @@ -11,7 +11,4 @@ struct pt_regs; extern void do_IRQ(struct pt_regs *regs); -/* should be defined in each interrupt controller driver */ -extern unsigned int xintc_get_irq(void); - #endif /* _ASM_MICROBLAZE_IRQ_H */ diff --git a/arch/microblaze/kernel/irq.c b/arch/microblaze/kernel/irq.c index 903dad822fad..1f8cb4c4f74f 100644 --- a/arch/microblaze/kernel/irq.c +++ b/arch/microblaze/kernel/irq.c @@ -20,27 +20,13 @@ #include <linux/irqchip.h> #include <linux/of_irq.h> -static u32 concurrent_irq; - void __irq_entry do_IRQ(struct pt_regs *regs) { - unsigned int irq; struct pt_regs *old_regs = set_irq_regs(regs); trace_hardirqs_off(); irq_enter(); - irq = xintc_get_irq(); -next_irq: - BUG_ON(!irq); - generic_handle_irq(irq); - - irq = xintc_get_irq(); - if (irq != -1U) { - pr_debug("next irq: %d\n", irq); - ++concurrent_irq; - goto next_irq; - } - + handle_arch_irq(regs); irq_exit(); set_irq_regs(old_regs); trace_hardirqs_on(); diff --git a/arch/mips/configs/cobalt_defconfig b/arch/mips/configs/cobalt_defconfig index c6a652ad34f7..e835730ea7fa 100644 --- a/arch/mips/configs/cobalt_defconfig +++ b/arch/mips/configs/cobalt_defconfig @@ -69,6 +69,5 @@ CONFIG_CONFIGFS_FS=y CONFIG_NFS_FS=y CONFIG_NFS_V3_ACL=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_LIBCRC32C=y diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig index e2ed105f8c97..0021427a1bbe 100644 --- a/arch/mips/configs/decstation_64_defconfig +++ b/arch/mips/configs/decstation_64_defconfig @@ -159,7 +159,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y # CONFIG_RPCSEC_GSS_KRB5 is not set CONFIG_NLS_ISO8859_8=m diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig index 7e987d6f5e34..7a97a0818ce4 100644 --- a/arch/mips/configs/decstation_defconfig +++ b/arch/mips/configs/decstation_defconfig @@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y # CONFIG_RPCSEC_GSS_KRB5 is not set CONFIG_NLS_ISO8859_8=m diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig index 6df5f6f2ac8e..a0643363526d 100644 --- a/arch/mips/configs/decstation_r4k_defconfig +++ b/arch/mips/configs/decstation_r4k_defconfig @@ -154,7 +154,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_SWAP=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y # CONFIG_RPCSEC_GSS_KRB5 is not set CONFIG_NLS_ISO8859_8=m diff --git a/arch/mips/configs/ip22_defconfig b/arch/mips/configs/ip22_defconfig index 21a1168ae301..70a4ba90f491 100644 --- a/arch/mips/configs/ip22_defconfig +++ b/arch/mips/configs/ip22_defconfig @@ -269,7 +269,6 @@ CONFIG_UFS_FS=m CONFIG_NFS_FS=m CONFIG_NFS_V3_ACL=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NFSD_V3_ACL=y CONFIG_CIFS=m CONFIG_CIFS_UPCALL=y diff --git a/arch/mips/configs/ip32_defconfig b/arch/mips/configs/ip32_defconfig index 1ae48f7d9ddd..74020aa3440b 100644 --- a/arch/mips/configs/ip32_defconfig +++ b/arch/mips/configs/ip32_defconfig @@ -112,7 +112,6 @@ CONFIG_CONFIGFS_FS=y CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=m diff --git a/arch/mips/configs/jazz_defconfig b/arch/mips/configs/jazz_defconfig index 8c223035921f..843f360da5f2 100644 --- a/arch/mips/configs/jazz_defconfig +++ b/arch/mips/configs/jazz_defconfig @@ -92,5 +92,4 @@ CONFIG_TMPFS=y CONFIG_UFS_FS=m CONFIG_NFS_FS=m CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m diff --git a/arch/mips/configs/malta_defconfig b/arch/mips/configs/malta_defconfig index 3321bb576944..1c220d152a50 100644 --- a/arch/mips/configs/malta_defconfig +++ b/arch/mips/configs/malta_defconfig @@ -363,7 +363,6 @@ CONFIG_UFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m diff --git a/arch/mips/configs/malta_kvm_defconfig b/arch/mips/configs/malta_kvm_defconfig index 009b30372226..b5ba08d7ab57 100644 --- a/arch/mips/configs/malta_kvm_defconfig +++ b/arch/mips/configs/malta_kvm_defconfig @@ -371,7 +371,6 @@ CONFIG_UFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m diff --git a/arch/mips/configs/maltaup_xpa_defconfig b/arch/mips/configs/maltaup_xpa_defconfig index e214e136101c..8d58653f1b4e 100644 --- a/arch/mips/configs/maltaup_xpa_defconfig +++ b/arch/mips/configs/maltaup_xpa_defconfig @@ -370,7 +370,6 @@ CONFIG_UFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m CONFIG_NLS_CODEPAGE_775=m diff --git a/arch/mips/configs/rm200_defconfig b/arch/mips/configs/rm200_defconfig index 3dc2da2bee0d..7d6f235e8ccb 100644 --- a/arch/mips/configs/rm200_defconfig +++ b/arch/mips/configs/rm200_defconfig @@ -354,7 +354,6 @@ CONFIG_SYSV_FS=m CONFIG_UFS_FS=m CONFIG_NFS_FS=m CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m CONFIG_CODA_FS=m CONFIG_AFS_FS=m diff --git a/arch/mips/configs/tb0219_defconfig b/arch/mips/configs/tb0219_defconfig index 6547f84750b5..c56d8ab14ba6 100644 --- a/arch/mips/configs/tb0219_defconfig +++ b/arch/mips/configs/tb0219_defconfig @@ -72,6 +72,5 @@ CONFIG_ROMFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="cca=3 mem=64M console=ttyVR0,115200 ip=any root=/dev/nfs" diff --git a/arch/mips/configs/tb0226_defconfig b/arch/mips/configs/tb0226_defconfig index 7e099f7c2286..6e1423428f02 100644 --- a/arch/mips/configs/tb0226_defconfig +++ b/arch/mips/configs/tb0226_defconfig @@ -67,6 +67,5 @@ CONFIG_ROMFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="cca=3 mem=32M console=ttyVR0,115200" diff --git a/arch/mips/configs/tb0287_defconfig b/arch/mips/configs/tb0287_defconfig index 0d881dd862c0..cf65a0879ece 100644 --- a/arch/mips/configs/tb0287_defconfig +++ b/arch/mips/configs/tb0287_defconfig @@ -77,7 +77,6 @@ CONFIG_ROMFS_FS=m CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_FONTS=y CONFIG_FONT_8x8=y CONFIG_FONT_8x16=y diff --git a/arch/mips/configs/workpad_defconfig b/arch/mips/configs/workpad_defconfig index 4798dc86c9ce..7e16da0bde8c 100644 --- a/arch/mips/configs/workpad_defconfig +++ b/arch/mips/configs/workpad_defconfig @@ -63,6 +63,5 @@ CONFIG_TMPFS=y CONFIG_TMPFS_POSIX_ACL=y CONFIG_NFS_FS=m CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CMDLINE_BOOL=y CONFIG_CMDLINE="console=ttyVR0,19200 ide0=0x170,0x376,49 mem=16M" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 43c1c880def6..864b4756dcdf 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -10,10 +10,12 @@ config PARISC select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAS_PTE_SPECIAL select ARCH_NO_SG_CHAIN select ARCH_SUPPORTS_HUGETLBFS if PA20 select ARCH_SUPPORTS_MEMORY_FAILURE select ARCH_STACKWALK + select ARCH_HAS_DEBUG_VM_PGTABLE select HAVE_RELIABLE_STACKTRACE select DMA_OPS select RTC_CLASS @@ -259,18 +261,6 @@ config PARISC_PAGE_SIZE_64KB endchoice -config PARISC_SELF_EXTRACT - bool "Build kernel as self-extracting executable" - default y - help - Say Y if you want to build the parisc kernel as a kind of - self-extracting executable. - - If you say N here, the kernel will be compressed with gzip - which can be loaded by the palo bootloader directly too. - - If you don't know what to do here, say Y. - config SMP bool "Symmetric multi-processing support" help diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 82d77f4b0d08..2a9387a93592 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -15,12 +15,8 @@ # Mike Shaver, Helge Deller and Martin K. Petersen # -ifdef CONFIG_PARISC_SELF_EXTRACT boot := arch/parisc/boot KBUILD_IMAGE := $(boot)/bzImage -else -KBUILD_IMAGE := vmlinuz -endif NM = sh $(srctree)/arch/parisc/nm CHECKFLAGS += -D__hppa__=1 @@ -44,6 +40,16 @@ endif export LD_BFD +# Set default 32 bits cross compilers for vdso +CC_ARCHES_32 = hppa hppa2.0 hppa1.1 +CC_SUFFIXES = linux linux-gnu unknown-linux-gnu +CROSS32_COMPILE := $(call cc-cross-prefix, \ + $(foreach a,$(CC_ARCHES_32), \ + $(foreach s,$(CC_SUFFIXES),$(a)-$(s)-))) +CROSS32CC := $(CROSS32_COMPILE)gcc +export CROSS32CC + +# Set default cross compiler for kernel build ifdef cross_compiling ifeq ($(CROSS_COMPILE),) CC_SUFFIXES = linux linux-gnu unknown-linux-gnu @@ -155,14 +161,29 @@ Image: vmlinux bzImage: vmlinux $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@ -ifdef CONFIG_PARISC_SELF_EXTRACT vmlinuz: bzImage $(OBJCOPY) $(boot)/bzImage $@ -else -vmlinuz: vmlinux - @$(KGZIP) -cf -9 $< > $@ + +ifeq ($(KBUILD_EXTMOD),) +# We need to generate vdso-offsets.h before compiling certain files in kernel/. +# In order to do that, we should use the archprepare target, but we can't since +# asm-offsets.h is included in some files used to generate vdso-offsets.h, and +# asm-offsets.h is built in prepare0, for which archprepare is a dependency. +# Therefore we need to generate the header after prepare0 has been made, hence +# this hack. +prepare: vdso_prepare +vdso_prepare: prepare0 + $(if $(CONFIG_64BIT),$(Q)$(MAKE) \ + $(build)=arch/parisc/kernel/vdso64 include/generated/vdso64-offsets.h) + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 include/generated/vdso32-offsets.h endif +PHONY += vdso_install + +vdso_install: + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso $@ + $(if $(CONFIG_COMPAT_VDSO), \ + $(Q)$(MAKE) $(build)=arch/parisc/kernel/vdso32 $@) install: $(CONFIG_SHELL) $(srctree)/arch/parisc/install.sh \ $(KERNELRELEASE) vmlinux System.map "$(INSTALL_PATH)" diff --git a/arch/parisc/configs/generic-32bit_defconfig b/arch/parisc/configs/generic-32bit_defconfig index 53061cb2cf7f..a5fee10d76ee 100644 --- a/arch/parisc/configs/generic-32bit_defconfig +++ b/arch/parisc/configs/generic-32bit_defconfig @@ -210,7 +210,6 @@ CONFIG_TMPFS_XATTR=y CONFIG_NFS_FS=m # CONFIG_NFS_V2 is not set CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m CONFIG_CIFS_XATTR=y CONFIG_CIFS_POSIX=y diff --git a/arch/parisc/include/asm/assembly.h b/arch/parisc/include/asm/assembly.h index 6369082c6c74..ea0cb318b13d 100644 --- a/arch/parisc/include/asm/assembly.h +++ b/arch/parisc/include/asm/assembly.h @@ -47,6 +47,12 @@ #define PRIV_USER 3 #define PRIV_KERNEL 0 +/* Space register used inside kernel */ +#define SR_KERNEL 0 +#define SR_TEMP1 1 +#define SR_TEMP2 2 +#define SR_USER 3 + #ifdef __ASSEMBLY__ #ifdef CONFIG_64BIT diff --git a/arch/parisc/include/asm/cache.h b/arch/parisc/include/asm/cache.h index d53e9e27dba0..5032e758594e 100644 --- a/arch/parisc/include/asm/cache.h +++ b/arch/parisc/include/asm/cache.h @@ -39,16 +39,13 @@ extern int icache_stride; extern struct pdc_cache_info cache_info; void parisc_setup_cache_timing(void); -#define pdtlb(addr) asm volatile("pdtlb 0(%%sr1,%0)" \ +#define pdtlb(sr, addr) asm volatile("pdtlb 0(%%sr%0,%1)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr) : "memory") -#define pitlb(addr) asm volatile("pitlb 0(%%sr1,%0)" \ + : : "i"(sr), "r" (addr) : "memory") +#define pitlb(sr, addr) asm volatile("pitlb 0(%%sr%0,%1)" \ ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ ALTERNATIVE(ALT_COND_NO_SPLIT_TLB, INSN_NOP) \ - : : "r" (addr) : "memory") -#define pdtlb_kernel(addr) asm volatile("pdtlb 0(%0)" \ - ALTERNATIVE(ALT_COND_NO_SMP, INSN_PxTLB) \ - : : "r" (addr) : "memory") + : : "i"(sr), "r" (addr) : "memory") #define asm_io_fdc(addr) asm volatile("fdc %%r0(%0)" \ ALTERNATIVE(ALT_COND_NO_DCACHE, INSN_NOP) \ diff --git a/arch/parisc/include/asm/cacheflush.h b/arch/parisc/include/asm/cacheflush.h index 859b8a34adcf..e8b4a03343d3 100644 --- a/arch/parisc/include/asm/cacheflush.h +++ b/arch/parisc/include/asm/cacheflush.h @@ -9,16 +9,11 @@ /* The usual comment is "Caches aren't brain-dead on the <architecture>". * Unfortunately, that doesn't apply to PA-RISC. */ -/* Internal implementation */ -void flush_data_cache_local(void *); /* flushes local data-cache only */ -void flush_instruction_cache_local(void *); /* flushes local code-cache only */ -#ifdef CONFIG_SMP -void flush_data_cache(void); /* flushes data-cache only (all processors) */ -void flush_instruction_cache(void); /* flushes i-cache only (all processors) */ -#else -#define flush_data_cache() flush_data_cache_local(NULL) -#define flush_instruction_cache() flush_instruction_cache_local(NULL) -#endif +#include <linux/jump_label.h> + +DECLARE_STATIC_KEY_TRUE(parisc_has_cache); +DECLARE_STATIC_KEY_TRUE(parisc_has_dcache); +DECLARE_STATIC_KEY_TRUE(parisc_has_icache); #define flush_cache_dup_mm(mm) flush_cache_mm(mm) diff --git a/arch/parisc/include/asm/current.h b/arch/parisc/include/asm/current.h index 568b739e42af..dc7aea07c3f3 100644 --- a/arch/parisc/include/asm/current.h +++ b/arch/parisc/include/asm/current.h @@ -2,14 +2,16 @@ #ifndef _ASM_PARISC_CURRENT_H #define _ASM_PARISC_CURRENT_H -#include <asm/special_insns.h> - #ifndef __ASSEMBLY__ struct task_struct; static __always_inline struct task_struct *get_current(void) { - return (struct task_struct *) mfctl(30); + struct task_struct *ts; + + /* do not use mfctl() macro as it is marked volatile */ + asm( "mfctl %%cr30,%0" : "=r" (ts) ); + return ts; } #define current get_current() diff --git a/arch/parisc/include/asm/elf.h b/arch/parisc/include/asm/elf.h index 3bd465a27791..cc426d365892 100644 --- a/arch/parisc/include/asm/elf.h +++ b/arch/parisc/include/asm/elf.h @@ -359,4 +359,19 @@ struct mm_struct; extern unsigned long arch_randomize_brk(struct mm_struct *); #define arch_randomize_brk arch_randomize_brk + +#define ARCH_HAS_SETUP_ADDITIONAL_PAGES 1 +struct linux_binprm; +extern int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack); +#define VDSO_AUX_ENT(a, b) NEW_AUX_ENT(a, b) +#define VDSO_CURRENT_BASE current->mm->context.vdso_base + +#define ARCH_DLINFO \ +do { \ + if (VDSO_CURRENT_BASE) { \ + NEW_AUX_ENT(AT_SYSINFO_EHDR, VDSO_CURRENT_BASE);\ + } \ +} while (0) + #endif diff --git a/arch/parisc/include/asm/kprobes.h b/arch/parisc/include/asm/kprobes.h index 904034da4974..0a175ac87698 100644 --- a/arch/parisc/include/asm/kprobes.h +++ b/arch/parisc/include/asm/kprobes.h @@ -18,8 +18,9 @@ #include <linux/notifier.h> #define PARISC_KPROBES_BREAK_INSN 0x3ff801f +#define PARISC_KPROBES_BREAK_INSN2 0x3ff801e #define __ARCH_WANT_KPROBES_INSN_SLOT -#define MAX_INSN_SIZE 1 +#define MAX_INSN_SIZE 2 typedef u32 kprobe_opcode_t; struct kprobe; @@ -29,7 +30,7 @@ void arch_remove_kprobe(struct kprobe *p); #define flush_insn_slot(p) \ flush_icache_range((unsigned long)&(p)->ainsn.insn[0], \ (unsigned long)&(p)->ainsn.insn[0] + \ - sizeof(kprobe_opcode_t)) + MAX_INSN_SIZE*sizeof(kprobe_opcode_t)) #define kretprobe_blacklist_size 0 diff --git a/arch/parisc/include/asm/mmu.h b/arch/parisc/include/asm/mmu.h index 3fb70a601d5c..44fd062b62ed 100644 --- a/arch/parisc/include/asm/mmu.h +++ b/arch/parisc/include/asm/mmu.h @@ -2,7 +2,9 @@ #ifndef _PARISC_MMU_H_ #define _PARISC_MMU_H_ -/* On parisc, we store the space id here */ -typedef unsigned long mm_context_t; +typedef struct { + unsigned long space_id; + unsigned long vdso_base; +} mm_context_t; #endif /* _PARISC_MMU_H_ */ diff --git a/arch/parisc/include/asm/mmu_context.h b/arch/parisc/include/asm/mmu_context.h index 726257648d9f..c9187fe836a3 100644 --- a/arch/parisc/include/asm/mmu_context.h +++ b/arch/parisc/include/asm/mmu_context.h @@ -20,7 +20,7 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) { BUG_ON(atomic_read(&mm->mm_users) != 1); - mm->context = alloc_sid(); + mm->context.space_id = alloc_sid(); return 0; } @@ -28,22 +28,22 @@ init_new_context(struct task_struct *tsk, struct mm_struct *mm) static inline void destroy_context(struct mm_struct *mm) { - free_sid(mm->context); - mm->context = 0; + free_sid(mm->context.space_id); + mm->context.space_id = 0; } static inline unsigned long __space_to_prot(mm_context_t context) { #if SPACEID_SHIFT == 0 - return context << 1; + return context.space_id << 1; #else - return context >> (SPACEID_SHIFT - 1); + return context.space_id >> (SPACEID_SHIFT - 1); #endif } static inline void load_context(mm_context_t context) { - mtsp(context, 3); + mtsp(context.space_id, SR_USER); mtctl(__space_to_prot(context), 8); } @@ -89,8 +89,8 @@ static inline void activate_mm(struct mm_struct *prev, struct mm_struct *next) BUG_ON(next == &init_mm); /* Should never happen */ - if (next->context == 0) - next->context = alloc_sid(); + if (next->context.space_id == 0) + next->context.space_id = alloc_sid(); switch_mm(prev,next,current); } diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h index 3e7cf882639f..7dff736936d0 100644 --- a/arch/parisc/include/asm/pgtable.h +++ b/arch/parisc/include/asm/pgtable.h @@ -70,9 +70,9 @@ static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr) unsigned long flags; purge_tlb_start(flags); - mtsp(mm->context, 1); - pdtlb(addr); - pitlb(addr); + mtsp(mm->context.space_id, SR_TEMP1); + pdtlb(SR_TEMP1, addr); + pitlb(SR_TEMP1, addr); purge_tlb_end(flags); } @@ -219,9 +219,10 @@ extern void __update_cache(pte_t pte); #define _PAGE_PRESENT (1 << xlate_pabit(_PAGE_PRESENT_BIT)) #define _PAGE_HUGE (1 << xlate_pabit(_PAGE_HPAGE_BIT)) #define _PAGE_USER (1 << xlate_pabit(_PAGE_USER_BIT)) +#define _PAGE_SPECIAL (_PAGE_DMB) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | _PAGE_DIRTY | _PAGE_ACCESSED) -#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY) +#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_SPECIAL) #define _PAGE_KERNEL_RO (_PAGE_PRESENT | _PAGE_READ | _PAGE_DIRTY | _PAGE_ACCESSED) #define _PAGE_KERNEL_EXEC (_PAGE_KERNEL_RO | _PAGE_EXEC) #define _PAGE_KERNEL_RWX (_PAGE_KERNEL_EXEC | _PAGE_WRITE) @@ -348,6 +349,7 @@ static inline void pud_clear(pud_t *pud) { static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } static inline int pte_write(pte_t pte) { return pte_val(pte) & _PAGE_WRITE; } +static inline int pte_special(pte_t pte) { return pte_val(pte) & _PAGE_SPECIAL; } static inline pte_t pte_mkclean(pte_t pte) { pte_val(pte) &= ~_PAGE_DIRTY; return pte; } static inline pte_t pte_mkold(pte_t pte) { pte_val(pte) &= ~_PAGE_ACCESSED; return pte; } @@ -355,6 +357,7 @@ static inline pte_t pte_wrprotect(pte_t pte) { pte_val(pte) &= ~_PAGE_WRITE; ret static inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= _PAGE_DIRTY; return pte; } static inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= _PAGE_ACCESSED; return pte; } static inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_WRITE; return pte; } +static inline pte_t pte_mkspecial(pte_t pte) { pte_val(pte) |= _PAGE_SPECIAL; return pte; } /* * Huge pte definitions. diff --git a/arch/parisc/include/asm/processor.h b/arch/parisc/include/asm/processor.h index 3a3d05438408..006364212795 100644 --- a/arch/parisc/include/asm/processor.h +++ b/arch/parisc/include/asm/processor.h @@ -236,7 +236,7 @@ on downward growing arches, it looks like this: #define start_thread(regs, new_pc, new_sp) do { \ elf_addr_t *sp = (elf_addr_t *)new_sp; \ - __u32 spaceid = (__u32)current->mm->context; \ + __u32 spaceid = (__u32)current->mm->context.space_id; \ elf_addr_t pc = (elf_addr_t)new_pc | 3; \ elf_caddr_t *argv = (elf_caddr_t *)bprm->exec + 1; \ \ diff --git a/arch/parisc/include/asm/rt_sigframe.h b/arch/parisc/include/asm/rt_sigframe.h index 2b3010ade00e..bb7fb4153327 100644 --- a/arch/parisc/include/asm/rt_sigframe.h +++ b/arch/parisc/include/asm/rt_sigframe.h @@ -2,16 +2,8 @@ #ifndef _ASM_PARISC_RT_SIGFRAME_H #define _ASM_PARISC_RT_SIGFRAME_H -#define SIGRETURN_TRAMP 4 -#define SIGRESTARTBLOCK_TRAMP 5 -#define TRAMP_SIZE (SIGRETURN_TRAMP + SIGRESTARTBLOCK_TRAMP) - struct rt_sigframe { - /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c - Secondary to that it must protect the ERESTART_RESTARTBLOCK - trampoline we left on the stack (we were bad and didn't - change sp so we could run really fast.) */ - unsigned int tramp[TRAMP_SIZE]; + unsigned int tramp[2]; /* holds original return address */ struct siginfo info; struct ucontext uc; }; diff --git a/arch/parisc/include/asm/special_insns.h b/arch/parisc/include/asm/special_insns.h index 16ee41e77174..41b3ddbd344c 100644 --- a/arch/parisc/include/asm/special_insns.h +++ b/arch/parisc/include/asm/special_insns.h @@ -55,8 +55,8 @@ static inline void set_eiem(unsigned long val) #define mfsp(reg) ({ \ unsigned long cr; \ __asm__ __volatile__( \ - "mfsp " #reg ",%0" : \ - "=r" (cr) \ + "mfsp %%sr%1,%0" \ + : "=r" (cr) : "i"(reg) \ ); \ cr; \ }) diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h index c5ded01d45be..5ffd7c17f593 100644 --- a/arch/parisc/include/asm/tlbflush.h +++ b/arch/parisc/include/asm/tlbflush.h @@ -17,7 +17,7 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, unsigned long end); #define flush_tlb_range(vma, start, end) \ - __flush_tlb_range((vma)->vm_mm->context, start, end) + __flush_tlb_range((vma)->vm_mm->context.space_id, start, end) #define flush_tlb_kernel_range(start, end) \ __flush_tlb_range(0, start, end) diff --git a/arch/parisc/include/asm/traps.h b/arch/parisc/include/asm/traps.h index 34619f010c63..0ccdb738a9a3 100644 --- a/arch/parisc/include/asm/traps.h +++ b/arch/parisc/include/asm/traps.h @@ -18,6 +18,7 @@ unsigned long parisc_acctyp(unsigned long code, unsigned int inst); const char *trap_name(unsigned long code); void do_page_fault(struct pt_regs *regs, unsigned long code, unsigned long address); +int handle_nadtlb_fault(struct pt_regs *regs); #endif #endif diff --git a/arch/parisc/include/asm/uaccess.h b/arch/parisc/include/asm/uaccess.h index 123d5f16cd9d..b3eb4541e441 100644 --- a/arch/parisc/include/asm/uaccess.h +++ b/arch/parisc/include/asm/uaccess.h @@ -79,18 +79,18 @@ struct exception_table_entry { #define __get_user(val, ptr) \ ({ \ - __get_user_internal("%%sr3,", val, ptr); \ + __get_user_internal(SR_USER, val, ptr); \ }) #define __get_user_asm(sr, val, ldx, ptr) \ { \ register long __gu_val; \ \ - __asm__("1: " ldx " 0(" sr "%2),%0\n" \ + __asm__("1: " ldx " 0(%%sr%2,%3),%0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "=r"(__gu_val), "+r"(__gu_err) \ - : "r"(ptr)); \ + : "i"(sr), "r"(ptr)); \ \ (val) = (__force __typeof__(*(ptr))) __gu_val; \ } @@ -100,7 +100,7 @@ struct exception_table_entry { { \ type __z; \ long __err; \ - __err = __get_user_internal("%%sr0,", __z, (type *)(src)); \ + __err = __get_user_internal(SR_KERNEL, __z, (type *)(src)); \ if (unlikely(__err)) \ goto err_label; \ else \ @@ -118,13 +118,13 @@ struct exception_table_entry { } __gu_tmp; \ \ __asm__(" copy %%r0,%R0\n" \ - "1: ldw 0(" sr "%2),%0\n" \ - "2: ldw 4(" sr "%2),%R0\n" \ + "1: ldw 0(%%sr%2,%3),%0\n" \ + "2: ldw 4(%%sr%2,%3),%R0\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "=&r"(__gu_tmp.l), "+r"(__gu_err) \ - : "r"(ptr)); \ + : "i"(sr), "r"(ptr)); \ \ (val) = __gu_tmp.t; \ } @@ -151,14 +151,14 @@ struct exception_table_entry { ({ \ __typeof__(&*(ptr)) __ptr = ptr; \ __typeof__(*(__ptr)) __x = (__typeof__(*(__ptr)))(x); \ - __put_user_internal("%%sr3,", __x, __ptr); \ + __put_user_internal(SR_USER, __x, __ptr); \ }) #define __put_kernel_nofault(dst, src, type, err_label) \ { \ type __z = *(type *)(src); \ long __err; \ - __err = __put_user_internal("%%sr0,", __z, (type *)(dst)); \ + __err = __put_user_internal(SR_KERNEL, __z, (type *)(dst)); \ if (unlikely(__err)) \ goto err_label; \ } @@ -178,24 +178,24 @@ struct exception_table_entry { #define __put_user_asm(sr, stx, x, ptr) \ __asm__ __volatile__ ( \ - "1: " stx " %2,0(" sr "%1)\n" \ + "1: " stx " %1,0(%%sr%2,%3)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ : "+r"(__pu_err) \ - : "r"(ptr), "r"(x)) + : "r"(x), "i"(sr), "r"(ptr)) #if !defined(CONFIG_64BIT) #define __put_user_asm64(sr, __val, ptr) do { \ __asm__ __volatile__ ( \ - "1: stw %2,0(" sr "%1)\n" \ - "2: stw %R2,4(" sr "%1)\n" \ + "1: stw %1,0(%%sr%2,%3)\n" \ + "2: stw %R1,4(%%sr%2,%3)\n" \ "9:\n" \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 9b) \ ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 9b) \ : "+r"(__pu_err) \ - : "r"(ptr), "r"(__val)); \ + : "r"(__val), "i"(sr), "r"(ptr)); \ } while (0) #endif /* !defined(CONFIG_64BIT) */ diff --git a/arch/parisc/include/asm/unistd.h b/arch/parisc/include/asm/unistd.h index cd438e4150f6..7708a5806f09 100644 --- a/arch/parisc/include/asm/unistd.h +++ b/arch/parisc/include/asm/unistd.h @@ -63,10 +63,6 @@ ); \ __sys_res = (long)__res; \ } \ - if ( (unsigned long)__sys_res >= (unsigned long)-4095 ){ \ - errno = -__sys_res; \ - __sys_res = -1; \ - } \ __sys_res; \ }) diff --git a/arch/parisc/include/asm/vdso.h b/arch/parisc/include/asm/vdso.h new file mode 100644 index 000000000000..ef8206193f82 --- /dev/null +++ b/arch/parisc/include/asm/vdso.h @@ -0,0 +1,24 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PARISC_VDSO_H__ +#define __PARISC_VDSO_H__ + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_64BIT +#include <generated/vdso64-offsets.h> +#endif +#include <generated/vdso32-offsets.h> + +#define VDSO64_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso64_offset_##name)) +#define VDSO32_SYMBOL(tsk, name) ((tsk)->mm->context.vdso_base + (vdso32_offset_##name)) + +extern struct vdso_data *vdso_data; + +#endif /* __ASSEMBLY __ */ + +/* Default link addresses for the vDSOs */ +#define VDSO_LBASE 0 + +#define VDSO_VERSION_STRING LINUX_5.18 + +#endif /* __PARISC_VDSO_H__ */ diff --git a/arch/parisc/include/uapi/asm/auxvec.h b/arch/parisc/include/uapi/asm/auxvec.h new file mode 100644 index 000000000000..90d2aa699cf3 --- /dev/null +++ b/arch/parisc/include/uapi/asm/auxvec.h @@ -0,0 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI_PARISC_AUXVEC_H +#define _UAPI_PARISC_AUXVEC_H + +/* The vDSO location. */ +#define AT_SYSINFO_EHDR 33 + +#endif /* _UAPI_PARISC_AUXVEC_H */ diff --git a/arch/parisc/kernel/Makefile b/arch/parisc/kernel/Makefile index 8fb819bbbb17..d579243edc2f 100644 --- a/arch/parisc/kernel/Makefile +++ b/arch/parisc/kernel/Makefile @@ -39,3 +39,8 @@ obj-$(CONFIG_KGDB) += kgdb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_KEXEC_CORE) += kexec.o relocate_kernel.o obj-$(CONFIG_KEXEC_FILE) += kexec_file.o + +# vdso +obj-y += vdso.o +obj-$(CONFIG_64BIT) += vdso64/ +obj-y += vdso32/ diff --git a/arch/parisc/kernel/alternative.c b/arch/parisc/kernel/alternative.c index fa28c4c9f972..daa1e9047275 100644 --- a/arch/parisc/kernel/alternative.c +++ b/arch/parisc/kernel/alternative.c @@ -8,6 +8,7 @@ #include <asm/processor.h> #include <asm/sections.h> #include <asm/alternative.h> +#include <asm/cacheflush.h> #include <linux/module.h> @@ -107,5 +108,14 @@ void __init apply_alternatives_all(void) apply_alternatives((struct alt_instr *) &__alt_instructions, (struct alt_instr *) &__alt_instructions_end, NULL); + if (cache_info.dc_size == 0 && cache_info.ic_size == 0) { + pr_info("alternatives: optimizing cache-flushes.\n"); + static_branch_disable(&parisc_has_cache); + } + if (cache_info.dc_size == 0) + static_branch_disable(&parisc_has_dcache); + if (cache_info.ic_size == 0) + static_branch_disable(&parisc_has_icache); + set_kernel_text_rw(0); } diff --git a/arch/parisc/kernel/asm-offsets.c b/arch/parisc/kernel/asm-offsets.c index 2a83ef36d216..2673d57eeb00 100644 --- a/arch/parisc/kernel/asm-offsets.c +++ b/arch/parisc/kernel/asm-offsets.c @@ -26,7 +26,11 @@ #include <asm/ptrace.h> #include <asm/processor.h> #include <asm/pdc.h> +#include <uapi/asm/sigcontext.h> +#include <asm/ucontext.h> +#include <asm/rt_sigframe.h> #include <linux/uaccess.h> +#include "signal32.h" /* Add FRAME_SIZE to the size x and align it to y. All definitions * that use align_frame will include space for a frame. @@ -218,6 +222,11 @@ int main(void) DEFINE(TI_FLAGS, offsetof(struct thread_info, flags)); DEFINE(TI_PRE_COUNT, offsetof(struct task_struct, thread_info.preempt_count)); BLANK(); + DEFINE(ASM_SIGFRAME_SIZE, PARISC_RT_SIGFRAME_SIZE); + DEFINE(SIGFRAME_CONTEXT_REGS, offsetof(struct rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE); + DEFINE(ASM_SIGFRAME_SIZE32, PARISC_RT_SIGFRAME_SIZE32); + DEFINE(SIGFRAME_CONTEXT_REGS32, offsetof(struct compat_rt_sigframe, uc.uc_mcontext) - PARISC_RT_SIGFRAME_SIZE32); + BLANK(); DEFINE(ICACHE_BASE, offsetof(struct pdc_cache_info, ic_base)); DEFINE(ICACHE_STRIDE, offsetof(struct pdc_cache_info, ic_stride)); DEFINE(ICACHE_COUNT, offsetof(struct pdc_cache_info, ic_count)); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 94150b91c96f..456e879d34a8 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -38,6 +38,9 @@ EXPORT_SYMBOL(flush_dcache_page_asm); void purge_dcache_page_asm(unsigned long phys_addr, unsigned long vaddr); void flush_icache_page_asm(unsigned long phys_addr, unsigned long vaddr); +/* Internal implementation in arch/parisc/kernel/pacache.S */ +void flush_data_cache_local(void *); /* flushes local data-cache only */ +void flush_instruction_cache_local(void); /* flushes local code-cache only */ /* On some machines (i.e., ones with the Merced bus), there can be * only a single PxTLB broadcast at a time; this must be guaranteed @@ -58,26 +61,35 @@ struct pdc_cache_info cache_info __ro_after_init; static struct pdc_btlb_info btlb_info __ro_after_init; #endif -#ifdef CONFIG_SMP -void -flush_data_cache(void) +DEFINE_STATIC_KEY_TRUE(parisc_has_cache); +DEFINE_STATIC_KEY_TRUE(parisc_has_dcache); +DEFINE_STATIC_KEY_TRUE(parisc_has_icache); + +static void cache_flush_local_cpu(void *dummy) { - on_each_cpu(flush_data_cache_local, NULL, 1); + if (static_branch_likely(&parisc_has_icache)) + flush_instruction_cache_local(); + if (static_branch_likely(&parisc_has_dcache)) + flush_data_cache_local(NULL); } -void -flush_instruction_cache(void) + +void flush_cache_all_local(void) { - on_each_cpu(flush_instruction_cache_local, NULL, 1); + cache_flush_local_cpu(NULL); } -#endif -void -flush_cache_all_local(void) +void flush_cache_all(void) { - flush_instruction_cache_local(NULL); - flush_data_cache_local(NULL); + if (static_branch_likely(&parisc_has_cache)) + on_each_cpu(cache_flush_local_cpu, NULL, 1); } -EXPORT_SYMBOL(flush_cache_all_local); + +static inline void flush_data_cache(void) +{ + if (static_branch_likely(&parisc_has_dcache)) + on_each_cpu(flush_data_cache_local, NULL, 1); +} + /* Virtual address of pfn. */ #define pfn_va(pfn) __va(PFN_PHYS(pfn)) @@ -303,6 +315,8 @@ static inline void __flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long physaddr) { + if (!static_branch_likely(&parisc_has_cache)) + return; preempt_disable(); flush_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -314,6 +328,8 @@ static inline void __purge_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long physaddr) { + if (!static_branch_likely(&parisc_has_cache)) + return; preempt_disable(); purge_dcache_page_asm(physaddr, vmaddr); if (vma->vm_flags & VM_EXEC) @@ -375,7 +391,6 @@ EXPORT_SYMBOL(flush_dcache_page); /* Defined in arch/parisc/kernel/pacache.S */ EXPORT_SYMBOL(flush_kernel_dcache_range_asm); -EXPORT_SYMBOL(flush_data_cache_local); EXPORT_SYMBOL(flush_kernel_icache_range_asm); #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */ @@ -388,7 +403,7 @@ void __init parisc_setup_cache_timing(void) { unsigned long rangetime, alltime; unsigned long size; - unsigned long threshold; + unsigned long threshold, threshold2; alltime = mfctl(16); flush_data_cache(); @@ -403,8 +418,20 @@ void __init parisc_setup_cache_timing(void) alltime, size, rangetime); threshold = L1_CACHE_ALIGN(size * alltime / rangetime); - if (threshold > cache_info.dc_size) - threshold = cache_info.dc_size; + + /* + * The threshold computed above isn't very reliable since the + * flush times depend greatly on the percentage of dirty lines + * in the flush range. Further, the whole cache time doesn't + * include the time to refill lines that aren't in the mm/vma + * being flushed. By timing glibc build and checks on mako cpus, + * the following formula seems to work reasonably well. The + * value from the timing calculation is too small, and increases + * build and check times by almost a factor two. + */ + threshold2 = cache_info.dc_size * num_online_cpus(); + if (threshold2 > threshold) + threshold = threshold2; if (threshold) parisc_cache_flush_threshold = threshold; printk(KERN_INFO "Cache flush threshold set to %lu KiB\n", @@ -457,7 +484,7 @@ void flush_kernel_dcache_page_addr(void *addr) flush_kernel_dcache_page_asm(addr); purge_tlb_start(flags); - pdtlb_kernel(addr); + pdtlb(SR_KERNEL, addr); purge_tlb_end(flags); } EXPORT_SYMBOL(flush_kernel_dcache_page_addr); @@ -496,25 +523,15 @@ int __flush_tlb_range(unsigned long sid, unsigned long start, but cause a purge request to be broadcast to other TLBs. */ while (start < end) { purge_tlb_start(flags); - mtsp(sid, 1); - pdtlb(start); - pitlb(start); + mtsp(sid, SR_TEMP1); + pdtlb(SR_TEMP1, start); + pitlb(SR_TEMP1, start); purge_tlb_end(flags); start += PAGE_SIZE; } return 0; } -static void cacheflush_h_tmp_function(void *dummy) -{ - flush_cache_all_local(); -} - -void flush_cache_all(void) -{ - on_each_cpu(cacheflush_h_tmp_function, NULL, 1); -} - static inline unsigned long mm_total_size(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -558,15 +575,6 @@ static void flush_cache_pages(struct vm_area_struct *vma, struct mm_struct *mm, } } -static void flush_user_cache_tlb(struct vm_area_struct *vma, - unsigned long start, unsigned long end) -{ - flush_user_dcache_range_asm(start, end); - if (vma->vm_flags & VM_EXEC) - flush_user_icache_range_asm(start, end); - flush_tlb_range(vma, start, end); -} - void flush_cache_mm(struct mm_struct *mm) { struct vm_area_struct *vma; @@ -575,23 +583,14 @@ void flush_cache_mm(struct mm_struct *mm) rp3440, etc. So, avoid it if the mm isn't too big. */ if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && mm_total_size(mm) >= parisc_cache_flush_threshold) { - if (mm->context) + if (mm->context.space_id) flush_tlb_all(); flush_cache_all(); return; } - preempt_disable(); - if (mm->context == mfsp(3)) { - for (vma = mm->mmap; vma; vma = vma->vm_next) - flush_user_cache_tlb(vma, vma->vm_start, vma->vm_end); - preempt_enable(); - return; - } - for (vma = mm->mmap; vma; vma = vma->vm_next) flush_cache_pages(vma, mm, vma->vm_start, vma->vm_end); - preempt_enable(); } void flush_cache_range(struct vm_area_struct *vma, @@ -599,29 +598,21 @@ void flush_cache_range(struct vm_area_struct *vma, { if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && end - start >= parisc_cache_flush_threshold) { - if (vma->vm_mm->context) + if (vma->vm_mm->context.space_id) flush_tlb_range(vma, start, end); flush_cache_all(); return; } - preempt_disable(); - if (vma->vm_mm->context == mfsp(3)) { - flush_user_cache_tlb(vma, start, end); - preempt_enable(); - return; - } - - flush_cache_pages(vma, vma->vm_mm, vma->vm_start, vma->vm_end); - preempt_enable(); + flush_cache_pages(vma, vma->vm_mm, start, end); } void flush_cache_page(struct vm_area_struct *vma, unsigned long vmaddr, unsigned long pfn) { if (pfn_valid(pfn)) { - if (likely(vma->vm_mm->context)) { - flush_tlb_page(vma, vmaddr); + flush_tlb_page(vma, vmaddr); + if (likely(vma->vm_mm->context.space_id)) { __flush_cache_page(vma, vmaddr, PFN_PHYS(pfn)); } else { __purge_cache_page(vma, vmaddr, PFN_PHYS(pfn)); @@ -633,6 +624,7 @@ void flush_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; + unsigned long flags, physaddr; if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && (unsigned long)size >= parisc_cache_flush_threshold) { @@ -641,8 +633,14 @@ void flush_kernel_vmap_range(void *vaddr, int size) return; } - flush_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + while (start < end) { + physaddr = lpa(start); + purge_tlb_start(flags); + pdtlb(SR_KERNEL, start); + purge_tlb_end(flags); + flush_dcache_page_asm(physaddr, start); + start += PAGE_SIZE; + } } EXPORT_SYMBOL(flush_kernel_vmap_range); @@ -650,6 +648,7 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) { unsigned long start = (unsigned long)vaddr; unsigned long end = start + size; + unsigned long flags, physaddr; if ((!IS_ENABLED(CONFIG_SMP) || !arch_irqs_disabled()) && (unsigned long)size >= parisc_cache_flush_threshold) { @@ -658,7 +657,13 @@ void invalidate_kernel_vmap_range(void *vaddr, int size) return; } - purge_kernel_dcache_range_asm(start, end); - flush_tlb_kernel_range(start, end); + while (start < end) { + physaddr = lpa(start); + purge_tlb_start(flags); + pdtlb(SR_KERNEL, start); + purge_tlb_end(flags); + purge_dcache_page_asm(physaddr, start); + start += PAGE_SIZE; + } } EXPORT_SYMBOL(invalidate_kernel_vmap_range); diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S index 6e9cdb269862..ecf50159359e 100644 --- a/arch/parisc/kernel/entry.S +++ b/arch/parisc/kernel/entry.S @@ -1288,74 +1288,12 @@ nadtlb_check_alias_20: nadtlb_emulate: /* - * Non access misses can be caused by fdc,fic,pdc,lpa,probe and - * probei instructions. We don't want to fault for these - * instructions (not only does it not make sense, it can cause - * deadlocks, since some flushes are done with the mmap - * semaphore held). If the translation doesn't exist, we can't - * insert a translation, so have to emulate the side effects - * of the instruction. Since we don't insert a translation - * we can get a lot of faults during a flush loop, so it makes - * sense to try to do it here with minimum overhead. We only - * emulate fdc,fic,pdc,probew,prober instructions whose base - * and index registers are not shadowed. We defer everything - * else to the "slow" path. + * Non-access misses can be caused by fdc,fic,pdc,lpa,probe and + * probei instructions. The kernel no longer faults doing flushes. + * Use of lpa and probe instructions is rare. Given the issue + * with shadow registers, we defer everything to the "slow" path. */ - - mfctl %cr19,%r9 /* Get iir */ - - /* PA 2.0 Arch Ref. Book pg 382 has a good description of the insn bits. - Checks for fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ - - /* Checks for fdc,fdce,pdc,"fic,4f" only */ - ldi 0x280,%r16 - and %r9,%r16,%r17 - cmpb,<>,n %r16,%r17,nadtlb_probe_check - bb,>=,n %r9,26,nadtlb_nullify /* m bit not set, just nullify */ - BL get_register,%r25 - extrw,u %r9,15,5,%r8 /* Get index register # */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - copy %r1,%r24 - BL get_register,%r25 - extrw,u %r9,10,5,%r8 /* Get base register # */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - BL set_register,%r25 - add,l %r1,%r24,%r1 /* doesn't affect c/b bits */ - -nadtlb_nullify: - mfctl %ipsw,%r8 - ldil L%PSW_N,%r9 - or %r8,%r9,%r8 /* Set PSW_N */ - mtctl %r8,%ipsw - - rfir - nop - - /* - When there is no translation for the probe address then we - must nullify the insn and return zero in the target register. - This will indicate to the calling code that it does not have - write/read privileges to this address. - - This should technically work for prober and probew in PA 1.1, - and also probe,r and probe,w in PA 2.0 - - WARNING: USE ONLY NON-SHADOW REGISTERS WITH PROBE INSN! - THE SLOW-PATH EMULATION HAS NOT BEEN WRITTEN YET. - - */ -nadtlb_probe_check: - ldi 0x80,%r16 - and %r9,%r16,%r17 - cmpb,<>,n %r16,%r17,nadtlb_fault /* Must be probe,[rw]*/ - BL get_register,%r25 /* Find the target register */ - extrw,u %r9,31,5,%r8 /* Get target register */ - cmpib,COND(=),n -1,%r1,nadtlb_fault /* have to use slow path */ - BL set_register,%r25 - copy %r0,%r1 /* Write zero to target register */ - b nadtlb_nullify /* Nullify return insn */ - nop - + b,n nadtlb_fault #ifdef CONFIG_64BIT itlb_miss_20w: diff --git a/arch/parisc/kernel/kprobes.c b/arch/parisc/kernel/kprobes.c index e2bdb5a5f93e..3343d2fb7889 100644 --- a/arch/parisc/kernel/kprobes.c +++ b/arch/parisc/kernel/kprobes.c @@ -5,6 +5,7 @@ * PA-RISC kprobes implementation * * Copyright (c) 2019 Sven Schnelle <svens@stackframe.org> + * Copyright (c) 2022 Helge Deller <deller@gmx.de> */ #include <linux/types.h> @@ -25,9 +26,14 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) if (!p->ainsn.insn) return -ENOMEM; - memcpy(p->ainsn.insn, p->addr, - MAX_INSN_SIZE * sizeof(kprobe_opcode_t)); + /* + * Set up new instructions. Second break instruction will + * trigger call of parisc_kprobe_ss_handler(). + */ p->opcode = *p->addr; + p->ainsn.insn[0] = p->opcode; + p->ainsn.insn[1] = PARISC_KPROBES_BREAK_INSN2; + flush_insn_slot(p); return 0; } @@ -73,9 +79,7 @@ static void __kprobes setup_singlestep(struct kprobe *p, { kcb->iaoq[0] = regs->iaoq[0]; kcb->iaoq[1] = regs->iaoq[1]; - regs->iaoq[0] = (unsigned long)p->ainsn.insn; - mtctl(0, 0); - regs->gr[0] |= PSW_R; + instruction_pointer_set(regs, (unsigned long)p->ainsn.insn); } int __kprobes parisc_kprobe_break_handler(struct pt_regs *regs) @@ -165,9 +169,8 @@ int __kprobes parisc_kprobe_ss_handler(struct pt_regs *regs) regs->iaoq[0] = kcb->iaoq[1]; break; default: - regs->iaoq[1] = kcb->iaoq[0]; - regs->iaoq[1] += (regs->iaoq[1] - regs->iaoq[0]) + 4; regs->iaoq[0] = kcb->iaoq[1]; + regs->iaoq[1] = regs->iaoq[0] + 4; break; } kcb->kprobe_status = KPROBE_HIT_SSDONE; @@ -191,14 +194,17 @@ static struct kprobe trampoline_p = { static int __kprobes trampoline_probe_handler(struct kprobe *p, struct pt_regs *regs) { - unsigned long orig_ret_address; - - orig_ret_address = __kretprobe_trampoline_handler(regs, NULL); - instruction_pointer_set(regs, orig_ret_address); + __kretprobe_trampoline_handler(regs, NULL); return 1; } +void arch_kretprobe_fixup_return(struct pt_regs *regs, + kprobe_opcode_t *correct_ret_addr) +{ + regs->gr[2] = (unsigned long)correct_ret_addr; +} + void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, struct pt_regs *regs) { diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 36a57aa38e87..160996f2198e 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -91,7 +91,7 @@ static inline int map_pte_uncached(pte_t * pte, printk(KERN_ERR "map_pte_uncached: page already exists\n"); purge_tlb_start(flags); set_pte(pte, __mk_pte(*paddr_ptr, PAGE_KERNEL_UNC)); - pdtlb_kernel(orig_vaddr); + pdtlb(SR_KERNEL, orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; orig_vaddr += PAGE_SIZE; @@ -175,7 +175,7 @@ static inline void unmap_uncached_pte(pmd_t * pmd, unsigned long vaddr, pte_clear(&init_mm, vaddr, pte); purge_tlb_start(flags); - pdtlb_kernel(orig_vaddr); + pdtlb(SR_KERNEL, orig_vaddr); purge_tlb_end(flags); vaddr += PAGE_SIZE; orig_vaddr += PAGE_SIZE; diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index 46b1050640b8..24443908f905 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -1,16 +1,13 @@ // SPDX-License-Identifier: GPL-2.0 /* - * linux/arch/parisc/kernel/signal.c: Architecture-specific signal - * handling support. + * PA-RISC architecture-specific signal handling support. * * Copyright (C) 2000 David Huggins-Daines <dhd@debian.org> * Copyright (C) 2000 Linuxcare, Inc. + * Copyright (C) 2000-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> * * Based on the ia64, i386, and alpha versions. - * - * Like the IA-64, we are a recent enough port (we are *starting* - * with glibc2.2) that we do not need to support the old non-realtime - * Linux signals. Therefore we don't. */ #include <linux/sched.h> @@ -32,6 +29,7 @@ #include <linux/uaccess.h> #include <asm/cacheflush.h> #include <asm/asm-offsets.h> +#include <asm/vdso.h> #ifdef CONFIG_COMPAT #include "signal32.h" @@ -59,14 +57,6 @@ * Do a signal return - restore sigcontext. */ -/* Trampoline for calling rt_sigreturn() */ -#define INSN_LDI_R25_0 0x34190000 /* ldi 0,%r25 (in_syscall=0) */ -#define INSN_LDI_R25_1 0x34190002 /* ldi 1,%r25 (in_syscall=1) */ -#define INSN_LDI_R20 0x3414015a /* ldi __NR_rt_sigreturn,%r20 */ -#define INSN_BLE_SR2_R0 0xe4008200 /* be,l 0x100(%sr2,%r0),%sr0,%r31 */ -/* For debugging */ -#define INSN_DIE_HORRIBLY 0x68000ccc /* stw %r0,0x666(%sr0,%r0) */ - static long restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) { @@ -77,9 +67,9 @@ restore_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs) err |= __copy_from_user(regs->iaoq, sc->sc_iaoq, sizeof(regs->iaoq)); err |= __copy_from_user(regs->iasq, sc->sc_iasq, sizeof(regs->iasq)); err |= __get_user(regs->sar, &sc->sc_sar); - DBG(2,"restore_sigcontext: iaoq is %#lx / %#lx\n", - regs->iaoq[0],regs->iaoq[1]); - DBG(2,"restore_sigcontext: r28 is %ld\n", regs->gr[28]); + DBG(2, "%s: iaoq is %#lx / %#lx\n", + __func__, regs->iaoq[0], regs->iaoq[1]); + DBG(2, "%s: r28 is %ld\n", __func__, regs->gr[28]); return err; } @@ -102,7 +92,7 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) /* Unwind the user stack to get the rt_sigframe structure. */ frame = (struct rt_sigframe __user *) (usp - sigframe_size); - DBG(2,"sys_rt_sigreturn: frame is %p\n", frame); + DBG(2, "%s: frame is %p pid %d\n", __func__, frame, task_pid_nr(current)); regs->orig_r28 = 1; /* no restarts for sigreturn */ @@ -110,7 +100,6 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) compat_frame = (struct compat_rt_sigframe __user *)frame; if (is_compat_task()) { - DBG(2,"sys_rt_sigreturn: ELF32 process.\n"); if (get_compat_sigset(&set, &compat_frame->uc.uc_sigmask)) goto give_sigsegv; } else @@ -125,25 +114,25 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) /* Good thing we saved the old gr[30], eh? */ #ifdef CONFIG_64BIT if (is_compat_task()) { - DBG(1,"sys_rt_sigreturn: compat_frame->uc.uc_mcontext 0x%p\n", - &compat_frame->uc.uc_mcontext); + DBG(1, "%s: compat_frame->uc.uc_mcontext 0x%p\n", + __func__, &compat_frame->uc.uc_mcontext); // FIXME: Load upper half from register file if (restore_sigcontext32(&compat_frame->uc.uc_mcontext, &compat_frame->regs, regs)) goto give_sigsegv; - DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", - usp, &compat_frame->uc.uc_stack); + DBG(1, "%s: usp %#08lx stack 0x%p\n", + __func__, usp, &compat_frame->uc.uc_stack); if (compat_restore_altstack(&compat_frame->uc.uc_stack)) goto give_sigsegv; } else #endif { - DBG(1,"sys_rt_sigreturn: frame->uc.uc_mcontext 0x%p\n", - &frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc.uc_mcontext 0x%p\n", + __func__, &frame->uc.uc_mcontext); if (restore_sigcontext(&frame->uc.uc_mcontext, regs)) goto give_sigsegv; - DBG(1,"sys_rt_sigreturn: usp %#08lx stack 0x%p\n", - usp, &frame->uc.uc_stack); + DBG(1, "%s: usp %#08lx stack 0x%p\n", + __func__, usp, &frame->uc.uc_stack); if (restore_altstack(&frame->uc.uc_stack)) goto give_sigsegv; } @@ -155,14 +144,11 @@ sys_rt_sigreturn(struct pt_regs *regs, int in_syscall) */ if (in_syscall) regs->gr[31] = regs->iaoq[0]; -#if DEBUG_SIG - DBG(1,"sys_rt_sigreturn: returning to %#lx, DUMPING REGS:\n", regs->iaoq[0]); - show_regs(regs); -#endif + return; give_sigsegv: - DBG(1,"sys_rt_sigreturn: Sending SIGSEGV\n"); + DBG(1, "%s: Sending SIGSEGV\n", __func__); force_sig(SIGSEGV); return; } @@ -177,15 +163,15 @@ get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size) /*FIXME: ELF32 vs. ELF64 has different frame_size, but since we don't use the parameter it doesn't matter */ - DBG(1,"get_sigframe: ka = %#lx, sp = %#lx, frame_size = %#lx\n", - (unsigned long)ka, sp, frame_size); + DBG(1, "%s: ka = %#lx, sp = %#lx, frame_size = %zu\n", + __func__, (unsigned long)ka, sp, frame_size); /* Align alternate stack and reserve 64 bytes for the signal handler's frame marker. */ if ((ka->sa.sa_flags & SA_ONSTACK) != 0 && ! sas_ss_flags(sp)) sp = (current->sas_ss_sp + 0x7f) & ~0x3f; /* Stacks grow up! */ - DBG(1,"get_sigframe: Returning sp = %#lx\n", (unsigned long)sp); + DBG(1, "%s: Returning sp = %#lx\n", __func__, (unsigned long)sp); return (void __user *) sp; /* Stacks grow up. Fun. */ } @@ -205,20 +191,20 @@ setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, int in_sysc err |= __put_user(regs->gr[31]+4, &sc->sc_iaoq[1]); err |= __put_user(regs->sr[3], &sc->sc_iasq[0]); err |= __put_user(regs->sr[3], &sc->sc_iasq[1]); - DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (in syscall)\n", - regs->gr[31], regs->gr[31]+4); + DBG(1, "%s: iaoq %#lx / %#lx (in syscall)\n", + __func__, regs->gr[31], regs->gr[31]+4); } else { err |= __copy_to_user(sc->sc_iaoq, regs->iaoq, sizeof(regs->iaoq)); err |= __copy_to_user(sc->sc_iasq, regs->iasq, sizeof(regs->iasq)); - DBG(1,"setup_sigcontext: iaoq %#lx / %#lx (not in syscall)\n", - regs->iaoq[0], regs->iaoq[1]); + DBG(1, "%s: iaoq %#lx / %#lx (not in syscall)\n", + __func__, regs->iaoq[0], regs->iaoq[1]); } err |= __put_user(flags, &sc->sc_flags); err |= __copy_to_user(sc->sc_gr, regs->gr, sizeof(regs->gr)); err |= __copy_to_user(sc->sc_fr, regs->fr, sizeof(regs->fr)); err |= __put_user(regs->sar, &sc->sc_sar); - DBG(1,"setup_sigcontext: r28 is %ld\n", regs->gr[28]); + DBG(1, "%s: r28 is %ld\n", __func__, regs->gr[28]); return err; } @@ -230,7 +216,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, struct rt_sigframe __user *frame; unsigned long rp, usp; unsigned long haddr, sigframe_size; - unsigned long start, end; + unsigned long start; int err = 0; #ifdef CONFIG_64BIT struct compat_rt_sigframe __user * compat_frame; @@ -247,8 +233,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, #endif frame = get_sigframe(&ksig->ka, usp, sigframe_size); - DBG(1,"SETUP_RT_FRAME: START\n"); - DBG(1,"setup_rt_frame: frame %p info %p\n", frame, ksig->info); + DBG(1, "%s: frame %p info %p\n", __func__, frame, &ksig->info); start = (unsigned long) frame; if (start >= user_addr_max() - sigframe_size) @@ -259,11 +244,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, compat_frame = (struct compat_rt_sigframe __user *)frame; if (is_compat_task()) { - DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &compat_frame->info); + DBG(1, "%s: frame->info = 0x%p\n", __func__, &compat_frame->info); err |= copy_siginfo_to_user32(&compat_frame->info, &ksig->info); err |= __compat_save_altstack( &compat_frame->uc.uc_stack, regs->gr[30]); - DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &compat_frame->uc); - DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &compat_frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc = 0x%p\n", __func__, &compat_frame->uc); + DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n", + __func__, &compat_frame->uc.uc_mcontext); err |= setup_sigcontext32(&compat_frame->uc.uc_mcontext, &compat_frame->regs, regs, in_syscall); err |= put_compat_sigset(&compat_frame->uc.uc_sigmask, set, @@ -271,11 +257,12 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, } else #endif { - DBG(1,"setup_rt_frame: frame->info = 0x%p\n", &frame->info); + DBG(1, "%s: frame->info = 0x%p\n", __func__, &frame->info); err |= copy_siginfo_to_user(&frame->info, &ksig->info); err |= __save_altstack(&frame->uc.uc_stack, regs->gr[30]); - DBG(1,"setup_rt_frame: frame->uc = 0x%p\n", &frame->uc); - DBG(1,"setup_rt_frame: frame->uc.uc_mcontext = 0x%p\n", &frame->uc.uc_mcontext); + DBG(1, "%s: frame->uc = 0x%p\n", __func__, &frame->uc); + DBG(1, "%s: frame->uc.uc_mcontext = 0x%p\n", + __func__, &frame->uc.uc_mcontext); err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, in_syscall); /* FIXME: Should probably be converted as well for the compat case */ err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); @@ -284,32 +271,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, if (err) return -EFAULT; - /* Set up to return from userspace. If provided, use a stub - already in userspace. The first words of tramp are used to - save the previous sigrestartblock trampoline that might be - on the stack. We start the sigreturn trampoline at - SIGRESTARTBLOCK_TRAMP+X. */ - err |= __put_user(in_syscall ? INSN_LDI_R25_1 : INSN_LDI_R25_0, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+0]); - err |= __put_user(INSN_LDI_R20, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+1]); - err |= __put_user(INSN_BLE_SR2_R0, - &frame->tramp[SIGRESTARTBLOCK_TRAMP+2]); - err |= __put_user(INSN_NOP, &frame->tramp[SIGRESTARTBLOCK_TRAMP+3]); - - start = (unsigned long) &frame->tramp[0]; - end = (unsigned long) &frame->tramp[TRAMP_SIZE]; - flush_user_dcache_range_asm(start, end); - flush_user_icache_range_asm(start, end); - - /* TRAMP Words 0-4, Length 5 = SIGRESTARTBLOCK_TRAMP - * TRAMP Words 5-9, Length 4 = SIGRETURN_TRAMP - * So the SIGRETURN_TRAMP is at the end of SIGRESTARTBLOCK_TRAMP - */ - rp = (unsigned long) &frame->tramp[SIGRESTARTBLOCK_TRAMP]; +#ifdef CONFIG_64BIT + if (!is_compat_task()) + rp = VDSO64_SYMBOL(current, sigtramp_rt); + else +#endif + rp = VDSO32_SYMBOL(current, sigtramp_rt); - if (err) - return -EFAULT; + if (in_syscall) + rp += 4*4; /* skip 4 instructions and start at ldi 1,%r25 */ haddr = A(ksig->ka.sa.sa_handler); /* The sa_handler may be a pointer to a function descriptor */ @@ -340,8 +310,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, haddr = fdesc.addr; regs->gr[19] = fdesc.gp; - DBG(1,"setup_rt_frame: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n", - haddr, regs->gr[19], in_syscall); + DBG(1, "%s: 64 bit signal, exe=%#lx, r19=%#lx, in_syscall=%d\n", + __func__, haddr, regs->gr[19], in_syscall); } #endif @@ -351,7 +321,7 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, regs->gr[31] = haddr; #ifdef CONFIG_64BIT if (!test_thread_flag(TIF_32BIT)) - sigframe_size |= 1; + sigframe_size |= 1; /* XXX ???? */ #endif } else { unsigned long psw = USER_PSW; @@ -373,11 +343,11 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, } regs->gr[0] = psw; - regs->iaoq[0] = haddr | 3; + regs->iaoq[0] = haddr | PRIV_USER; regs->iaoq[1] = regs->iaoq[0] + 4; } - regs->gr[2] = rp; /* userland return pointer */ + regs->gr[2] = rp; /* userland return pointer */ regs->gr[26] = ksig->sig; /* signal number */ #ifdef CONFIG_64BIT @@ -391,15 +361,15 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs, regs->gr[24] = A(&frame->uc); /* ucontext pointer */ } - DBG(1,"setup_rt_frame: making sigreturn frame: %#lx + %#lx = %#lx\n", + DBG(1, "%s: making sigreturn frame: %#lx + %#lx = %#lx\n", __func__, regs->gr[30], sigframe_size, regs->gr[30] + sigframe_size); /* Raise the user stack pointer to make a proper call frame. */ regs->gr[30] = (A(frame) + sigframe_size); - DBG(1,"setup_rt_frame: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n", - current->comm, current->pid, frame, regs->gr[30], + DBG(1, "%s: sig deliver (%s,%d) frame=0x%p sp=%#lx iaoq=%#lx/%#lx rp=%#lx\n", + __func__, current->comm, current->pid, frame, regs->gr[30], regs->iaoq[0], regs->iaoq[1], rp); return 0; @@ -415,8 +385,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall) int ret; sigset_t *oldset = sigmask_to_save(); - DBG(1,"handle_signal: sig=%ld, ka=%p, info=%p, oldset=%p, regs=%p\n", - ksig->sig, ksig->ka, ksig->info, oldset, regs); + DBG(1, "%s: sig=%d, ka=%p, info=%p, oldset=%p, regs=%p\n", + __func__, ksig->sig, &ksig->ka, &ksig->info, oldset, regs); /* Set up the stack frame */ ret = setup_rt_frame(ksig, oldset, regs, in_syscall); @@ -424,8 +394,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs, int in_syscall) signal_setup_done(ret, ksig, test_thread_flag(TIF_SINGLESTEP) || test_thread_flag(TIF_BLOCKSTEP)); - DBG(1,KERN_DEBUG "do_signal: Exit (success), regs->gr[28] = %ld\n", - regs->gr[28]); + DBG(1, "%s: Exit (success), regs->gr[28] = %ld\n", + __func__, regs->gr[28]); } /* @@ -483,21 +453,27 @@ syscall_restart(struct pt_regs *regs, struct k_sigaction *ka) if (regs->orig_r28) return; regs->orig_r28 = 1; /* no more restarts */ + + DBG(1, "%s: orig_r28 = %ld pid %d r20 %ld\n", + __func__, regs->orig_r28, task_pid_nr(current), regs->gr[20]); + /* Check the return code */ switch (regs->gr[28]) { case -ERESTART_RESTARTBLOCK: case -ERESTARTNOHAND: - DBG(1,"ERESTARTNOHAND: returning -EINTR\n"); + DBG(1, "%s: ERESTARTNOHAND: returning -EINTR\n", __func__); regs->gr[28] = -EINTR; break; case -ERESTARTSYS: if (!(ka->sa.sa_flags & SA_RESTART)) { - DBG(1,"ERESTARTSYS: putting -EINTR\n"); + DBG(1, "%s: ERESTARTSYS: putting -EINTR pid %d\n", + __func__, task_pid_nr(current)); regs->gr[28] = -EINTR; break; } fallthrough; case -ERESTARTNOINTR: + DBG(1, "%s: %ld\n", __func__, regs->gr[28]); check_syscallno_in_delay_branch(regs); break; } @@ -509,50 +485,52 @@ insert_restart_trampoline(struct pt_regs *regs) if (regs->orig_r28) return; regs->orig_r28 = 1; /* no more restarts */ - switch(regs->gr[28]) { + + DBG(2, "%s: gr28 = %ld pid %d\n", + __func__, regs->gr[28], task_pid_nr(current)); + + switch (regs->gr[28]) { case -ERESTART_RESTARTBLOCK: { /* Restart the system call - no handlers present */ unsigned int *usp = (unsigned int *)regs->gr[30]; - unsigned long start = (unsigned long) &usp[2]; - unsigned long end = (unsigned long) &usp[5]; + unsigned long rp; long err = 0; /* check that we don't exceed the stack */ if (A(&usp[0]) >= user_addr_max() - 5 * sizeof(int)) return; - /* Setup a trampoline to restart the syscall - * with __NR_restart_syscall + /* Call trampoline in vdso to restart the syscall + * with __NR_restart_syscall. + * Original return addresses are on stack like this: * * 0: <return address (orig r31)> * 4: <2nd half for 64-bit> - * 8: ldw 0(%sp), %r31 - * 12: be 0x100(%sr2, %r0) - * 16: ldi __NR_restart_syscall, %r20 */ #ifdef CONFIG_64BIT - err |= put_user(regs->gr[31] >> 32, &usp[0]); - err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]); - err |= put_user(0x0fc010df, &usp[2]); -#else - err |= put_user(regs->gr[31], &usp[0]); - err |= put_user(0x0fc0109f, &usp[2]); + if (!is_compat_task()) { + err |= put_user(regs->gr[31] >> 32, &usp[0]); + err |= put_user(regs->gr[31] & 0xffffffff, &usp[1]); + rp = VDSO64_SYMBOL(current, restart_syscall); + } else #endif - err |= put_user(0xe0008200, &usp[3]); - err |= put_user(0x34140000, &usp[4]); - + { + err |= put_user(regs->gr[31], &usp[0]); + rp = VDSO32_SYMBOL(current, restart_syscall); + } WARN_ON(err); - /* flush data/instruction cache for new insns */ - flush_user_dcache_range_asm(start, end); - flush_user_icache_range_asm(start, end); - - regs->gr[31] = regs->gr[30] + 8; + regs->gr[31] = rp; + DBG(1, "%s: ERESTART_RESTARTBLOCK\n", __func__); return; } + case -EINTR: + /* ok, was handled before and should be returned. */ + break; case -ERESTARTNOHAND: case -ERESTARTSYS: case -ERESTARTNOINTR: + DBG(1, "%s: Type %ld\n", __func__, regs->gr[28]); check_syscallno_in_delay_branch(regs); return; default: @@ -567,30 +545,35 @@ insert_restart_trampoline(struct pt_regs *regs) * registers). As noted below, the syscall number gets restored for * us due to the magic of delayed branching. */ -asmlinkage void -do_signal(struct pt_regs *regs, long in_syscall) +static void do_signal(struct pt_regs *regs, long in_syscall) { struct ksignal ksig; + int restart_syscall; + bool has_handler; - DBG(1,"\ndo_signal: regs=0x%p, sr7 %#lx, in_syscall=%d\n", - regs, regs->sr[7], in_syscall); + has_handler = get_signal(&ksig); - if (get_signal(&ksig)) { - DBG(3,"do_signal: signr = %d, regs->gr[28] = %ld\n", signr, regs->gr[28]); + restart_syscall = 0; + if (in_syscall) + restart_syscall = 1; + + if (has_handler) { /* Restart a system call if necessary. */ - if (in_syscall) + if (restart_syscall) syscall_restart(regs, &ksig.ka); handle_signal(&ksig, regs, in_syscall); + DBG(1, "%s: Handled signal pid %d\n", + __func__, task_pid_nr(current)); return; } - /* Did we come from a system call? */ - if (in_syscall) + /* Do we need to restart the system call? */ + if (restart_syscall) insert_restart_trampoline(regs); - DBG(1,"do_signal: Exit (not delivered), regs->gr[28] = %ld\n", - regs->gr[28]); + DBG(1, "%s: Exit (not delivered), regs->gr[28] = %ld orig_r28 = %ld pid %d\n", + __func__, regs->gr[28], regs->orig_r28, task_pid_nr(current)); restore_saved_sigmask(); } diff --git a/arch/parisc/kernel/signal32.h b/arch/parisc/kernel/signal32.h index f166250f2d06..c03eb1ed4c53 100644 --- a/arch/parisc/kernel/signal32.h +++ b/arch/parisc/kernel/signal32.h @@ -36,21 +36,12 @@ struct compat_regfile { compat_int_t rf_sar; }; -#define COMPAT_SIGRETURN_TRAMP 4 -#define COMPAT_SIGRESTARTBLOCK_TRAMP 5 -#define COMPAT_TRAMP_SIZE (COMPAT_SIGRETURN_TRAMP + \ - COMPAT_SIGRESTARTBLOCK_TRAMP) - struct compat_rt_sigframe { - /* XXX: Must match trampoline size in arch/parisc/kernel/signal.c - Secondary to that it must protect the ERESTART_RESTARTBLOCK - trampoline we left on the stack (we were bad and didn't - change sp so we could run really fast.) */ - compat_uint_t tramp[COMPAT_TRAMP_SIZE]; - compat_siginfo_t info; - struct compat_ucontext uc; - /* Hidden location of truncated registers, *must* be last. */ - struct compat_regfile regs; + unsigned int tramp[2]; /* holds original return address */ + compat_siginfo_t info; + struct compat_ucontext uc; + /* Hidden location of truncated registers, *must* be last. */ + struct compat_regfile regs; }; /* diff --git a/arch/parisc/kernel/topology.c b/arch/parisc/kernel/topology.c index 0a10e4ddc528..e88a6ce7c96d 100644 --- a/arch/parisc/kernel/topology.c +++ b/arch/parisc/kernel/topology.c @@ -101,8 +101,8 @@ void __init store_cpu_topology(unsigned int cpuid) update_siblings_masks(cpuid); - pr_info("CPU%u: thread %d, cpu %d, socket %d\n", - cpuid, cpu_topology[cpuid].thread_id, + pr_info("CPU%u: cpu core %d of socket %d\n", + cpuid, cpu_topology[cpuid].core_id, cpu_topology[cpuid].socket_id); } diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index b6fdebddc8e9..a6e61cf2cad0 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -302,7 +302,10 @@ static void handle_break(struct pt_regs *regs) parisc_kprobe_break_handler(regs); return; } - + if (unlikely(iir == PARISC_KPROBES_BREAK_INSN2)) { + parisc_kprobe_ss_handler(regs); + return; + } #endif #ifdef CONFIG_KGDB @@ -539,11 +542,6 @@ void notrace handle_interruption(int code, struct pt_regs *regs) /* Recovery counter trap */ regs->gr[0] &= ~PSW_R; -#ifdef CONFIG_KPROBES - if (parisc_kprobe_ss_handler(regs)) - return; -#endif - #ifdef CONFIG_KGDB if (kgdb_single_step) { kgdb_handle_exception(0, SIGTRAP, 0, regs); @@ -662,6 +660,8 @@ void notrace handle_interruption(int code, struct pt_regs *regs) by hand. Technically we need to emulate: fdc,fdce,pdc,"fic,4f",prober,probeir,probew, probeiw */ + if (code == 17 && handle_nadtlb_fault(regs)) + return; fault_address = regs->ior; fault_space = regs->isr; break; diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 286cec4d86d7..ed1e88a74dc4 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -3,14 +3,11 @@ * Unaligned memory access handler * * Copyright (C) 2001 Randolph Chung <tausq@debian.org> + * Copyright (C) 2022 Helge Deller <deller@gmx.de> * Significantly tweaked by LaMont Jones <lamont@debian.org> */ -#include <linux/jiffies.h> -#include <linux/kernel.h> -#include <linux/module.h> #include <linux/sched/signal.h> -#include <linux/sched/debug.h> #include <linux/signal.h> #include <linux/ratelimit.h> #include <linux/uaccess.h> @@ -25,18 +22,7 @@ #define DPRINTF(fmt, args...) #endif -#ifdef CONFIG_64BIT -#define RFMT "%016lx" -#else -#define RFMT "%08lx" -#endif - -#define FIXUP_BRANCH(lbl) \ - "\tldil L%%" #lbl ", %%r1\n" \ - "\tldo R%%" #lbl "(%%r1), %%r1\n" \ - "\tbv,n %%r0(%%r1)\n" -/* If you use FIXUP_BRANCH, then you must list this clobber */ -#define FIXUP_BRANCH_CLOBBER "r1" +#define RFMT "%#08lx" /* 1111 1100 0000 0000 0001 0011 1100 0000 */ #define OPCODE1(a,b,c) ((a)<<26|(b)<<12|(c)<<6) @@ -114,37 +100,30 @@ #define IM14(i) IM((i),14) #define ERR_NOTHANDLED -1 -#define ERR_PAGEFAULT -2 int unaligned_enabled __read_mostly = 1; static int emulate_ldh(struct pt_regs *regs, int toreg) { unsigned long saddr = regs->ior; - unsigned long val = 0; - int ret; + unsigned long val = 0, temp1; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 2 bytes\n", regs->isr, regs->ior, toreg); __asm__ __volatile__ ( " mtsp %4, %%sr1\n" -"1: ldbs 0(%%sr1,%3), %%r20\n" +"1: ldbs 0(%%sr1,%3), %2\n" "2: ldbs 1(%%sr1,%3), %0\n" -" depw %%r20, 23, 24, %0\n" -" copy %%r0, %1\n" +" depw %2, 23, 24, %0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY(2b, 4b) - : "=r" (val), "=r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r20", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (val), "+r" (ret), "=&r" (temp1) + : "r" (saddr), "r" (regs->isr) ); - DPRINTF("val = 0x" RFMT "\n", val); + DPRINTF("val = " RFMT "\n", val); if (toreg) regs->gr[toreg] = val; @@ -155,34 +134,28 @@ static int emulate_ldh(struct pt_regs *regs, int toreg) static int emulate_ldw(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; - unsigned long val = 0; - int ret; + unsigned long val = 0, temp1, temp2; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 4 bytes\n", regs->isr, regs->ior, toreg); __asm__ __volatile__ ( -" zdep %3,28,2,%%r19\n" /* r19=(ofs&3)*8 */ -" mtsp %4, %%sr1\n" -" depw %%r0,31,2,%3\n" -"1: ldw 0(%%sr1,%3),%0\n" -"2: ldw 4(%%sr1,%3),%%r20\n" -" subi 32,%%r19,%%r19\n" -" mtctl %%r19,11\n" -" vshd %0,%%r20,%0\n" -" copy %%r0, %1\n" +" zdep %4,28,2,%2\n" /* r19=(ofs&3)*8 */ +" mtsp %5, %%sr1\n" +" depw %%r0,31,2,%4\n" +"1: ldw 0(%%sr1,%4),%0\n" +"2: ldw 4(%%sr1,%4),%3\n" +" subi 32,%4,%2\n" +" mtctl %2,11\n" +" vshd %0,%3,%0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b, 4b) - ASM_EXCEPTIONTABLE_ENTRY(2b, 4b) - : "=r" (val), "=r" (ret) - : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (val), "+r" (ret), "=&r" (temp1), "=&r" (temp2) + : "r" (saddr), "r" (regs->isr) ); - DPRINTF("val = 0x" RFMT "\n", val); + DPRINTF("val = " RFMT "\n", val); if (flop) ((__u32*)(regs->fr))[toreg] = val; @@ -195,16 +168,15 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) { unsigned long saddr = regs->ior; __u64 val = 0; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); DPRINTF("load " RFMT ":" RFMT " to r%d for 8 bytes\n", regs->isr, regs->ior, toreg); -#ifdef CONFIG_PA20 -#ifndef CONFIG_64BIT - if (!flop) - return -1; -#endif + if (!IS_ENABLED(CONFIG_64BIT) && !flop) + return ERR_NOTHANDLED; + +#ifdef CONFIG_64BIT __asm__ __volatile__ ( " depd,z %3,60,3,%%r19\n" /* r19=(ofs&7)*8 */ " mtsp %4, %%sr1\n" @@ -214,44 +186,32 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) " subi 64,%%r19,%%r19\n" " mtsar %%r19\n" " shrpd %0,%%r20,%%sar,%0\n" -" copy %%r0, %1\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %1\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (val), "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "=r" (val), "+r" (ret) : "0" (val), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20" ); #else { - unsigned long valh=0,vall=0; + unsigned long shift, temp1; __asm__ __volatile__ ( -" zdep %5,29,2,%%r19\n" /* r19=(ofs&3)*8 */ -" mtsp %6, %%sr1\n" -" dep %%r0,31,2,%5\n" -"1: ldw 0(%%sr1,%5),%0\n" -"2: ldw 4(%%sr1,%5),%1\n" -"3: ldw 8(%%sr1,%5),%%r20\n" -" subi 32,%%r19,%%r19\n" -" mtsar %%r19\n" -" vshd %0,%1,%0\n" -" vshd %1,%%r20,%1\n" -" copy %%r0, %2\n" +" zdep %2,29,2,%3\n" /* r19=(ofs&3)*8 */ +" mtsp %5, %%sr1\n" +" dep %%r0,31,2,%2\n" +"1: ldw 0(%%sr1,%2),%0\n" +"2: ldw 4(%%sr1,%2),%R0\n" +"3: ldw 8(%%sr1,%2),%4\n" +" subi 32,%3,%3\n" +" mtsar %3\n" +" vshd %0,%R0,%0\n" +" vshd %R0,%4,%R0\n" "4: \n" -" .section .fixup,\"ax\"\n" -"5: ldi -2, %2\n" - FIXUP_BRANCH(4b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,5b) - ASM_EXCEPTIONTABLE_ENTRY(2b,5b) - ASM_EXCEPTIONTABLE_ENTRY(3b,5b) - : "=r" (valh), "=r" (vall), "=r" (ret) - : "0" (valh), "1" (vall), "r" (saddr), "r" (regs->isr) - : "r19", "r20", FIXUP_BRANCH_CLOBBER ); - val=((__u64)valh<<32)|(__u64)vall; + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 4b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 4b) + : "+r" (val), "+r" (ret), "+r" (saddr), "=&r" (shift), "=&r" (temp1) + : "r" (regs->isr) ); } #endif @@ -267,31 +227,25 @@ static int emulate_ldd(struct pt_regs *regs, int toreg, int flop) static int emulate_sth(struct pt_regs *regs, int frreg) { - unsigned long val = regs->gr[frreg]; - int ret; + unsigned long val = regs->gr[frreg], temp1; + ASM_EXCEPTIONTABLE_VAR(ret); if (!frreg) val = 0; - DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, + DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 2 bytes\n", frreg, val, regs->isr, regs->ior); __asm__ __volatile__ ( -" mtsp %3, %%sr1\n" -" extrw,u %1, 23, 8, %%r19\n" -"1: stb %1, 1(%%sr1, %2)\n" -"2: stb %%r19, 0(%%sr1, %2)\n" -" copy %%r0, %0\n" +" mtsp %4, %%sr1\n" +" extrw,u %2, 23, 8, %1\n" +"1: stb %1, 0(%%sr1, %3)\n" +"2: stb %2, 1(%%sr1, %3)\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %0\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (ret) - : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", FIXUP_BRANCH_CLOBBER ); + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (ret), "=&r" (temp1) + : "r" (val), "r" (regs->ior), "r" (regs->isr) ); return ret; } @@ -299,7 +253,7 @@ static int emulate_sth(struct pt_regs *regs, int frreg) static int emulate_stw(struct pt_regs *regs, int frreg, int flop) { unsigned long val; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); if (flop) val = ((__u32*)(regs->fr))[frreg]; @@ -308,7 +262,7 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) else val = 0; - DPRINTF("store r%d (0x" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, + DPRINTF("store r%d (" RFMT ") to " RFMT ":" RFMT " for 4 bytes\n", frreg, val, regs->isr, regs->ior); @@ -328,24 +282,19 @@ static int emulate_stw(struct pt_regs *regs, int frreg, int flop) " or %%r1, %%r21, %%r21\n" " stw %%r20,0(%%sr1,%2)\n" " stw %%r21,4(%%sr1,%2)\n" -" copy %%r0, %0\n" "3: \n" -" .section .fixup,\"ax\"\n" -"4: ldi -2, %0\n" - FIXUP_BRANCH(3b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,4b) - ASM_EXCEPTIONTABLE_ENTRY(2b,4b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 3b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 3b) + : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r22", "r1" ); return ret; } static int emulate_std(struct pt_regs *regs, int frreg, int flop) { __u64 val; - int ret; + ASM_EXCEPTIONTABLE_VAR(ret); if (flop) val = regs->fr[frreg]; @@ -357,11 +306,10 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) DPRINTF("store r%d (0x%016llx) to " RFMT ":" RFMT " for 8 bytes\n", frreg, val, regs->isr, regs->ior); -#ifdef CONFIG_PA20 -#ifndef CONFIG_64BIT - if (!flop) - return -1; -#endif + if (!IS_ENABLED(CONFIG_64BIT) && !flop) + return ERR_NOTHANDLED; + +#ifdef CONFIG_64BIT __asm__ __volatile__ ( " mtsp %3, %%sr1\n" " depd,z %2, 60, 3, %%r19\n" @@ -378,19 +326,14 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) " or %%r1, %%r21, %%r21\n" "3: std %%r20,0(%%sr1,%2)\n" "4: std %%r21,8(%%sr1,%2)\n" -" copy %%r0, %0\n" "5: \n" -" .section .fixup,\"ax\"\n" -"6: ldi -2, %0\n" - FIXUP_BRANCH(5b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,6b) - ASM_EXCEPTIONTABLE_ENTRY(2b,6b) - ASM_EXCEPTIONTABLE_ENTRY(3b,6b) - ASM_EXCEPTIONTABLE_ENTRY(4b,6b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 5b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 5b) + : "+r" (ret) : "r" (val), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r22", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r22", "r1" ); #else { unsigned long valh=(val>>32),vall=(val&0xffffffffl); @@ -412,20 +355,15 @@ static int emulate_std(struct pt_regs *regs, int frreg, int flop) "3: stw %1,0(%%sr1,%3)\n" "4: stw %%r1,4(%%sr1,%3)\n" "5: stw %2,8(%%sr1,%3)\n" -" copy %%r0, %0\n" "6: \n" -" .section .fixup,\"ax\"\n" -"7: ldi -2, %0\n" - FIXUP_BRANCH(6b) -" .previous\n" - ASM_EXCEPTIONTABLE_ENTRY(1b,7b) - ASM_EXCEPTIONTABLE_ENTRY(2b,7b) - ASM_EXCEPTIONTABLE_ENTRY(3b,7b) - ASM_EXCEPTIONTABLE_ENTRY(4b,7b) - ASM_EXCEPTIONTABLE_ENTRY(5b,7b) - : "=r" (ret) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(1b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(2b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(3b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(4b, 6b) + ASM_EXCEPTIONTABLE_ENTRY_EFAULT(5b, 6b) + : "+r" (ret) : "r" (valh), "r" (vall), "r" (regs->ior), "r" (regs->isr) - : "r19", "r20", "r21", "r1", FIXUP_BRANCH_CLOBBER ); + : "r19", "r20", "r21", "r1" ); } #endif @@ -438,7 +376,6 @@ void handle_unaligned(struct pt_regs *regs) unsigned long newbase = R1(regs->iir)?regs->gr[R1(regs->iir)]:0; int modify = 0; int ret = ERR_NOTHANDLED; - register int flop=0; /* true if this is a flop */ __inc_irq_stat(irq_unaligned_count); @@ -450,10 +387,10 @@ void handle_unaligned(struct pt_regs *regs) if (!(current->thread.flags & PARISC_UAC_NOPRINT) && __ratelimit(&ratelimit)) { - char buf[256]; - sprintf(buf, "%s(%d): unaligned access to 0x" RFMT " at ip=0x" RFMT "\n", - current->comm, task_pid_nr(current), regs->ior, regs->iaoq[0]); - printk(KERN_WARNING "%s", buf); + printk(KERN_WARNING "%s(%d): unaligned access to " RFMT + " at ip " RFMT " (iir " RFMT ")\n", + current->comm, task_pid_nr(current), regs->ior, + regs->iaoq[0], regs->iir); #ifdef DEBUG_UNALIGNED show_regs(regs); #endif @@ -547,7 +484,7 @@ void handle_unaligned(struct pt_regs *regs) ret = emulate_stw(regs, R2(regs->iir),0); break; -#ifdef CONFIG_PA20 +#ifdef CONFIG_64BIT case OPCODE_LDD_I: case OPCODE_LDDA_I: case OPCODE_LDD_S: @@ -565,13 +502,11 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_FLDWS: case OPCODE_FLDWXR: case OPCODE_FLDWSR: - flop=1; ret = emulate_ldw(regs,FR3(regs->iir),1); break; case OPCODE_FLDDX: case OPCODE_FLDDS: - flop=1; ret = emulate_ldd(regs,R3(regs->iir),1); break; @@ -579,13 +514,11 @@ void handle_unaligned(struct pt_regs *regs) case OPCODE_FSTWS: case OPCODE_FSTWXR: case OPCODE_FSTWSR: - flop=1; ret = emulate_stw(regs,FR3(regs->iir),1); break; case OPCODE_FSTDX: case OPCODE_FSTDS: - flop=1; ret = emulate_std(regs,R3(regs->iir),1); break; @@ -599,14 +532,12 @@ void handle_unaligned(struct pt_regs *regs) switch (regs->iir & OPCODE2_MASK) { case OPCODE_FLDD_L: - flop=1; ret = emulate_ldd(regs,R2(regs->iir),1); break; case OPCODE_FSTD_L: - flop=1; ret = emulate_std(regs, R2(regs->iir),1); break; -#ifdef CONFIG_PA20 +#ifdef CONFIG_64BIT case OPCODE_LDD_L: ret = emulate_ldd(regs, R2(regs->iir),0); break; @@ -618,7 +549,6 @@ void handle_unaligned(struct pt_regs *regs) switch (regs->iir & OPCODE3_MASK) { case OPCODE_FLDW_L: - flop=1; ret = emulate_ldw(regs, R2(regs->iir), 1); break; case OPCODE_LDW_M: @@ -626,7 +556,6 @@ void handle_unaligned(struct pt_regs *regs) break; case OPCODE_FSTW_L: - flop=1; ret = emulate_stw(regs, R2(regs->iir),1); break; case OPCODE_STW_M: @@ -673,7 +602,7 @@ void handle_unaligned(struct pt_regs *regs) printk(KERN_CRIT "Unaligned handler failed, ret = %d\n", ret); die_if_kernel("Unaligned data reference", regs, 28); - if (ret == ERR_PAGEFAULT) + if (ret == -EFAULT) { force_sig_fault(SIGSEGV, SEGV_MAPERR, (void __user *)regs->ior); diff --git a/arch/parisc/kernel/vdso.c b/arch/parisc/kernel/vdso.c new file mode 100644 index 000000000000..63dc44c4c246 --- /dev/null +++ b/arch/parisc/kernel/vdso.c @@ -0,0 +1,122 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Helge Deller <deller@gmx.de> + * + * based on arch/s390/kernel/vdso.c which is + * Copyright IBM Corp. 2008 + * Author(s): Martin Schwidefsky (schwidefsky@de.ibm.com) + */ + +#include <linux/kernel.h> +#include <linux/mm.h> +#include <linux/slab.h> +#include <linux/elf.h> +#include <linux/timekeeper_internal.h> +#include <linux/compat.h> +#include <linux/nsproxy.h> +#include <linux/time_namespace.h> +#include <linux/random.h> + +#include <asm/pgtable.h> +#include <asm/page.h> +#include <asm/sections.h> +#include <asm/vdso.h> +#include <asm/cacheflush.h> + +extern char vdso32_start, vdso32_end; +extern char vdso64_start, vdso64_end; + +static int vdso_mremap(const struct vm_special_mapping *sm, + struct vm_area_struct *vma) +{ + current->mm->context.vdso_base = vma->vm_start; + return 0; +} + +#ifdef CONFIG_64BIT +static struct vm_special_mapping vdso64_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; +#endif + +static struct vm_special_mapping vdso32_mapping = { + .name = "[vdso]", + .mremap = vdso_mremap, +}; + +/* + * This is called from binfmt_elf, we create the special vma for the + * vDSO and insert it into the mm struct tree + */ +int arch_setup_additional_pages(struct linux_binprm *bprm, + int executable_stack) +{ + + unsigned long vdso_text_start, vdso_text_len, map_base; + struct vm_special_mapping *vdso_mapping; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc; + + if (mmap_write_lock_killable(mm)) + return -EINTR; + +#ifdef CONFIG_64BIT + if (!is_compat_task()) { + vdso_text_len = &vdso64_end - &vdso64_start; + vdso_mapping = &vdso64_mapping; + } else +#endif + { + vdso_text_len = &vdso32_end - &vdso32_start; + vdso_mapping = &vdso32_mapping; + } + + map_base = mm->mmap_base; + if (current->flags & PF_RANDOMIZE) + map_base -= (get_random_int() & 0x1f) * PAGE_SIZE; + + vdso_text_start = get_unmapped_area(NULL, map_base, vdso_text_len, 0, 0); + + /* VM_MAYWRITE for COW so gdb can set breakpoints */ + vma = _install_special_mapping(mm, vdso_text_start, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + vdso_mapping); + if (IS_ERR(vma)) { + do_munmap(mm, vdso_text_start, PAGE_SIZE, NULL); + rc = PTR_ERR(vma); + } else { + current->mm->context.vdso_base = vdso_text_start; + rc = 0; + } + + mmap_write_unlock(mm); + return rc; +} + +static struct page ** __init vdso_setup_pages(void *start, void *end) +{ + int pages = (end - start) >> PAGE_SHIFT; + struct page **pagelist; + int i; + + pagelist = kcalloc(pages + 1, sizeof(struct page *), GFP_KERNEL); + if (!pagelist) + panic("%s: Cannot allocate page list for VDSO", __func__); + for (i = 0; i < pages; i++) + pagelist[i] = virt_to_page(start + i * PAGE_SIZE); + return pagelist; +} + +static int __init vdso_init(void) +{ +#ifdef CONFIG_64BIT + vdso64_mapping.pages = vdso_setup_pages(&vdso64_start, &vdso64_end); +#endif + if (IS_ENABLED(CONFIG_COMPAT) || !IS_ENABLED(CONFIG_64BIT)) + vdso32_mapping.pages = vdso_setup_pages(&vdso32_start, &vdso32_end); + return 0; +} +arch_initcall(vdso_init); diff --git a/arch/parisc/kernel/vdso32/Makefile b/arch/parisc/kernel/vdso32/Makefile new file mode 100644 index 000000000000..85b1c6d261d1 --- /dev/null +++ b/arch/parisc/kernel/vdso32/Makefile @@ -0,0 +1,53 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso32 = note.o sigtramp.o restart_syscall.o + +# Build rules + +targets := $(obj-vdso32) vdso32.so +obj-vdso32 := $(addprefix $(obj)/, $(obj-vdso32)) + +ccflags-y := -shared -fno-common -fbuiltin -mno-fast-indirect-calls -O2 -mno-long-calls +# -march=1.1 -mschedule=7100LC +ccflags-y += -nostdlib -Wl,-soname=linux-vdso32.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO32__ -s + +KBUILD_AFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING + +VDSO_LIBGCC := $(shell $(CROSS32CC) -print-libgcc-file-name) + +obj-y += vdso32_wrapper.o +extra-y += vdso32.lds +CPPFLAGS_vdso32.lds += -P -C # -U$(ARCH) + +$(obj)/vdso32_wrapper.o : $(obj)/vdso32.so FORCE + +# Force dependency (incbin is bad) +# link rule for the .so file, .lds has to be first +$(obj)/vdso32.so: $(src)/vdso32.lds $(obj-vdso32) $(obj-cvdso32) $(VDSO_LIBGCC) + $(call if_changed,vdso32ld) + +# assembly rules for the .S files +$(obj-vdso32): %.o: %.S FORCE + $(call if_changed_dep,vdso32as) + +$(obj-cvdso32): %.o: %.c FORCE + $(call if_changed_dep,vdso32cc) + +# actual build commands +quiet_cmd_vdso32ld = VDSO32L $@ + cmd_vdso32ld = $(CROSS32CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso32as = VDSO32A $@ + cmd_vdso32as = $(CROSS32CC) $(a_flags) -c -o $@ $< +quiet_cmd_vdso32cc = VDSO32C $@ + cmd_vdso32cc = $(CROSS32CC) $(c_flags) -c -fPIC -mno-fast-indirect-calls -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso32-offsets.h: $(obj)/vdso32.so FORCE + $(call if_changed,vdsosym) diff --git a/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh new file mode 100755 index 000000000000..da39d6cff7f1 --- /dev/null +++ b/arch/parisc/kernel/vdso32/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso32_offset_\2\t0x\1/p' diff --git a/arch/parisc/kernel/vdso32/note.S b/arch/parisc/kernel/vdso32/note.S new file mode 100644 index 000000000000..bb350918bebd --- /dev/null +++ b/arch/parisc/kernel/vdso32/note.S @@ -0,0 +1,26 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text. + * Here we can supply some information useful to userland. + */ + +#include <linux/uts.h> +#include <linux/version.h> + +#define ASM_ELF_NOTE_BEGIN(name, flags, vendor, type) \ + .section name, flags; \ + .balign 4; \ + .long 1f - 0f; /* name length */ \ + .long 3f - 2f; /* data length */ \ + .long type; /* note type */ \ +0: .asciz vendor; /* vendor name */ \ +1: .balign 4; \ +2: + +#define ASM_ELF_NOTE_END \ +3: .balign 4; /* pad out section */ \ + .previous + + ASM_ELF_NOTE_BEGIN(".note.kernel-version", "a", UTS_SYSNAME, 0) + .long LINUX_VERSION_CODE + ASM_ELF_NOTE_END diff --git a/arch/parisc/kernel/vdso32/restart_syscall.S b/arch/parisc/kernel/vdso32/restart_syscall.S new file mode 100644 index 000000000000..7e82008d7e40 --- /dev/null +++ b/arch/parisc/kernel/vdso32/restart_syscall.S @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Syscall restart trampoline for 32 and 64 bits processes. + * + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ + +#include <asm/unistd.h> +#include <asm/vdso.h> + +#include <linux/linkage.h> + + .text + +ENTRY_CFI(__kernel_restart_syscall) + /* + * Setup a trampoline to restart the syscall + * with __NR_restart_syscall + */ + + /* load return pointer */ +#if defined(__VDSO64__) + ldd 0(%sp), %r31 +#elif defined(__VDSO32__) + ldw 0(%sp), %r31 +#endif + + be 0x100(%sr2, %r0) + ldi __NR_restart_syscall, %r20 + +ENDPROC_CFI(__kernel_restart_syscall) diff --git a/arch/parisc/kernel/vdso32/sigtramp.S b/arch/parisc/kernel/vdso32/sigtramp.S new file mode 100644 index 000000000000..192da7077869 --- /dev/null +++ b/arch/parisc/kernel/vdso32/sigtramp.S @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Signal trampolines for 32 bit processes. + * + * Copyright (C) 2006 Randolph Chung <tausq@debian.org> + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ +#include <asm/unistd.h> +#include <linux/linkage.h> +#include <generated/asm-offsets.h> + + .text + +/* Gdb expects the trampoline is on the stack and the pc is offset from + a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline + is not on the stack, we need a new variant with different offsets and + data to tell gdb where to find the signal context on the stack. + + Here we put the offset to the context data at the start of the trampoline + region and offset the first trampoline by 2 instructions. Please do + not change the trampoline as the code in gdb depends on the following + instruction sequence exactly. + */ + .align 64 + .word SIGFRAME_CONTEXT_REGS32 + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. + */ + nop + + .globl __kernel_sigtramp_rt + .type __kernel_sigtramp_rt, @function +__kernel_sigtramp_rt: + .proc + .callinfo FRAME=ASM_SIGFRAME_SIZE32,CALLS,SAVE_RP + .entry + +.Lsigrt_start = . - 4 +0: ldi 0, %r25 /* (in_syscall=0) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop + +1: ldi 1, %r25 /* (in_syscall=1) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop +.Lsigrt_end: + .exit + .procend + .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt + + + .section .eh_frame,"a",@progbits + +/* This is where the mcontext_t struct can be found on the stack. */ +#define PTREGS SIGFRAME_CONTEXT_REGS32 /* 32-bit process offset is -672 */ + +/* Register REGNO can be found at offset OFS of the mcontext_t structure. */ + .macro rsave regno,ofs + .byte 0x05 /* DW_CFA_offset_extended */ + .uleb128 \regno; /* regno */ + .uleb128 \ofs /* factored offset */ + .endm + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .stringz "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 4 /* Data alignment factor */ + .byte 89 /* Return address register column, iaoq[0] */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0f /* DW_CFA_def_cfa_expresion */ + .uleb128 9f - 1f /* length */ +1: + .byte 0x8e /* DW_OP_breg30 */ + .sleb128 PTREGS +9: + .balign 4 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + + /* General registers */ + rsave 1, 2 + rsave 2, 3 + rsave 3, 4 + rsave 4, 5 + rsave 5, 6 + rsave 6, 7 + rsave 7, 8 + rsave 8, 9 + rsave 9, 10 + rsave 10, 11 + rsave 11, 12 + rsave 12, 13 + rsave 13, 14 + rsave 14, 15 + rsave 15, 16 + rsave 16, 17 + rsave 17, 18 + rsave 18, 19 + rsave 19, 20 + rsave 20, 21 + rsave 21, 22 + rsave 22, 23 + rsave 23, 24 + rsave 24, 25 + rsave 25, 26 + rsave 26, 27 + rsave 27, 28 + rsave 28, 29 + rsave 29, 30 + rsave 30, 31 + rsave 31, 32 + + /* Floating-point registers */ + rsave 32, 42 + rsave 33, 43 + rsave 34, 44 + rsave 35, 45 + rsave 36, 46 + rsave 37, 47 + rsave 38, 48 + rsave 39, 49 + rsave 40, 50 + rsave 41, 51 + rsave 42, 52 + rsave 43, 53 + rsave 44, 54 + rsave 45, 55 + rsave 46, 56 + rsave 47, 57 + rsave 48, 58 + rsave 49, 59 + rsave 50, 60 + rsave 51, 61 + rsave 52, 62 + rsave 53, 63 + rsave 54, 64 + rsave 55, 65 + rsave 56, 66 + rsave 57, 67 + rsave 58, 68 + rsave 59, 69 + rsave 60, 70 + rsave 61, 71 + rsave 62, 72 + rsave 63, 73 + rsave 64, 74 + rsave 65, 75 + rsave 66, 76 + rsave 67, 77 + rsave 68, 78 + rsave 69, 79 + rsave 70, 80 + rsave 71, 81 + rsave 72, 82 + rsave 73, 83 + rsave 74, 84 + rsave 75, 85 + rsave 76, 86 + rsave 77, 87 + rsave 78, 88 + rsave 79, 89 + rsave 80, 90 + rsave 81, 91 + rsave 82, 92 + rsave 83, 93 + rsave 84, 94 + rsave 85, 95 + rsave 86, 96 + rsave 87, 97 + + /* SAR register */ + rsave 88, 102 + + /* iaoq[0] return address register */ + rsave 89, 100 + .balign 4 +.Lfde0_end: diff --git a/arch/parisc/kernel/vdso32/vdso32.lds.S b/arch/parisc/kernel/vdso32/vdso32.lds.S new file mode 100644 index 000000000000..d4aff3af5262 --- /dev/null +++ b/arch/parisc/kernel/vdso32/vdso32.lds.S @@ -0,0 +1,111 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is the infamous ld script for the 32 bits vdso library + */ +#include <asm/vdso.h> +#include <asm/page.h> + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf32-hppa-linux") +OUTPUT_ARCH(hppa) +ENTRY(_start) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + .rodata2 : { *(.data.rel.ro) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .plt : { *(.plt) } + .got : { *(.got) } + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_sigtramp_rt32; + __kernel_restart_syscall32; + local: *; + }; +} diff --git a/arch/parisc/kernel/vdso32/vdso32_wrapper.S b/arch/parisc/kernel/vdso32/vdso32_wrapper.S new file mode 100644 index 000000000000..5ac06093e8ec --- /dev/null +++ b/arch/parisc/kernel/vdso32/vdso32_wrapper.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso32_start, vdso32_end + .balign PAGE_SIZE +vdso32_start: + .incbin "arch/parisc/kernel/vdso32/vdso32.so" + .balign PAGE_SIZE +vdso32_end: + + .previous diff --git a/arch/parisc/kernel/vdso64/Makefile b/arch/parisc/kernel/vdso64/Makefile new file mode 100644 index 000000000000..a30f5ec5eb4b --- /dev/null +++ b/arch/parisc/kernel/vdso64/Makefile @@ -0,0 +1,48 @@ +# List of files in the vdso, has to be asm only for now + +obj-vdso64 = note.o sigtramp.o restart_syscall.o + +# Build rules + +targets := $(obj-vdso64) vdso64.so +obj-vdso64 := $(addprefix $(obj)/, $(obj-vdso64)) + + +ccflags-y := -shared -fno-common -fno-builtin +ccflags-y += -nostdlib -Wl,-soname=linux-vdso64.so.1 \ + $(call ld-option, -Wl$(comma)--hash-style=sysv) +asflags-y := -D__VDSO64__ -s + +KBUILD_AFLAGS += -DBUILD_VDSO +KBUILD_CFLAGS += -DBUILD_VDSO -DDISABLE_BRANCH_PROFILING + +VDSO_LIBGCC := $(shell $(CC) -print-libgcc-file-name) + +obj-y += vdso64_wrapper.o +extra-y += vdso64.lds +CPPFLAGS_vdso64.lds += -P -C -U$(ARCH) + +$(obj)/vdso64_wrapper.o : $(obj)/vdso64.so FORCE + +# Force dependency (incbin is bad) +# link rule for the .so file, .lds has to be first +$(obj)/vdso64.so: $(src)/vdso64.lds $(obj-vdso64) $(VDSO_LIBGCC) + $(call if_changed,vdso64ld) + +# assembly rules for the .S files +$(obj-vdso64): %.o: %.S FORCE + $(call if_changed_dep,vdso64as) + +# actual build commands +quiet_cmd_vdso64ld = VDSO64L $@ + cmd_vdso64ld = $(CC) $(c_flags) -Wl,-T $^ -o $@ +quiet_cmd_vdso64as = VDSO64A $@ + cmd_vdso64as = $(CC) $(a_flags) -c -o $@ $< + +# Generate VDSO offsets using helper script +gen-vdsosym := $(srctree)/$(src)/gen_vdso_offsets.sh +quiet_cmd_vdsosym = VDSOSYM $@ + cmd_vdsosym = $(NM) $< | $(gen-vdsosym) | LC_ALL=C sort > $@ + +include/generated/vdso64-offsets.h: $(obj)/vdso64.so FORCE + $(call if_changed,vdsosym) diff --git a/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh new file mode 100755 index 000000000000..37f05cb38dad --- /dev/null +++ b/arch/parisc/kernel/vdso64/gen_vdso_offsets.sh @@ -0,0 +1,15 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# +# Match symbols in the DSO that look like VDSO_*; produce a header file +# of constant offsets into the shared object. +# +# Doing this inside the Makefile will break the $(filter-out) function, +# causing Kbuild to rebuild the vdso-offsets header file every time. +# +# Inspired by arm64 version. +# + +LC_ALL=C +sed -n 's/\([0-9a-f]*\) . __kernel_\(.*\)/\#define vdso64_offset_\2\t0x\1/p' diff --git a/arch/parisc/kernel/vdso64/note.S b/arch/parisc/kernel/vdso64/note.S new file mode 100644 index 000000000000..bd1fa23597d6 --- /dev/null +++ b/arch/parisc/kernel/vdso64/note.S @@ -0,0 +1,2 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include "../vdso32/note.S" diff --git a/arch/parisc/kernel/vdso64/restart_syscall.S b/arch/parisc/kernel/vdso64/restart_syscall.S new file mode 100644 index 000000000000..83004451f6b1 --- /dev/null +++ b/arch/parisc/kernel/vdso64/restart_syscall.S @@ -0,0 +1,3 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#include "../vdso32/restart_syscall.S" diff --git a/arch/parisc/kernel/vdso64/sigtramp.S b/arch/parisc/kernel/vdso64/sigtramp.S new file mode 100644 index 000000000000..66a6d2b241e1 --- /dev/null +++ b/arch/parisc/kernel/vdso64/sigtramp.S @@ -0,0 +1,166 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Signal trampolines for 64 bit processes. + * + * Copyright (C) 2006 Randolph Chung <tausq@debian.org> + * Copyright (C) 2018-2022 Helge Deller <deller@gmx.de> + * Copyright (C) 2022 John David Anglin <dave.anglin@bell.net> + */ +#include <asm/unistd.h> +#include <linux/linkage.h> +#include <generated/asm-offsets.h> + + .text + +/* Gdb expects the trampoline is on the stack and the pc is offset from + a 64-byte boundary by 0, 4 or 5 instructions. Since the vdso trampoline + is not on the stack, we need a new variant with different offsets and + data to tell gdb where to find the signal context on the stack. + + Here we put the offset to the context data at the start of the trampoline + region and offset the first trampoline by 2 instructions. Please do + not change the trampoline as the code in gdb depends on the following + instruction sequence exactly. + */ + .align 64 + .word SIGFRAME_CONTEXT_REGS + +/* The nop here is a hack. The dwarf2 unwind routines subtract 1 from + the return address to get an address in the middle of the presumed + call instruction. Since we don't have a call here, we artifically + extend the range covered by the unwind info by adding a nop before + the real start. + */ + nop + + .globl __kernel_sigtramp_rt + .type __kernel_sigtramp_rt, @function +__kernel_sigtramp_rt: + .proc + .callinfo FRAME=ASM_SIGFRAME_SIZE,CALLS,SAVE_RP + .entry + +.Lsigrt_start = . - 4 +0: ldi 0, %r25 /* (in_syscall=0) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop + +1: ldi 1, %r25 /* (in_syscall=1) */ + ldi __NR_rt_sigreturn, %r20 + ble 0x100(%sr2, %r0) + nop +.Lsigrt_end: + .exit + .procend + .size __kernel_sigtramp_rt,.-__kernel_sigtramp_rt + + .section .eh_frame,"a",@progbits + +/* This is where the mcontext_t struct can be found on the stack. */ +#define PTREGS SIGFRAME_CONTEXT_REGS /* 64-bit process offset is -720 */ + +/* Register REGNO can be found at offset OFS of the mcontext_t structure. */ + .macro rsave regno,ofs + .byte 0x05 /* DW_CFA_offset_extended */ + .uleb128 \regno; /* regno */ + .uleb128 \ofs /* factored offset */ + .endm + +.Lcie: + .long .Lcie_end - .Lcie_start +.Lcie_start: + .long 0 /* CIE ID */ + .byte 1 /* Version number */ + .stringz "zRS" /* NUL-terminated augmentation string */ + .uleb128 4 /* Code alignment factor */ + .sleb128 8 /* Data alignment factor */ + .byte 61 /* Return address register column, iaoq[0] */ + .uleb128 1 /* Augmentation value length */ + .byte 0x1b /* DW_EH_PE_pcrel | DW_EH_PE_sdata4. */ + .byte 0x0f /* DW_CFA_def_cfa_expresion */ + .uleb128 9f - 1f /* length */ +1: + .byte 0x8e /* DW_OP_breg30 */ + .sleb128 PTREGS +9: + .balign 8 +.Lcie_end: + + .long .Lfde0_end - .Lfde0_start +.Lfde0_start: + .long .Lfde0_start - .Lcie /* CIE pointer. */ + .long .Lsigrt_start - . /* PC start, length */ + .long .Lsigrt_end - .Lsigrt_start + .uleb128 0 /* Augmentation */ + + /* General registers */ + rsave 1, 2 + rsave 2, 3 + rsave 3, 4 + rsave 4, 5 + rsave 5, 6 + rsave 6, 7 + rsave 7, 8 + rsave 8, 9 + rsave 9, 10 + rsave 10, 11 + rsave 11, 12 + rsave 12, 13 + rsave 13, 14 + rsave 14, 15 + rsave 15, 16 + rsave 16, 17 + rsave 17, 18 + rsave 18, 19 + rsave 19, 20 + rsave 20, 21 + rsave 21, 22 + rsave 22, 23 + rsave 23, 24 + rsave 24, 25 + rsave 25, 26 + rsave 26, 27 + rsave 27, 28 + rsave 28, 29 + rsave 29, 30 + rsave 30, 31 + rsave 31, 32 + + /* Floating-point registers */ + rsave 32, 36 + rsave 33, 37 + rsave 34, 38 + rsave 35, 39 + rsave 36, 40 + rsave 37, 41 + rsave 38, 42 + rsave 39, 43 + rsave 40, 44 + rsave 41, 45 + rsave 42, 46 + rsave 43, 47 + rsave 44, 48 + rsave 45, 49 + rsave 46, 50 + rsave 47, 51 + rsave 48, 52 + rsave 49, 53 + rsave 50, 54 + rsave 51, 55 + rsave 52, 56 + rsave 53, 57 + rsave 54, 58 + rsave 55, 59 + rsave 56, 60 + rsave 57, 61 + rsave 58, 62 + rsave 59, 63 + + /* SAR register */ + rsave 60, 67 + + /* iaoq[0] return address register */ + rsave 61, 65 + .balign 8 +.Lfde0_end: diff --git a/arch/parisc/kernel/vdso64/vdso64.lds.S b/arch/parisc/kernel/vdso64/vdso64.lds.S new file mode 100644 index 000000000000..de1fb4b19286 --- /dev/null +++ b/arch/parisc/kernel/vdso64/vdso64.lds.S @@ -0,0 +1,109 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * This is the infamous ld script for the 64 bits vdso library + */ +#include <asm/vdso.h> + +/* Default link addresses for the vDSOs */ +OUTPUT_FORMAT("elf64-hppa-linux") +OUTPUT_ARCH(hppa:hppa2.0w) +ENTRY(_start) + +SECTIONS +{ + . = VDSO_LBASE + SIZEOF_HEADERS; + .hash : { *(.hash) } :text + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + + .note : { *(.note.*) } :text :note + + . = ALIGN (16); + .text : + { + *(.text .stub .text.* .gnu.linkonce.t.*) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + + /* Other stuff is appended to the text segment: */ + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.*) } + .rodata1 : { *(.rodata1) } + + .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr + .eh_frame : { KEEP (*(.eh_frame)) } :text + .gcc_except_table : { *(.gcc_except_table) } + .fixup : { *(.fixup) } + + .dynamic : { *(.dynamic) } :text :dynamic + .plt : { *(.plt) } + .got : { *(.got) } + + _end = .; + __end = .; + PROVIDE (end = .); + + + /* Stabs debugging sections are here too + */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.*) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + + /DISCARD/ : { *(.note.GNU-stack) } + /DISCARD/ : { *(.data .data.* .gnu.linkonce.d.* .sdata*) } + /DISCARD/ : { *(.bss .sbss .dynbss .dynsbss) } +} + + +PHDRS +{ + text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ + note PT_NOTE FLAGS(4); /* PF_R */ + dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ + eh_frame_hdr PT_GNU_EH_FRAME; +} + + +/* + * This controls what symbols we export from the DSO. + */ +VERSION +{ + VDSO_VERSION_STRING { + global: + __kernel_sigtramp_rt64; + __kernel_restart_syscall64; + local: *; + }; +} diff --git a/arch/parisc/kernel/vdso64/vdso64_wrapper.S b/arch/parisc/kernel/vdso64/vdso64_wrapper.S new file mode 100644 index 000000000000..66f929482d3d --- /dev/null +++ b/arch/parisc/kernel/vdso64/vdso64_wrapper.S @@ -0,0 +1,14 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#include <linux/linkage.h> +#include <asm/page.h> + + __PAGE_ALIGNED_DATA + + .globl vdso64_start, vdso64_end + .balign PAGE_SIZE +vdso64_start: + .incbin "arch/parisc/kernel/vdso64/vdso64.so" + .balign PAGE_SIZE +vdso64_end: + + .previous diff --git a/arch/parisc/lib/memcpy.c b/arch/parisc/lib/memcpy.c index ea70a0e08321..5fc0c852c84c 100644 --- a/arch/parisc/lib/memcpy.c +++ b/arch/parisc/lib/memcpy.c @@ -13,8 +13,8 @@ #include <linux/compiler.h> #include <linux/uaccess.h> -#define get_user_space() (uaccess_kernel() ? 0 : mfsp(3)) -#define get_kernel_space() (0) +#define get_user_space() mfsp(SR_USER) +#define get_kernel_space() SR_KERNEL /* Returns 0 for success, otherwise, returns number of bytes not transferred. */ extern unsigned long pa_memcpy(void *dst, const void *src, @@ -23,8 +23,8 @@ extern unsigned long pa_memcpy(void *dst, const void *src, unsigned long raw_copy_to_user(void __user *dst, const void *src, unsigned long len) { - mtsp(get_kernel_space(), 1); - mtsp(get_user_space(), 2); + mtsp(get_kernel_space(), SR_TEMP1); + mtsp(get_user_space(), SR_TEMP2); return pa_memcpy((void __force *)dst, src, len); } EXPORT_SYMBOL(raw_copy_to_user); @@ -32,16 +32,16 @@ EXPORT_SYMBOL(raw_copy_to_user); unsigned long raw_copy_from_user(void *dst, const void __user *src, unsigned long len) { - mtsp(get_user_space(), 1); - mtsp(get_kernel_space(), 2); + mtsp(get_user_space(), SR_TEMP1); + mtsp(get_kernel_space(), SR_TEMP2); return pa_memcpy(dst, (void __force *)src, len); } EXPORT_SYMBOL(raw_copy_from_user); void * memcpy(void * dst,const void *src, size_t count) { - mtsp(get_kernel_space(), 1); - mtsp(get_kernel_space(), 2); + mtsp(get_kernel_space(), SR_TEMP1); + mtsp(get_kernel_space(), SR_TEMP2); pa_memcpy(dst, src, count); return dst; } diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index e9eabf8f14d7..f114e102aaf2 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -425,3 +425,92 @@ out_of_memory: } pagefault_out_of_memory(); } + +/* Handle non-access data TLB miss faults. + * + * For probe instructions, accesses to userspace are considered allowed + * if they lie in a valid VMA and the access type matches. We are not + * allowed to handle MM faults here so there may be situations where an + * actual access would fail even though a probe was successful. + */ +int +handle_nadtlb_fault(struct pt_regs *regs) +{ + unsigned long insn = regs->iir; + int breg, treg, xreg, val = 0; + struct vm_area_struct *vma, *prev_vma; + struct task_struct *tsk; + struct mm_struct *mm; + unsigned long address; + unsigned long acc_type; + + switch (insn & 0x380) { + case 0x280: + /* FDC instruction */ + fallthrough; + case 0x380: + /* PDC and FIC instructions */ + if (printk_ratelimit()) { + pr_warn("BUG: nullifying cache flush/purge instruction\n"); + show_regs(regs); + } + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + regs->gr[0] |= PSW_N; + return 1; + + case 0x180: + /* PROBE instruction */ + treg = insn & 0x1f; + if (regs->isr) { + tsk = current; + mm = tsk->mm; + if (mm) { + /* Search for VMA */ + address = regs->ior; + mmap_read_lock(mm); + vma = find_vma_prev(mm, address, &prev_vma); + mmap_read_unlock(mm); + + /* + * Check if access to the VMA is okay. + * We don't allow for stack expansion. + */ + acc_type = (insn & 0x40) ? VM_WRITE : VM_READ; + if (vma + && address >= vma->vm_start + && (vma->vm_flags & acc_type) == acc_type) + val = 1; + } + } + if (treg) + regs->gr[treg] = val; + regs->gr[0] |= PSW_N; + return 1; + + case 0x300: + /* LPA instruction */ + if (insn & 0x20) { + /* Base modification */ + breg = (insn >> 21) & 0x1f; + xreg = (insn >> 16) & 0x1f; + if (breg && xreg) + regs->gr[breg] += regs->gr[xreg]; + } + treg = insn & 0x1f; + if (treg) + regs->gr[treg] = 0; + regs->gr[0] |= PSW_N; + return 1; + + default: + break; + } + + return 0; +} diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index b779603978e1..7e7387bd7d53 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -108,6 +108,7 @@ config PPC select ARCH_ENABLE_MEMORY_HOTPLUG select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_HAS_COPY_MC if PPC64 + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX if STRICT_KERNEL_RWX diff --git a/arch/powerpc/configs/linkstation_defconfig b/arch/powerpc/configs/linkstation_defconfig index d4be64f190ff..fa707de761be 100644 --- a/arch/powerpc/configs/linkstation_defconfig +++ b/arch/powerpc/configs/linkstation_defconfig @@ -120,7 +120,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_932=m diff --git a/arch/powerpc/configs/mvme5100_defconfig b/arch/powerpc/configs/mvme5100_defconfig index 1fed6be95d53..d1c7fd5bf34b 100644 --- a/arch/powerpc/configs/mvme5100_defconfig +++ b/arch/powerpc/configs/mvme5100_defconfig @@ -101,7 +101,6 @@ CONFIG_NFS_V3_ACL=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_CIFS=m CONFIG_NLS=y CONFIG_NLS_CODEPAGE_437=m diff --git a/arch/powerpc/include/asm/xor_altivec.h b/arch/powerpc/include/asm/xor_altivec.h index 6ca923510b59..294620a25f80 100644 --- a/arch/powerpc/include/asm/xor_altivec.h +++ b/arch/powerpc/include/asm/xor_altivec.h @@ -3,17 +3,20 @@ #define _ASM_POWERPC_XOR_ALTIVEC_H #ifdef CONFIG_ALTIVEC - -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); #endif #endif /* _ASM_POWERPC_XOR_ALTIVEC_H */ diff --git a/arch/powerpc/lib/xor_vmx.c b/arch/powerpc/lib/xor_vmx.c index 54e61979e80e..aab49d056d18 100644 --- a/arch/powerpc/lib/xor_vmx.c +++ b/arch/powerpc/lib/xor_vmx.c @@ -49,8 +49,9 @@ typedef vector signed char unative_t; V1##_3 = vec_xor(V1##_3, V2##_3); \ } while (0) -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void __xor_altivec_2(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in) { DEFINE(v1); DEFINE(v2); @@ -67,8 +68,10 @@ void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void __xor_altivec_3(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in) { DEFINE(v1); DEFINE(v2); @@ -89,9 +92,11 @@ void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void __xor_altivec_4(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in) { DEFINE(v1); DEFINE(v2); @@ -116,9 +121,12 @@ void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, } while (--lines > 0); } -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void __xor_altivec_5(unsigned long bytes, + unsigned long * __restrict v1_in, + const unsigned long * __restrict v2_in, + const unsigned long * __restrict v3_in, + const unsigned long * __restrict v4_in, + const unsigned long * __restrict v5_in) { DEFINE(v1); DEFINE(v2); diff --git a/arch/powerpc/lib/xor_vmx.h b/arch/powerpc/lib/xor_vmx.h index 5c2b0839b179..573c41d90dac 100644 --- a/arch/powerpc/lib/xor_vmx.h +++ b/arch/powerpc/lib/xor_vmx.h @@ -6,16 +6,17 @@ * outside of the enable/disable altivec block. */ -void __xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in); - -void __xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in); - -void __xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in); - -void __xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in); +void __xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void __xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void __xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void __xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); diff --git a/arch/powerpc/lib/xor_vmx_glue.c b/arch/powerpc/lib/xor_vmx_glue.c index 80dba916c367..35d917ece4d1 100644 --- a/arch/powerpc/lib/xor_vmx_glue.c +++ b/arch/powerpc/lib/xor_vmx_glue.c @@ -12,47 +12,51 @@ #include <asm/xor_altivec.h> #include "xor_vmx.h" -void xor_altivec_2(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in) +void xor_altivec_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_2(bytes, v1_in, v2_in); + __xor_altivec_2(bytes, p1, p2); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_2); -void xor_altivec_3(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in) +void xor_altivec_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_3(bytes, v1_in, v2_in, v3_in); + __xor_altivec_3(bytes, p1, p2, p3); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_3); -void xor_altivec_4(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in) +void xor_altivec_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_4(bytes, v1_in, v2_in, v3_in, v4_in); + __xor_altivec_4(bytes, p1, p2, p3, p4); disable_kernel_altivec(); preempt_enable(); } EXPORT_SYMBOL(xor_altivec_4); -void xor_altivec_5(unsigned long bytes, unsigned long *v1_in, - unsigned long *v2_in, unsigned long *v3_in, - unsigned long *v4_in, unsigned long *v5_in) +void xor_altivec_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { preempt_disable(); enable_kernel_altivec(); - __xor_altivec_5(bytes, v1_in, v2_in, v3_in, v4_in, v5_in); + __xor_altivec_5(bytes, p1, p2, p3, p4, p5); disable_kernel_altivec(); preempt_enable(); } diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index be9f39fd06df..4845ab549dd1 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -60,6 +60,7 @@ config S390 select ARCH_ENABLE_MEMORY_HOTPLUG if SPARSEMEM select ARCH_ENABLE_MEMORY_HOTREMOVE select ARCH_ENABLE_SPLIT_PMD_PTLOCK if PGTABLE_LEVELS > 2 + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VM_PGTABLE select ARCH_HAS_DEBUG_WX select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/arch/s390/lib/xor.c b/arch/s390/lib/xor.c index a963c3d8ad0d..fb924a8041dc 100644 --- a/arch/s390/lib/xor.c +++ b/arch/s390/lib/xor.c @@ -11,7 +11,8 @@ #include <linux/raid/xor.h> #include <asm/xor.h> -static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +static void xor_xc_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { asm volatile( " larl 1,2f\n" @@ -32,8 +33,9 @@ static void xor_xc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) : "0", "1", "cc", "memory"); } -static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +static void xor_xc_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { asm volatile( " larl 1,2f\n" @@ -58,8 +60,10 @@ static void xor_xc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "0", "1", "cc", "memory"); } -static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +static void xor_xc_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { asm volatile( " larl 1,2f\n" @@ -88,8 +92,11 @@ static void xor_xc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, : : "0", "1", "cc", "memory"); } -static void xor_xc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +static void xor_xc_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { asm volatile( " larl 1,2f\n" diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 2474a04ceac4..1c2b53bf3093 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -7,6 +7,7 @@ config SUPERH select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAVE_NMI_SAFE_CMPXCHG if (GUSA_RB || CPU_SH4A) select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_PTE_SPECIAL diff --git a/arch/sh/configs/ap325rxa_defconfig b/arch/sh/configs/ap325rxa_defconfig index 5193b3e099b9..4d83576b89c6 100644 --- a/arch/sh/configs/ap325rxa_defconfig +++ b/arch/sh/configs/ap325rxa_defconfig @@ -93,7 +93,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/sh/configs/ecovec24_defconfig b/arch/sh/configs/ecovec24_defconfig index 03cb916819fa..d90d29d44469 100644 --- a/arch/sh/configs/ecovec24_defconfig +++ b/arch/sh/configs/ecovec24_defconfig @@ -123,7 +123,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/sh/configs/landisk_defconfig b/arch/sh/configs/landisk_defconfig index e6c5ddf070c0..492a0a2e0e36 100644 --- a/arch/sh/configs/landisk_defconfig +++ b/arch/sh/configs/landisk_defconfig @@ -108,7 +108,6 @@ CONFIG_UFS_FS=m CONFIG_NFS_FS=m CONFIG_NFS_V3=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_SMB_FS=m CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y diff --git a/arch/sh/configs/sdk7780_defconfig b/arch/sh/configs/sdk7780_defconfig index 6c719ab4332a..7d6d32359848 100644 --- a/arch/sh/configs/sdk7780_defconfig +++ b/arch/sh/configs/sdk7780_defconfig @@ -120,7 +120,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ASCII=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/sh/configs/se7724_defconfig b/arch/sh/configs/se7724_defconfig index a26f7f1841c7..d817df7cc4a7 100644 --- a/arch/sh/configs/se7724_defconfig +++ b/arch/sh/configs/se7724_defconfig @@ -122,7 +122,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/sh/configs/sh03_defconfig b/arch/sh/configs/sh03_defconfig index ff502683132e..9fcf68b22dba 100644 --- a/arch/sh/configs/sh03_defconfig +++ b/arch/sh/configs/sh03_defconfig @@ -75,7 +75,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_PARTITION_ADVANCED=y CONFIG_NLS_CODEPAGE_437=m CONFIG_NLS_CODEPAGE_737=m diff --git a/arch/sh/configs/sh7785lcr_32bit_defconfig b/arch/sh/configs/sh7785lcr_32bit_defconfig index 5c725c75fcef..7eb3c10f28ad 100644 --- a/arch/sh/configs/sh7785lcr_32bit_defconfig +++ b/arch/sh/configs/sh7785lcr_32bit_defconfig @@ -129,7 +129,6 @@ CONFIG_NFS_V3=y CONFIG_NFS_V4=y CONFIG_ROOT_NFS=y CONFIG_NFSD=m -CONFIG_NFSD_V3=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_CODEPAGE_932=y CONFIG_NLS_ISO8859_1=y diff --git a/arch/sh/configs/titan_defconfig b/arch/sh/configs/titan_defconfig index cd5c58916c65..73a0d68b0de6 100644 --- a/arch/sh/configs/titan_defconfig +++ b/arch/sh/configs/titan_defconfig @@ -239,7 +239,6 @@ CONFIG_NFS_FS=y CONFIG_NFS_V3=y CONFIG_ROOT_NFS=y CONFIG_NFSD=y -CONFIG_NFSD_V3=y CONFIG_SMB_FS=m CONFIG_CIFS=m CONFIG_PARTITION_ADVANCED=y diff --git a/arch/sparc/include/asm/xor_32.h b/arch/sparc/include/asm/xor_32.h index 3e5af37e4b9c..0351813cf3af 100644 --- a/arch/sparc/include/asm/xor_32.h +++ b/arch/sparc/include/asm/xor_32.h @@ -13,7 +13,8 @@ */ static void -sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +sparc_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { int lines = bytes / (sizeof (long)) / 8; @@ -50,8 +51,9 @@ sparc_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +sparc_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { int lines = bytes / (sizeof (long)) / 8; @@ -101,8 +103,10 @@ sparc_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +sparc_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { int lines = bytes / (sizeof (long)) / 8; @@ -165,8 +169,11 @@ sparc_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -sparc_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +sparc_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { int lines = bytes / (sizeof (long)) / 8; diff --git a/arch/sparc/include/asm/xor_64.h b/arch/sparc/include/asm/xor_64.h index 16169f3edcd5..caaddea8ad79 100644 --- a/arch/sparc/include/asm/xor_64.h +++ b/arch/sparc/include/asm/xor_64.h @@ -12,13 +12,20 @@ #include <asm/spitfire.h> -void xor_vis_2(unsigned long, unsigned long *, unsigned long *); -void xor_vis_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -void xor_vis_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -void xor_vis_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +void xor_vis_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_vis_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_vis_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_vis_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); /* XXX Ugh, write cheetah versions... -DaveM */ @@ -30,13 +37,20 @@ static struct xor_block_template xor_block_VIS = { .do_5 = xor_vis_5, }; -void xor_niagara_2(unsigned long, unsigned long *, unsigned long *); -void xor_niagara_3(unsigned long, unsigned long *, unsigned long *, - unsigned long *); -void xor_niagara_4(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *); -void xor_niagara_5(unsigned long, unsigned long *, unsigned long *, - unsigned long *, unsigned long *, unsigned long *); +void xor_niagara_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2); +void xor_niagara_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3); +void xor_niagara_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4); +void xor_niagara_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5); static struct xor_block_template xor_block_niagara = { .name = "Niagara", diff --git a/arch/um/os-Linux/execvp.c b/arch/um/os-Linux/execvp.c index 84a0777c2a45..c09a5fd5e225 100644 --- a/arch/um/os-Linux/execvp.c +++ b/arch/um/os-Linux/execvp.c @@ -93,6 +93,7 @@ int execvp_noalloc(char *buf, const char *file, char *const argv[]) up finding no executable we can use, we want to diagnose that we did find one but were denied access. */ got_eacces = 1; + break; case ENOENT: case ESTALE: case ENOTDIR: diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index f384cb1a4f7a..5a83da703e87 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -1,4 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 +obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += coco/ + obj-y += entry/ obj-$(CONFIG_PERF_EVENTS) += events/ diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 37372cd5c9a7..61e511c51890 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -69,6 +69,7 @@ config X86 select ARCH_ENABLE_THP_MIGRATION if X86_64 && TRANSPARENT_HUGEPAGE select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_CACHE_LINE_SIZE + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEBUG_VM_PGTABLE if !X86_PAE select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -121,6 +122,7 @@ config X86 select ARCH_WANT_GENERAL_HUGETLB select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANT_LD_ORPHAN_WARN + select ARCH_WANTS_RT_DELAYED_SIGNALS select ARCH_WANTS_THP_SWAP if X86_64 select ARCH_HAS_PARANOID_L1D_FLUSH select BUILDTIME_TABLE_SORT @@ -247,7 +249,7 @@ config X86 select HAVE_STACK_VALIDATION if X86_64 select HAVE_STATIC_CALL select HAVE_STATIC_CALL_INLINE if HAVE_STACK_VALIDATION - select HAVE_PREEMPT_DYNAMIC + select HAVE_PREEMPT_DYNAMIC_CALL select HAVE_RSEQ select HAVE_SYSCALL_TRACEPOINTS select HAVE_UNSTABLE_SCHED_CLOCK @@ -1273,23 +1275,6 @@ config TOSHIBA Say Y if you intend to run this kernel on a Toshiba portable. Say N otherwise. -config I8K - tristate "Dell i8k legacy laptop support" - depends on HWMON - depends on PROC_FS - select SENSORS_DELL_SMM - help - This option enables legacy /proc/i8k userspace interface in hwmon - dell-smm-hwmon driver. Character file /proc/i8k reports bios version, - temperature and allows controlling fan speeds of Dell laptops via - System Management Mode. For old Dell laptops (like Dell Inspiron 8000) - it reports also power and hotkey status. For fan speed control is - needed userspace package i8kutils. - - Say Y if you intend to run this kernel on old Dell laptops or want to - use userspace package i8kutils. - Say N otherwise. - config X86_REBOOTFIXUPS bool "Enable X86 board specific fixups for reboot" depends on X86_32 @@ -1636,7 +1621,7 @@ config ARCH_SPARSEMEM_DEFAULT config ARCH_SELECT_MEMORY_MODEL def_bool y - depends on ARCH_SPARSEMEM_ENABLE + depends on ARCH_SPARSEMEM_ENABLE && ARCH_FLATMEM_ENABLE config ARCH_MEMORY_PROBE bool "Enable sysfs memory/probe interface" diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 659fad53ca82..3b354eb9516d 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -152,14 +152,13 @@ SYM_FUNC_END(startup_32) #ifdef CONFIG_EFI_STUB SYM_FUNC_START(efi32_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) add $0x4, %esp movl 8(%esp), %esi /* save boot_params pointer */ call efi_main /* efi_main returns the possibly relocated address of startup_32 */ jmp *%eax SYM_FUNC_END(efi32_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi32_stub_entry) #endif .text diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fd9441f40457..dea95301196b 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -535,7 +535,6 @@ SYM_CODE_END(startup_64) #ifdef CONFIG_EFI_STUB .org 0x390 SYM_FUNC_START(efi64_stub_entry) -SYM_FUNC_START_ALIAS(efi_stub_entry) and $~0xf, %rsp /* realign the stack */ movq %rdx, %rbx /* save boot_params pointer */ call efi_main @@ -543,7 +542,7 @@ SYM_FUNC_START_ALIAS(efi_stub_entry) leaq rva(startup_64)(%rax), %rax jmp *%rax SYM_FUNC_END(efi64_stub_entry) -SYM_FUNC_END_ALIAS(efi_stub_entry) +SYM_FUNC_ALIAS(efi_stub_entry, efi64_stub_entry) #endif .text diff --git a/arch/x86/coco/Makefile b/arch/x86/coco/Makefile new file mode 100644 index 000000000000..c1ead00017a7 --- /dev/null +++ b/arch/x86/coco/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 +CFLAGS_REMOVE_core.o = -pg +KASAN_SANITIZE_core.o := n +CFLAGS_core.o += -fno-stack-protector + +obj-y += core.o diff --git a/arch/x86/kernel/cc_platform.c b/arch/x86/coco/core.c index 6a6ffcd978f6..fc1365dd927e 100644 --- a/arch/x86/kernel/cc_platform.c +++ b/arch/x86/coco/core.c @@ -9,18 +9,16 @@ #include <linux/export.h> #include <linux/cc_platform.h> -#include <linux/mem_encrypt.h> -#include <asm/mshyperv.h> +#include <asm/coco.h> #include <asm/processor.h> -static bool __maybe_unused intel_cc_platform_has(enum cc_attr attr) +static enum cc_vendor vendor __ro_after_init; +static u64 cc_mask __ro_after_init; + +static bool intel_cc_platform_has(enum cc_attr attr) { -#ifdef CONFIG_INTEL_TDX_GUEST - return false; -#else return false; -#endif } /* @@ -74,12 +72,46 @@ static bool hyperv_cc_platform_has(enum cc_attr attr) bool cc_platform_has(enum cc_attr attr) { - if (sme_me_mask) + switch (vendor) { + case CC_VENDOR_AMD: return amd_cc_platform_has(attr); - - if (hv_is_isolation_supported()) + case CC_VENDOR_INTEL: + return intel_cc_platform_has(attr); + case CC_VENDOR_HYPERV: return hyperv_cc_platform_has(attr); - - return false; + default: + return false; + } } EXPORT_SYMBOL_GPL(cc_platform_has); + +u64 cc_mkenc(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val | cc_mask; + default: + return val; + } +} + +u64 cc_mkdec(u64 val) +{ + switch (vendor) { + case CC_VENDOR_AMD: + return val & ~cc_mask; + default: + return val; + } +} +EXPORT_SYMBOL_GPL(cc_mkdec); + +__init void cc_set_vendor(enum cc_vendor v) +{ + vendor = v; +} + +__init void cc_set_mask(u64 mask) +{ + cc_mask = mask; +} diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index c3af959648e6..2831685adf6f 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -90,6 +90,9 @@ nhpoly1305-avx2-y := nh-avx2-x86_64.o nhpoly1305-avx2-glue.o obj-$(CONFIG_CRYPTO_CURVE25519_X86) += curve25519-x86_64.o +obj-$(CONFIG_CRYPTO_SM3_AVX_X86_64) += sm3-avx-x86_64.o +sm3-avx-x86_64-y := sm3-avx-asm_64.o sm3_avx_glue.o + obj-$(CONFIG_CRYPTO_SM4_AESNI_AVX_X86_64) += sm4-aesni-avx-x86_64.o sm4-aesni-avx-x86_64-y := sm4-aesni-avx-asm_64.o sm4_aesni_avx_glue.o diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index c799838242a6..43852ba6e19c 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -1,65 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0-only OR BSD-3-Clause */ /* - * Implement AES CTR mode by8 optimization with AVX instructions. (x86_64) - * - * This is AES128/192/256 CTR mode optimization implementation. It requires - * the support of Intel(R) AESNI and AVX instructions. - * - * This work was inspired by the AES CTR mode optimization published - * in Intel Optimized IPSEC Cryptograhpic library. - * Additional information on it can be found at: - * http://downloadcenter.intel.com/Detail_Desc.aspx?agr=Y&DwnldID=22972 - * - * This file is provided under a dual BSD/GPLv2 license. When using or - * redistributing this file, you may do so under either license. - * - * GPL LICENSE SUMMARY + * AES CTR mode by8 optimization with AVX instructions. (x86_64) * * Copyright(c) 2014 Intel Corporation. * - * This program is free software; you can redistribute it and/or modify - * it under the terms of version 2 of the GNU General Public License as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - * * Contact Information: * James Guilford <james.guilford@intel.com> * Sean Gulley <sean.m.gulley@intel.com> * Chandramouli Narayanan <mouli@linux.intel.com> + */ +/* + * This is AES128/192/256 CTR mode optimization implementation. It requires + * the support of Intel(R) AESNI and AVX instructions. * - * BSD LICENSE - * - * Copyright(c) 2014 Intel Corporation. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * - * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in - * the documentation and/or other materials provided with the - * distribution. - * Neither the name of Intel Corporation nor the names of its - * contributors may be used to endorse or promote products derived - * from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - * + * This work was inspired by the AES CTR mode optimization published + * in Intel Optimized IPSEC Cryptographic library. + * Additional information on it can be found at: + * https://github.com/intel/intel-ipsec-mb */ #include <linux/linkage.h> diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 363699dd7220..837c1e0aa021 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -1751,8 +1751,6 @@ SYM_FUNC_END(aesni_gcm_finalize) #endif - -SYM_FUNC_START_LOCAL_ALIAS(_key_expansion_128) SYM_FUNC_START_LOCAL(_key_expansion_256a) pshufd $0b11111111, %xmm1, %xmm1 shufps $0b00010000, %xmm0, %xmm4 @@ -1764,7 +1762,7 @@ SYM_FUNC_START_LOCAL(_key_expansion_256a) add $0x10, TKEYP RET SYM_FUNC_END(_key_expansion_256a) -SYM_FUNC_END_ALIAS(_key_expansion_128) +SYM_FUNC_ALIAS_LOCAL(_key_expansion_128, _key_expansion_256a) SYM_FUNC_START_LOCAL(_key_expansion_192a) pshufd $0b01010101, %xmm1, %xmm1 diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c index a880e0b1c255..fda6066437aa 100644 --- a/arch/x86/crypto/blowfish_glue.c +++ b/arch/x86/crypto/blowfish_glue.c @@ -32,24 +32,12 @@ static inline void blowfish_enc_blk(struct bf_ctx *ctx, u8 *dst, const u8 *src) __blowfish_enc_blk(ctx, dst, src, false); } -static inline void blowfish_enc_blk_xor(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk(ctx, dst, src, true); -} - static inline void blowfish_enc_blk_4way(struct bf_ctx *ctx, u8 *dst, const u8 *src) { __blowfish_enc_blk_4way(ctx, dst, src, false); } -static inline void blowfish_enc_blk_xor_4way(struct bf_ctx *ctx, u8 *dst, - const u8 *src) -{ - __blowfish_enc_blk_4way(ctx, dst, src, true); -} - static void blowfish_encrypt(struct crypto_tfm *tfm, u8 *dst, const u8 *src) { blowfish_enc_blk(crypto_tfm_ctx(tfm), dst, src); diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c index 787c234d2469..abb8b1fe123b 100644 --- a/arch/x86/crypto/des3_ede_glue.c +++ b/arch/x86/crypto/des3_ede_glue.c @@ -45,14 +45,6 @@ static inline void des3_ede_dec_blk(struct des3_ede_x86_ctx *ctx, u8 *dst, des3_ede_x86_64_crypt_blk(dec_ctx, dst, src); } -static inline void des3_ede_enc_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, - const u8 *src) -{ - u32 *enc_ctx = ctx->enc.expkey; - - des3_ede_x86_64_crypt_blk_3way(enc_ctx, dst, src); -} - static inline void des3_ede_dec_blk_3way(struct des3_ede_x86_ctx *ctx, u8 *dst, const u8 *src) { diff --git a/arch/x86/crypto/sm3-avx-asm_64.S b/arch/x86/crypto/sm3-avx-asm_64.S new file mode 100644 index 000000000000..71e6aae23e17 --- /dev/null +++ b/arch/x86/crypto/sm3-avx-asm_64.S @@ -0,0 +1,517 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 AVX accelerated transform. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +/* Based on SM3 AES/BMI2 accelerated work by libgcrypt at: + * https://gnupg.org/software/libgcrypt/index.html + */ + +#include <linux/linkage.h> +#include <asm/frame.h> + +/* Context structure */ + +#define state_h0 0 +#define state_h1 4 +#define state_h2 8 +#define state_h3 12 +#define state_h4 16 +#define state_h5 20 +#define state_h6 24 +#define state_h7 28 + +/* Constants */ + +/* Round constant macros */ + +#define K0 2043430169 /* 0x79cc4519 */ +#define K1 -208106958 /* 0xf3988a32 */ +#define K2 -416213915 /* 0xe7311465 */ +#define K3 -832427829 /* 0xce6228cb */ +#define K4 -1664855657 /* 0x9cc45197 */ +#define K5 965255983 /* 0x3988a32f */ +#define K6 1930511966 /* 0x7311465e */ +#define K7 -433943364 /* 0xe6228cbc */ +#define K8 -867886727 /* 0xcc451979 */ +#define K9 -1735773453 /* 0x988a32f3 */ +#define K10 823420391 /* 0x311465e7 */ +#define K11 1646840782 /* 0x6228cbce */ +#define K12 -1001285732 /* 0xc451979c */ +#define K13 -2002571463 /* 0x88a32f39 */ +#define K14 289824371 /* 0x11465e73 */ +#define K15 579648742 /* 0x228cbce6 */ +#define K16 -1651869049 /* 0x9d8a7a87 */ +#define K17 991229199 /* 0x3b14f50f */ +#define K18 1982458398 /* 0x7629ea1e */ +#define K19 -330050500 /* 0xec53d43c */ +#define K20 -660100999 /* 0xd8a7a879 */ +#define K21 -1320201997 /* 0xb14f50f3 */ +#define K22 1654563303 /* 0x629ea1e7 */ +#define K23 -985840690 /* 0xc53d43ce */ +#define K24 -1971681379 /* 0x8a7a879d */ +#define K25 351604539 /* 0x14f50f3b */ +#define K26 703209078 /* 0x29ea1e76 */ +#define K27 1406418156 /* 0x53d43cec */ +#define K28 -1482130984 /* 0xa7a879d8 */ +#define K29 1330705329 /* 0x4f50f3b1 */ +#define K30 -1633556638 /* 0x9ea1e762 */ +#define K31 1027854021 /* 0x3d43cec5 */ +#define K32 2055708042 /* 0x7a879d8a */ +#define K33 -183551212 /* 0xf50f3b14 */ +#define K34 -367102423 /* 0xea1e7629 */ +#define K35 -734204845 /* 0xd43cec53 */ +#define K36 -1468409689 /* 0xa879d8a7 */ +#define K37 1358147919 /* 0x50f3b14f */ +#define K38 -1578671458 /* 0xa1e7629e */ +#define K39 1137624381 /* 0x43cec53d */ +#define K40 -2019718534 /* 0x879d8a7a */ +#define K41 255530229 /* 0x0f3b14f5 */ +#define K42 511060458 /* 0x1e7629ea */ +#define K43 1022120916 /* 0x3cec53d4 */ +#define K44 2044241832 /* 0x79d8a7a8 */ +#define K45 -206483632 /* 0xf3b14f50 */ +#define K46 -412967263 /* 0xe7629ea1 */ +#define K47 -825934525 /* 0xcec53d43 */ +#define K48 -1651869049 /* 0x9d8a7a87 */ +#define K49 991229199 /* 0x3b14f50f */ +#define K50 1982458398 /* 0x7629ea1e */ +#define K51 -330050500 /* 0xec53d43c */ +#define K52 -660100999 /* 0xd8a7a879 */ +#define K53 -1320201997 /* 0xb14f50f3 */ +#define K54 1654563303 /* 0x629ea1e7 */ +#define K55 -985840690 /* 0xc53d43ce */ +#define K56 -1971681379 /* 0x8a7a879d */ +#define K57 351604539 /* 0x14f50f3b */ +#define K58 703209078 /* 0x29ea1e76 */ +#define K59 1406418156 /* 0x53d43cec */ +#define K60 -1482130984 /* 0xa7a879d8 */ +#define K61 1330705329 /* 0x4f50f3b1 */ +#define K62 -1633556638 /* 0x9ea1e762 */ +#define K63 1027854021 /* 0x3d43cec5 */ + +/* Register macros */ + +#define RSTATE %rdi +#define RDATA %rsi +#define RNBLKS %rdx + +#define t0 %eax +#define t1 %ebx +#define t2 %ecx + +#define a %r8d +#define b %r9d +#define c %r10d +#define d %r11d +#define e %r12d +#define f %r13d +#define g %r14d +#define h %r15d + +#define W0 %xmm0 +#define W1 %xmm1 +#define W2 %xmm2 +#define W3 %xmm3 +#define W4 %xmm4 +#define W5 %xmm5 + +#define XTMP0 %xmm6 +#define XTMP1 %xmm7 +#define XTMP2 %xmm8 +#define XTMP3 %xmm9 +#define XTMP4 %xmm10 +#define XTMP5 %xmm11 +#define XTMP6 %xmm12 + +#define BSWAP_REG %xmm15 + +/* Stack structure */ + +#define STACK_W_SIZE (32 * 2 * 3) +#define STACK_REG_SAVE_SIZE (64) + +#define STACK_W (0) +#define STACK_REG_SAVE (STACK_W + STACK_W_SIZE) +#define STACK_SIZE (STACK_REG_SAVE + STACK_REG_SAVE_SIZE) + +/* Instruction helpers. */ + +#define roll2(v, reg) \ + roll $(v), reg; + +#define roll3mov(v, src, dst) \ + movl src, dst; \ + roll $(v), dst; + +#define roll3(v, src, dst) \ + rorxl $(32-(v)), src, dst; + +#define addl2(a, out) \ + leal (a, out), out; + +/* Round function macros. */ + +#define GG1(x, y, z, o, t) \ + movl x, o; \ + xorl y, o; \ + xorl z, o; + +#define FF1(x, y, z, o, t) GG1(x, y, z, o, t) + +#define GG2(x, y, z, o, t) \ + andnl z, x, o; \ + movl y, t; \ + andl x, t; \ + addl2(t, o); + +#define FF2(x, y, z, o, t) \ + movl y, o; \ + xorl x, o; \ + movl y, t; \ + andl x, t; \ + andl z, o; \ + xorl t, o; + +#define R(i, a, b, c, d, e, f, g, h, round, widx, wtype) \ + /* rol(a, 12) => t0 */ \ + roll3mov(12, a, t0); /* rorxl here would reduce perf by 6% on zen3 */ \ + /* rol (t0 + e + t), 7) => t1 */ \ + leal K##round(t0, e, 1), t1; \ + roll2(7, t1); \ + /* h + w1 => h */ \ + addl wtype##_W1_ADDR(round, widx), h; \ + /* h + t1 => h */ \ + addl2(t1, h); \ + /* t1 ^ t0 => t0 */ \ + xorl t1, t0; \ + /* w1w2 + d => d */ \ + addl wtype##_W1W2_ADDR(round, widx), d; \ + /* FF##i(a,b,c) => t1 */ \ + FF##i(a, b, c, t1, t2); \ + /* d + t1 => d */ \ + addl2(t1, d); \ + /* GG#i(e,f,g) => t2 */ \ + GG##i(e, f, g, t2, t1); \ + /* h + t2 => h */ \ + addl2(t2, h); \ + /* rol (f, 19) => f */ \ + roll2(19, f); \ + /* d + t0 => d */ \ + addl2(t0, d); \ + /* rol (b, 9) => b */ \ + roll2(9, b); \ + /* P0(h) => h */ \ + roll3(9, h, t2); \ + roll3(17, h, t1); \ + xorl t2, h; \ + xorl t1, h; + +#define R1(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(1, a, b, c, d, e, f, g, h, round, widx, wtype) + +#define R2(a, b, c, d, e, f, g, h, round, widx, wtype) \ + R(2, a, b, c, d, e, f, g, h, round, widx, wtype) + +/* Input expansion macros. */ + +/* Byte-swapped input address. */ +#define IW_W_ADDR(round, widx, offs) \ + (STACK_W + ((round) / 4) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Expanded input address. */ +#define XW_W_ADDR(round, widx, offs) \ + (STACK_W + ((((round) / 3) - 4) % 2) * 64 + (offs) + ((widx) * 4))(%rsp) + +/* Rounds 1-12, byte-swapped input block addresses. */ +#define IW_W1_ADDR(round, widx) IW_W_ADDR(round, widx, 0) +#define IW_W1W2_ADDR(round, widx) IW_W_ADDR(round, widx, 32) + +/* Rounds 1-12, expanded input block addresses. */ +#define XW_W1_ADDR(round, widx) XW_W_ADDR(round, widx, 0) +#define XW_W1W2_ADDR(round, widx) XW_W_ADDR(round, widx, 32) + +/* Input block loading. */ +#define LOAD_W_XMM_1() \ + vmovdqu 0*16(RDATA), XTMP0; /* XTMP0: w3, w2, w1, w0 */ \ + vmovdqu 1*16(RDATA), XTMP1; /* XTMP1: w7, w6, w5, w4 */ \ + vmovdqu 2*16(RDATA), XTMP2; /* XTMP2: w11, w10, w9, w8 */ \ + vmovdqu 3*16(RDATA), XTMP3; /* XTMP3: w15, w14, w13, w12 */ \ + vpshufb BSWAP_REG, XTMP0, XTMP0; \ + vpshufb BSWAP_REG, XTMP1, XTMP1; \ + vpshufb BSWAP_REG, XTMP2, XTMP2; \ + vpshufb BSWAP_REG, XTMP3, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP4; \ + vpxor XTMP1, XTMP2, XTMP5; \ + vpxor XTMP2, XTMP3, XTMP6; \ + leaq 64(RDATA), RDATA; \ + vmovdqa XTMP0, IW_W1_ADDR(0, 0); \ + vmovdqa XTMP4, IW_W1W2_ADDR(0, 0); \ + vmovdqa XTMP1, IW_W1_ADDR(4, 0); \ + vmovdqa XTMP5, IW_W1W2_ADDR(4, 0); + +#define LOAD_W_XMM_2() \ + vmovdqa XTMP2, IW_W1_ADDR(8, 0); \ + vmovdqa XTMP6, IW_W1W2_ADDR(8, 0); + +#define LOAD_W_XMM_3() \ + vpshufd $0b00000000, XTMP0, W0; /* W0: xx, w0, xx, xx */ \ + vpshufd $0b11111001, XTMP0, W1; /* W1: xx, w3, w2, w1 */ \ + vmovdqa XTMP1, W2; /* W2: xx, w6, w5, w4 */ \ + vpalignr $12, XTMP1, XTMP2, W3; /* W3: xx, w9, w8, w7 */ \ + vpalignr $8, XTMP2, XTMP3, W4; /* W4: xx, w12, w11, w10 */ \ + vpshufd $0b11111001, XTMP3, W5; /* W5: xx, w15, w14, w13 */ + +/* Message scheduling. Note: 3 words per XMM register. */ +#define SCHED_W_0(round, w0, w1, w2, w3, w4, w5) \ + /* Load (w[i - 16]) => XTMP0 */ \ + vpshufd $0b10111111, w0, XTMP0; \ + vpalignr $12, XTMP0, w1, XTMP0; /* XTMP0: xx, w2, w1, w0 */ \ + /* Load (w[i - 13]) => XTMP1 */ \ + vpshufd $0b10111111, w1, XTMP1; \ + vpalignr $12, XTMP1, w2, XTMP1; \ + /* w[i - 9] == w3 */ \ + /* XMM3 ^ XTMP0 => XTMP0 */ \ + vpxor w3, XTMP0, XTMP0; + +#define SCHED_W_1(round, w0, w1, w2, w3, w4, w5) \ + /* w[i - 3] == w5 */ \ + /* rol(XMM5, 15) ^ XTMP0 => XTMP0 */ \ + vpslld $15, w5, XTMP2; \ + vpsrld $(32-15), w5, XTMP3; \ + vpxor XTMP2, XTMP3, XTMP3; \ + vpxor XTMP3, XTMP0, XTMP0; \ + /* rol(XTMP1, 7) => XTMP1 */ \ + vpslld $7, XTMP1, XTMP5; \ + vpsrld $(32-7), XTMP1, XTMP1; \ + vpxor XTMP5, XTMP1, XTMP1; \ + /* XMM4 ^ XTMP1 => XTMP1 */ \ + vpxor w4, XTMP1, XTMP1; \ + /* w[i - 6] == XMM4 */ \ + /* P1(XTMP0) ^ XTMP1 => XMM0 */ \ + vpslld $15, XTMP0, XTMP5; \ + vpsrld $(32-15), XTMP0, XTMP6; \ + vpslld $23, XTMP0, XTMP2; \ + vpsrld $(32-23), XTMP0, XTMP3; \ + vpxor XTMP0, XTMP1, XTMP1; \ + vpxor XTMP6, XTMP5, XTMP5; \ + vpxor XTMP3, XTMP2, XTMP2; \ + vpxor XTMP2, XTMP5, XTMP5; \ + vpxor XTMP5, XTMP1, w0; + +#define SCHED_W_2(round, w0, w1, w2, w3, w4, w5) \ + /* W1 in XMM12 */ \ + vpshufd $0b10111111, w4, XTMP4; \ + vpalignr $12, XTMP4, w5, XTMP4; \ + vmovdqa XTMP4, XW_W1_ADDR((round), 0); \ + /* W1 ^ W2 => XTMP1 */ \ + vpxor w0, XTMP4, XTMP1; \ + vmovdqa XTMP1, XW_W1W2_ADDR((round), 0); + + +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + +.Lbe32mask: + .long 0x00010203, 0x04050607, 0x08090a0b, 0x0c0d0e0f + +.text + +/* + * Transform nblocks*64 bytes (nblocks*16 32-bit words) at DATA. + * + * void sm3_transform_avx(struct sm3_state *state, + * const u8 *data, int nblocks); + */ +.align 16 +SYM_FUNC_START(sm3_transform_avx) + /* input: + * %rdi: ctx, CTX + * %rsi: data (64*nblks bytes) + * %rdx: nblocks + */ + vzeroupper; + + pushq %rbp; + movq %rsp, %rbp; + + movq %rdx, RNBLKS; + + subq $STACK_SIZE, %rsp; + andq $(~63), %rsp; + + movq %rbx, (STACK_REG_SAVE + 0 * 8)(%rsp); + movq %r15, (STACK_REG_SAVE + 1 * 8)(%rsp); + movq %r14, (STACK_REG_SAVE + 2 * 8)(%rsp); + movq %r13, (STACK_REG_SAVE + 3 * 8)(%rsp); + movq %r12, (STACK_REG_SAVE + 4 * 8)(%rsp); + + vmovdqa .Lbe32mask (%rip), BSWAP_REG; + + /* Get the values of the chaining variables. */ + movl state_h0(RSTATE), a; + movl state_h1(RSTATE), b; + movl state_h2(RSTATE), c; + movl state_h3(RSTATE), d; + movl state_h4(RSTATE), e; + movl state_h5(RSTATE), f; + movl state_h6(RSTATE), g; + movl state_h7(RSTATE), h; + +.align 16 +.Loop: + /* Load data part1. */ + LOAD_W_XMM_1(); + + leaq -1(RNBLKS), RNBLKS; + + /* Transform 0-3 + Load data part2. */ + R1(a, b, c, d, e, f, g, h, 0, 0, IW); LOAD_W_XMM_2(); + R1(d, a, b, c, h, e, f, g, 1, 1, IW); + R1(c, d, a, b, g, h, e, f, 2, 2, IW); + R1(b, c, d, a, f, g, h, e, 3, 3, IW); LOAD_W_XMM_3(); + + /* Transform 4-7 + Precalc 12-14. */ + R1(a, b, c, d, e, f, g, h, 4, 0, IW); + R1(d, a, b, c, h, e, f, g, 5, 1, IW); + R1(c, d, a, b, g, h, e, f, 6, 2, IW); SCHED_W_0(12, W0, W1, W2, W3, W4, W5); + R1(b, c, d, a, f, g, h, e, 7, 3, IW); SCHED_W_1(12, W0, W1, W2, W3, W4, W5); + + /* Transform 8-11 + Precalc 12-17. */ + R1(a, b, c, d, e, f, g, h, 8, 0, IW); SCHED_W_2(12, W0, W1, W2, W3, W4, W5); + R1(d, a, b, c, h, e, f, g, 9, 1, IW); SCHED_W_0(15, W1, W2, W3, W4, W5, W0); + R1(c, d, a, b, g, h, e, f, 10, 2, IW); SCHED_W_1(15, W1, W2, W3, W4, W5, W0); + R1(b, c, d, a, f, g, h, e, 11, 3, IW); SCHED_W_2(15, W1, W2, W3, W4, W5, W0); + + /* Transform 12-14 + Precalc 18-20 */ + R1(a, b, c, d, e, f, g, h, 12, 0, XW); SCHED_W_0(18, W2, W3, W4, W5, W0, W1); + R1(d, a, b, c, h, e, f, g, 13, 1, XW); SCHED_W_1(18, W2, W3, W4, W5, W0, W1); + R1(c, d, a, b, g, h, e, f, 14, 2, XW); SCHED_W_2(18, W2, W3, W4, W5, W0, W1); + + /* Transform 15-17 + Precalc 21-23 */ + R1(b, c, d, a, f, g, h, e, 15, 0, XW); SCHED_W_0(21, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 16, 1, XW); SCHED_W_1(21, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 17, 2, XW); SCHED_W_2(21, W3, W4, W5, W0, W1, W2); + + /* Transform 18-20 + Precalc 24-26 */ + R2(c, d, a, b, g, h, e, f, 18, 0, XW); SCHED_W_0(24, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 19, 1, XW); SCHED_W_1(24, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 20, 2, XW); SCHED_W_2(24, W4, W5, W0, W1, W2, W3); + + /* Transform 21-23 + Precalc 27-29 */ + R2(d, a, b, c, h, e, f, g, 21, 0, XW); SCHED_W_0(27, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 22, 1, XW); SCHED_W_1(27, W5, W0, W1, W2, W3, W4); + R2(b, c, d, a, f, g, h, e, 23, 2, XW); SCHED_W_2(27, W5, W0, W1, W2, W3, W4); + + /* Transform 24-26 + Precalc 30-32 */ + R2(a, b, c, d, e, f, g, h, 24, 0, XW); SCHED_W_0(30, W0, W1, W2, W3, W4, W5); + R2(d, a, b, c, h, e, f, g, 25, 1, XW); SCHED_W_1(30, W0, W1, W2, W3, W4, W5); + R2(c, d, a, b, g, h, e, f, 26, 2, XW); SCHED_W_2(30, W0, W1, W2, W3, W4, W5); + + /* Transform 27-29 + Precalc 33-35 */ + R2(b, c, d, a, f, g, h, e, 27, 0, XW); SCHED_W_0(33, W1, W2, W3, W4, W5, W0); + R2(a, b, c, d, e, f, g, h, 28, 1, XW); SCHED_W_1(33, W1, W2, W3, W4, W5, W0); + R2(d, a, b, c, h, e, f, g, 29, 2, XW); SCHED_W_2(33, W1, W2, W3, W4, W5, W0); + + /* Transform 30-32 + Precalc 36-38 */ + R2(c, d, a, b, g, h, e, f, 30, 0, XW); SCHED_W_0(36, W2, W3, W4, W5, W0, W1); + R2(b, c, d, a, f, g, h, e, 31, 1, XW); SCHED_W_1(36, W2, W3, W4, W5, W0, W1); + R2(a, b, c, d, e, f, g, h, 32, 2, XW); SCHED_W_2(36, W2, W3, W4, W5, W0, W1); + + /* Transform 33-35 + Precalc 39-41 */ + R2(d, a, b, c, h, e, f, g, 33, 0, XW); SCHED_W_0(39, W3, W4, W5, W0, W1, W2); + R2(c, d, a, b, g, h, e, f, 34, 1, XW); SCHED_W_1(39, W3, W4, W5, W0, W1, W2); + R2(b, c, d, a, f, g, h, e, 35, 2, XW); SCHED_W_2(39, W3, W4, W5, W0, W1, W2); + + /* Transform 36-38 + Precalc 42-44 */ + R2(a, b, c, d, e, f, g, h, 36, 0, XW); SCHED_W_0(42, W4, W5, W0, W1, W2, W3); + R2(d, a, b, c, h, e, f, g, 37, 1, XW); SCHED_W_1(42, W4, W5, W0, W1, W2, W3); + R2(c, d, a, b, g, h, e, f, 38, 2, XW); SCHED_W_2(42, W4, W5, W0, W1, W2, W3); + + /* Transform 39-41 + Precalc 45-47 */ + R2(b, c, d, a, f, g, h, e, 39, 0, XW); SCHED_W_0(45, W5, W0, W1, W2, W3, W4); + R2(a, b, c, d, e, f, g, h, 40, 1, XW); SCHED_W_1(45, W5, W0, W1, W2, W3, W4); + R2(d, a, b, c, h, e, f, g, 41, 2, XW); SCHED_W_2(45, W5, W0, W1, W2, W3, W4); + + /* Transform 42-44 + Precalc 48-50 */ + R2(c, d, a, b, g, h, e, f, 42, 0, XW); SCHED_W_0(48, W0, W1, W2, W3, W4, W5); + R2(b, c, d, a, f, g, h, e, 43, 1, XW); SCHED_W_1(48, W0, W1, W2, W3, W4, W5); + R2(a, b, c, d, e, f, g, h, 44, 2, XW); SCHED_W_2(48, W0, W1, W2, W3, W4, W5); + + /* Transform 45-47 + Precalc 51-53 */ + R2(d, a, b, c, h, e, f, g, 45, 0, XW); SCHED_W_0(51, W1, W2, W3, W4, W5, W0); + R2(c, d, a, b, g, h, e, f, 46, 1, XW); SCHED_W_1(51, W1, W2, W3, W4, W5, W0); + R2(b, c, d, a, f, g, h, e, 47, 2, XW); SCHED_W_2(51, W1, W2, W3, W4, W5, W0); + + /* Transform 48-50 + Precalc 54-56 */ + R2(a, b, c, d, e, f, g, h, 48, 0, XW); SCHED_W_0(54, W2, W3, W4, W5, W0, W1); + R2(d, a, b, c, h, e, f, g, 49, 1, XW); SCHED_W_1(54, W2, W3, W4, W5, W0, W1); + R2(c, d, a, b, g, h, e, f, 50, 2, XW); SCHED_W_2(54, W2, W3, W4, W5, W0, W1); + + /* Transform 51-53 + Precalc 57-59 */ + R2(b, c, d, a, f, g, h, e, 51, 0, XW); SCHED_W_0(57, W3, W4, W5, W0, W1, W2); + R2(a, b, c, d, e, f, g, h, 52, 1, XW); SCHED_W_1(57, W3, W4, W5, W0, W1, W2); + R2(d, a, b, c, h, e, f, g, 53, 2, XW); SCHED_W_2(57, W3, W4, W5, W0, W1, W2); + + /* Transform 54-56 + Precalc 60-62 */ + R2(c, d, a, b, g, h, e, f, 54, 0, XW); SCHED_W_0(60, W4, W5, W0, W1, W2, W3); + R2(b, c, d, a, f, g, h, e, 55, 1, XW); SCHED_W_1(60, W4, W5, W0, W1, W2, W3); + R2(a, b, c, d, e, f, g, h, 56, 2, XW); SCHED_W_2(60, W4, W5, W0, W1, W2, W3); + + /* Transform 57-59 + Precalc 63 */ + R2(d, a, b, c, h, e, f, g, 57, 0, XW); SCHED_W_0(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 58, 1, XW); + R2(b, c, d, a, f, g, h, e, 59, 2, XW); SCHED_W_1(63, W5, W0, W1, W2, W3, W4); + + /* Transform 60-62 + Precalc 63 */ + R2(a, b, c, d, e, f, g, h, 60, 0, XW); + R2(d, a, b, c, h, e, f, g, 61, 1, XW); SCHED_W_2(63, W5, W0, W1, W2, W3, W4); + R2(c, d, a, b, g, h, e, f, 62, 2, XW); + + /* Transform 63 */ + R2(b, c, d, a, f, g, h, e, 63, 0, XW); + + /* Update the chaining variables. */ + xorl state_h0(RSTATE), a; + xorl state_h1(RSTATE), b; + xorl state_h2(RSTATE), c; + xorl state_h3(RSTATE), d; + movl a, state_h0(RSTATE); + movl b, state_h1(RSTATE); + movl c, state_h2(RSTATE); + movl d, state_h3(RSTATE); + xorl state_h4(RSTATE), e; + xorl state_h5(RSTATE), f; + xorl state_h6(RSTATE), g; + xorl state_h7(RSTATE), h; + movl e, state_h4(RSTATE); + movl f, state_h5(RSTATE); + movl g, state_h6(RSTATE); + movl h, state_h7(RSTATE); + + cmpq $0, RNBLKS; + jne .Loop; + + vzeroall; + + movq (STACK_REG_SAVE + 0 * 8)(%rsp), %rbx; + movq (STACK_REG_SAVE + 1 * 8)(%rsp), %r15; + movq (STACK_REG_SAVE + 2 * 8)(%rsp), %r14; + movq (STACK_REG_SAVE + 3 * 8)(%rsp), %r13; + movq (STACK_REG_SAVE + 4 * 8)(%rsp), %r12; + + vmovdqa %xmm0, IW_W1_ADDR(0, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(0, 0); + vmovdqa %xmm0, IW_W1_ADDR(4, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(4, 0); + vmovdqa %xmm0, IW_W1_ADDR(8, 0); + vmovdqa %xmm0, IW_W1W2_ADDR(8, 0); + + movq %rbp, %rsp; + popq %rbp; + ret; +SYM_FUNC_END(sm3_transform_avx) diff --git a/arch/x86/crypto/sm3_avx_glue.c b/arch/x86/crypto/sm3_avx_glue.c new file mode 100644 index 000000000000..661b6f22ffcd --- /dev/null +++ b/arch/x86/crypto/sm3_avx_glue.c @@ -0,0 +1,134 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ +/* + * SM3 Secure Hash Algorithm, AVX assembler accelerated. + * specified in: https://datatracker.ietf.org/doc/html/draft-sca-cfrg-sm3-02 + * + * Copyright (C) 2021 Tianjia Zhang <tianjia.zhang@linux.alibaba.com> + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <crypto/internal/hash.h> +#include <crypto/internal/simd.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/types.h> +#include <crypto/sm3.h> +#include <crypto/sm3_base.h> +#include <asm/simd.h> + +asmlinkage void sm3_transform_avx(struct sm3_state *state, + const u8 *data, int nblocks); + +static int sm3_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (!crypto_simd_usable() || + (sctx->count % SM3_BLOCK_SIZE) + len < SM3_BLOCK_SIZE) { + sm3_update(sctx, data, len); + return 0; + } + + /* + * Make sure struct sm3_state begins directly with the SM3 + * 256-bit internal state, as this is what the asm functions expect. + */ + BUILD_BUG_ON(offsetof(struct sm3_state, state) != 0); + + kernel_fpu_begin(); + sm3_base_do_update(desc, data, len, sm3_transform_avx); + kernel_fpu_end(); + + return 0; +} + +static int sm3_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + if (!crypto_simd_usable()) { + struct sm3_state *sctx = shash_desc_ctx(desc); + + if (len) + sm3_update(sctx, data, len); + + sm3_final(sctx, out); + return 0; + } + + kernel_fpu_begin(); + if (len) + sm3_base_do_update(desc, data, len, sm3_transform_avx); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static int sm3_avx_final(struct shash_desc *desc, u8 *out) +{ + if (!crypto_simd_usable()) { + sm3_final(shash_desc_ctx(desc), out); + return 0; + } + + kernel_fpu_begin(); + sm3_base_do_finalize(desc, sm3_transform_avx); + kernel_fpu_end(); + + return sm3_base_finish(desc, out); +} + +static struct shash_alg sm3_avx_alg = { + .digestsize = SM3_DIGEST_SIZE, + .init = sm3_base_init, + .update = sm3_avx_update, + .final = sm3_avx_final, + .finup = sm3_avx_finup, + .descsize = sizeof(struct sm3_state), + .base = { + .cra_name = "sm3", + .cra_driver_name = "sm3-avx", + .cra_priority = 300, + .cra_blocksize = SM3_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static int __init sm3_avx_mod_init(void) +{ + const char *feature_name; + + if (!boot_cpu_has(X86_FEATURE_AVX)) { + pr_info("AVX instruction are not detected.\n"); + return -ENODEV; + } + + if (!boot_cpu_has(X86_FEATURE_BMI2)) { + pr_info("BMI2 instruction are not detected.\n"); + return -ENODEV; + } + + if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, + &feature_name)) { + pr_info("CPU feature '%s' is not supported.\n", feature_name); + return -ENODEV; + } + + return crypto_register_shash(&sm3_avx_alg); +} + +static void __exit sm3_avx_mod_exit(void) +{ + crypto_unregister_shash(&sm3_avx_alg); +} + +module_init(sm3_avx_mod_init); +module_exit(sm3_avx_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Tianjia Zhang <tianjia.zhang@linux.alibaba.com>"); +MODULE_DESCRIPTION("SM3 Secure Hash Algorithm, AVX assembler accelerated"); +MODULE_ALIAS_CRYPTO("sm3"); +MODULE_ALIAS_CRYPTO("sm3-avx"); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a3c7ca876aeb..e88791b420ee 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -181,6 +181,27 @@ static struct event_constraint intel_gen_event_constraints[] __read_mostly = EVENT_CONSTRAINT_END }; +static struct event_constraint intel_v5_gen_event_constraints[] __read_mostly = +{ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */ + FIXED_EVENT_CONSTRAINT(0x0400, 3), /* SLOTS */ + FIXED_EVENT_CONSTRAINT(0x0500, 4), + FIXED_EVENT_CONSTRAINT(0x0600, 5), + FIXED_EVENT_CONSTRAINT(0x0700, 6), + FIXED_EVENT_CONSTRAINT(0x0800, 7), + FIXED_EVENT_CONSTRAINT(0x0900, 8), + FIXED_EVENT_CONSTRAINT(0x0a00, 9), + FIXED_EVENT_CONSTRAINT(0x0b00, 10), + FIXED_EVENT_CONSTRAINT(0x0c00, 11), + FIXED_EVENT_CONSTRAINT(0x0d00, 12), + FIXED_EVENT_CONSTRAINT(0x0e00, 13), + FIXED_EVENT_CONSTRAINT(0x0f00, 14), + FIXED_EVENT_CONSTRAINT(0x1000, 15), + EVENT_CONSTRAINT_END +}; + static struct event_constraint intel_slm_event_constraints[] __read_mostly = { FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ @@ -6308,7 +6329,9 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon v1, "); name = "generic_arch_v1"; break; - default: + case 2: + case 3: + case 4: /* * default constraints for v2 and up */ @@ -6316,6 +6339,21 @@ __init int intel_pmu_init(void) pr_cont("generic architected perfmon, "); name = "generic_arch_v2+"; break; + default: + /* + * The default constraints for v5 and up can support up to + * 16 fixed counters. For the fixed counters 4 and later, + * the pseudo-encoding is applied. + * The constraints may be cut according to the CPUID enumeration + * by inserting the EVENT_CONSTRAINT_END. + */ + if (x86_pmu.num_counters_fixed > INTEL_PMC_MAX_FIXED) + x86_pmu.num_counters_fixed = INTEL_PMC_MAX_FIXED; + intel_v5_gen_event_constraints[x86_pmu.num_counters_fixed].weight = -1; + x86_pmu.event_constraints = intel_v5_gen_event_constraints; + pr_cont("generic architected perfmon, "); + name = "generic_arch_v5+"; + break; } } diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 2e215369df4a..376cc3d66094 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1203,7 +1203,10 @@ static void intel_pmu_pebs_via_pt_enable(struct perf_event *event) if (hwc->idx >= INTEL_PMC_IDX_FIXED) { base = MSR_RELOAD_FIXED_CTR0; idx = hwc->idx - INTEL_PMC_IDX_FIXED; - value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; + if (x86_pmu.intel_cap.pebs_format < 5) + value = ds->pebs_event_reset[MAX_PEBS_EVENTS_FMT4 + idx]; + else + value = ds->pebs_event_reset[MAX_PEBS_EVENTS + idx]; } wrmsrl(base + idx, value); } @@ -1232,8 +1235,12 @@ void intel_pmu_pebs_enable(struct perf_event *event) } } - if (idx >= INTEL_PMC_IDX_FIXED) - idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + if (idx >= INTEL_PMC_IDX_FIXED) { + if (x86_pmu.intel_cap.pebs_format < 5) + idx = MAX_PEBS_EVENTS_FMT4 + (idx - INTEL_PMC_IDX_FIXED); + else + idx = MAX_PEBS_EVENTS + (idx - INTEL_PMC_IDX_FIXED); + } /* * Use auto-reload if possible to save a MSR write in the PMI. @@ -2204,6 +2211,7 @@ void __init intel_ds_init(void) break; case 4: + case 5: x86_pmu.drain_pebs = intel_pmu_drain_pebs_icl; x86_pmu.pebs_record_size = sizeof(struct pebs_basic); if (x86_pmu.intel_cap.pebs_baseline) { diff --git a/arch/x86/events/intel/lbr.c b/arch/x86/events/intel/lbr.c index 669c2be14784..fe1742c4ca49 100644 --- a/arch/x86/events/intel/lbr.c +++ b/arch/x86/events/intel/lbr.c @@ -1329,10 +1329,10 @@ static int branch_map[X86_BR_TYPE_MAP_MAX] = { PERF_BR_SYSCALL, /* X86_BR_SYSCALL */ PERF_BR_SYSRET, /* X86_BR_SYSRET */ PERF_BR_UNKNOWN, /* X86_BR_INT */ - PERF_BR_UNKNOWN, /* X86_BR_IRET */ + PERF_BR_ERET, /* X86_BR_IRET */ PERF_BR_COND, /* X86_BR_JCC */ PERF_BR_UNCOND, /* X86_BR_JMP */ - PERF_BR_UNKNOWN, /* X86_BR_IRQ */ + PERF_BR_IRQ, /* X86_BR_IRQ */ PERF_BR_IND_CALL, /* X86_BR_IND_CALL */ PERF_BR_UNKNOWN, /* X86_BR_ABORT */ PERF_BR_UNKNOWN, /* X86_BR_IN_TX */ diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 2d33bba9a144..82ef87e9a897 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -13,6 +13,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include <linux/types.h> +#include <linux/bits.h> +#include <linux/limits.h> #include <linux/slab.h> #include <linux/device.h> @@ -57,6 +59,8 @@ static struct pt_cap_desc { PT_CAP(mtc, 0, CPUID_EBX, BIT(3)), PT_CAP(ptwrite, 0, CPUID_EBX, BIT(4)), PT_CAP(power_event_trace, 0, CPUID_EBX, BIT(5)), + PT_CAP(event_trace, 0, CPUID_EBX, BIT(7)), + PT_CAP(tnt_disable, 0, CPUID_EBX, BIT(8)), PT_CAP(topa_output, 0, CPUID_ECX, BIT(0)), PT_CAP(topa_multiple_entries, 0, CPUID_ECX, BIT(1)), PT_CAP(single_range_output, 0, CPUID_ECX, BIT(2)), @@ -108,6 +112,8 @@ PMU_FORMAT_ATTR(tsc, "config:10" ); PMU_FORMAT_ATTR(noretcomp, "config:11" ); PMU_FORMAT_ATTR(ptw, "config:12" ); PMU_FORMAT_ATTR(branch, "config:13" ); +PMU_FORMAT_ATTR(event, "config:31" ); +PMU_FORMAT_ATTR(notnt, "config:55" ); PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); PMU_FORMAT_ATTR(psb_period, "config:24-27" ); @@ -116,6 +122,8 @@ static struct attribute *pt_formats_attr[] = { &format_attr_pt.attr, &format_attr_cyc.attr, &format_attr_pwr_evt.attr, + &format_attr_event.attr, + &format_attr_notnt.attr, &format_attr_fup_on_ptw.attr, &format_attr_mtc.attr, &format_attr_tsc.attr, @@ -296,6 +304,8 @@ fail: RTIT_CTL_CYC_PSB | \ RTIT_CTL_MTC | \ RTIT_CTL_PWR_EVT_EN | \ + RTIT_CTL_EVENT_EN | \ + RTIT_CTL_NOTNT | \ RTIT_CTL_FUP_ON_PTW | \ RTIT_CTL_PTW_EN) @@ -350,6 +360,14 @@ static bool pt_event_valid(struct perf_event *event) !intel_pt_validate_hw_cap(PT_CAP_power_event_trace)) return false; + if (config & RTIT_CTL_EVENT_EN && + !intel_pt_validate_hw_cap(PT_CAP_event_trace)) + return false; + + if (config & RTIT_CTL_NOTNT && + !intel_pt_validate_hw_cap(PT_CAP_tnt_disable)) + return false; + if (config & RTIT_CTL_PTW) { if (!intel_pt_validate_hw_cap(PT_CAP_ptwrite)) return false; @@ -472,7 +490,7 @@ static u64 pt_config_filters(struct perf_event *event) pt->filters.filter[range].msr_b = filter->msr_b; } - rtit_ctl |= filter->config << pt_address_ranges[range].reg_off; + rtit_ctl |= (u64)filter->config << pt_address_ranges[range].reg_off; } return rtit_ctl; @@ -1348,10 +1366,26 @@ static void pt_addr_filters_fini(struct perf_event *event) event->hw.addr_filters = NULL; } -static inline bool valid_kernel_ip(unsigned long ip) +#ifdef CONFIG_X86_64 +/* Clamp to a canonical address greater-than-or-equal-to the address given */ +static u64 clamp_to_ge_canonical_addr(u64 vaddr, u8 vaddr_bits) +{ + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + -BIT_ULL(vaddr_bits - 1); +} + +/* Clamp to a canonical address less-than-or-equal-to the address given */ +static u64 clamp_to_le_canonical_addr(u64 vaddr, u8 vaddr_bits) { - return virt_addr_valid(ip) && kernel_ip(ip); + return __is_canonical_address(vaddr, vaddr_bits) ? + vaddr : + BIT_ULL(vaddr_bits - 1) - 1; } +#else +#define clamp_to_ge_canonical_addr(x, y) (x) +#define clamp_to_le_canonical_addr(x, y) (x) +#endif static int pt_event_addr_filters_validate(struct list_head *filters) { @@ -1367,14 +1401,6 @@ static int pt_event_addr_filters_validate(struct list_head *filters) filter->action == PERF_ADDR_FILTER_ACTION_START) return -EOPNOTSUPP; - if (!filter->path.dentry) { - if (!valid_kernel_ip(filter->offset)) - return -EINVAL; - - if (!valid_kernel_ip(filter->offset + filter->size)) - return -EINVAL; - } - if (++range > intel_pt_validate_hw_cap(PT_CAP_num_address_ranges)) return -EOPNOTSUPP; } @@ -1398,9 +1424,26 @@ static void pt_event_addr_filters_sync(struct perf_event *event) if (filter->path.dentry && !fr[range].start) { msr_a = msr_b = 0; } else { - /* apply the offset */ - msr_a = fr[range].start; - msr_b = msr_a + fr[range].size - 1; + unsigned long n = fr[range].size - 1; + unsigned long a = fr[range].start; + unsigned long b; + + if (a > ULONG_MAX - n) + b = ULONG_MAX; + else + b = a + n; + /* + * Apply the offset. 64-bit addresses written to the + * MSRs must be canonical, but the range can encompass + * non-canonical addresses. Since software cannot + * execute at non-canonical addresses, adjusting to + * canonical addresses does not affect the result of the + * address filter. + */ + msr_a = clamp_to_ge_canonical_addr(a, boot_cpu_data.x86_virt_bits); + msr_b = clamp_to_le_canonical_addr(b, boot_cpu_data.x86_virt_bits); + if (msr_b < msr_a) + msr_a = msr_b = 0; } filters->filter[range].msr_a = msr_a; diff --git a/arch/x86/events/intel/uncore_discovery.c b/arch/x86/events/intel/uncore_discovery.c index 6ddadb482f68..5fd72d4b8bbb 100644 --- a/arch/x86/events/intel/uncore_discovery.c +++ b/arch/x86/events/intel/uncore_discovery.c @@ -215,10 +215,18 @@ static int parse_discovery_table(struct pci_dev *dev, int die, pci_read_config_dword(dev, bar_offset, &val); - if (val & UNCORE_DISCOVERY_MASK) + if (val & ~PCI_BASE_ADDRESS_MEM_MASK & ~PCI_BASE_ADDRESS_MEM_TYPE_64) return -EINVAL; - addr = (resource_size_t)(val & ~UNCORE_DISCOVERY_MASK); + addr = (resource_size_t)(val & PCI_BASE_ADDRESS_MEM_MASK); +#ifdef CONFIG_PHYS_ADDR_T_64BIT + if ((val & PCI_BASE_ADDRESS_MEM_TYPE_MASK) == PCI_BASE_ADDRESS_MEM_TYPE_64) { + u32 val2; + + pci_read_config_dword(dev, bar_offset + 4, &val2); + addr |= ((resource_size_t)val2) << 32; + } +#endif size = UNCORE_DISCOVERY_GLOBAL_MAP_SIZE; io_addr = ioremap(addr, size); if (!io_addr) @@ -444,7 +452,7 @@ static struct intel_uncore_ops generic_uncore_pci_ops = { #define UNCORE_GENERIC_MMIO_SIZE 0x4000 -static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) +static u64 generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) { struct intel_uncore_type *type = box->pmu->type; @@ -456,7 +464,7 @@ static unsigned int generic_uncore_mmio_box_ctl(struct intel_uncore_box *box) void intel_generic_uncore_mmio_init_box(struct intel_uncore_box *box) { - unsigned int box_ctl = generic_uncore_mmio_box_ctl(box); + u64 box_ctl = generic_uncore_mmio_box_ctl(box); struct intel_uncore_type *type = box->pmu->type; resource_size_t addr; diff --git a/arch/x86/events/intel/uncore_discovery.h b/arch/x86/events/intel/uncore_discovery.h index cfaf558bdb6b..f4439357779a 100644 --- a/arch/x86/events/intel/uncore_discovery.h +++ b/arch/x86/events/intel/uncore_discovery.h @@ -18,8 +18,6 @@ #define UNCORE_DISCOVERY_BIR_BASE 0x10 /* Discovery table BAR step */ #define UNCORE_DISCOVERY_BIR_STEP 0x4 -/* Mask of the discovery table offset */ -#define UNCORE_DISCOVERY_MASK 0xf /* Global discovery table size */ #define UNCORE_DISCOVERY_GLOBAL_MAP_SIZE 0x20 diff --git a/arch/x86/include/asm/coco.h b/arch/x86/include/asm/coco.h new file mode 100644 index 000000000000..3d98c3a60d34 --- /dev/null +++ b/arch/x86/include/asm/coco.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_COCO_H +#define _ASM_X86_COCO_H + +#include <asm/types.h> + +enum cc_vendor { + CC_VENDOR_NONE, + CC_VENDOR_AMD, + CC_VENDOR_HYPERV, + CC_VENDOR_INTEL, +}; + +void cc_set_vendor(enum cc_vendor v); +void cc_set_mask(u64 mask); + +#ifdef CONFIG_ARCH_HAS_CC_PLATFORM +u64 cc_mkenc(u64 val); +u64 cc_mkdec(u64 val); +#else +static inline u64 cc_mkenc(u64 val) +{ + return val; +} + +static inline u64 cc_mkdec(u64 val) +{ + return val; +} +#endif + +#endif /* _ASM_X86_COCO_H */ diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 65d147974f8d..3edf05e98e58 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -299,9 +299,6 @@ /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */ #define X86_FEATURE_AVX_VNNI (12*32+ 4) /* AVX VNNI instructions */ #define X86_FEATURE_AVX512_BF16 (12*32+ 5) /* AVX512 BFLOAT16 instructions */ -#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ -#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ -#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */ @@ -330,6 +327,7 @@ #define X86_FEATURE_HWP_ACT_WINDOW (14*32+ 9) /* HWP Activity Window */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ +#define X86_FEATURE_HFI (14*32+19) /* Hardware Feedback Interface */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */ #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ @@ -390,7 +388,10 @@ #define X86_FEATURE_TSXLDTRK (18*32+16) /* TSX Suspend Load Address Tracking */ #define X86_FEATURE_PCONFIG (18*32+18) /* Intel PCONFIG */ #define X86_FEATURE_ARCH_LBR (18*32+19) /* Intel ARCH LBR */ +#define X86_FEATURE_AMX_BF16 (18*32+22) /* AMX bf16 Support */ #define X86_FEATURE_AVX512_FP16 (18*32+23) /* AVX512 FP16 */ +#define X86_FEATURE_AMX_TILE (18*32+24) /* AMX tile Support */ +#define X86_FEATURE_AMX_INT8 (18*32+25) /* AMX int8 Support */ #define X86_FEATURE_SPEC_CTRL (18*32+26) /* "" Speculation Control (IBRS + IBPB) */ #define X86_FEATURE_INTEL_STIBP (18*32+27) /* "" Single Thread Indirect Branch Predictors */ #define X86_FEATURE_FLUSH_L1D (18*32+28) /* Flush L1D cache */ diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h index 3afa990d756b..c5aed9e9226c 100644 --- a/arch/x86/include/asm/cpumask.h +++ b/arch/x86/include/asm/cpumask.h @@ -20,11 +20,21 @@ static __always_inline bool arch_cpu_online(int cpu) { return arch_test_bit(cpu, cpumask_bits(cpu_online_mask)); } + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + arch_clear_bit(cpumask_check(cpu), cpumask_bits(dstp)); +} #else static __always_inline bool arch_cpu_online(int cpu) { return cpu == 0; } + +static __always_inline void arch_cpumask_clear_cpu(int cpu, struct cpumask *dstp) +{ + return; +} #endif #define arch_cpu_is_offline(cpu) unlikely(!arch_cpu_online(cpu)) diff --git a/arch/x86/include/asm/disabled-features.h b/arch/x86/include/asm/disabled-features.h index 8f28fafa98b3..1231d63f836d 100644 --- a/arch/x86/include/asm/disabled-features.h +++ b/arch/x86/include/asm/disabled-features.h @@ -56,8 +56,11 @@ # define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) #endif -/* Force disable because it's broken beyond repair */ -#define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#ifdef CONFIG_INTEL_IOMMU_SVM +# define DISABLE_ENQCMD 0 +#else +# define DISABLE_ENQCMD (1 << (X86_FEATURE_ENQCMD & 31)) +#endif #ifdef CONFIG_X86_SGX # define DISABLE_SGX 0 diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 05a6ab940f45..1b29f58f730f 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -124,7 +124,7 @@ struct insn { #define X86_VEX_B(vex) ((vex) & 0x20) /* VEX3 Byte1 */ #define X86_VEX_L(vex) ((vex) & 0x04) /* VEX3 Byte2, VEX2 Byte1 */ /* VEX bit fields */ -#define X86_EVEX_M(vex) ((vex) & 0x03) /* EVEX Byte1 */ +#define X86_EVEX_M(vex) ((vex) & 0x07) /* EVEX Byte1 */ #define X86_VEX3_M(vex) ((vex) & 0x1f) /* VEX3 Byte1 */ #define X86_VEX2_M 1 /* VEX2.M always 1 */ #define X86_VEX_V(vex) (((vex) & 0x78) >> 3) /* VEX3 Byte2, VEX2 Byte1 */ diff --git a/arch/x86/include/asm/intel_ds.h b/arch/x86/include/asm/intel_ds.h index 8380c3ddd4b2..2f9eeb5c3069 100644 --- a/arch/x86/include/asm/intel_ds.h +++ b/arch/x86/include/asm/intel_ds.h @@ -7,8 +7,9 @@ #define PEBS_BUFFER_SIZE (PAGE_SIZE << 4) /* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 8 -#define MAX_FIXED_PEBS_EVENTS 4 +#define MAX_PEBS_EVENTS_FMT4 8 +#define MAX_PEBS_EVENTS 32 +#define MAX_FIXED_PEBS_EVENTS 16 /* * A debug store configuration. diff --git a/arch/x86/include/asm/intel_pt.h b/arch/x86/include/asm/intel_pt.h index ebe8d2ea44fe..c796e9bc98b6 100644 --- a/arch/x86/include/asm/intel_pt.h +++ b/arch/x86/include/asm/intel_pt.h @@ -13,6 +13,8 @@ enum pt_capabilities { PT_CAP_mtc, PT_CAP_ptwrite, PT_CAP_power_event_trace, + PT_CAP_event_trace, + PT_CAP_tnt_disable, PT_CAP_topa_output, PT_CAP_topa_multiple_entries, PT_CAP_single_range_output, diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ec9830d2aabf..b61e46743184 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -498,6 +498,7 @@ struct kvm_pmc { bool intr; }; +#define KVM_PMC_MAX_FIXED 3 struct kvm_pmu { unsigned nr_arch_gp_counters; unsigned nr_arch_fixed_counters; @@ -511,7 +512,7 @@ struct kvm_pmu { u64 reserved_bits; u8 version; struct kvm_pmc gp_counters[INTEL_PMC_MAX_GENERIC]; - struct kvm_pmc fixed_counters[INTEL_PMC_MAX_FIXED]; + struct kvm_pmc fixed_counters[KVM_PMC_MAX_FIXED]; struct irq_work irq_work; DECLARE_BITMAP(reprogram_pmi, X86_PMC_IDX_MAX); DECLARE_BITMAP(all_valid_pmc_idx, X86_PMC_IDX_MAX); diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index a4a39c3e0f19..9f1741ac4769 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -205,6 +205,8 @@ #define RTIT_CTL_DISRETC BIT(11) #define RTIT_CTL_PTW_EN BIT(12) #define RTIT_CTL_BRANCH_EN BIT(13) +#define RTIT_CTL_EVENT_EN BIT(31) +#define RTIT_CTL_NOTNT BIT_ULL(55) #define RTIT_CTL_MTC_RANGE_OFFSET 14 #define RTIT_CTL_MTC_RANGE (0x0full << RTIT_CTL_MTC_RANGE_OFFSET) #define RTIT_CTL_CYC_THRESH_OFFSET 19 @@ -705,12 +707,14 @@ #define PACKAGE_THERM_STATUS_PROCHOT (1 << 0) #define PACKAGE_THERM_STATUS_POWER_LIMIT (1 << 10) +#define PACKAGE_THERM_STATUS_HFI_UPDATED (1 << 26) #define MSR_IA32_PACKAGE_THERM_INTERRUPT 0x000001b2 #define PACKAGE_THERM_INT_HIGH_ENABLE (1 << 0) #define PACKAGE_THERM_INT_LOW_ENABLE (1 << 1) #define PACKAGE_THERM_INT_PLN_ENABLE (1 << 24) +#define PACKAGE_THERM_INT_HFI_ENABLE (1 << 25) /* Thermal Thresholds Support */ #define THERM_INT_THRESHOLD0_ENABLE (1 << 15) @@ -959,4 +963,8 @@ #define MSR_VM_IGNNE 0xc0010115 #define MSR_VM_HSAVE_PA 0xc0010117 +/* Hardware Feedback Interface */ +#define MSR_IA32_HW_FEEDBACK_PTR 0x17d0 +#define MSR_IA32_HW_FEEDBACK_CONFIG 0x17d1 + #endif /* _ASM_X86_MSR_INDEX_H */ diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 4d5810c8fab7..9cc82f305f4b 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -71,6 +71,16 @@ static inline void copy_user_page(void *to, void *from, unsigned long vaddr, extern bool __virt_addr_valid(unsigned long kaddr); #define virt_addr_valid(kaddr) __virt_addr_valid((unsigned long) (kaddr)) +static __always_inline u64 __canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); +} + +static __always_inline u64 __is_canonical_address(u64 vaddr, u8 vaddr_bits) +{ + return __canonical_address(vaddr, vaddr_bits) == vaddr; +} + #endif /* __ASSEMBLY__ */ #include <asm-generic/memory_model.h> diff --git a/arch/x86/include/asm/paravirt_api_clock.h b/arch/x86/include/asm/paravirt_api_clock.h new file mode 100644 index 000000000000..65ac7cee0dad --- /dev/null +++ b/arch/x86/include/asm/paravirt_api_clock.h @@ -0,0 +1 @@ +#include <asm/paravirt.h> diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index a69012e1903f..e1591467668e 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -279,7 +279,7 @@ extern void (*paravirt_iret)(void); #define paravirt_type(op) \ [paravirt_typenum] "i" (PARAVIRT_PATCH(op)), \ - [paravirt_opptr] "i" (&(pv_ops.op)) + [paravirt_opptr] "m" (pv_ops.op) #define paravirt_clobber(clobber) \ [paravirt_clobber] "i" (clobber) @@ -316,7 +316,7 @@ int paravirt_disable_iospace(void); */ #define PARAVIRT_CALL \ ANNOTATE_RETPOLINE_SAFE \ - "call *%c[paravirt_opptr];" + "call *%[paravirt_opptr];" /* * These macros are intended to wrap calls through one of the paravirt diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8fc1b5003713..58d9e4b1fa0a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -7,7 +7,7 @@ */ #define INTEL_PMC_MAX_GENERIC 32 -#define INTEL_PMC_MAX_FIXED 4 +#define INTEL_PMC_MAX_FIXED 16 #define INTEL_PMC_IDX_FIXED 32 #define X86_PMC_IDX_MAX 64 diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 8a9432fb3802..62ab07e24aef 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -15,17 +15,12 @@ cachemode2protval(_PAGE_CACHE_MODE_UC_MINUS))) \ : (prot)) -/* - * Macros to add or remove encryption attribute - */ -#define pgprot_encrypted(prot) __pgprot(__sme_set(pgprot_val(prot))) -#define pgprot_decrypted(prot) __pgprot(__sme_clr(pgprot_val(prot))) - #ifndef __ASSEMBLY__ #include <linux/spinlock.h> #include <asm/x86_init.h> #include <asm/pkru.h> #include <asm/fpu/api.h> +#include <asm/coco.h> #include <asm-generic/pgtable_uffd.h> #include <linux/page_table_check.h> @@ -38,6 +33,12 @@ void ptdump_walk_pgd_level_debugfs(struct seq_file *m, struct mm_struct *mm, void ptdump_walk_pgd_level_checkwx(void); void ptdump_walk_user_pgd_level_checkwx(void); +/* + * Macros to add or remove encryption attribute + */ +#define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) +#define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot))) + #ifdef CONFIG_DEBUG_WX #define debug_checkwx() ptdump_walk_pgd_level_checkwx() #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() diff --git a/arch/x86/include/asm/preempt.h b/arch/x86/include/asm/preempt.h index fe5efbcba824..5f6daea1ee24 100644 --- a/arch/x86/include/asm/preempt.h +++ b/arch/x86/include/asm/preempt.h @@ -108,16 +108,18 @@ static __always_inline bool should_resched(int preempt_offset) extern asmlinkage void preempt_schedule(void); extern asmlinkage void preempt_schedule_thunk(void); -#define __preempt_schedule_func preempt_schedule_thunk +#define preempt_schedule_dynamic_enabled preempt_schedule_thunk +#define preempt_schedule_dynamic_disabled NULL extern asmlinkage void preempt_schedule_notrace(void); extern asmlinkage void preempt_schedule_notrace_thunk(void); -#define __preempt_schedule_notrace_func preempt_schedule_notrace_thunk +#define preempt_schedule_notrace_dynamic_enabled preempt_schedule_notrace_thunk +#define preempt_schedule_notrace_dynamic_disabled NULL #ifdef CONFIG_PREEMPT_DYNAMIC -DECLARE_STATIC_CALL(preempt_schedule, __preempt_schedule_func); +DECLARE_STATIC_CALL(preempt_schedule, preempt_schedule_dynamic_enabled); #define __preempt_schedule() \ do { \ @@ -125,7 +127,7 @@ do { \ asm volatile ("call " STATIC_CALL_TRAMP_STR(preempt_schedule) : ASM_CALL_CONSTRAINT); \ } while (0) -DECLARE_STATIC_CALL(preempt_schedule_notrace, __preempt_schedule_notrace_func); +DECLARE_STATIC_CALL(preempt_schedule_notrace, preempt_schedule_notrace_dynamic_enabled); #define __preempt_schedule_notrace() \ do { \ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 2c5f12ae7d04..a87e7c33d5ac 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -119,6 +119,8 @@ struct cpuinfo_x86 { int x86_cache_mbm_width_offset; int x86_power; unsigned long loops_per_jiffy; + /* protected processor identification number */ + u64 ppin; /* cpuid returned max cores value: */ u16 x86_max_cores; u16 apicid; diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 703663175a5a..4357e0f2cd5f 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -137,7 +137,7 @@ static __always_inline int user_mode(struct pt_regs *regs) #endif } -static inline int v8086_mode(struct pt_regs *regs) +static __always_inline int v8086_mode(struct pt_regs *regs) { #ifdef CONFIG_X86_32 return (regs->flags & X86_VM_MASK); diff --git a/arch/x86/include/asm/set_memory.h b/arch/x86/include/asm/set_memory.h index ff0f2d90338a..78ca53512486 100644 --- a/arch/x86/include/asm/set_memory.h +++ b/arch/x86/include/asm/set_memory.h @@ -53,7 +53,6 @@ int set_memory_global(unsigned long addr, int numpages); int set_pages_array_uc(struct page **pages, int addrinarray); int set_pages_array_wc(struct page **pages, int addrinarray); -int set_pages_array_wt(struct page **pages, int addrinarray); int set_pages_array_wb(struct page **pages, int addrinarray); /* @@ -84,7 +83,6 @@ int set_pages_rw(struct page *page, int numpages); int set_direct_map_invalid_noflush(struct page *page); int set_direct_map_default_noflush(struct page *page); bool kernel_page_present(struct page *page); -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc); extern int kernel_set_to_readonly; diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h index 2f0b6be8eaab..9619385bf749 100644 --- a/arch/x86/include/asm/topology.h +++ b/arch/x86/include/asm/topology.h @@ -110,6 +110,7 @@ extern const struct cpumask *cpu_clustergroup_mask(int cpu); #define topology_logical_die_id(cpu) (cpu_data(cpu).logical_die_id) #define topology_die_id(cpu) (cpu_data(cpu).cpu_die_id) #define topology_core_id(cpu) (cpu_data(cpu).cpu_core_id) +#define topology_ppin(cpu) (cpu_data(cpu).ppin) extern unsigned int __max_die_per_package; @@ -215,15 +216,26 @@ extern void arch_scale_freq_tick(void); #define arch_scale_freq_tick arch_scale_freq_tick extern void arch_set_max_freq_ratio(bool turbo_disabled); +void init_freq_invariance(bool secondary, bool cppc_ready); #else static inline void arch_set_max_freq_ratio(bool turbo_disabled) { } +static inline void init_freq_invariance(bool secondary, bool cppc_ready) +{ +} #endif -#if defined(CONFIG_ACPI_CPPC_LIB) && defined(CONFIG_SMP) +#ifdef CONFIG_ACPI_CPPC_LIB void init_freq_invariance_cppc(void); -#define init_freq_invariance_cppc init_freq_invariance_cppc +#define arch_init_invariance_cppc init_freq_invariance_cppc + +bool amd_set_max_freq_ratio(u64 *ratio); +#else +static inline bool amd_set_max_freq_ratio(u64 *ratio) +{ + return false; +} #endif #endif /* _ASM_X86_TOPOLOGY_H */ diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h index 22b7412c08f6..e9170457697e 100644 --- a/arch/x86/include/asm/x86_init.h +++ b/arch/x86/include/asm/x86_init.h @@ -142,6 +142,21 @@ struct x86_init_acpi { }; /** + * struct x86_guest - Functions used by misc guest incarnations like SEV, TDX, etc. + * + * @enc_status_change_prepare Notify HV before the encryption status of a range is changed + * @enc_status_change_finish Notify HV after the encryption status of a range is changed + * @enc_tlb_flush_required Returns true if a TLB flush is needed before changing page encryption status + * @enc_cache_flush_required Returns true if a cache flush is needed before changing page encryption status + */ +struct x86_guest { + void (*enc_status_change_prepare)(unsigned long vaddr, int npages, bool enc); + bool (*enc_status_change_finish)(unsigned long vaddr, int npages, bool enc); + bool (*enc_tlb_flush_required)(bool enc); + bool (*enc_cache_flush_required)(void); +}; + +/** * struct x86_init_ops - functions for platform specific setup * */ @@ -287,6 +302,7 @@ struct x86_platform_ops { struct x86_legacy_features legacy; void (*set_legacy_features)(void); struct x86_hyper_runtime hyper; + struct x86_guest guest; }; struct x86_apic_ops { diff --git a/arch/x86/include/asm/xor.h b/arch/x86/include/asm/xor.h index 2ee95a7769e6..7b0307acc410 100644 --- a/arch/x86/include/asm/xor.h +++ b/arch/x86/include/asm/xor.h @@ -57,7 +57,8 @@ op(i + 3, 3) static void -xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -108,7 +109,8 @@ xor_sse_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_sse_2_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 8; @@ -142,8 +144,9 @@ xor_sse_2_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -201,8 +204,9 @@ xor_sse_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_sse_3_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 8; @@ -238,8 +242,10 @@ xor_sse_3_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -304,8 +310,10 @@ xor_sse_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_sse_4_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 8; @@ -343,8 +351,11 @@ xor_sse_4_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; @@ -416,8 +427,11 @@ xor_sse_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_sse_5_pf64(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_sse_5_pf64(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 8; diff --git a/arch/x86/include/asm/xor_32.h b/arch/x86/include/asm/xor_32.h index 67ceb790e639..7a6b9474591e 100644 --- a/arch/x86/include/asm/xor_32.h +++ b/arch/x86/include/asm/xor_32.h @@ -21,7 +21,8 @@ #include <asm/fpu/api.h> static void -xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_pII_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 7; @@ -64,8 +65,9 @@ xor_pII_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_pII_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 7; @@ -113,8 +115,10 @@ xor_pII_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_pII_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 7; @@ -168,8 +172,11 @@ xor_pII_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, static void -xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_pII_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 7; @@ -248,7 +255,8 @@ xor_pII_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, #undef BLOCK static void -xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) +xor_p5_mmx_2(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 6; @@ -295,8 +303,9 @@ xor_p5_mmx_2(unsigned long bytes, unsigned long *p1, unsigned long *p2) } static void -xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3) +xor_p5_mmx_3(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 6; @@ -352,8 +361,10 @@ xor_p5_mmx_3(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4) +xor_p5_mmx_4(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 6; @@ -418,8 +429,11 @@ xor_p5_mmx_4(unsigned long bytes, unsigned long *p1, unsigned long *p2, } static void -xor_p5_mmx_5(unsigned long bytes, unsigned long *p1, unsigned long *p2, - unsigned long *p3, unsigned long *p4, unsigned long *p5) +xor_p5_mmx_5(unsigned long bytes, unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4, + const unsigned long * __restrict p5) { unsigned long lines = bytes >> 6; diff --git a/arch/x86/include/asm/xor_avx.h b/arch/x86/include/asm/xor_avx.h index 0c4e5b5e3852..7f81dd5897f4 100644 --- a/arch/x86/include/asm/xor_avx.h +++ b/arch/x86/include/asm/xor_avx.h @@ -26,7 +26,8 @@ BLOCK4(8) \ BLOCK4(12) -static void xor_avx_2(unsigned long bytes, unsigned long *p0, unsigned long *p1) +static void xor_avx_2(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1) { unsigned long lines = bytes >> 9; @@ -52,8 +53,9 @@ do { \ kernel_fpu_end(); } -static void xor_avx_3(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2) +static void xor_avx_3(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2) { unsigned long lines = bytes >> 9; @@ -82,8 +84,10 @@ do { \ kernel_fpu_end(); } -static void xor_avx_4(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3) +static void xor_avx_4(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3) { unsigned long lines = bytes >> 9; @@ -115,8 +119,11 @@ do { \ kernel_fpu_end(); } -static void xor_avx_5(unsigned long bytes, unsigned long *p0, unsigned long *p1, - unsigned long *p2, unsigned long *p3, unsigned long *p4) +static void xor_avx_5(unsigned long bytes, unsigned long * __restrict p0, + const unsigned long * __restrict p1, + const unsigned long * __restrict p2, + const unsigned long * __restrict p3, + const unsigned long * __restrict p4) { unsigned long lines = bytes >> 9; diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6aef9ee28a39..6462e3dd98f4 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -21,7 +21,6 @@ CFLAGS_REMOVE_ftrace.o = -pg CFLAGS_REMOVE_early_printk.o = -pg CFLAGS_REMOVE_head64.o = -pg CFLAGS_REMOVE_sev.o = -pg -CFLAGS_REMOVE_cc_platform.o = -pg endif KASAN_SANITIZE_head$(BITS).o := n @@ -30,7 +29,6 @@ KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_paravirt.o := n KASAN_SANITIZE_sev.o := n -KASAN_SANITIZE_cc_platform.o := n # With some compiler versions the generated code results in boot hangs, caused # by several compilation units. To be safe, disable all instrumentation. @@ -49,7 +47,6 @@ endif KCOV_INSTRUMENT := n CFLAGS_head$(BITS).o += -fno-stack-protector -CFLAGS_cc_platform.o += -fno-stack-protector CFLAGS_irq.o := -I $(srctree)/$(src)/../include/asm/trace @@ -151,8 +148,6 @@ obj-$(CONFIG_UNWINDER_GUESS) += unwind_guess.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += sev.o -obj-$(CONFIG_ARCH_HAS_CC_PLATFORM) += cc_platform.o - ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile index cf340d85946a..fc17b3f136fe 100644 --- a/arch/x86/kernel/acpi/Makefile +++ b/arch/x86/kernel/acpi/Makefile @@ -3,7 +3,7 @@ obj-$(CONFIG_ACPI) += boot.o obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup_$(BITS).o obj-$(CONFIG_ACPI_APEI) += apei.o -obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_msr.o +obj-$(CONFIG_ACPI_CPPC_LIB) += cppc.o ifneq ($(CONFIG_ACPI_PROCESSOR),) obj-y += cstate.o diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 5b6d1a95776f..0d01e7f5078c 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1328,6 +1328,17 @@ static int __init disable_acpi_pci(const struct dmi_system_id *d) return 0; } +static int __init disable_acpi_xsdt(const struct dmi_system_id *d) +{ + if (!acpi_force) { + pr_notice("%s detected: force use of acpi=rsdt\n", d->ident); + acpi_gbl_do_not_use_xsdt = TRUE; + } else { + pr_notice("Warning: DMI blacklist says broken, but acpi XSDT forced\n"); + } + return 0; +} + static int __init dmi_disable_acpi(const struct dmi_system_id *d) { if (!acpi_force) { @@ -1451,6 +1462,19 @@ static const struct dmi_system_id acpi_dmi_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "TravelMate 360"), }, }, + /* + * Boxes that need ACPI XSDT use disabled due to corrupted tables + */ + { + .callback = disable_acpi_xsdt, + .ident = "Advantech DAC-BJ01", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "NEC"), + DMI_MATCH(DMI_PRODUCT_NAME, "Bearlake CRB Board"), + DMI_MATCH(DMI_BIOS_VERSION, "V1.12"), + DMI_MATCH(DMI_BIOS_DATE, "02/01/2011"), + }, + }, {} }; diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c new file mode 100644 index 000000000000..df1644d9b3b6 --- /dev/null +++ b/arch/x86/kernel/acpi/cppc.c @@ -0,0 +1,103 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * cppc.c: CPPC Interface for x86 + * Copyright (c) 2016, Intel Corporation. + */ + +#include <acpi/cppc_acpi.h> +#include <asm/msr.h> +#include <asm/processor.h> +#include <asm/topology.h> + +/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ + +bool cpc_ffh_supported(void) +{ + return true; +} + +int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) +{ + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + *val &= mask; + *val >>= reg->bit_offset; + } + return err; +} + +int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) +{ + u64 rd_val; + int err; + + err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val); + if (!err) { + u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, + reg->bit_offset); + + val <<= reg->bit_offset; + val &= mask; + rd_val &= ~mask; + rd_val |= val; + err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val); + } + return err; +} + +bool amd_set_max_freq_ratio(u64 *ratio) +{ + struct cppc_perf_caps perf_caps; + u64 highest_perf, nominal_perf; + u64 perf_ratio; + int rc; + + if (!ratio) + return false; + + rc = cppc_get_perf_caps(0, &perf_caps); + if (rc) { + pr_debug("Could not retrieve perf counters (%d)\n", rc); + return false; + } + + highest_perf = amd_get_highest_perf(); + nominal_perf = perf_caps.nominal_perf; + + if (!highest_perf || !nominal_perf) { + pr_debug("Could not retrieve highest or nominal performance\n"); + return false; + } + + perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); + /* midpoint between max_boost and max_P */ + perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; + if (!perf_ratio) { + pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); + return false; + } + + *ratio = perf_ratio; + arch_set_max_freq_ratio(false); + + return true; +} + +static DEFINE_MUTEX(freq_invariance_lock); + +void init_freq_invariance_cppc(void) +{ + static bool secondary; + + mutex_lock(&freq_invariance_lock); + + init_freq_invariance(secondary, true); + secondary = true; + + mutex_unlock(&freq_invariance_lock); +} diff --git a/arch/x86/kernel/acpi/cppc_msr.c b/arch/x86/kernel/acpi/cppc_msr.c deleted file mode 100644 index b961de569e7e..000000000000 --- a/arch/x86/kernel/acpi/cppc_msr.c +++ /dev/null @@ -1,49 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * cppc_msr.c: MSR Interface for CPPC - * Copyright (c) 2016, Intel Corporation. - */ - -#include <acpi/cppc_acpi.h> -#include <asm/msr.h> - -/* Refer to drivers/acpi/cppc_acpi.c for the description of functions */ - -bool cpc_ffh_supported(void) -{ - return true; -} - -int cpc_read_ffh(int cpunum, struct cpc_reg *reg, u64 *val) -{ - int err; - - err = rdmsrl_safe_on_cpu(cpunum, reg->address, val); - if (!err) { - u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, - reg->bit_offset); - - *val &= mask; - *val >>= reg->bit_offset; - } - return err; -} - -int cpc_write_ffh(int cpunum, struct cpc_reg *reg, u64 val) -{ - u64 rd_val; - int err; - - err = rdmsrl_safe_on_cpu(cpunum, reg->address, &rd_val); - if (!err) { - u64 mask = GENMASK_ULL(reg->bit_offset + reg->bit_width - 1, - reg->bit_offset); - - val <<= reg->bit_offset; - val &= mask; - rd_val &= ~mask; - rd_val |= val; - err = wrmsrl_safe_on_cpu(cpunum, reg->address, rd_val); - } - return err; -} diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c index 1e97f944b47d..3b7f4cdbf2e0 100644 --- a/arch/x86/kernel/acpi/sleep.c +++ b/arch/x86/kernel/acpi/sleep.c @@ -15,6 +15,7 @@ #include <asm/desc.h> #include <asm/cacheflush.h> #include <asm/realmode.h> +#include <asm/hypervisor.h> #include <linux/ftrace.h> #include "../../realmode/rm/wakeup.h" @@ -140,9 +141,9 @@ static int __init acpi_sleep_setup(char *str) acpi_realmode_flags |= 4; #ifdef CONFIG_HIBERNATION if (strncmp(str, "s4_hwsig", 8) == 0) - acpi_check_s4_hw_signature(1); + acpi_check_s4_hw_signature = 1; if (strncmp(str, "s4_nohwsig", 10) == 0) - acpi_check_s4_hw_signature(0); + acpi_check_s4_hw_signature = 0; #endif if (strncmp(str, "nonvs", 5) == 0) acpi_nvs_nosave(); @@ -160,3 +161,21 @@ static int __init acpi_sleep_setup(char *str) } __setup("acpi_sleep=", acpi_sleep_setup); + +#if defined(CONFIG_HIBERNATION) && defined(CONFIG_HYPERVISOR_GUEST) +static int __init init_s4_sigcheck(void) +{ + /* + * If running on a hypervisor, honour the ACPI specification + * by default and trigger a clean reboot when the hardware + * signature in FACS is changed after hibernation. + */ + if (acpi_check_s4_hw_signature == -1 && + !hypervisor_is_type(X86_HYPER_NATIVE)) + acpi_check_s4_hw_signature = 1; + + return 0; +} +/* This must happen before acpi_init() which is a subsys initcall */ +arch_initcall(init_s4_sigcheck); +#endif diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4edb6f0f628c..0c0b09796ced 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -394,35 +394,6 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c) per_cpu(cpu_llc_id, cpu) = c->cpu_die_id = c->phys_proc_id; } -static void amd_detect_ppin(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - if (!cpu_has(c, X86_FEATURE_AMD_PPIN)) - return; - - /* When PPIN is defined in CPUID, still need to check PPIN_CTL MSR */ - if (rdmsrl_safe(MSR_AMD_PPIN_CTL, &val)) - goto clear_ppin; - - /* PPIN is locked in disabled mode, clear feature bit */ - if ((val & 3UL) == 1UL) - goto clear_ppin; - - /* If PPIN is disabled, try to enable it */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_AMD_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_AMD_PPIN_CTL, &val); - } - - /* If PPIN_EN bit is 1, return from here; otherwise fall through */ - if (val & 2UL) - return; - -clear_ppin: - clear_cpu_cap(c, X86_FEATURE_AMD_PPIN); -} - u32 amd_get_nodes_per_socket(void) { return nodes_per_socket; @@ -585,6 +556,8 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) * the SME physical address space reduction value. * If BIOS has not enabled SME then don't advertise the * SME feature (set in scattered.c). + * If the kernel has not enabled SME via any means then + * don't advertise the SME feature. * For SEV: If BIOS has not enabled SEV then don't advertise the * SEV and SEV_ES feature (set in scattered.c). * @@ -607,6 +580,9 @@ static void early_detect_mem_encrypt(struct cpuinfo_x86 *c) if (IS_ENABLED(CONFIG_X86_32)) goto clear_all; + if (!sme_me_mask) + setup_clear_cpu_cap(X86_FEATURE_SME); + rdmsrl(MSR_K7_HWCR, msr); if (!(msr & MSR_K7_HWCR_SMMLOCK)) goto clear_sev; @@ -947,7 +923,6 @@ static void init_amd(struct cpuinfo_x86 *c) amd_detect_cmp(c); amd_get_topology(c); srat_detect_node(c); - amd_detect_ppin(c); init_amd_cacheinfo(c); diff --git a/arch/x86/kernel/cpu/aperfmperf.c b/arch/x86/kernel/cpu/aperfmperf.c index 22911deacb6e..9ca008f9e9b1 100644 --- a/arch/x86/kernel/cpu/aperfmperf.c +++ b/arch/x86/kernel/cpu/aperfmperf.c @@ -91,7 +91,7 @@ unsigned int aperfmperf_get_khz(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; - if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) return 0; if (rcu_is_idle_cpu(cpu)) @@ -114,7 +114,7 @@ void arch_freq_prepare_all(void) return; for_each_online_cpu(cpu) { - if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) continue; if (rcu_is_idle_cpu(cpu)) continue; /* Idle CPUs are completely uninteresting. */ @@ -136,7 +136,7 @@ unsigned int arch_freq_get_on_cpu(int cpu) if (!boot_cpu_has(X86_FEATURE_APERFMPERF)) return 0; - if (!housekeeping_cpu(cpu, HK_FLAG_MISC)) + if (!housekeeping_cpu(cpu, HK_TYPE_MISC)) return 0; if (aperfmperf_snapshot_cpu(cpu, ktime_get(), true)) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 7b8382c11788..64deb7727d00 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -88,6 +88,83 @@ EXPORT_SYMBOL_GPL(get_llc_id); /* L2 cache ID of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID; +static struct ppin_info { + int feature; + int msr_ppin_ctl; + int msr_ppin; +} ppin_info[] = { + [X86_VENDOR_INTEL] = { + .feature = X86_FEATURE_INTEL_PPIN, + .msr_ppin_ctl = MSR_PPIN_CTL, + .msr_ppin = MSR_PPIN + }, + [X86_VENDOR_AMD] = { + .feature = X86_FEATURE_AMD_PPIN, + .msr_ppin_ctl = MSR_AMD_PPIN_CTL, + .msr_ppin = MSR_AMD_PPIN + }, +}; + +static const struct x86_cpu_id ppin_cpuids[] = { + X86_MATCH_FEATURE(X86_FEATURE_AMD_PPIN, &ppin_info[X86_VENDOR_AMD]), + X86_MATCH_FEATURE(X86_FEATURE_INTEL_PPIN, &ppin_info[X86_VENDOR_INTEL]), + + /* Legacy models without CPUID enumeration */ + X86_MATCH_INTEL_FAM6_MODEL(IVYBRIDGE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(HASWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(BROADWELL_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SKYLAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(ICELAKE_D, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(SAPPHIRERAPIDS_X, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNL, &ppin_info[X86_VENDOR_INTEL]), + X86_MATCH_INTEL_FAM6_MODEL(XEON_PHI_KNM, &ppin_info[X86_VENDOR_INTEL]), + + {} +}; + +static void ppin_init(struct cpuinfo_x86 *c) +{ + const struct x86_cpu_id *id; + unsigned long long val; + struct ppin_info *info; + + id = x86_match_cpu(ppin_cpuids); + if (!id) + return; + + /* + * Testing the presence of the MSR is not enough. Need to check + * that the PPIN_CTL allows reading of the PPIN. + */ + info = (struct ppin_info *)id->driver_data; + + if (rdmsrl_safe(info->msr_ppin_ctl, &val)) + goto clear_ppin; + + if ((val & 3UL) == 1UL) { + /* PPIN locked in disabled mode */ + goto clear_ppin; + } + + /* If PPIN is disabled, try to enable */ + if (!(val & 2UL)) { + wrmsrl_safe(info->msr_ppin_ctl, val | 2UL); + rdmsrl_safe(info->msr_ppin_ctl, &val); + } + + /* Is the enable bit set? */ + if (val & 2UL) { + c->ppin = __rdmsr(info->msr_ppin); + set_cpu_cap(c, info->feature); + return; + } + +clear_ppin: + clear_cpu_cap(c, info->feature); +} + /* correctly size the local cpu masks */ void __init setup_cpu_local_masks(void) { @@ -1655,6 +1732,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_capability[i] |= boot_cpu_data.x86_capability[i]; } + ppin_init(c); + /* Init Machine Check Exception if available. */ mcheck_cpu_init(c); diff --git a/arch/x86/kernel/cpu/mce/amd.c b/arch/x86/kernel/cpu/mce/amd.c index 9f4b508886dd..1940d305db1c 100644 --- a/arch/x86/kernel/cpu/mce/amd.c +++ b/arch/x86/kernel/cpu/mce/amd.c @@ -993,6 +993,7 @@ static struct attribute *default_attrs[] = { NULL, /* possibly interrupt_enable if supported, see below */ NULL, }; +ATTRIBUTE_GROUPS(default); #define to_block(k) container_of(k, struct threshold_block, kobj) #define to_attr(a) container_of(a, struct threshold_attr, attr) @@ -1029,7 +1030,7 @@ static void threshold_block_release(struct kobject *kobj); static struct kobj_type threshold_ktype = { .sysfs_ops = &threshold_ops, - .default_attrs = default_attrs, + .default_groups = default_groups, .release = threshold_block_release, }; @@ -1101,10 +1102,10 @@ static int allocate_threshold_blocks(unsigned int cpu, struct threshold_bank *tb b->threshold_limit = THRESHOLD_MAX; if (b->interrupt_capable) { - threshold_ktype.default_attrs[2] = &interrupt_enable.attr; + default_attrs[2] = &interrupt_enable.attr; b->interrupt_enable = 1; } else { - threshold_ktype.default_attrs[2] = NULL; + default_attrs[2] = NULL; } INIT_LIST_HEAD(&b->miscj); diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c index 05c6469db769..e53d3e6096fb 100644 --- a/arch/x86/kernel/cpu/mce/core.c +++ b/arch/x86/kernel/cpu/mce/core.c @@ -138,12 +138,7 @@ void mce_setup(struct mce *m) m->socketid = cpu_data(m->extcpu).phys_proc_id; m->apicid = cpu_data(m->extcpu).initial_apicid; m->mcgcap = __rdmsr(MSR_IA32_MCG_CAP); - - if (this_cpu_has(X86_FEATURE_INTEL_PPIN)) - m->ppin = __rdmsr(MSR_PPIN); - else if (this_cpu_has(X86_FEATURE_AMD_PPIN)) - m->ppin = __rdmsr(MSR_AMD_PPIN); - + m->ppin = cpu_data(m->extcpu).ppin; m->microcode = boot_cpu_data.microcode; } diff --git a/arch/x86/kernel/cpu/mce/intel.c b/arch/x86/kernel/cpu/mce/intel.c index baafbb37be67..95275a5e57e0 100644 --- a/arch/x86/kernel/cpu/mce/intel.c +++ b/arch/x86/kernel/cpu/mce/intel.c @@ -470,47 +470,6 @@ void intel_clear_lmce(void) wrmsrl(MSR_IA32_MCG_EXT_CTL, val); } -static void intel_ppin_init(struct cpuinfo_x86 *c) -{ - unsigned long long val; - - /* - * Even if testing the presence of the MSR would be enough, we don't - * want to risk the situation where other models reuse this MSR for - * other purposes. - */ - switch (c->x86_model) { - case INTEL_FAM6_IVYBRIDGE_X: - case INTEL_FAM6_HASWELL_X: - case INTEL_FAM6_BROADWELL_D: - case INTEL_FAM6_BROADWELL_X: - case INTEL_FAM6_SKYLAKE_X: - case INTEL_FAM6_ICELAKE_X: - case INTEL_FAM6_ICELAKE_D: - case INTEL_FAM6_SAPPHIRERAPIDS_X: - case INTEL_FAM6_XEON_PHI_KNL: - case INTEL_FAM6_XEON_PHI_KNM: - - if (rdmsrl_safe(MSR_PPIN_CTL, &val)) - return; - - if ((val & 3UL) == 1UL) { - /* PPIN locked in disabled mode */ - return; - } - - /* If PPIN is disabled, try to enable */ - if (!(val & 2UL)) { - wrmsrl_safe(MSR_PPIN_CTL, val | 2UL); - rdmsrl_safe(MSR_PPIN_CTL, &val); - } - - /* Is the enable bit set? */ - if (val & 2UL) - set_cpu_cap(c, X86_FEATURE_INTEL_PPIN); - } -} - /* * Enable additional error logs from the integrated * memory controller on processors that support this. @@ -535,7 +494,6 @@ void mce_intel_feature_init(struct cpuinfo_x86 *c) { intel_init_cmci(); intel_init_lmce(); - intel_ppin_init(c); intel_imc_init(c); } diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 5a99f993e639..e0a572472052 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -33,6 +33,7 @@ #include <asm/nmi.h> #include <clocksource/hyperv_timer.h> #include <asm/numa.h> +#include <asm/coco.h> /* Is Linux running as the root partition? */ bool hv_root_partition; @@ -344,6 +345,11 @@ static void __init ms_hyperv_init_platform(void) */ swiotlb_force = SWIOTLB_FORCE; #endif + /* Isolation VMs are unenlightened SEV-based VMs, thus this check: */ + if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) { + if (hv_get_isolation_type() != HV_ISOLATION_TYPE_NONE) + cc_set_vendor(CC_VENDOR_HYPERV); + } } if (hv_max_functions_eax >= HYPERV_CPUID_NESTED_FEATURES) { diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 21d1f062895a..4143b1e4c5c6 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -26,6 +26,7 @@ struct cpuid_bit { static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_APERFMPERF, CPUID_ECX, 0, 0x00000006, 0 }, { X86_FEATURE_EPB, CPUID_ECX, 3, 0x00000006, 0 }, + { X86_FEATURE_INTEL_PPIN, CPUID_EBX, 0, 0x00000007, 1 }, { X86_FEATURE_CQM_LLC, CPUID_EDX, 1, 0x0000000f, 0 }, { X86_FEATURE_CQM_OCCUP_LLC, CPUID_EDX, 0, 0x0000000f, 1 }, { X86_FEATURE_CQM_MBM_TOTAL, CPUID_EDX, 1, 0x0000000f, 1 }, diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index 8dea01ffc5c1..19821f027cb3 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -612,6 +612,13 @@ int fpu_clone(struct task_struct *dst, unsigned long clone_flags) fpu_inherit_perms(dst_fpu); fpregs_unlock(); + /* + * Children never inherit PASID state. + * Force it to have its init value: + */ + if (use_xsave()) + dst_fpu->fpstate->regs.xsave.header.xfeatures &= ~XFEATURE_MASK_PASID; + trace_x86_fpu_copy_src(src_fpu); trace_x86_fpu_copy_dst(dst_fpu); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index de563db9cdcd..4f5ecbbaae77 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -126,7 +126,7 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif -static unsigned long sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) +static unsigned long __head sme_postprocess_startup(struct boot_params *bp, pmdval_t *pmd) { unsigned long vaddr, vaddr_end; int i; diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index 4bce802d25fb..e73f7df362f5 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -292,7 +292,6 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", reason, smp_processor_id()); - pr_emerg("Do you have a strange power saving mode enabled?\n"); if (unknown_nmi_panic || panic_on_unrecovered_nmi) nmi_panic(regs, "NMI: Not continuing"); diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 81d8ef036637..e131d71b3cae 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -765,8 +765,11 @@ void stop_this_cpu(void *dummy) * without the encryption bit, they don't race each other when flushed * and potentially end up with the wrong entry being committed to * memory. + * + * Test the CPUID bit directly because the machine might've cleared + * X86_FEATURE_SME due to cmdline options. */ - if (boot_cpu_has(X86_FEATURE_SME)) + if (cpuid_eax(0x8000001f) & BIT(0)) native_wbinvd(); for (;;) { /* diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 617012f4619f..2ef14772dc04 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -83,10 +83,6 @@ #include <asm/hw_irq.h> #include <asm/stackprotector.h> -#ifdef CONFIG_ACPI_CPPC_LIB -#include <acpi/cppc_acpi.h> -#endif - /* representing HT siblings of each logical CPU */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map); EXPORT_PER_CPU_SYMBOL(cpu_sibling_map); @@ -155,8 +151,6 @@ static inline void smpboot_restore_warm_reset_vector(void) *((volatile u32 *)phys_to_virt(TRAMPOLINE_PHYS_LOW)) = 0; } -static void init_freq_invariance(bool secondary, bool cppc_ready); - /* * Report back to the Boot Processor during boot time or to the caller processor * during CPU online. @@ -2097,48 +2091,6 @@ out: return true; } -#ifdef CONFIG_ACPI_CPPC_LIB -static bool amd_set_max_freq_ratio(void) -{ - struct cppc_perf_caps perf_caps; - u64 highest_perf, nominal_perf; - u64 perf_ratio; - int rc; - - rc = cppc_get_perf_caps(0, &perf_caps); - if (rc) { - pr_debug("Could not retrieve perf counters (%d)\n", rc); - return false; - } - - highest_perf = amd_get_highest_perf(); - nominal_perf = perf_caps.nominal_perf; - - if (!highest_perf || !nominal_perf) { - pr_debug("Could not retrieve highest or nominal performance\n"); - return false; - } - - perf_ratio = div_u64(highest_perf * SCHED_CAPACITY_SCALE, nominal_perf); - /* midpoint between max_boost and max_P */ - perf_ratio = (perf_ratio + SCHED_CAPACITY_SCALE) >> 1; - if (!perf_ratio) { - pr_debug("Non-zero highest/nominal perf values led to a 0 ratio\n"); - return false; - } - - arch_turbo_freq_ratio = perf_ratio; - arch_set_max_freq_ratio(false); - - return true; -} -#else -static bool amd_set_max_freq_ratio(void) -{ - return false; -} -#endif - static void init_counter_refs(void) { u64 aperf, mperf; @@ -2167,7 +2119,7 @@ static void register_freq_invariance_syscore_ops(void) static inline void register_freq_invariance_syscore_ops(void) {} #endif -static void init_freq_invariance(bool secondary, bool cppc_ready) +void init_freq_invariance(bool secondary, bool cppc_ready) { bool ret = false; @@ -2187,7 +2139,7 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) if (!cppc_ready) { return; } - ret = amd_set_max_freq_ratio(); + ret = amd_set_max_freq_ratio(&arch_turbo_freq_ratio); } if (ret) { @@ -2200,22 +2152,6 @@ static void init_freq_invariance(bool secondary, bool cppc_ready) } } -#ifdef CONFIG_ACPI_CPPC_LIB -static DEFINE_MUTEX(freq_invariance_lock); - -void init_freq_invariance_cppc(void) -{ - static bool secondary; - - mutex_lock(&freq_invariance_lock); - - init_freq_invariance(secondary, true); - secondary = true; - - mutex_unlock(&freq_invariance_lock); -} -#endif - static void disable_freq_invariance_workfn(struct work_struct *work) { static_branch_disable(&arch_scale_freq_key); @@ -2264,8 +2200,4 @@ error: pr_warn("Scheduler frequency invariance went wobbly, disabling!\n"); schedule_work(&disable_freq_invariance_work); } -#else -static inline void init_freq_invariance(bool secondary, bool cppc_ready) -{ -} #endif /* CONFIG_X86_64 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 8143693a7ea6..2e37862e3a8c 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -39,6 +39,7 @@ #include <linux/io.h> #include <linux/hardirq.h> #include <linux/atomic.h> +#include <linux/ioasid.h> #include <asm/stacktrace.h> #include <asm/processor.h> @@ -559,6 +560,57 @@ static bool fixup_iopl_exception(struct pt_regs *regs) return true; } +/* + * The unprivileged ENQCMD instruction generates #GPs if the + * IA32_PASID MSR has not been populated. If possible, populate + * the MSR from a PASID previously allocated to the mm. + */ +static bool try_fixup_enqcmd_gp(void) +{ +#ifdef CONFIG_IOMMU_SVA + u32 pasid; + + /* + * MSR_IA32_PASID is managed using XSAVE. Directly + * writing to the MSR is only possible when fpregs + * are valid and the fpstate is not. This is + * guaranteed when handling a userspace exception + * in *before* interrupts are re-enabled. + */ + lockdep_assert_irqs_disabled(); + + /* + * Hardware without ENQCMD will not generate + * #GPs that can be fixed up here. + */ + if (!cpu_feature_enabled(X86_FEATURE_ENQCMD)) + return false; + + pasid = current->mm->pasid; + + /* + * If the mm has not been allocated a + * PASID, the #GP can not be fixed up. + */ + if (!pasid_valid(pasid)) + return false; + + /* + * Did this thread already have its PASID activated? + * If so, the #GP must be from something else. + */ + if (current->pasid_activated) + return false; + + wrmsrl(MSR_IA32_PASID, pasid | MSR_IA32_PASID_VALID); + current->pasid_activated = 1; + + return true; +#else + return false; +#endif +} + DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) { char desc[sizeof(GPFSTR) + 50 + 2*sizeof(unsigned long) + 1] = GPFSTR; @@ -567,6 +619,9 @@ DEFINE_IDTENTRY_ERRORCODE(exc_general_protection) unsigned long gp_addr; int ret; + if (user_mode(regs) && try_fixup_enqcmd_gp()) + return; + cond_local_irq_enable(regs); if (static_cpu_has(X86_FEATURE_UMIP)) { diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c index 7d20c1d34a3c..e84ee5cdbd8c 100644 --- a/arch/x86/kernel/x86_init.c +++ b/arch/x86/kernel/x86_init.c @@ -129,6 +129,11 @@ struct x86_cpuinit_ops x86_cpuinit = { static void default_nmi_init(void) { }; +static void enc_status_change_prepare_noop(unsigned long vaddr, int npages, bool enc) { } +static bool enc_status_change_finish_noop(unsigned long vaddr, int npages, bool enc) { return false; } +static bool enc_tlb_flush_required_noop(bool enc) { return false; } +static bool enc_cache_flush_required_noop(void) { return false; } + struct x86_platform_ops x86_platform __ro_after_init = { .calibrate_cpu = native_calibrate_cpu_early, .calibrate_tsc = native_calibrate_tsc, @@ -138,9 +143,16 @@ struct x86_platform_ops x86_platform __ro_after_init = { .is_untracked_pat_range = is_ISA_range, .nmi_init = default_nmi_init, .get_nmi_reason = default_get_nmi_reason, - .save_sched_clock_state = tsc_save_sched_clock_state, - .restore_sched_clock_state = tsc_restore_sched_clock_state, + .save_sched_clock_state = tsc_save_sched_clock_state, + .restore_sched_clock_state = tsc_restore_sched_clock_state, .hyper.pin_vcpu = x86_op_int_noop, + + .guest = { + .enc_status_change_prepare = enc_status_change_prepare_noop, + .enc_status_change_finish = enc_status_change_finish_noop, + .enc_tlb_flush_required = enc_tlb_flush_required_noop, + .enc_cache_flush_required = enc_cache_flush_required_noop, + }, }; EXPORT_SYMBOL_GPL(x86_platform); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index b8f8d268d058..8bf541b24b4b 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -879,7 +879,8 @@ static inline int __do_cpuid_func(struct kvm_cpuid_array *array, u32 function) eax.split.bit_width = cap.bit_width_gp; eax.split.mask_length = cap.events_mask_len; - edx.split.num_counters_fixed = min(cap.num_counters_fixed, MAX_FIXED_COUNTERS); + edx.split.num_counters_fixed = + min(cap.num_counters_fixed, KVM_PMC_MAX_FIXED); edx.split.bit_width_fixed = cap.bit_width_fixed; if (cap.version) edx.split.anythread_deprecated = 1; diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index e86d610dc6b7..50ca1db7247c 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -680,7 +680,7 @@ static inline u8 ctxt_virt_addr_bits(struct x86_emulate_ctxt *ctxt) static inline bool emul_is_noncanonical_address(u64 la, struct x86_emulate_ctxt *ctxt) { - return get_canonical(la, ctxt_virt_addr_bits(ctxt)) != la; + return !__is_canonical_address(la, ctxt_virt_addr_bits(ctxt)); } /* @@ -730,7 +730,7 @@ static __always_inline int __linearize(struct x86_emulate_ctxt *ctxt, case X86EMUL_MODE_PROT64: *linear = la; va_bits = ctxt_virt_addr_bits(ctxt); - if (get_canonical(la, va_bits) != la) + if (!__is_canonical_address(la, va_bits)) goto bad; *max_size = min_t(u64, ~0u, (1ull << va_bits) - la); @@ -5395,8 +5395,13 @@ static int fastop(struct x86_emulate_ctxt *ctxt, fastop_t fop) void init_decode_cache(struct x86_emulate_ctxt *ctxt) { - memset(&ctxt->rip_relative, 0, - (void *)&ctxt->modrm - (void *)&ctxt->rip_relative); + /* Clear fields that are set conditionally but read without a guard. */ + ctxt->rip_relative = false; + ctxt->rex_prefix = 0; + ctxt->lock_prefix = 0; + ctxt->rep_prefix = 0; + ctxt->regs_valid = 0; + ctxt->regs_dirty = 0; ctxt->io_read.pos = 0; ctxt->io_read.end = 0; diff --git a/arch/x86/kvm/kvm_emulate.h b/arch/x86/kvm/kvm_emulate.h index 39eded2426ff..840ddb4a9302 100644 --- a/arch/x86/kvm/kvm_emulate.h +++ b/arch/x86/kvm/kvm_emulate.h @@ -336,11 +336,7 @@ struct x86_emulate_ctxt { fastop_t fop; }; int (*check_perm)(struct x86_emulate_ctxt *ctxt); - /* - * The following six fields are cleared together, - * the rest are initialized unconditionally in x86_decode_insn - * or elsewhere - */ + bool rip_relative; u8 rex_prefix; u8 lock_prefix; diff --git a/arch/x86/kvm/pmu.h b/arch/x86/kvm/pmu.h index 7a7b8d5b775e..9e66fba1d6a3 100644 --- a/arch/x86/kvm/pmu.h +++ b/arch/x86/kvm/pmu.h @@ -15,8 +15,6 @@ #define VMWARE_BACKDOOR_PMC_REAL_TIME 0x10001 #define VMWARE_BACKDOOR_PMC_APPARENT_TIME 0x10002 -#define MAX_FIXED_COUNTERS 3 - struct kvm_event_hw_type_mapping { u8 eventsel; u8 unit_mask; diff --git a/arch/x86/kvm/vmx/pmu_intel.c b/arch/x86/kvm/vmx/pmu_intel.c index 466d18fc0c5d..9b26596099a1 100644 --- a/arch/x86/kvm/vmx/pmu_intel.c +++ b/arch/x86/kvm/vmx/pmu_intel.c @@ -565,7 +565,7 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu) pmu->gp_counters[i].current_config = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmu->fixed_counters[i].type = KVM_PMC_FIXED; pmu->fixed_counters[i].vcpu = vcpu; pmu->fixed_counters[i].idx = i + INTEL_PMC_IDX_FIXED; @@ -591,7 +591,7 @@ static void intel_pmu_reset(struct kvm_vcpu *vcpu) pmc->counter = pmc->eventsel = 0; } - for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) { + for (i = 0; i < KVM_PMC_MAX_FIXED; i++) { pmc = &pmu->fixed_counters[i]; pmc_stop_counter(pmc); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index eb4029660bd9..6db3a506b402 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1749,7 +1749,7 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data, * value, and that something deterministic happens if the guest * invokes 64-bit SYSENTER. */ - data = get_canonical(data, vcpu_virt_addr_bits(vcpu)); + data = __canonical_address(data, vcpu_virt_addr_bits(vcpu)); break; case MSR_TSC_AUX: if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX)) @@ -6529,7 +6529,7 @@ static void kvm_init_msr_list(void) u32 dummy[2]; unsigned i; - BUILD_BUG_ON_MSG(INTEL_PMC_MAX_FIXED != 4, + BUILD_BUG_ON_MSG(KVM_PMC_MAX_FIXED != 3, "Please update the fixed PMCs in msrs_to_saved_all[]"); perf_get_x86_pmu_capability(&x86_pmu); @@ -8853,7 +8853,7 @@ int kvm_arch_init(void *opaque) } if (pi_inject_timer == -1) - pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER); + pi_inject_timer = housekeeping_enabled(HK_TYPE_TIMER); #ifdef CONFIG_X86_64 pvclock_gtod_register_notifier(&pvclock_gtod_notifier); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 767ec7f99516..f11d945ac41f 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -166,14 +166,9 @@ static inline u8 vcpu_virt_addr_bits(struct kvm_vcpu *vcpu) return kvm_read_cr4_bits(vcpu, X86_CR4_LA57) ? 57 : 48; } -static inline u64 get_canonical(u64 la, u8 vaddr_bits) -{ - return ((int64_t)la << (64 - vaddr_bits)) >> (64 - vaddr_bits); -} - static inline bool is_noncanonical_address(u64 la, struct kvm_vcpu *vcpu) { - return get_canonical(la, vcpu_virt_addr_bits(vcpu)) != la; + return !__is_canonical_address(la, vcpu_virt_addr_bits(vcpu)); } static inline void vcpu_cache_mmio_info(struct kvm_vcpu *vcpu, diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 59cf2343f3d9..d0d7b9bc6cad 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -27,8 +27,7 @@ * Output: * rax original destination */ -SYM_FUNC_START_ALIAS(__memcpy) -SYM_FUNC_START_WEAK(memcpy) +SYM_FUNC_START(__memcpy) ALTERNATIVE_2 "jmp memcpy_orig", "", X86_FEATURE_REP_GOOD, \ "jmp memcpy_erms", X86_FEATURE_ERMS @@ -40,11 +39,12 @@ SYM_FUNC_START_WEAK(memcpy) movl %edx, %ecx rep movsb RET -SYM_FUNC_END(memcpy) -SYM_FUNC_END_ALIAS(__memcpy) -EXPORT_SYMBOL(memcpy) +SYM_FUNC_END(__memcpy) EXPORT_SYMBOL(__memcpy) +SYM_FUNC_ALIAS_WEAK(memcpy, __memcpy) +EXPORT_SYMBOL(memcpy) + /* * memcpy_erms() - enhanced fast string memcpy. This is faster and * simpler than memcpy. Use memcpy_erms when possible. diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S index 50ea390df712..d83cba364e31 100644 --- a/arch/x86/lib/memmove_64.S +++ b/arch/x86/lib/memmove_64.S @@ -24,7 +24,6 @@ * Output: * rax: dest */ -SYM_FUNC_START_WEAK(memmove) SYM_FUNC_START(__memmove) mov %rdi, %rax @@ -207,6 +206,7 @@ SYM_FUNC_START(__memmove) 13: RET SYM_FUNC_END(__memmove) -SYM_FUNC_END_ALIAS(memmove) EXPORT_SYMBOL(__memmove) + +SYM_FUNC_ALIAS_WEAK(memmove, __memmove) EXPORT_SYMBOL(memmove) diff --git a/arch/x86/lib/memset_64.S b/arch/x86/lib/memset_64.S index d624f2bc42f1..fc9ffd3ff3b2 100644 --- a/arch/x86/lib/memset_64.S +++ b/arch/x86/lib/memset_64.S @@ -17,7 +17,6 @@ * * rax original destination */ -SYM_FUNC_START_WEAK(memset) SYM_FUNC_START(__memset) /* * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended @@ -42,10 +41,11 @@ SYM_FUNC_START(__memset) movq %r9,%rax RET SYM_FUNC_END(__memset) -SYM_FUNC_END_ALIAS(memset) -EXPORT_SYMBOL(memset) EXPORT_SYMBOL(__memset) +SYM_FUNC_ALIAS_WEAK(memset, __memset) +EXPORT_SYMBOL(memset) + /* * ISO C memset - set a memory block to a byte value. This function uses * enhanced rep stosb to override the fast string function. diff --git a/arch/x86/lib/x86-opcode-map.txt b/arch/x86/lib/x86-opcode-map.txt index ec31f5b60323..d12d1358f96d 100644 --- a/arch/x86/lib/x86-opcode-map.txt +++ b/arch/x86/lib/x86-opcode-map.txt @@ -690,7 +690,10 @@ AVXcode: 2 45: vpsrlvd/q Vx,Hx,Wx (66),(v) 46: vpsravd Vx,Hx,Wx (66),(v) | vpsravd/q Vx,Hx,Wx (66),(evo) 47: vpsllvd/q Vx,Hx,Wx (66),(v) -# Skip 0x48-0x4b +# Skip 0x48 +49: TILERELEASE (v1),(000),(11B) | LDTILECFG Mtc (v1)(000) | STTILECFG Mtc (66),(v1),(000) | TILEZERO Vt (F2),(v1),(11B) +# Skip 0x4a +4b: TILELOADD Vt,Wsm (F2),(v1) | TILELOADDT1 Vt,Wsm (66),(v1) | TILESTORED Wsm,Vt (F3),(v) 4c: vrcp14ps/d Vpd,Wpd (66),(ev) 4d: vrcp14ss/d Vsd,Hpd,Wsd (66),(ev) 4e: vrsqrt14ps/d Vpd,Wpd (66),(ev) @@ -705,7 +708,10 @@ AVXcode: 2 59: vpbroadcastq Vx,Wx (66),(v) | vbroadcasti32x2 Vx,Wx (66),(evo) 5a: vbroadcasti128 Vqq,Mdq (66),(v) | vbroadcasti32x4/64x2 Vx,Wx (66),(evo) 5b: vbroadcasti32x8/64x4 Vqq,Mdq (66),(ev) -# Skip 0x5c-0x61 +5c: TDPBF16PS Vt,Wt,Ht (F3),(v1) +# Skip 0x5d +5e: TDPBSSD Vt,Wt,Ht (F2),(v1) | TDPBSUD Vt,Wt,Ht (F3),(v1) | TDPBUSD Vt,Wt,Ht (66),(v1) | TDPBUUD Vt,Wt,Ht (v1) +# Skip 0x5f-0x61 62: vpexpandb/w Vx,Wx (66),(ev) 63: vpcompressb/w Wx,Vx (66),(ev) 64: vpblendmd/q Vx,Hx,Wx (66),(ev) @@ -822,9 +828,9 @@ AVXcode: 3 05: vpermilpd Vx,Wx,Ib (66),(v) 06: vperm2f128 Vqq,Hqq,Wqq,Ib (66),(v) 07: -08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) +08: vroundps Vx,Wx,Ib (66) | vrndscaleps Vx,Wx,Ib (66),(evo) | vrndscaleph Vx,Wx,Ib (evo) 09: vroundpd Vx,Wx,Ib (66) | vrndscalepd Vx,Wx,Ib (66),(evo) -0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) +0a: vroundss Vss,Wss,Ib (66),(v1) | vrndscaless Vx,Hx,Wx,Ib (66),(evo) | vrndscalesh Vx,Hx,Wx,Ib (evo) 0b: vroundsd Vsd,Wsd,Ib (66),(v1) | vrndscalesd Vx,Hx,Wx,Ib (66),(evo) 0c: vblendps Vx,Hx,Wx,Ib (66) 0d: vblendpd Vx,Hx,Wx,Ib (66) @@ -846,8 +852,8 @@ AVXcode: 3 22: vpinsrd/q Vdq,Hdq,Ey,Ib (66),(v1) 23: vshuff32x4/64x2 Vx,Hx,Wx,Ib (66),(ev) 25: vpternlogd/q Vx,Hx,Wx,Ib (66),(ev) -26: vgetmantps/d Vx,Wx,Ib (66),(ev) -27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) +26: vgetmantps/d Vx,Wx,Ib (66),(ev) | vgetmantph Vx,Wx,Ib (ev) +27: vgetmantss/d Vx,Hx,Wx,Ib (66),(ev) | vgetmantsh Vx,Hx,Wx,Ib (ev) 30: kshiftrb/w Vk,Uk,Ib (66),(v) 31: kshiftrd/q Vk,Uk,Ib (66),(v) 32: kshiftlb/w Vk,Uk,Ib (66),(v) @@ -871,23 +877,102 @@ AVXcode: 3 51: vrangess/d Vx,Hx,Wx,Ib (66),(ev) 54: vfixupimmps/d Vx,Hx,Wx,Ib (66),(ev) 55: vfixupimmss/d Vx,Hx,Wx,Ib (66),(ev) -56: vreduceps/d Vx,Wx,Ib (66),(ev) -57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) +56: vreduceps/d Vx,Wx,Ib (66),(ev) | vreduceph Vx,Wx,Ib (ev) +57: vreducess/d Vx,Hx,Wx,Ib (66),(ev) | vreducesh Vx,Hx,Wx,Ib (ev) 60: vpcmpestrm Vdq,Wdq,Ib (66),(v1) 61: vpcmpestri Vdq,Wdq,Ib (66),(v1) 62: vpcmpistrm Vdq,Wdq,Ib (66),(v1) 63: vpcmpistri Vdq,Wdq,Ib (66),(v1) -66: vfpclassps/d Vk,Wx,Ib (66),(ev) -67: vfpclassss/d Vk,Wx,Ib (66),(ev) +66: vfpclassps/d Vk,Wx,Ib (66),(ev) | vfpclassph Vx,Wx,Ib (ev) +67: vfpclassss/d Vk,Wx,Ib (66),(ev) | vfpclasssh Vx,Wx,Ib (ev) 70: vpshldw Vx,Hx,Wx,Ib (66),(ev) 71: vpshldd/q Vx,Hx,Wx,Ib (66),(ev) 72: vpshrdw Vx,Hx,Wx,Ib (66),(ev) 73: vpshrdd/q Vx,Hx,Wx,Ib (66),(ev) +c2: vcmpph Vx,Hx,Wx,Ib (ev) | vcmpsh Vx,Hx,Wx,Ib (F3),(ev) cc: sha1rnds4 Vdq,Wdq,Ib ce: vgf2p8affineqb Vx,Wx,Ib (66) cf: vgf2p8affineinvqb Vx,Wx,Ib (66) df: VAESKEYGEN Vdq,Wdq,Ib (66),(v1) -f0: RORX Gy,Ey,Ib (F2),(v) +f0: RORX Gy,Ey,Ib (F2),(v) | HRESET Gv,Ib (F3),(000),(11B) +EndTable + +Table: EVEX map 5 +Referrer: +AVXcode: 5 +10: vmovsh Vx,Hx,Wx (F3),(ev) | vmovsh Vx,Wx (F3),(ev) +11: vmovsh Wx,Hx,Vx (F3),(ev) | vmovsh Wx,Vx (F3),(ev) +1d: vcvtps2phx Vx,Wx (66),(ev) | vcvtss2sh Vx,Hx,Wx (ev) +2a: vcvtsi2sh Vx,Hx,Wx (F3),(ev) +2c: vcvttsh2si Vx,Wx (F3),(ev) +2d: vcvtsh2si Vx,Wx (F3),(ev) +2e: vucomish Vx,Wx (ev) +2f: vcomish Vx,Wx (ev) +51: vsqrtph Vx,Wx (ev) | vsqrtsh Vx,Hx,Wx (F3),(ev) +58: vaddph Vx,Hx,Wx (ev) | vaddsh Vx,Hx,Wx (F3),(ev) +59: vmulph Vx,Hx,Wx (ev) | vmulsh Vx,Hx,Wx (F3),(ev) +5a: vcvtpd2ph Vx,Wx (66),(ev) | vcvtph2pd Vx,Wx (ev) | vcvtsd2sh Vx,Hx,Wx (F2),(ev) | vcvtsh2sd Vx,Hx,Wx (F3),(ev) +5b: vcvtdq2ph Vx,Wx (ev) | vcvtph2dq Vx,Wx (66),(ev) | vcvtqq2ph Vx,Wx (ev) | vcvttph2dq Vx,Wx (F3),(ev) +5c: vsubph Vx,Hx,Wx (ev) | vsubsh Vx,Hx,Wx (F3),(ev) +5d: vminph Vx,Hx,Wx (ev) | vminsh Vx,Hx,Wx (F3),(ev) +5e: vdivph Vx,Hx,Wx (ev) | vdivsh Vx,Hx,Wx (F3),(ev) +5f: vmaxph Vx,Hx,Wx (ev) | vmaxsh Vx,Hx,Wx (F3),(ev) +6e: vmovw Vx,Wx (66),(ev) +78: vcvttph2udq Vx,Wx (ev) | vcvttph2uqq Vx,Wx (66),(ev) | vcvttsh2usi Vx,Wx (F3),(ev) +79: vcvtph2udq Vx,Wx (ev) | vcvtph2uqq Vx,Wx (66),(ev) | vcvtsh2usi Vx,Wx (F3),(ev) +7a: vcvttph2qq Vx,Wx (66),(ev) | vcvtudq2ph Vx,Wx (F2),(ev) | vcvtuqq2ph Vx,Wx (F2),(ev) +7b: vcvtph2qq Vx,Wx (66),(ev) | vcvtusi2sh Vx,Hx,Wx (F3),(ev) +7c: vcvttph2uw Vx,Wx (ev) | vcvttph2w Vx,Wx (66),(ev) +7d: vcvtph2uw Vx,Wx (ev) | vcvtph2w Vx,Wx (66),(ev) | vcvtuw2ph Vx,Wx (F2),(ev) | vcvtw2ph Vx,Wx (F3),(ev) +7e: vmovw Wx,Vx (66),(ev) +EndTable + +Table: EVEX map 6 +Referrer: +AVXcode: 6 +13: vcvtph2psx Vx,Wx (66),(ev) | vcvtsh2ss Vx,Hx,Wx (ev) +2c: vscalefph Vx,Hx,Wx (66),(ev) +2d: vscalefsh Vx,Hx,Wx (66),(ev) +42: vgetexpph Vx,Wx (66),(ev) +43: vgetexpsh Vx,Hx,Wx (66),(ev) +4c: vrcpph Vx,Wx (66),(ev) +4d: vrcpsh Vx,Hx,Wx (66),(ev) +4e: vrsqrtph Vx,Wx (66),(ev) +4f: vrsqrtsh Vx,Hx,Wx (66),(ev) +56: vfcmaddcph Vx,Hx,Wx (F2),(ev) | vfmaddcph Vx,Hx,Wx (F3),(ev) +57: vfcmaddcsh Vx,Hx,Wx (F2),(ev) | vfmaddcsh Vx,Hx,Wx (F3),(ev) +96: vfmaddsub132ph Vx,Hx,Wx (66),(ev) +97: vfmsubadd132ph Vx,Hx,Wx (66),(ev) +98: vfmadd132ph Vx,Hx,Wx (66),(ev) +99: vfmadd132sh Vx,Hx,Wx (66),(ev) +9a: vfmsub132ph Vx,Hx,Wx (66),(ev) +9b: vfmsub132sh Vx,Hx,Wx (66),(ev) +9c: vfnmadd132ph Vx,Hx,Wx (66),(ev) +9d: vfnmadd132sh Vx,Hx,Wx (66),(ev) +9e: vfnmsub132ph Vx,Hx,Wx (66),(ev) +9f: vfnmsub132sh Vx,Hx,Wx (66),(ev) +a6: vfmaddsub213ph Vx,Hx,Wx (66),(ev) +a7: vfmsubadd213ph Vx,Hx,Wx (66),(ev) +a8: vfmadd213ph Vx,Hx,Wx (66),(ev) +a9: vfmadd213sh Vx,Hx,Wx (66),(ev) +aa: vfmsub213ph Vx,Hx,Wx (66),(ev) +ab: vfmsub213sh Vx,Hx,Wx (66),(ev) +ac: vfnmadd213ph Vx,Hx,Wx (66),(ev) +ad: vfnmadd213sh Vx,Hx,Wx (66),(ev) +ae: vfnmsub213ph Vx,Hx,Wx (66),(ev) +af: vfnmsub213sh Vx,Hx,Wx (66),(ev) +b6: vfmaddsub231ph Vx,Hx,Wx (66),(ev) +b7: vfmsubadd231ph Vx,Hx,Wx (66),(ev) +b8: vfmadd231ph Vx,Hx,Wx (66),(ev) +b9: vfmadd231sh Vx,Hx,Wx (66),(ev) +ba: vfmsub231ph Vx,Hx,Wx (66),(ev) +bb: vfmsub231sh Vx,Hx,Wx (66),(ev) +bc: vfnmadd231ph Vx,Hx,Wx (66),(ev) +bd: vfnmadd231sh Vx,Hx,Wx (66),(ev) +be: vfnmsub231ph Vx,Hx,Wx (66),(ev) +bf: vfnmsub231sh Vx,Hx,Wx (66),(ev) +d6: vfcmulcph Vx,Hx,Wx (F2),(ev) | vfmulcph Vx,Hx,Wx (F3),(ev) +d7: vfcmulcsh Vx,Hx,Wx (F2),(ev) | vfmulcsh Vx,Hx,Wx (F3),(ev) EndTable GrpTable: Grp1 @@ -970,7 +1055,7 @@ GrpTable: Grp7 2: LGDT Ms | XGETBV (000),(11B) | XSETBV (001),(11B) | VMFUNC (100),(11B) | XEND (101)(11B) | XTEST (110)(11B) | ENCLU (111),(11B) 3: LIDT Ms 4: SMSW Mw/Rv -5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) +5: rdpkru (110),(11B) | wrpkru (111),(11B) | SAVEPREVSSP (F3),(010),(11B) | RSTORSSP Mq (F3) | SETSSBSY (F3),(000),(11B) | CLUI (F3),(110),(11B) | SERIALIZE (000),(11B) | STUI (F3),(111),(11B) | TESTUI (F3)(101)(11B) | UIRET (F3),(100),(11B) | XRESLDTRK (F2),(000),(11B) | XSUSLDTRK (F2),(001),(11B) 6: LMSW Ew 7: INVLPG Mb | SWAPGS (o64),(000),(11B) | RDTSCP (001),(11B) EndTable @@ -987,7 +1072,7 @@ GrpTable: Grp9 3: xrstors 4: xsavec 5: xsaves -6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) +6: VMPTRLD Mq | VMCLEAR Mq (66) | VMXON Mq (F3) | RDRAND Rv (11B) | SENDUIPI Gq (F3) 7: VMPTRST Mq | VMPTRST Mq (F3) | RDSEED Rv (11B) EndTable diff --git a/arch/x86/mm/maccess.c b/arch/x86/mm/maccess.c index 92ec176a7293..5a53c2cc169c 100644 --- a/arch/x86/mm/maccess.c +++ b/arch/x86/mm/maccess.c @@ -4,11 +4,6 @@ #include <linux/kernel.h> #ifdef CONFIG_X86_64 -static __always_inline u64 canonical_address(u64 vaddr, u8 vaddr_bits) -{ - return ((s64)vaddr << (64 - vaddr_bits)) >> (64 - vaddr_bits); -} - bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) { unsigned long vaddr = (unsigned long)unsafe_src; @@ -19,7 +14,7 @@ bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) * we also need to include the userspace guard page. */ return vaddr >= TASK_SIZE_MAX + PAGE_SIZE && - canonical_address(vaddr, boot_cpu_data.x86_virt_bits) == vaddr; + __is_canonical_address(vaddr, boot_cpu_data.x86_virt_bits); } #else bool copy_from_kernel_nofault_allowed(const void *unsafe_src, size_t size) diff --git a/arch/x86/mm/mem_encrypt_amd.c b/arch/x86/mm/mem_encrypt_amd.c index 2b2d018ea345..6169053c2854 100644 --- a/arch/x86/mm/mem_encrypt_amd.c +++ b/arch/x86/mm/mem_encrypt_amd.c @@ -177,25 +177,6 @@ void __init sme_map_bootdata(char *real_mode_data) __sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true); } -void __init sme_early_init(void) -{ - unsigned int i; - - if (!sme_me_mask) - return; - - early_pmd_flags = __sme_set(early_pmd_flags); - - __supported_pte_mask = __sme_set(__supported_pte_mask); - - /* Update the protection map with memory encryption mask */ - for (i = 0; i < ARRAY_SIZE(protection_map); i++) - protection_map[i] = pgprot_encrypted(protection_map[i]); - - if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) - swiotlb_force = SWIOTLB_FORCE; -} - void __init sev_setup_arch(void) { phys_addr_t total_mem = memblock_phys_mem_size(); @@ -256,7 +237,17 @@ static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot) return pfn; } -void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) +static bool amd_enc_tlb_flush_required(bool enc) +{ + return true; +} + +static bool amd_enc_cache_flush_required(void) +{ + return !cpu_feature_enabled(X86_FEATURE_SME_COHERENT); +} + +static void enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { #ifdef CONFIG_PARAVIRT unsigned long sz = npages << PAGE_SHIFT; @@ -287,6 +278,19 @@ void notify_range_enc_status_changed(unsigned long vaddr, int npages, bool enc) #endif } +static void amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc) +{ +} + +/* Return true unconditionally: return value doesn't matter for the SEV side */ +static bool amd_enc_status_change_finish(unsigned long vaddr, int npages, bool enc) +{ + if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT)) + enc_dec_hypercall(vaddr, npages, enc); + + return true; +} + static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc) { pgprot_t old_prot, new_prot; @@ -392,7 +396,7 @@ static int __init early_set_memory_enc_dec(unsigned long vaddr, ret = 0; - notify_range_enc_status_changed(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); + early_set_mem_enc_dec_hypercall(start, PAGE_ALIGN(size) >> PAGE_SHIFT, enc); out: __flush_tlb_all(); return ret; @@ -410,7 +414,31 @@ int __init early_set_memory_encrypted(unsigned long vaddr, unsigned long size) void __init early_set_mem_enc_dec_hypercall(unsigned long vaddr, int npages, bool enc) { - notify_range_enc_status_changed(vaddr, npages, enc); + enc_dec_hypercall(vaddr, npages, enc); +} + +void __init sme_early_init(void) +{ + unsigned int i; + + if (!sme_me_mask) + return; + + early_pmd_flags = __sme_set(early_pmd_flags); + + __supported_pte_mask = __sme_set(__supported_pte_mask); + + /* Update the protection map with memory encryption mask */ + for (i = 0; i < ARRAY_SIZE(protection_map); i++) + protection_map[i] = pgprot_encrypted(protection_map[i]); + + if (cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) + swiotlb_force = SWIOTLB_FORCE; + + x86_platform.guest.enc_status_change_prepare = amd_enc_status_change_prepare; + x86_platform.guest.enc_status_change_finish = amd_enc_status_change_finish; + x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required; + x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required; } void __init mem_encrypt_free_decrypted_mem(void) diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index 3f0abb403340..b43bc24d2bb6 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -44,6 +44,7 @@ #include <asm/setup.h> #include <asm/sections.h> #include <asm/cmdline.h> +#include <asm/coco.h> #include "mm_internal.h" @@ -565,8 +566,7 @@ void __init sme_enable(struct boot_params *bp) } else { /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; - physical_mask &= ~sme_me_mask; - return; + goto out; } /* @@ -600,6 +600,10 @@ void __init sme_enable(struct boot_params *bp) sme_me_mask = 0; else sme_me_mask = active_by_default ? me_mask : 0; - - physical_mask &= ~sme_me_mask; +out: + if (sme_me_mask) { + physical_mask &= ~sme_me_mask; + cc_set_vendor(CC_VENDOR_AMD); + cc_set_mask(sme_me_mask); + } } diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index b4072115c8ef..abf5ed76e4b7 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1989,6 +1989,7 @@ int set_memory_global(unsigned long addr, int numpages) */ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) { + pgprot_t empty = __pgprot(0); struct cpa_data cpa; int ret; @@ -1999,18 +2000,20 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) memset(&cpa, 0, sizeof(cpa)); cpa.vaddr = &addr; cpa.numpages = numpages; - cpa.mask_set = enc ? __pgprot(_PAGE_ENC) : __pgprot(0); - cpa.mask_clr = enc ? __pgprot(0) : __pgprot(_PAGE_ENC); + cpa.mask_set = enc ? pgprot_encrypted(empty) : pgprot_decrypted(empty); + cpa.mask_clr = enc ? pgprot_decrypted(empty) : pgprot_encrypted(empty); cpa.pgd = init_mm.pgd; /* Must avoid aliasing mappings in the highmem code */ kmap_flush_unused(); vm_unmap_aliases(); - /* - * Before changing the encryption attribute, we need to flush caches. - */ - cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); + /* Flush the caches as needed before changing the encryption attribute. */ + if (x86_platform.guest.enc_tlb_flush_required(enc)) + cpa_flush(&cpa, x86_platform.guest.enc_cache_flush_required()); + + /* Notify hypervisor that we are about to set/clr encryption attribute. */ + x86_platform.guest.enc_status_change_prepare(addr, numpages, enc); ret = __change_page_attr_set_clr(&cpa, 1); @@ -2023,11 +2026,11 @@ static int __set_memory_enc_pgtable(unsigned long addr, int numpages, bool enc) */ cpa_flush(&cpa, 0); - /* - * Notify hypervisor that a given memory range is mapped encrypted - * or decrypted. - */ - notify_range_enc_status_changed(addr, numpages, enc); + /* Notify hypervisor that we have successfully set/clr encryption attribute. */ + if (!ret) { + if (!x86_platform.guest.enc_status_change_finish(addr, numpages, enc)) + ret = -EIO; + } return ret; } @@ -2121,12 +2124,6 @@ int set_pages_array_wc(struct page **pages, int numpages) } EXPORT_SYMBOL(set_pages_array_wc); -int set_pages_array_wt(struct page **pages, int numpages) -{ - return _set_pages_array(pages, numpages, _PAGE_CACHE_MODE_WT); -} -EXPORT_SYMBOL_GPL(set_pages_array_wt); - int set_pages_wb(struct page *page, int numpages) { unsigned long addr = (unsigned long)page_address(page); diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index 40d6a06e41c8..ead7e5b3a975 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig @@ -8,6 +8,7 @@ endmenu config UML_X86 def_bool y + select ARCH_BINFMT_ELF_EXTRA_PHDRS if X86_32 config 64BIT bool "64-bit kernel" if "$(SUBARCH)" = "x86" diff --git a/arch/x86/um/user-offsets.c b/arch/x86/um/user-offsets.c index bae61554abcc..e54a9814ccf1 100644 --- a/arch/x86/um/user-offsets.c +++ b/arch/x86/um/user-offsets.c @@ -8,12 +8,11 @@ #define __FRAME_OFFSETS #include <linux/ptrace.h> #include <asm/types.h> +#include <linux/kbuild.h> -#define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) - -#define DEFINE_LONGS(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val/sizeof(unsigned long))) +#define DEFINE_LONGS(sym, val) \ + COMMENT(#val " / sizeof(unsigned long)"); \ + DEFINE(sym, val / sizeof(unsigned long)) void foo(void) { diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 8ac599aa6d99..887432327613 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig @@ -3,6 +3,7 @@ config XTENSA def_bool y select ARCH_32BIT_OFF_T select ARCH_HAS_BINFMT_FLAT if !MMU + select ARCH_HAS_CURRENT_STACK_POINTER select ARCH_HAS_DMA_PREP_COHERENT if MMU select ARCH_HAS_SYNC_DMA_FOR_CPU if MMU select ARCH_HAS_SYNC_DMA_FOR_DEVICE if MMU diff --git a/arch/xtensa/include/asm/current.h b/arch/xtensa/include/asm/current.h index 5d98a7ad4251..08010dbf5e09 100644 --- a/arch/xtensa/include/asm/current.h +++ b/arch/xtensa/include/asm/current.h @@ -26,6 +26,8 @@ static inline struct task_struct *get_current(void) #define current get_current() +register unsigned long current_stack_pointer __asm__("a1"); + #else #define GET_CURRENT(reg,sp) \ diff --git a/arch/xtensa/include/asm/stacktrace.h b/arch/xtensa/include/asm/stacktrace.h index fe06e8ed162b..a85e785a6288 100644 --- a/arch/xtensa/include/asm/stacktrace.h +++ b/arch/xtensa/include/asm/stacktrace.h @@ -19,14 +19,14 @@ struct stackframe { static __always_inline unsigned long *stack_pointer(struct task_struct *task) { - unsigned long *sp; + unsigned long sp; if (!task || task == current) - __asm__ __volatile__ ("mov %0, a1\n" : "=a"(sp)); + sp = current_stack_pointer; else - sp = (unsigned long *)task->thread.sp; + sp = task->thread.sp; - return sp; + return (unsigned long *)sp; } void walk_stackframe(unsigned long *sp, diff --git a/arch/xtensa/kernel/irq.c b/arch/xtensa/kernel/irq.c index 15051a8a1539..529fe9245821 100644 --- a/arch/xtensa/kernel/irq.c +++ b/arch/xtensa/kernel/irq.c @@ -36,9 +36,8 @@ asmlinkage void do_IRQ(int hwirq, struct pt_regs *regs) #ifdef CONFIG_DEBUG_STACKOVERFLOW /* Debugging check for stack overflow: is there less than 1KB free? */ { - unsigned long sp; + unsigned long sp = current_stack_pointer; - __asm__ __volatile__ ("mov %0, a1\n" : "=a" (sp)); sp &= THREAD_SIZE - 1; if (unlikely(sp < (sizeof(thread_info) + 1024))) diff --git a/arch/xtensa/platforms/iss/simdisk.c b/arch/xtensa/platforms/iss/simdisk.c index 8eb6ad1a3a1d..0f0e0724397f 100644 --- a/arch/xtensa/platforms/iss/simdisk.c +++ b/arch/xtensa/platforms/iss/simdisk.c @@ -108,13 +108,13 @@ static void simdisk_submit_bio(struct bio *bio) sector_t sector = bio->bi_iter.bi_sector; bio_for_each_segment(bvec, bio, iter) { - char *buffer = kmap_atomic(bvec.bv_page) + bvec.bv_offset; + char *buffer = bvec_kmap_local(&bvec); unsigned len = bvec.bv_len >> SECTOR_SHIFT; simdisk_transfer(dev, sector, len, buffer, bio_data_dir(bio) == WRITE); sector += len; - kunmap_atomic(buffer); + kunmap_local(buffer); } bio_endio(bio); |