diff options
Diffstat (limited to 'arch/mips')
38 files changed, 2215 insertions, 127 deletions
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 827bbda105f3..ed8e28b0fb3e 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -74,6 +74,7 @@ config MIPS select HAVE_PERF_EVENTS select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RSEQ + select HAVE_SPARSE_SYSCALL_NR select HAVE_STACKPROTECTOR select HAVE_SYSCALL_TRACEPOINTS select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP @@ -1193,9 +1194,9 @@ config DMA_NONCOHERENT select ARCH_HAS_DMA_WRITE_COMBINE select ARCH_HAS_SYNC_DMA_FOR_DEVICE select ARCH_HAS_UNCACHED_SEGMENT - select NEED_DMA_MAP_STATE - select ARCH_HAS_DMA_COHERENT_TO_PFN + select DMA_NONCOHERENT_MMAP select DMA_NONCOHERENT_CACHE_SYNC + select NEED_DMA_MAP_STATE config SYS_HAS_EARLY_PRINTK bool diff --git a/arch/mips/Makefile b/arch/mips/Makefile index 0a5eab626260..e1c44aed8156 100644 --- a/arch/mips/Makefile +++ b/arch/mips/Makefile @@ -326,7 +326,7 @@ libs-$(CONFIG_MIPS_FP_SUPPORT) += arch/mips/math-emu/ # See arch/mips/Kbuild for content of core part of the kernel core-y += arch/mips/ -drivers-$(CONFIG_MIPS_CRC_SUPPORT) += arch/mips/crypto/ +drivers-y += arch/mips/crypto/ drivers-$(CONFIG_OPROFILE) += arch/mips/oprofile/ # suspend and hibernation support diff --git a/arch/mips/bmips/dma.c b/arch/mips/bmips/dma.c index 3d13c77c125f..df56bf4179e3 100644 --- a/arch/mips/bmips/dma.c +++ b/arch/mips/bmips/dma.c @@ -64,7 +64,7 @@ phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t dma_addr) return dma_addr; } -void arch_sync_dma_for_cpu_all(struct device *dev) +void arch_sync_dma_for_cpu_all(void) { void __iomem *cbr = BMIPS_GET_CBR(); u32 cfg; diff --git a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c index ba8f82a29a81..e794b2d53adf 100644 --- a/arch/mips/cavium-octeon/executive/cvmx-bootmem.c +++ b/arch/mips/cavium-octeon/executive/cvmx-bootmem.c @@ -45,13 +45,6 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; /* See header file for descriptions of functions */ /** - * This macro returns the size of a member of a structure. - * Logically it is the same as "sizeof(s::field)" in C++, but - * C lacks the "::" operator. - */ -#define SIZEOF_FIELD(s, field) sizeof(((s *)NULL)->field) - -/** * This macro returns a member of the * cvmx_bootmem_named_block_desc_t structure. These members can't * be directly addressed as they might be in memory not directly @@ -65,7 +58,7 @@ static struct cvmx_bootmem_desc *cvmx_bootmem_desc; #define CVMX_BOOTMEM_NAMED_GET_FIELD(addr, field) \ __cvmx_bootmem_desc_get(addr, \ offsetof(struct cvmx_bootmem_named_block_desc, field), \ - SIZEOF_FIELD(struct cvmx_bootmem_named_block_desc, field)) + sizeof_field(struct cvmx_bootmem_named_block_desc, field)) /** * This function is the implementation of the get macros defined diff --git a/arch/mips/configs/ci20_defconfig b/arch/mips/configs/ci20_defconfig index cb4aa23a2bf4..be41df2a81fb 100644 --- a/arch/mips/configs/ci20_defconfig +++ b/arch/mips/configs/ci20_defconfig @@ -17,7 +17,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig index c16a2330e84d..360c6b2d397a 100644 --- a/arch/mips/configs/loongson3_defconfig +++ b/arch/mips/configs/loongson3_defconfig @@ -20,7 +20,6 @@ CONFIG_SCHED_AUTOGROUP=y CONFIG_SYSFS_DEPRECATED=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_MACH_LOONGSON64=y CONFIG_SMP=y diff --git a/arch/mips/configs/malta_qemu_32r6_defconfig b/arch/mips/configs/malta_qemu_32r6_defconfig index e6c600dc1814..614af02d83e6 100644 --- a/arch/mips/configs/malta_qemu_32r6_defconfig +++ b/arch/mips/configs/malta_qemu_32r6_defconfig @@ -5,7 +5,6 @@ CONFIG_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MIPS_MALTA=y diff --git a/arch/mips/configs/maltaaprp_defconfig b/arch/mips/configs/maltaaprp_defconfig index 82b44b774553..9c051f8fd330 100644 --- a/arch/mips/configs/maltaaprp_defconfig +++ b/arch/mips/configs/maltaaprp_defconfig @@ -5,7 +5,6 @@ CONFIG_AUDIT=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MIPS_MALTA=y diff --git a/arch/mips/configs/maltasmvp_defconfig b/arch/mips/configs/maltasmvp_defconfig index 4190fc6189a0..2e90d97551d6 100644 --- a/arch/mips/configs/maltasmvp_defconfig +++ b/arch/mips/configs/maltasmvp_defconfig @@ -5,7 +5,6 @@ CONFIG_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MIPS_MALTA=y diff --git a/arch/mips/configs/maltasmvp_eva_defconfig b/arch/mips/configs/maltasmvp_eva_defconfig index a13c10e910ec..d1f7fdb27284 100644 --- a/arch/mips/configs/maltasmvp_eva_defconfig +++ b/arch/mips/configs/maltasmvp_eva_defconfig @@ -5,7 +5,6 @@ CONFIG_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MIPS_MALTA=y diff --git a/arch/mips/configs/maltaup_defconfig b/arch/mips/configs/maltaup_defconfig index b35f1fc690fb..48e5bd492452 100644 --- a/arch/mips/configs/maltaup_defconfig +++ b/arch/mips/configs/maltaup_defconfig @@ -6,7 +6,6 @@ CONFIG_NO_HZ=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y CONFIG_LOG_BUF_SHIFT=15 -CONFIG_SYSCTL_SYSCALL=y CONFIG_EMBEDDED=y CONFIG_SLAB=y CONFIG_MIPS_MALTA=y diff --git a/arch/mips/configs/omega2p_defconfig b/arch/mips/configs/omega2p_defconfig index a39426e57e91..fc39ddf610a9 100644 --- a/arch/mips/configs/omega2p_defconfig +++ b/arch/mips/configs/omega2p_defconfig @@ -16,7 +16,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/qi_lb60_defconfig b/arch/mips/configs/qi_lb60_defconfig index d3f4d5248d9f..97c9a69d1528 100644 --- a/arch/mips/configs/qi_lb60_defconfig +++ b/arch/mips/configs/qi_lb60_defconfig @@ -2,7 +2,6 @@ CONFIG_SYSVIPC=y # CONFIG_CROSS_MEMORY_ATTACH is not set CONFIG_LOG_BUF_SHIFT=14 -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/configs/vocore2_defconfig b/arch/mips/configs/vocore2_defconfig index 523b944fd527..a14f8ea5c386 100644 --- a/arch/mips/configs/vocore2_defconfig +++ b/arch/mips/configs/vocore2_defconfig @@ -16,7 +16,6 @@ CONFIG_CGROUP_CPUACCT=y CONFIG_NAMESPACES=y CONFIG_USER_NS=y CONFIG_CC_OPTIMIZE_FOR_SIZE=y -CONFIG_SYSCTL_SYSCALL=y CONFIG_KALLSYMS_ALL=y CONFIG_EMBEDDED=y # CONFIG_VM_EVENT_COUNTERS is not set diff --git a/arch/mips/crypto/Makefile b/arch/mips/crypto/Makefile index e07aca572c2e..8e1deaf00e0c 100644 --- a/arch/mips/crypto/Makefile +++ b/arch/mips/crypto/Makefile @@ -4,3 +4,21 @@ # obj-$(CONFIG_CRYPTO_CRC32_MIPS) += crc32-mips.o + +obj-$(CONFIG_CRYPTO_CHACHA_MIPS) += chacha-mips.o +chacha-mips-y := chacha-core.o chacha-glue.o +AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots + +obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o +poly1305-mips-y := poly1305-core.o poly1305-glue.o + +perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32 +perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64 + +quiet_cmd_perlasm = PERLASM $@ + cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@) + +$(obj)/poly1305-core.S: $(src)/poly1305-mips.pl FORCE + $(call if_changed,perlasm) + +targets += poly1305-core.S diff --git a/arch/mips/crypto/chacha-core.S b/arch/mips/crypto/chacha-core.S new file mode 100644 index 000000000000..5755f69cfe00 --- /dev/null +++ b/arch/mips/crypto/chacha-core.S @@ -0,0 +1,497 @@ +/* SPDX-License-Identifier: GPL-2.0 OR MIT */ +/* + * Copyright (C) 2016-2018 René van Dorst <opensource@vdorst.com>. All Rights Reserved. + * Copyright (C) 2015-2019 Jason A. Donenfeld <Jason@zx2c4.com>. All Rights Reserved. + */ + +#define MASK_U32 0x3c +#define CHACHA20_BLOCK_SIZE 64 +#define STACK_SIZE 32 + +#define X0 $t0 +#define X1 $t1 +#define X2 $t2 +#define X3 $t3 +#define X4 $t4 +#define X5 $t5 +#define X6 $t6 +#define X7 $t7 +#define X8 $t8 +#define X9 $t9 +#define X10 $v1 +#define X11 $s6 +#define X12 $s5 +#define X13 $s4 +#define X14 $s3 +#define X15 $s2 +/* Use regs which are overwritten on exit for Tx so we don't leak clear data. */ +#define T0 $s1 +#define T1 $s0 +#define T(n) T ## n +#define X(n) X ## n + +/* Input arguments */ +#define STATE $a0 +#define OUT $a1 +#define IN $a2 +#define BYTES $a3 + +/* Output argument */ +/* NONCE[0] is kept in a register and not in memory. + * We don't want to touch original value in memory. + * Must be incremented every loop iteration. + */ +#define NONCE_0 $v0 + +/* SAVED_X and SAVED_CA are set in the jump table. + * Use regs which are overwritten on exit else we don't leak clear data. + * They are used to handling the last bytes which are not multiple of 4. + */ +#define SAVED_X X15 +#define SAVED_CA $s7 + +#define IS_UNALIGNED $s7 + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define MSB 0 +#define LSB 3 +#define ROTx rotl +#define ROTR(n) rotr n, 24 +#define CPU_TO_LE32(n) \ + wsbh n; \ + rotr n, 16; +#else +#define MSB 3 +#define LSB 0 +#define ROTx rotr +#define CPU_TO_LE32(n) +#define ROTR(n) +#endif + +#define FOR_EACH_WORD(x) \ + x( 0); \ + x( 1); \ + x( 2); \ + x( 3); \ + x( 4); \ + x( 5); \ + x( 6); \ + x( 7); \ + x( 8); \ + x( 9); \ + x(10); \ + x(11); \ + x(12); \ + x(13); \ + x(14); \ + x(15); + +#define FOR_EACH_WORD_REV(x) \ + x(15); \ + x(14); \ + x(13); \ + x(12); \ + x(11); \ + x(10); \ + x( 9); \ + x( 8); \ + x( 7); \ + x( 6); \ + x( 5); \ + x( 4); \ + x( 3); \ + x( 2); \ + x( 1); \ + x( 0); + +#define PLUS_ONE_0 1 +#define PLUS_ONE_1 2 +#define PLUS_ONE_2 3 +#define PLUS_ONE_3 4 +#define PLUS_ONE_4 5 +#define PLUS_ONE_5 6 +#define PLUS_ONE_6 7 +#define PLUS_ONE_7 8 +#define PLUS_ONE_8 9 +#define PLUS_ONE_9 10 +#define PLUS_ONE_10 11 +#define PLUS_ONE_11 12 +#define PLUS_ONE_12 13 +#define PLUS_ONE_13 14 +#define PLUS_ONE_14 15 +#define PLUS_ONE_15 16 +#define PLUS_ONE(x) PLUS_ONE_ ## x +#define _CONCAT3(a,b,c) a ## b ## c +#define CONCAT3(a,b,c) _CONCAT3(a,b,c) + +#define STORE_UNALIGNED(x) \ +CONCAT3(.Lchacha_mips_xor_unaligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lwl T1, (x*4)+MSB ## (IN); \ + lwr T1, (x*4)+LSB ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + swl X ## x, (x*4)+MSB ## (OUT); \ + swr X ## x, (x*4)+LSB ## (OUT); + +#define STORE_ALIGNED(x) \ +CONCAT3(.Lchacha_mips_xor_aligned_, PLUS_ONE(x), _b: ;) \ + .if (x != 12); \ + lw T0, (x*4)(STATE); \ + .endif; \ + lw T1, (x*4) ## (IN); \ + .if (x == 12); \ + addu X ## x, NONCE_0; \ + .else; \ + addu X ## x, T0; \ + .endif; \ + CPU_TO_LE32(X ## x); \ + xor X ## x, T1; \ + sw X ## x, (x*4) ## (OUT); + +/* Jump table macro. + * Used for setup and handling the last bytes, which are not multiple of 4. + * X15 is free to store Xn + * Every jumptable entry must be equal in size. + */ +#define JMPTBL_ALIGNED(x) \ +.Lchacha_mips_jmptbl_aligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha_mips_xor_aligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define JMPTBL_UNALIGNED(x) \ +.Lchacha_mips_jmptbl_unaligned_ ## x: ; \ + .set noreorder; \ + b .Lchacha_mips_xor_unaligned_ ## x ## _b; \ + .if (x == 12); \ + addu SAVED_X, X ## x, NONCE_0; \ + .else; \ + addu SAVED_X, X ## x, SAVED_CA; \ + .endif; \ + .set reorder + +#define AXR(A, B, C, D, K, L, M, N, V, W, Y, Z, S) \ + addu X(A), X(K); \ + addu X(B), X(L); \ + addu X(C), X(M); \ + addu X(D), X(N); \ + xor X(V), X(A); \ + xor X(W), X(B); \ + xor X(Y), X(C); \ + xor X(Z), X(D); \ + rotl X(V), S; \ + rotl X(W), S; \ + rotl X(Y), S; \ + rotl X(Z), S; + +.text +.set reorder +.set noat +.globl chacha_crypt_arch +.ent chacha_crypt_arch +chacha_crypt_arch: + .frame $sp, STACK_SIZE, $ra + + /* Load number of rounds */ + lw $at, 16($sp) + + addiu $sp, -STACK_SIZE + + /* Return bytes = 0. */ + beqz BYTES, .Lchacha_mips_end + + lw NONCE_0, 48(STATE) + + /* Save s0-s7 */ + sw $s0, 0($sp) + sw $s1, 4($sp) + sw $s2, 8($sp) + sw $s3, 12($sp) + sw $s4, 16($sp) + sw $s5, 20($sp) + sw $s6, 24($sp) + sw $s7, 28($sp) + + /* Test IN or OUT is unaligned. + * IS_UNALIGNED = ( IN | OUT ) & 0x00000003 + */ + or IS_UNALIGNED, IN, OUT + andi IS_UNALIGNED, 0x3 + + b .Lchacha_rounds_start + +.align 4 +.Loop_chacha_rounds: + addiu IN, CHACHA20_BLOCK_SIZE + addiu OUT, CHACHA20_BLOCK_SIZE + addiu NONCE_0, 1 + +.Lchacha_rounds_start: + lw X0, 0(STATE) + lw X1, 4(STATE) + lw X2, 8(STATE) + lw X3, 12(STATE) + + lw X4, 16(STATE) + lw X5, 20(STATE) + lw X6, 24(STATE) + lw X7, 28(STATE) + lw X8, 32(STATE) + lw X9, 36(STATE) + lw X10, 40(STATE) + lw X11, 44(STATE) + + move X12, NONCE_0 + lw X13, 52(STATE) + lw X14, 56(STATE) + lw X15, 60(STATE) + +.Loop_chacha_xor_rounds: + addiu $at, -2 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); + bnez $at, .Loop_chacha_xor_rounds + + addiu BYTES, -(CHACHA20_BLOCK_SIZE) + + /* Is data src/dst unaligned? Jump */ + bnez IS_UNALIGNED, .Loop_chacha_unaligned + + /* Set number rounds here to fill delayslot. */ + lw $at, (STACK_SIZE+16)($sp) + + /* BYTES < 0, it has no full block. */ + bltz BYTES, .Lchacha_mips_no_full_block_aligned + + FOR_EACH_WORD_REV(STORE_ALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha_rounds + + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + + /* BYTES < 0? Handle last bytes */ + bltz BYTES, .Lchacha_mips_xor_bytes + +.Lchacha_mips_xor_done: + /* Restore used registers */ + lw $s0, 0($sp) + lw $s1, 4($sp) + lw $s2, 8($sp) + lw $s3, 12($sp) + lw $s4, 16($sp) + lw $s5, 20($sp) + lw $s6, 24($sp) + lw $s7, 28($sp) + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + +.Lchacha_mips_end: + addiu $sp, STACK_SIZE + jr $ra + +.Lchacha_mips_no_full_block_aligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha_mips_jmptbl_aligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha_mips_jmptbl_aligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_ALIGNED) + + +.Loop_chacha_unaligned: + /* Set number rounds here to fill delayslot. */ + lw $at, (STACK_SIZE+16)($sp) + + /* BYTES > 0, it has no full block. */ + bltz BYTES, .Lchacha_mips_no_full_block_unaligned + + FOR_EACH_WORD_REV(STORE_UNALIGNED) + + /* BYTES > 0? Loop again. */ + bgtz BYTES, .Loop_chacha_rounds + + /* Write NONCE_0 back to right location in state */ + sw NONCE_0, 48(STATE) + + .set noreorder + /* Fall through to byte handling */ + bgez BYTES, .Lchacha_mips_xor_done +.Lchacha_mips_xor_unaligned_0_b: +.Lchacha_mips_xor_aligned_0_b: + /* Place this here to fill delay slot */ + addiu NONCE_0, 1 + .set reorder + +.Lchacha_mips_xor_bytes: + addu IN, $at + addu OUT, $at + /* First byte */ + lbu T1, 0(IN) + addiu $at, BYTES, 1 + CPU_TO_LE32(SAVED_X) + ROTR(SAVED_X) + xor T1, SAVED_X + sb T1, 0(OUT) + beqz $at, .Lchacha_mips_xor_done + /* Second byte */ + lbu T1, 1(IN) + addiu $at, BYTES, 2 + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 1(OUT) + beqz $at, .Lchacha_mips_xor_done + /* Third byte */ + lbu T1, 2(IN) + ROTx SAVED_X, 8 + xor T1, SAVED_X + sb T1, 2(OUT) + b .Lchacha_mips_xor_done + +.Lchacha_mips_no_full_block_unaligned: + /* Restore the offset on BYTES */ + addiu BYTES, CHACHA20_BLOCK_SIZE + + /* Get number of full WORDS */ + andi $at, BYTES, MASK_U32 + + /* Load upper half of jump table addr */ + lui T0, %hi(.Lchacha_mips_jmptbl_unaligned_0) + + /* Calculate lower half jump table offset */ + ins T0, $at, 1, 6 + + /* Add offset to STATE */ + addu T1, STATE, $at + + /* Add lower half jump table addr */ + addiu T0, %lo(.Lchacha_mips_jmptbl_unaligned_0) + + /* Read value from STATE */ + lw SAVED_CA, 0(T1) + + /* Store remaining bytecounter as negative value */ + subu BYTES, $at, BYTES + + jr T0 + + /* Jump table */ + FOR_EACH_WORD(JMPTBL_UNALIGNED) +.end chacha_crypt_arch +.set at + +/* Input arguments + * STATE $a0 + * OUT $a1 + * NROUND $a2 + */ + +#undef X12 +#undef X13 +#undef X14 +#undef X15 + +#define X12 $a3 +#define X13 $at +#define X14 $v0 +#define X15 STATE + +.set noat +.globl hchacha_block_arch +.ent hchacha_block_arch +hchacha_block_arch: + .frame $sp, STACK_SIZE, $ra + + addiu $sp, -STACK_SIZE + + /* Save X11(s6) */ + sw X11, 0($sp) + + lw X0, 0(STATE) + lw X1, 4(STATE) + lw X2, 8(STATE) + lw X3, 12(STATE) + lw X4, 16(STATE) + lw X5, 20(STATE) + lw X6, 24(STATE) + lw X7, 28(STATE) + lw X8, 32(STATE) + lw X9, 36(STATE) + lw X10, 40(STATE) + lw X11, 44(STATE) + lw X12, 48(STATE) + lw X13, 52(STATE) + lw X14, 56(STATE) + lw X15, 60(STATE) + +.Loop_hchacha_xor_rounds: + addiu $a2, -2 + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 16); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 12); + AXR( 0, 1, 2, 3, 4, 5, 6, 7, 12,13,14,15, 8); + AXR( 8, 9,10,11, 12,13,14,15, 4, 5, 6, 7, 7); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 16); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 12); + AXR( 0, 1, 2, 3, 5, 6, 7, 4, 15,12,13,14, 8); + AXR(10,11, 8, 9, 15,12,13,14, 5, 6, 7, 4, 7); + bnez $a2, .Loop_hchacha_xor_rounds + + /* Restore used register */ + lw X11, 0($sp) + + sw X0, 0(OUT) + sw X1, 4(OUT) + sw X2, 8(OUT) + sw X3, 12(OUT) + sw X12, 16(OUT) + sw X13, 20(OUT) + sw X14, 24(OUT) + sw X15, 28(OUT) + + addiu $sp, STACK_SIZE + jr $ra +.end hchacha_block_arch +.set at diff --git a/arch/mips/crypto/chacha-glue.c b/arch/mips/crypto/chacha-glue.c new file mode 100644 index 000000000000..d1fd23e6ef84 --- /dev/null +++ b/arch/mips/crypto/chacha-glue.c @@ -0,0 +1,152 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * MIPS accelerated ChaCha and XChaCha stream ciphers, + * including ChaCha20 (RFC7539) + * + * Copyright (C) 2019 Linaro, Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/byteorder.h> +#include <crypto/algapi.h> +#include <crypto/internal/chacha.h> +#include <crypto/internal/skcipher.h> +#include <linux/kernel.h> +#include <linux/module.h> + +asmlinkage void chacha_crypt_arch(u32 *state, u8 *dst, const u8 *src, + unsigned int bytes, int nrounds); +EXPORT_SYMBOL(chacha_crypt_arch); + +asmlinkage void hchacha_block_arch(const u32 *state, u32 *stream, int nrounds); +EXPORT_SYMBOL(hchacha_block_arch); + +void chacha_init_arch(u32 *state, const u32 *key, const u8 *iv) +{ + chacha_init_generic(state, key, iv); +} +EXPORT_SYMBOL(chacha_init_arch); + +static int chacha_mips_stream_xor(struct skcipher_request *req, + const struct chacha_ctx *ctx, const u8 *iv) +{ + struct skcipher_walk walk; + u32 state[16]; + int err; + + err = skcipher_walk_virt(&walk, req, false); + + chacha_init_generic(state, ctx->key, iv); + + while (walk.nbytes > 0) { + unsigned int nbytes = walk.nbytes; + + if (nbytes < walk.total) + nbytes = round_down(nbytes, walk.stride); + + chacha_crypt(state, walk.dst.virt.addr, walk.src.virt.addr, + nbytes, ctx->nrounds); + err = skcipher_walk_done(&walk, walk.nbytes - nbytes); + } + + return err; +} + +static int chacha_mips(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + + return chacha_mips_stream_xor(req, ctx, req->iv); +} + +static int xchacha_mips(struct skcipher_request *req) +{ + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha_ctx *ctx = crypto_skcipher_ctx(tfm); + struct chacha_ctx subctx; + u32 state[16]; + u8 real_iv[16]; + + chacha_init_generic(state, ctx->key, req->iv); + + hchacha_block(state, subctx.key, ctx->nrounds); + subctx.nrounds = ctx->nrounds; + + memcpy(&real_iv[0], req->iv + 24, 8); + memcpy(&real_iv[8], req->iv + 16, 8); + return chacha_mips_stream_xor(req, &subctx, real_iv); +} + +static struct skcipher_alg algs[] = { + { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = CHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = chacha_mips, + .decrypt = chacha_mips, + }, { + .base.cra_name = "xchacha20", + .base.cra_driver_name = "xchacha20-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha20_setkey, + .encrypt = xchacha_mips, + .decrypt = xchacha_mips, + }, { + .base.cra_name = "xchacha12", + .base.cra_driver_name = "xchacha12-mips", + .base.cra_priority = 200, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha_ctx), + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA_KEY_SIZE, + .max_keysize = CHACHA_KEY_SIZE, + .ivsize = XCHACHA_IV_SIZE, + .chunksize = CHACHA_BLOCK_SIZE, + .setkey = chacha12_setkey, + .encrypt = xchacha_mips, + .decrypt = xchacha_mips, + } +}; + +static int __init chacha_simd_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER) ? + crypto_register_skciphers(algs, ARRAY_SIZE(algs)) : 0; +} + +static void __exit chacha_simd_mod_fini(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_SKCIPHER)) + crypto_unregister_skciphers(algs, ARRAY_SIZE(algs)); +} + +module_init(chacha_simd_mod_init); +module_exit(chacha_simd_mod_fini); + +MODULE_DESCRIPTION("ChaCha and XChaCha stream ciphers (MIPS accelerated)"); +MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>"); +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("chacha20"); +MODULE_ALIAS_CRYPTO("chacha20-mips"); +MODULE_ALIAS_CRYPTO("xchacha20"); +MODULE_ALIAS_CRYPTO("xchacha20-mips"); +MODULE_ALIAS_CRYPTO("xchacha12"); +MODULE_ALIAS_CRYPTO("xchacha12-mips"); diff --git a/arch/mips/crypto/poly1305-glue.c b/arch/mips/crypto/poly1305-glue.c new file mode 100644 index 000000000000..b37d29cf5d0a --- /dev/null +++ b/arch/mips/crypto/poly1305-glue.c @@ -0,0 +1,205 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * OpenSSL/Cryptogams accelerated Poly1305 transform for MIPS + * + * Copyright (C) 2019 Linaro Ltd. <ard.biesheuvel@linaro.org> + */ + +#include <asm/unaligned.h> +#include <crypto/algapi.h> +#include <crypto/internal/hash.h> +#include <crypto/internal/poly1305.h> +#include <linux/cpufeature.h> +#include <linux/crypto.h> +#include <linux/module.h> + +asmlinkage void poly1305_init_mips(void *state, const u8 *key); +asmlinkage void poly1305_blocks_mips(void *state, const u8 *src, u32 len, u32 hibit); +asmlinkage void poly1305_emit_mips(void *state, __le32 *digest, const u32 *nonce); + +void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key) +{ + poly1305_init_mips(&dctx->h, key); + dctx->s[0] = get_unaligned_le32(key + 16); + dctx->s[1] = get_unaligned_le32(key + 20); + dctx->s[2] = get_unaligned_le32(key + 24); + dctx->s[3] = get_unaligned_le32(key + 28); + dctx->buflen = 0; +} +EXPORT_SYMBOL(poly1305_init_arch); + +static int mips_poly1305_init(struct shash_desc *desc) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + dctx->buflen = 0; + dctx->rset = 0; + dctx->sset = false; + + return 0; +} + +static void mips_poly1305_blocks(struct poly1305_desc_ctx *dctx, const u8 *src, + u32 len, u32 hibit) +{ + if (unlikely(!dctx->sset)) { + if (!dctx->rset) { + poly1305_init_mips(&dctx->h, src); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->rset = 1; + } + if (len >= POLY1305_BLOCK_SIZE) { + dctx->s[0] = get_unaligned_le32(src + 0); + dctx->s[1] = get_unaligned_le32(src + 4); + dctx->s[2] = get_unaligned_le32(src + 8); + dctx->s[3] = get_unaligned_le32(src + 12); + src += POLY1305_BLOCK_SIZE; + len -= POLY1305_BLOCK_SIZE; + dctx->sset = true; + } + if (len < POLY1305_BLOCK_SIZE) + return; + } + + len &= ~(POLY1305_BLOCK_SIZE - 1); + + poly1305_blocks_mips(&dctx->h, src, len, hibit); +} + +static int mips_poly1305_update(struct shash_desc *desc, const u8 *src, + unsigned int len) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(dctx->buflen)) { + u32 bytes = min(len, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + len -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + mips_poly1305_blocks(dctx, dctx->buf, POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(len >= POLY1305_BLOCK_SIZE)) { + mips_poly1305_blocks(dctx, src, len, 1); + src += round_down(len, POLY1305_BLOCK_SIZE); + len %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(len)) { + dctx->buflen = len; + memcpy(dctx->buf, src, len); + } + return 0; +} + +void poly1305_update_arch(struct poly1305_desc_ctx *dctx, const u8 *src, + unsigned int nbytes) +{ + if (unlikely(dctx->buflen)) { + u32 bytes = min(nbytes, POLY1305_BLOCK_SIZE - dctx->buflen); + + memcpy(dctx->buf + dctx->buflen, src, bytes); + src += bytes; + nbytes -= bytes; + dctx->buflen += bytes; + + if (dctx->buflen == POLY1305_BLOCK_SIZE) { + poly1305_blocks_mips(&dctx->h, dctx->buf, + POLY1305_BLOCK_SIZE, 1); + dctx->buflen = 0; + } + } + + if (likely(nbytes >= POLY1305_BLOCK_SIZE)) { + unsigned int len = round_down(nbytes, POLY1305_BLOCK_SIZE); + + poly1305_blocks_mips(&dctx->h, src, len, 1); + src += len; + nbytes %= POLY1305_BLOCK_SIZE; + } + + if (unlikely(nbytes)) { + dctx->buflen = nbytes; + memcpy(dctx->buf, src, nbytes); + } +} +EXPORT_SYMBOL(poly1305_update_arch); + +void poly1305_final_arch(struct poly1305_desc_ctx *dctx, u8 *dst) +{ + __le32 digest[4]; + u64 f = 0; + + if (unlikely(dctx->buflen)) { + dctx->buf[dctx->buflen++] = 1; + memset(dctx->buf + dctx->buflen, 0, + POLY1305_BLOCK_SIZE - dctx->buflen); + poly1305_blocks_mips(&dctx->h, dctx->buf, POLY1305_BLOCK_SIZE, 0); + } + + poly1305_emit_mips(&dctx->h, digest, dctx->s); + + /* mac = (h + s) % (2^128) */ + f = (f >> 32) + le32_to_cpu(digest[0]); + put_unaligned_le32(f, dst); + f = (f >> 32) + le32_to_cpu(digest[1]); + put_unaligned_le32(f, dst + 4); + f = (f >> 32) + le32_to_cpu(digest[2]); + put_unaligned_le32(f, dst + 8); + f = (f >> 32) + le32_to_cpu(digest[3]); + put_unaligned_le32(f, dst + 12); + + *dctx = (struct poly1305_desc_ctx){}; +} +EXPORT_SYMBOL(poly1305_final_arch); + +static int mips_poly1305_final(struct shash_desc *desc, u8 *dst) +{ + struct poly1305_desc_ctx *dctx = shash_desc_ctx(desc); + + if (unlikely(!dctx->sset)) + return -ENOKEY; + + poly1305_final_arch(dctx, dst); + return 0; +} + +static struct shash_alg mips_poly1305_alg = { + .init = mips_poly1305_init, + .update = mips_poly1305_update, + .final = mips_poly1305_final, + .digestsize = POLY1305_DIGEST_SIZE, + .descsize = sizeof(struct poly1305_desc_ctx), + + .base.cra_name = "poly1305", + .base.cra_driver_name = "poly1305-mips", + .base.cra_priority = 200, + .base.cra_blocksize = POLY1305_BLOCK_SIZE, + .base.cra_module = THIS_MODULE, +}; + +static int __init mips_poly1305_mod_init(void) +{ + return IS_REACHABLE(CONFIG_CRYPTO_HASH) ? + crypto_register_shash(&mips_poly1305_alg) : 0; +} + +static void __exit mips_poly1305_mod_exit(void) +{ + if (IS_REACHABLE(CONFIG_CRYPTO_HASH)) + crypto_unregister_shash(&mips_poly1305_alg); +} + +module_init(mips_poly1305_mod_init); +module_exit(mips_poly1305_mod_exit); + +MODULE_LICENSE("GPL v2"); +MODULE_ALIAS_CRYPTO("poly1305"); +MODULE_ALIAS_CRYPTO("poly1305-mips"); diff --git a/arch/mips/crypto/poly1305-mips.pl b/arch/mips/crypto/poly1305-mips.pl new file mode 100644 index 000000000000..b05bab884ed2 --- /dev/null +++ b/arch/mips/crypto/poly1305-mips.pl @@ -0,0 +1,1273 @@ +#!/usr/bin/env perl +# SPDX-License-Identifier: GPL-1.0+ OR BSD-3-Clause +# +# ==================================================================== +# Written by Andy Polyakov, @dot-asm, originally for the OpenSSL +# project. +# ==================================================================== + +# Poly1305 hash for MIPS. +# +# May 2016 +# +# Numbers are cycles per processed byte with poly1305_blocks alone. +# +# IALU/gcc +# R1x000 ~5.5/+130% (big-endian) +# Octeon II 2.50/+70% (little-endian) +# +# March 2019 +# +# Add 32-bit code path. +# +# October 2019 +# +# Modulo-scheduling reduction allows to omit dependency chain at the +# end of inner loop and improve performance. Also optimize MIPS32R2 +# code path for MIPS 1004K core. Per René von Dorst's suggestions. +# +# IALU/gcc +# R1x000 ~9.8/? (big-endian) +# Octeon II 3.65/+140% (little-endian) +# MT7621/1004K 4.75/? (little-endian) +# +###################################################################### +# There is a number of MIPS ABI in use, O32 and N32/64 are most +# widely used. Then there is a new contender: NUBI. It appears that if +# one picks the latter, it's possible to arrange code in ABI neutral +# manner. Therefore let's stick to NUBI register layout: +# +($zero,$at,$t0,$t1,$t2)=map("\$$_",(0..2,24,25)); +($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7,$s8,$s9,$s10,$s11)=map("\$$_",(12..23)); +($gp,$tp,$sp,$fp,$ra)=map("\$$_",(3,28..31)); +# +# The return value is placed in $a0. Following coding rules facilitate +# interoperability: +# +# - never ever touch $tp, "thread pointer", former $gp [o32 can be +# excluded from the rule, because it's specified volatile]; +# - copy return value to $t0, former $v0 [or to $a0 if you're adapting +# old code]; +# - on O32 populate $a4-$a7 with 'lw $aN,4*N($sp)' if necessary; +# +# For reference here is register layout for N32/64 MIPS ABIs: +# +# ($zero,$at,$v0,$v1)=map("\$$_",(0..3)); +# ($a0,$a1,$a2,$a3,$a4,$a5,$a6,$a7)=map("\$$_",(4..11)); +# ($t0,$t1,$t2,$t3,$t8,$t9)=map("\$$_",(12..15,24,25)); +# ($s0,$s1,$s2,$s3,$s4,$s5,$s6,$s7)=map("\$$_",(16..23)); +# ($gp,$sp,$fp,$ra)=map("\$$_",(28..31)); +# +# <appro@openssl.org> +# +###################################################################### + +$flavour = shift || "64"; # supported flavours are o32,n32,64,nubi32,nubi64 + +$v0 = ($flavour =~ /nubi/i) ? $a0 : $t0; + +if ($flavour =~ /64|n32/i) {{{ +###################################################################### +# 64-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$tmp0,$tmp1,$tmp2,$tmp3,$tmp4) = ($a4,$a5,$a6,$a7,$at,$t0,$t1); + +$code.=<<___; +#if (defined(_MIPS_ARCH_MIPS64R3) || defined(_MIPS_ARCH_MIPS64R5) || \\ + defined(_MIPS_ARCH_MIPS64R6)) \\ + && !defined(_MIPS_ARCH_MIPS64R2) +# define _MIPS_ARCH_MIPS64R2 +#endif + +#if defined(_MIPS_ARCH_MIPS64R6) +# define dmultu(rs,rt) +# define mflo(rd,rs,rt) dmulu rd,rs,rt +# define mfhi(rd,rs,rt) dmuhu rd,rs,rt +#else +# define dmultu(rs,rt) dmultu rs,rt +# define mflo(rd,rs,rt) mflo rd +# define mfhi(rd,rs,rt) mfhi rd +#endif + +#ifdef __KERNEL__ +# define poly1305_init poly1305_init_mips +# define poly1305_blocks poly1305_blocks_mips +# define poly1305_emit poly1305_emit_mips +#endif + +#if defined(__MIPSEB__) && !defined(MIPSEB) +# define MIPSEB +#endif + +#ifdef MIPSEB +# define MSB 0 +# define LSB 7 +#else +# define MSB 7 +# define LSB 0 +#endif + +.text +.set noat +.set noreorder + +.align 5 +.globl poly1305_init +.ent poly1305_init +poly1305_init: + .frame $sp,0,$ra + .set reorder + + sd $zero,0($ctx) + sd $zero,8($ctx) + sd $zero,16($ctx) + + beqz $inp,.Lno_key + +#if defined(_MIPS_ARCH_MIPS64R6) + andi $tmp0,$inp,7 # $inp % 8 + dsubu $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset + ld $in0,0($inp) + ld $in1,8($inp) + beqz $tmp0,.Laligned_key + ld $tmp2,16($inp) + + subu $tmp1,$zero,$tmp0 +# ifdef MIPSEB + dsllv $in0,$in0,$tmp0 + dsrlv $tmp3,$in1,$tmp1 + dsllv $in1,$in1,$tmp0 + dsrlv $tmp2,$tmp2,$tmp1 +# else + dsrlv $in0,$in0,$tmp0 + dsllv $tmp3,$in1,$tmp1 + dsrlv $in1,$in1,$tmp0 + dsllv $tmp2,$tmp2,$tmp1 +# endif + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 +.Laligned_key: +#else + ldl $in0,0+MSB($inp) + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + li $tmp0,1 + dsll $tmp0,32 # 0x0000000100000000 + daddiu $tmp0,-63 # 0x00000000ffffffc1 + dsll $tmp0,28 # 0x0ffffffc10000000 + daddiu $tmp0,-1 # 0x0ffffffc0fffffff + + and $in0,$tmp0 + daddiu $tmp0,-3 # 0x0ffffffc0ffffffc + and $in1,$tmp0 + + sd $in0,24($ctx) + dsrl $tmp0,$in1,2 + sd $in1,32($ctx) + daddu $tmp0,$in1 # s1 = r1 + (r1 >> 2) + sd $tmp0,40($ctx) + +.Lno_key: + li $v0,0 # return 0 + jr $ra +.end poly1305_init +___ +{ +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x0003f000" : "0x00030000"; + +my ($h0,$h1,$h2,$r0,$r1,$rs1,$d0,$d1,$d2) = + ($s0,$s1,$s2,$s3,$s4,$s5,$in0,$in1,$t2); +my ($shr,$shl) = ($s6,$s7); # used on R6 + +$code.=<<___; +.align 5 +.globl poly1305_blocks +.ent poly1305_blocks +poly1305_blocks: + .set noreorder + dsrl $len,4 # number of complete blocks + bnez $len,poly1305_blocks_internal + nop + jr $ra + nop +.end poly1305_blocks + +.align 5 +.ent poly1305_blocks_internal +poly1305_blocks_internal: + .set noreorder +#if defined(_MIPS_ARCH_MIPS64R6) + .frame $sp,8*8,$ra + .mask $SAVED_REGS_MASK|0x000c0000,-8 + dsubu $sp,8*8 + sd $s7,56($sp) + sd $s6,48($sp) +#else + .frame $sp,6*8,$ra + .mask $SAVED_REGS_MASK,-8 + dsubu $sp,6*8 +#endif + sd $s5,40($sp) + sd $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + sd $s3,24($sp) + sd $s2,16($sp) + sd $s1,8($sp) + sd $s0,0($sp) +___ +$code.=<<___; + .set reorder + +#if defined(_MIPS_ARCH_MIPS64R6) + andi $shr,$inp,7 + dsubu $inp,$inp,$shr # align $inp + sll $shr,$shr,3 # byte to bit offset + subu $shl,$zero,$shr +#endif + + ld $h0,0($ctx) # load hash value + ld $h1,8($ctx) + ld $h2,16($ctx) + + ld $r0,24($ctx) # load key + ld $r1,32($ctx) + ld $rs1,40($ctx) + + dsll $len,4 + daddu $len,$inp # end of buffer + b .Loop + +.align 4 +.Loop: +#if defined(_MIPS_ARCH_MIPS64R6) + ld $in0,0($inp) # load input + ld $in1,8($inp) + beqz $shr,.Laligned_inp + + ld $tmp2,16($inp) +# ifdef MIPSEB + dsllv $in0,$in0,$shr + dsrlv $tmp3,$in1,$shl + dsllv $in1,$in1,$shr + dsrlv $tmp2,$tmp2,$shl +# else + dsrlv $in0,$in0,$shr + dsllv $tmp3,$in1,$shl + dsrlv $in1,$in1,$shr + dsllv $tmp2,$tmp2,$shl +# endif + or $in0,$in0,$tmp3 + or $in1,$in1,$tmp2 +.Laligned_inp: +#else + ldl $in0,0+MSB($inp) # load input + ldl $in1,8+MSB($inp) + ldr $in0,0+LSB($inp) + ldr $in1,8+LSB($inp) +#endif + daddiu $inp,16 +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS64R2) + dsbh $in0,$in0 # byte swap + dsbh $in1,$in1 + dshd $in0,$in0 + dshd $in1,$in1 +# else + ori $tmp0,$zero,0xFF + dsll $tmp2,$tmp0,32 + or $tmp0,$tmp2 # 0x000000FF000000FF + + and $tmp1,$in0,$tmp0 # byte swap + and $tmp3,$in1,$tmp0 + dsrl $tmp2,$in0,24 + dsrl $tmp4,$in1,24 + dsll $tmp1,24 + dsll $tmp3,24 + and $tmp2,$tmp0 + and $tmp4,$tmp0 + dsll $tmp0,8 # 0x0000FF000000FF00 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + and $tmp2,$in0,$tmp0 + and $tmp4,$in1,$tmp0 + dsrl $in0,8 + dsrl $in1,8 + dsll $tmp2,8 + dsll $tmp4,8 + and $in0,$tmp0 + and $in1,$tmp0 + or $tmp1,$tmp2 + or $tmp3,$tmp4 + or $in0,$tmp1 + or $in1,$tmp3 + dsrl $tmp1,$in0,32 + dsrl $tmp3,$in1,32 + dsll $in0,32 + dsll $in1,32 + or $in0,$tmp1 + or $in1,$tmp3 +# endif +#endif + dsrl $tmp1,$h2,2 # modulo-scheduled reduction + andi $h2,$h2,3 + dsll $tmp0,$tmp1,2 + + daddu $d0,$h0,$in0 # accumulate input + daddu $tmp1,$tmp0 + sltu $tmp0,$d0,$h0 + daddu $d0,$d0,$tmp1 # ... and residue + sltu $tmp1,$d0,$tmp1 + daddu $d1,$h1,$in1 + daddu $tmp0,$tmp1 + sltu $tmp1,$d1,$h1 + daddu $d1,$tmp0 + + dmultu ($r0,$d0) # h0*r0 + daddu $d2,$h2,$padbit + sltu $tmp0,$d1,$tmp0 + mflo ($h0,$r0,$d0) + mfhi ($h1,$r0,$d0) + + dmultu ($rs1,$d1) # h1*5*r1 + daddu $d2,$tmp1 + daddu $d2,$tmp0 + mflo ($tmp0,$rs1,$d1) + mfhi ($tmp1,$rs1,$d1) + + dmultu ($r1,$d0) # h0*r1 + mflo ($tmp2,$r1,$d0) + mfhi ($h2,$r1,$d0) + daddu $h0,$tmp0 + daddu $h1,$tmp1 + sltu $tmp0,$h0,$tmp0 + + dmultu ($r0,$d1) # h1*r0 + daddu $h1,$tmp0 + daddu $h1,$tmp2 + mflo ($tmp0,$r0,$d1) + mfhi ($tmp1,$r0,$d1) + + dmultu ($rs1,$d2) # h2*5*r1 + sltu $tmp2,$h1,$tmp2 + daddu $h2,$tmp2 + mflo ($tmp2,$rs1,$d2) + + dmultu ($r0,$d2) # h2*r0 + daddu $h1,$tmp0 + daddu $h2,$tmp1 + mflo ($tmp3,$r0,$d2) + sltu $tmp0,$h1,$tmp0 + daddu $h2,$tmp0 + + daddu $h1,$tmp2 + sltu $tmp2,$h1,$tmp2 + daddu $h2,$tmp2 + daddu $h2,$tmp3 + + bne $inp,$len,.Loop + + sd $h0,0($ctx) # store hash value + sd $h1,8($ctx) + sd $h2,16($ctx) + + .set noreorder +#if defined(_MIPS_ARCH_MIPS64R6) + ld $s7,56($sp) + ld $s6,48($sp) +#endif + ld $s5,40($sp) # epilogue + ld $s4,32($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi epilogue + ld $s3,24($sp) + ld $s2,16($sp) + ld $s1,8($sp) + ld $s0,0($sp) +___ +$code.=<<___; + jr $ra +#if defined(_MIPS_ARCH_MIPS64R6) + daddu $sp,8*8 +#else + daddu $sp,6*8 +#endif +.end poly1305_blocks_internal +___ +} +{ +my ($ctx,$mac,$nonce) = ($a0,$a1,$a2); + +$code.=<<___; +.align 5 +.globl poly1305_emit +.ent poly1305_emit +poly1305_emit: + .frame $sp,0,$ra + .set reorder + + ld $tmp2,16($ctx) + ld $tmp0,0($ctx) + ld $tmp1,8($ctx) + + li $in0,-4 # final reduction + dsrl $in1,$tmp2,2 + and $in0,$tmp2 + andi $tmp2,$tmp2,3 + daddu $in0,$in1 + + daddu $tmp0,$tmp0,$in0 + sltu $in1,$tmp0,$in0 + daddiu $in0,$tmp0,5 # compare to modulus + daddu $tmp1,$tmp1,$in1 + sltiu $tmp3,$in0,5 + sltu $tmp4,$tmp1,$in1 + daddu $in1,$tmp1,$tmp3 + daddu $tmp2,$tmp2,$tmp4 + sltu $tmp3,$in1,$tmp3 + daddu $tmp2,$tmp2,$tmp3 + + dsrl $tmp2,2 # see if it carried/borrowed + dsubu $tmp2,$zero,$tmp2 + + xor $in0,$tmp0 + xor $in1,$tmp1 + and $in0,$tmp2 + and $in1,$tmp2 + xor $in0,$tmp0 + xor $in1,$tmp1 + + lwu $tmp0,0($nonce) # load nonce + lwu $tmp1,4($nonce) + lwu $tmp2,8($nonce) + lwu $tmp3,12($nonce) + dsll $tmp1,32 + dsll $tmp3,32 + or $tmp0,$tmp1 + or $tmp2,$tmp3 + + daddu $in0,$tmp0 # accumulate nonce + daddu $in1,$tmp2 + sltu $tmp0,$in0,$tmp0 + daddu $in1,$tmp0 + + dsrl $tmp0,$in0,8 # write mac value + dsrl $tmp1,$in0,16 + dsrl $tmp2,$in0,24 + sb $in0,0($mac) + dsrl $tmp3,$in0,32 + sb $tmp0,1($mac) + dsrl $tmp0,$in0,40 + sb $tmp1,2($mac) + dsrl $tmp1,$in0,48 + sb $tmp2,3($mac) + dsrl $tmp2,$in0,56 + sb $tmp3,4($mac) + dsrl $tmp3,$in1,8 + sb $tmp0,5($mac) + dsrl $tmp0,$in1,16 + sb $tmp1,6($mac) + dsrl $tmp1,$in1,24 + sb $tmp2,7($mac) + + sb $in1,8($mac) + dsrl $tmp2,$in1,32 + sb $tmp3,9($mac) + dsrl $tmp3,$in1,40 + sb $tmp0,10($mac) + dsrl $tmp0,$in1,48 + sb $tmp1,11($mac) + dsrl $tmp1,$in1,56 + sb $tmp2,12($mac) + sb $tmp3,13($mac) + sb $tmp0,14($mac) + sb $tmp1,15($mac) + + jr $ra +.end poly1305_emit +.rdata +.asciiz "Poly1305 for MIPS64, CRYPTOGAMS by \@dot-asm" +.align 2 +___ +} +}}} else {{{ +###################################################################### +# 32-bit code path +# + +my ($ctx,$inp,$len,$padbit) = ($a0,$a1,$a2,$a3); +my ($in0,$in1,$in2,$in3,$tmp0,$tmp1,$tmp2,$tmp3) = + ($a4,$a5,$a6,$a7,$at,$t0,$t1,$t2); + +$code.=<<___; +#if (defined(_MIPS_ARCH_MIPS32R3) || defined(_MIPS_ARCH_MIPS32R5) || \\ + defined(_MIPS_ARCH_MIPS32R6)) \\ + && !defined(_MIPS_ARCH_MIPS32R2) +# define _MIPS_ARCH_MIPS32R2 +#endif + +#if defined(_MIPS_ARCH_MIPS32R6) +# define multu(rs,rt) +# define mflo(rd,rs,rt) mulu rd,rs,rt +# define mfhi(rd,rs,rt) muhu rd,rs,rt +#else +# define multu(rs,rt) multu rs,rt +# define mflo(rd,rs,rt) mflo rd +# define mfhi(rd,rs,rt) mfhi rd +#endif + +#ifdef __KERNEL__ +# define poly1305_init poly1305_init_mips +# define poly1305_blocks poly1305_blocks_mips +# define poly1305_emit poly1305_emit_mips +#endif + +#if defined(__MIPSEB__) && !defined(MIPSEB) +# define MIPSEB +#endif + +#ifdef MIPSEB +# define MSB 0 +# define LSB 3 +#else +# define MSB 3 +# define LSB 0 +#endif + +.text +.set noat +.set noreorder + +.align 5 +.globl poly1305_init +.ent poly1305_init +poly1305_init: + .frame $sp,0,$ra + .set reorder + + sw $zero,0($ctx) + sw $zero,4($ctx) + sw $zero,8($ctx) + sw $zero,12($ctx) + sw $zero,16($ctx) + + beqz $inp,.Lno_key + +#if defined(_MIPS_ARCH_MIPS32R6) + andi $tmp0,$inp,3 # $inp % 4 + subu $inp,$inp,$tmp0 # align $inp + sll $tmp0,$tmp0,3 # byte to bit offset + lw $in0,0($inp) + lw $in1,4($inp) + lw $in2,8($inp) + lw $in3,12($inp) + beqz $tmp0,.Laligned_key + + lw $tmp2,16($inp) + subu $tmp1,$zero,$tmp0 +# ifdef MIPSEB + sllv $in0,$in0,$tmp0 + srlv $tmp3,$in1,$tmp1 + sllv $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + srlv $tmp3,$in2,$tmp1 + sllv $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + srlv $tmp3,$in3,$tmp1 + sllv $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + srlv $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +# else + srlv $in0,$in0,$tmp0 + sllv $tmp3,$in1,$tmp1 + srlv $in1,$in1,$tmp0 + or $in0,$in0,$tmp3 + sllv $tmp3,$in2,$tmp1 + srlv $in2,$in2,$tmp0 + or $in1,$in1,$tmp3 + sllv $tmp3,$in3,$tmp1 + srlv $in3,$in3,$tmp0 + or $in2,$in2,$tmp3 + sllv $tmp2,$tmp2,$tmp1 + or $in3,$in3,$tmp2 +# endif +.Laligned_key: +#else + lwl $in0,0+MSB($inp) + lwl $in1,4+MSB($inp) + lwl $in2,8+MSB($inp) + lwl $in3,12+MSB($inp) + lwr $in0,0+LSB($inp) + lwr $in1,4+LSB($inp) + lwr $in2,8+LSB($inp) + lwr $in3,12+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS32R2) + wsbh $in0,$in0 # byte swap + wsbh $in1,$in1 + wsbh $in2,$in2 + wsbh $in3,$in3 + rotr $in0,$in0,16 + rotr $in1,$in1,16 + rotr $in2,$in2,16 + rotr $in3,$in3,16 +# else + srl $tmp0,$in0,24 # byte swap + srl $tmp1,$in0,8 + andi $tmp2,$in0,0xFF00 + sll $in0,$in0,24 + andi $tmp1,0xFF00 + sll $tmp2,$tmp2,8 + or $in0,$tmp0 + srl $tmp0,$in1,24 + or $tmp1,$tmp2 + srl $tmp2,$in1,8 + or $in0,$tmp1 + andi $tmp1,$in1,0xFF00 + sll $in1,$in1,24 + andi $tmp2,0xFF00 + sll $tmp1,$tmp1,8 + or $in1,$tmp0 + srl $tmp0,$in2,24 + or $tmp2,$tmp1 + srl $tmp1,$in2,8 + or $in1,$tmp2 + andi $tmp2,$in2,0xFF00 + sll $in2,$in2,24 + andi $tmp1,0xFF00 + sll $tmp2,$tmp2,8 + or $in2,$tmp0 + srl $tmp0,$in3,24 + or $tmp1,$tmp2 + srl $tmp2,$in3,8 + or $in2,$tmp1 + andi $tmp1,$in3,0xFF00 + sll $in3,$in3,24 + andi $tmp2,0xFF00 + sll $tmp1,$tmp1,8 + or $in3,$tmp0 + or $tmp2,$tmp1 + or $in3,$tmp2 +# endif +#endif + lui $tmp0,0x0fff + ori $tmp0,0xffff # 0x0fffffff + and $in0,$in0,$tmp0 + subu $tmp0,3 # 0x0ffffffc + and $in1,$in1,$tmp0 + and $in2,$in2,$tmp0 + and $in3,$in3,$tmp0 + + sw $in0,20($ctx) + sw $in1,24($ctx) + sw $in2,28($ctx) + sw $in3,32($ctx) + + srl $tmp1,$in1,2 + srl $tmp2,$in2,2 + srl $tmp3,$in3,2 + addu $in1,$in1,$tmp1 # s1 = r1 + (r1 >> 2) + addu $in2,$in2,$tmp2 + addu $in3,$in3,$tmp3 + sw $in1,36($ctx) + sw $in2,40($ctx) + sw $in3,44($ctx) +.Lno_key: + li $v0,0 + jr $ra +.end poly1305_init +___ +{ +my $SAVED_REGS_MASK = ($flavour =~ /nubi/i) ? "0x00fff000" : "0x00ff0000"; + +my ($h0,$h1,$h2,$h3,$h4, $r0,$r1,$r2,$r3, $rs1,$rs2,$rs3) = + ($s0,$s1,$s2,$s3,$s4, $s5,$s6,$s7,$s8, $s9,$s10,$s11); +my ($d0,$d1,$d2,$d3) = + ($a4,$a5,$a6,$a7); +my $shr = $t2; # used on R6 +my $one = $t2; # used on R2 + +$code.=<<___; +.globl poly1305_blocks +.align 5 +.ent poly1305_blocks +poly1305_blocks: + .frame $sp,16*4,$ra + .mask $SAVED_REGS_MASK,-4 + .set noreorder + subu $sp, $sp,4*12 + sw $s11,4*11($sp) + sw $s10,4*10($sp) + sw $s9, 4*9($sp) + sw $s8, 4*8($sp) + sw $s7, 4*7($sp) + sw $s6, 4*6($sp) + sw $s5, 4*5($sp) + sw $s4, 4*4($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + sw $s3, 4*3($sp) + sw $s2, 4*2($sp) + sw $s1, 4*1($sp) + sw $s0, 4*0($sp) +___ +$code.=<<___; + .set reorder + + srl $len,4 # number of complete blocks + li $one,1 + beqz $len,.Labort + +#if defined(_MIPS_ARCH_MIPS32R6) + andi $shr,$inp,3 + subu $inp,$inp,$shr # align $inp + sll $shr,$shr,3 # byte to bit offset +#endif + + lw $h0,0($ctx) # load hash value + lw $h1,4($ctx) + lw $h2,8($ctx) + lw $h3,12($ctx) + lw $h4,16($ctx) + + lw $r0,20($ctx) # load key + lw $r1,24($ctx) + lw $r2,28($ctx) + lw $r3,32($ctx) + lw $rs1,36($ctx) + lw $rs2,40($ctx) + lw $rs3,44($ctx) + + sll $len,4 + addu $len,$len,$inp # end of buffer + b .Loop + +.align 4 +.Loop: +#if defined(_MIPS_ARCH_MIPS32R6) + lw $d0,0($inp) # load input + lw $d1,4($inp) + lw $d2,8($inp) + lw $d3,12($inp) + beqz $shr,.Laligned_inp + + lw $t0,16($inp) + subu $t1,$zero,$shr +# ifdef MIPSEB + sllv $d0,$d0,$shr + srlv $at,$d1,$t1 + sllv $d1,$d1,$shr + or $d0,$d0,$at + srlv $at,$d2,$t1 + sllv $d2,$d2,$shr + or $d1,$d1,$at + srlv $at,$d3,$t1 + sllv $d3,$d3,$shr + or $d2,$d2,$at + srlv $t0,$t0,$t1 + or $d3,$d3,$t0 +# else + srlv $d0,$d0,$shr + sllv $at,$d1,$t1 + srlv $d1,$d1,$shr + or $d0,$d0,$at + sllv $at,$d2,$t1 + srlv $d2,$d2,$shr + or $d1,$d1,$at + sllv $at,$d3,$t1 + srlv $d3,$d3,$shr + or $d2,$d2,$at + sllv $t0,$t0,$t1 + or $d3,$d3,$t0 +# endif +.Laligned_inp: +#else + lwl $d0,0+MSB($inp) # load input + lwl $d1,4+MSB($inp) + lwl $d2,8+MSB($inp) + lwl $d3,12+MSB($inp) + lwr $d0,0+LSB($inp) + lwr $d1,4+LSB($inp) + lwr $d2,8+LSB($inp) + lwr $d3,12+LSB($inp) +#endif +#ifdef MIPSEB +# if defined(_MIPS_ARCH_MIPS32R2) + wsbh $d0,$d0 # byte swap + wsbh $d1,$d1 + wsbh $d2,$d2 + wsbh $d3,$d3 + rotr $d0,$d0,16 + rotr $d1,$d1,16 + rotr $d2,$d2,16 + rotr $d3,$d3,16 +# else + srl $at,$d0,24 # byte swap + srl $t0,$d0,8 + andi $t1,$d0,0xFF00 + sll $d0,$d0,24 + andi $t0,0xFF00 + sll $t1,$t1,8 + or $d0,$at + srl $at,$d1,24 + or $t0,$t1 + srl $t1,$d1,8 + or $d0,$t0 + andi $t0,$d1,0xFF00 + sll $d1,$d1,24 + andi $t1,0xFF00 + sll $t0,$t0,8 + or $d1,$at + srl $at,$d2,24 + or $t1,$t0 + srl $t0,$d2,8 + or $d1,$t1 + andi $t1,$d2,0xFF00 + sll $d2,$d2,24 + andi $t0,0xFF00 + sll $t1,$t1,8 + or $d2,$at + srl $at,$d3,24 + or $t0,$t1 + srl $t1,$d3,8 + or $d2,$t0 + andi $t0,$d3,0xFF00 + sll $d3,$d3,24 + andi $t1,0xFF00 + sll $t0,$t0,8 + or $d3,$at + or $t1,$t0 + or $d3,$t1 +# endif +#endif + srl $t0,$h4,2 # modulo-scheduled reduction + andi $h4,$h4,3 + sll $at,$t0,2 + + addu $d0,$d0,$h0 # accumulate input + addu $t0,$t0,$at + sltu $h0,$d0,$h0 + addu $d0,$d0,$t0 # ... and residue + sltu $at,$d0,$t0 + + addu $d1,$d1,$h1 + addu $h0,$h0,$at # carry + sltu $h1,$d1,$h1 + addu $d1,$d1,$h0 + sltu $h0,$d1,$h0 + + addu $d2,$d2,$h2 + addu $h1,$h1,$h0 # carry + sltu $h2,$d2,$h2 + addu $d2,$d2,$h1 + sltu $h1,$d2,$h1 + + addu $d3,$d3,$h3 + addu $h2,$h2,$h1 # carry + sltu $h3,$d3,$h3 + addu $d3,$d3,$h2 + +#if defined(_MIPS_ARCH_MIPS32R2) && !defined(_MIPS_ARCH_MIPS32R6) + multu $r0,$d0 # d0*r0 + sltu $h2,$d3,$h2 + maddu $rs3,$d1 # d1*s3 + addu $h3,$h3,$h2 # carry + maddu $rs2,$d2 # d2*s2 + addu $h4,$h4,$padbit + maddu $rs1,$d3 # d3*s1 + addu $h4,$h4,$h3 + mfhi $at + mflo $h0 + + multu $r1,$d0 # d0*r1 + maddu $r0,$d1 # d1*r0 + maddu $rs3,$d2 # d2*s3 + maddu $rs2,$d3 # d3*s2 + maddu $rs1,$h4 # h4*s1 + maddu $at,$one # hi*1 + mfhi $at + mflo $h1 + + multu $r2,$d0 # d0*r2 + maddu $r1,$d1 # d1*r1 + maddu $r0,$d2 # d2*r0 + maddu $rs3,$d3 # d3*s3 + maddu $rs2,$h4 # h4*s2 + maddu $at,$one # hi*1 + mfhi $at + mflo $h2 + + mul $t0,$r0,$h4 # h4*r0 + + multu $r3,$d0 # d0*r3 + maddu $r2,$d1 # d1*r2 + maddu $r1,$d2 # d2*r1 + maddu $r0,$d3 # d3*r0 + maddu $rs3,$h4 # h4*s3 + maddu $at,$one # hi*1 + mfhi $at + mflo $h3 + + addiu $inp,$inp,16 + + addu $h4,$t0,$at +#else + multu ($r0,$d0) # d0*r0 + mflo ($h0,$r0,$d0) + mfhi ($h1,$r0,$d0) + + sltu $h2,$d3,$h2 + addu $h3,$h3,$h2 # carry + + multu ($rs3,$d1) # d1*s3 + mflo ($at,$rs3,$d1) + mfhi ($t0,$rs3,$d1) + + addu $h4,$h4,$padbit + addiu $inp,$inp,16 + addu $h4,$h4,$h3 + + multu ($rs2,$d2) # d2*s2 + mflo ($a3,$rs2,$d2) + mfhi ($t1,$rs2,$d2) + addu $h0,$h0,$at + addu $h1,$h1,$t0 + multu ($rs1,$d3) # d3*s1 + sltu $at,$h0,$at + addu $h1,$h1,$at + + mflo ($at,$rs1,$d3) + mfhi ($t0,$rs1,$d3) + addu $h0,$h0,$a3 + addu $h1,$h1,$t1 + multu ($r1,$d0) # d0*r1 + sltu $a3,$h0,$a3 + addu $h1,$h1,$a3 + + + mflo ($a3,$r1,$d0) + mfhi ($h2,$r1,$d0) + addu $h0,$h0,$at + addu $h1,$h1,$t0 + multu ($r0,$d1) # d1*r0 + sltu $at,$h0,$at + addu $h1,$h1,$at + + mflo ($at,$r0,$d1) + mfhi ($t0,$r0,$d1) + addu $h1,$h1,$a3 + sltu $a3,$h1,$a3 + multu ($rs3,$d2) # d2*s3 + addu $h2,$h2,$a3 + + mflo ($a3,$rs3,$d2) + mfhi ($t1,$rs3,$d2) + addu $h1,$h1,$at + addu $h2,$h2,$t0 + multu ($rs2,$d3) # d3*s2 + sltu $at,$h1,$at + addu $h2,$h2,$at + + mflo ($at,$rs2,$d3) + mfhi ($t0,$rs2,$d3) + addu $h1,$h1,$a3 + addu $h2,$h2,$t1 + multu ($rs1,$h4) # h4*s1 + sltu $a3,$h1,$a3 + addu $h2,$h2,$a3 + + mflo ($a3,$rs1,$h4) + addu $h1,$h1,$at + addu $h2,$h2,$t0 + multu ($r2,$d0) # d0*r2 + sltu $at,$h1,$at + addu $h2,$h2,$at + + + mflo ($at,$r2,$d0) + mfhi ($h3,$r2,$d0) + addu $h1,$h1,$a3 + sltu $a3,$h1,$a3 + multu ($r1,$d1) # d1*r1 + addu $h2,$h2,$a3 + + mflo ($a3,$r1,$d1) + mfhi ($t1,$r1,$d1) + addu $h2,$h2,$at + sltu $at,$h2,$at + multu ($r0,$d2) # d2*r0 + addu $h3,$h3,$at + + mflo ($at,$r0,$d2) + mfhi ($t0,$r0,$d2) + addu $h2,$h2,$a3 + addu $h3,$h3,$t1 + multu ($rs3,$d3) # d3*s3 + sltu $a3,$h2,$a3 + addu $h3,$h3,$a3 + + mflo ($a3,$rs3,$d3) + mfhi ($t1,$rs3,$d3) + addu $h2,$h2,$at + addu $h3,$h3,$t0 + multu ($rs2,$h4) # h4*s2 + sltu $at,$h2,$at + addu $h3,$h3,$at + + mflo ($at,$rs2,$h4) + addu $h2,$h2,$a3 + addu $h3,$h3,$t1 + multu ($r3,$d0) # d0*r3 + sltu $a3,$h2,$a3 + addu $h3,$h3,$a3 + + + mflo ($a3,$r3,$d0) + mfhi ($t1,$r3,$d0) + addu $h2,$h2,$at + sltu $at,$h2,$at + multu ($r2,$d1) # d1*r2 + addu $h3,$h3,$at + + mflo ($at,$r2,$d1) + mfhi ($t0,$r2,$d1) + addu $h3,$h3,$a3 + sltu $a3,$h3,$a3 + multu ($r0,$d3) # d3*r0 + addu $t1,$t1,$a3 + + mflo ($a3,$r0,$d3) + mfhi ($d3,$r0,$d3) + addu $h3,$h3,$at + addu $t1,$t1,$t0 + multu ($r1,$d2) # d2*r1 + sltu $at,$h3,$at + addu $t1,$t1,$at + + mflo ($at,$r1,$d2) + mfhi ($t0,$r1,$d2) + addu $h3,$h3,$a3 + addu $t1,$t1,$d3 + multu ($rs3,$h4) # h4*s3 + sltu $a3,$h3,$a3 + addu $t1,$t1,$a3 + + mflo ($a3,$rs3,$h4) + addu $h3,$h3,$at + addu $t1,$t1,$t0 + multu ($r0,$h4) # h4*r0 + sltu $at,$h3,$at + addu $t1,$t1,$at + + + mflo ($h4,$r0,$h4) + addu $h3,$h3,$a3 + sltu $a3,$h3,$a3 + addu $t1,$t1,$a3 + addu $h4,$h4,$t1 + + li $padbit,1 # if we loop, padbit is 1 +#endif + bne $inp,$len,.Loop + + sw $h0,0($ctx) # store hash value + sw $h1,4($ctx) + sw $h2,8($ctx) + sw $h3,12($ctx) + sw $h4,16($ctx) + + .set noreorder +.Labort: + lw $s11,4*11($sp) + lw $s10,4*10($sp) + lw $s9, 4*9($sp) + lw $s8, 4*8($sp) + lw $s7, 4*7($sp) + lw $s6, 4*6($sp) + lw $s5, 4*5($sp) + lw $s4, 4*4($sp) +___ +$code.=<<___ if ($flavour =~ /nubi/i); # optimize non-nubi prologue + lw $s3, 4*3($sp) + lw $s2, 4*2($sp) + lw $s1, 4*1($sp) + lw $s0, 4*0($sp) +___ +$code.=<<___; + jr $ra + addu $sp,$sp,4*12 +.end poly1305_blocks +___ +} +{ +my ($ctx,$mac,$nonce,$tmp4) = ($a0,$a1,$a2,$a3); + +$code.=<<___; +.align 5 +.globl poly1305_emit +.ent poly1305_emit +poly1305_emit: + .frame $sp,0,$ra + .set reorder + + lw $tmp4,16($ctx) + lw $tmp0,0($ctx) + lw $tmp1,4($ctx) + lw $tmp2,8($ctx) + lw $tmp3,12($ctx) + + li $in0,-4 # final reduction + srl $ctx,$tmp4,2 + and $in0,$in0,$tmp4 + andi $tmp4,$tmp4,3 + addu $ctx,$ctx,$in0 + + addu $tmp0,$tmp0,$ctx + sltu $ctx,$tmp0,$ctx + addiu $in0,$tmp0,5 # compare to modulus + addu $tmp1,$tmp1,$ctx + sltiu $in1,$in0,5 + sltu $ctx,$tmp1,$ctx + addu $in1,$in1,$tmp1 + addu $tmp2,$tmp2,$ctx + sltu $in2,$in1,$tmp1 + sltu $ctx,$tmp2,$ctx + addu $in2,$in2,$tmp2 + addu $tmp3,$tmp3,$ctx + sltu $in3,$in2,$tmp2 + sltu $ctx,$tmp3,$ctx + addu $in3,$in3,$tmp3 + addu $tmp4,$tmp4,$ctx + sltu $ctx,$in3,$tmp3 + addu $ctx,$tmp4 + + srl $ctx,2 # see if it carried/borrowed + subu $ctx,$zero,$ctx + + xor $in0,$tmp0 + xor $in1,$tmp1 + xor $in2,$tmp2 + xor $in3,$tmp3 + and $in0,$ctx + and $in1,$ctx + and $in2,$ctx + and $in3,$ctx + xor $in0,$tmp0 + xor $in1,$tmp1 + xor $in2,$tmp2 + xor $in3,$tmp3 + + lw $tmp0,0($nonce) # load nonce + lw $tmp1,4($nonce) + lw $tmp2,8($nonce) + lw $tmp3,12($nonce) + + addu $in0,$tmp0 # accumulate nonce + sltu $ctx,$in0,$tmp0 + + addu $in1,$tmp1 + sltu $tmp1,$in1,$tmp1 + addu $in1,$ctx + sltu $ctx,$in1,$ctx + addu $ctx,$tmp1 + + addu $in2,$tmp2 + sltu $tmp2,$in2,$tmp2 + addu $in2,$ctx + sltu $ctx,$in2,$ctx + addu $ctx,$tmp2 + + addu $in3,$tmp3 + addu $in3,$ctx + + srl $tmp0,$in0,8 # write mac value + srl $tmp1,$in0,16 + srl $tmp2,$in0,24 + sb $in0, 0($mac) + sb $tmp0,1($mac) + srl $tmp0,$in1,8 + sb $tmp1,2($mac) + srl $tmp1,$in1,16 + sb $tmp2,3($mac) + srl $tmp2,$in1,24 + sb $in1, 4($mac) + sb $tmp0,5($mac) + srl $tmp0,$in2,8 + sb $tmp1,6($mac) + srl $tmp1,$in2,16 + sb $tmp2,7($mac) + srl $tmp2,$in2,24 + sb $in2, 8($mac) + sb $tmp0,9($mac) + srl $tmp0,$in3,8 + sb $tmp1,10($mac) + srl $tmp1,$in3,16 + sb $tmp2,11($mac) + srl $tmp2,$in3,24 + sb $in3, 12($mac) + sb $tmp0,13($mac) + sb $tmp1,14($mac) + sb $tmp2,15($mac) + + jr $ra +.end poly1305_emit +.rdata +.asciiz "Poly1305 for MIPS32, CRYPTOGAMS by \@dot-asm" +.align 2 +___ +} +}}} + +$output=pop and open STDOUT,">$output"; +print $code; +close STDOUT; diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index c8b595c60910..61b0fc2026e6 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -13,7 +13,6 @@ generic-y += irq_work.h generic-y += local64.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h -generic-y += msi.h generic-y += parport.h generic-y += percpu.h generic-y += preempt.h diff --git a/arch/mips/include/asm/dma-direct.h b/arch/mips/include/asm/dma-direct.h index b5c240806e1b..14e352651ce9 100644 --- a/arch/mips/include/asm/dma-direct.h +++ b/arch/mips/include/asm/dma-direct.h @@ -2,14 +2,6 @@ #ifndef _MIPS_DMA_DIRECT_H #define _MIPS_DMA_DIRECT_H 1 -static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) -{ - if (!dev->dma_mask) - return false; - - return addr + size - 1 <= *dev->dma_mask; -} - dma_addr_t __phys_to_dma(struct device *dev, phys_addr_t paddr); phys_addr_t __dma_to_phys(struct device *dev, dma_addr_t daddr); diff --git a/arch/mips/include/asm/vdso/vsyscall.h b/arch/mips/include/asm/vdso/vsyscall.h index 195314732233..00d41b94ba31 100644 --- a/arch/mips/include/asm/vdso/vsyscall.h +++ b/arch/mips/include/asm/vdso/vsyscall.h @@ -28,13 +28,6 @@ int __mips_get_clock_mode(struct timekeeper *tk) } #define __arch_get_clock_mode __mips_get_clock_mode -static __always_inline -int __mips_use_vsyscall(struct vdso_data *vdata) -{ - return (vdata[CS_HRES_COARSE].clock_mode != VDSO_CLOCK_NONE); -} -#define __arch_use_vsyscall __mips_use_vsyscall - /* The asm-generic header needs to be included after the definitions above */ #include <asm-generic/vdso/vsyscall.h> diff --git a/arch/mips/include/uapi/asm/msgbuf.h b/arch/mips/include/uapi/asm/msgbuf.h index 46aa15b13e4e..128af72f2dfe 100644 --- a/arch/mips/include/uapi/asm/msgbuf.h +++ b/arch/mips/include/uapi/asm/msgbuf.h @@ -2,6 +2,7 @@ #ifndef _ASM_MSGBUF_H #define _ASM_MSGBUF_H +#include <asm/ipcbuf.h> /* * The msqid64_ds structure for the MIPS architecture. @@ -15,9 +16,9 @@ #if defined(__mips64) struct msqid64_ds { struct ipc64_perm msg_perm; - __kernel_time_t msg_stime; /* last msgsnd time */ - __kernel_time_t msg_rtime; /* last msgrcv time */ - __kernel_time_t msg_ctime; /* last change time */ + long msg_stime; /* last msgsnd time */ + long msg_rtime; /* last msgrcv time */ + long msg_ctime; /* last change time */ unsigned long msg_cbytes; /* current number of bytes on queue */ unsigned long msg_qnum; /* number of messages in queue */ unsigned long msg_qbytes; /* max number of bytes on queue */ diff --git a/arch/mips/include/uapi/asm/sembuf.h b/arch/mips/include/uapi/asm/sembuf.h index 60c89e6cb25b..ba7fe0c89e7d 100644 --- a/arch/mips/include/uapi/asm/sembuf.h +++ b/arch/mips/include/uapi/asm/sembuf.h @@ -2,6 +2,8 @@ #ifndef _ASM_SEMBUF_H #define _ASM_SEMBUF_H +#include <asm/ipcbuf.h> + /* * The semid64_ds structure for the MIPS architecture. * Note extra padding because this structure is passed back and forth @@ -14,8 +16,8 @@ #ifdef __mips64 struct semid64_ds { struct ipc64_perm sem_perm; /* permissions .. see ipc.h */ - __kernel_time_t sem_otime; /* last semop time */ - __kernel_time_t sem_ctime; /* last change time */ + long sem_otime; /* last semop time */ + long sem_ctime; /* last change time */ unsigned long sem_nsems; /* no. of semaphores in array */ unsigned long __unused1; unsigned long __unused2; diff --git a/arch/mips/include/uapi/asm/shmbuf.h b/arch/mips/include/uapi/asm/shmbuf.h index 9b9bba3401f2..680bb95b2240 100644 --- a/arch/mips/include/uapi/asm/shmbuf.h +++ b/arch/mips/include/uapi/asm/shmbuf.h @@ -17,9 +17,9 @@ struct shmid64_ds { struct ipc64_perm shm_perm; /* operation perms */ size_t shm_segsz; /* size of segment (bytes) */ - __kernel_time_t shm_atime; /* last attach time */ - __kernel_time_t shm_dtime; /* last detach time */ - __kernel_time_t shm_ctime; /* last change time */ + long shm_atime; /* last attach time */ + long shm_dtime; /* last detach time */ + long shm_ctime; /* last change time */ __kernel_pid_t shm_cpid; /* pid of creator */ __kernel_pid_t shm_lpid; /* pid of last operator */ unsigned long shm_nattch; /* no. of current attaches */ diff --git a/arch/mips/include/uapi/asm/stat.h b/arch/mips/include/uapi/asm/stat.h index 95416f366d7f..3d2a3b71845c 100644 --- a/arch/mips/include/uapi/asm/stat.h +++ b/arch/mips/include/uapi/asm/stat.h @@ -26,17 +26,17 @@ struct stat { gid_t st_gid; unsigned st_rdev; long st_pad2[2]; - off_t st_size; + long st_size; long st_pad3; /* * Actually this should be timestruc_t st_atime, st_mtime and st_ctime * but we don't have it under Linux. */ - time_t st_atime; + long st_atime; long st_atime_nsec; - time_t st_mtime; + long st_mtime; long st_mtime_nsec; - time_t st_ctime; + long st_ctime; long st_ctime_nsec; long st_blksize; long st_blocks; @@ -70,13 +70,13 @@ struct stat64 { * Actually this should be timestruc_t st_atime, st_mtime and st_ctime * but we don't have it under Linux. */ - time_t st_atime; + long st_atime; unsigned long st_atime_nsec; /* Reserved for st_atime expansion */ - time_t st_mtime; + long st_mtime; unsigned long st_mtime_nsec; /* Reserved for st_mtime expansion */ - time_t st_ctime; + long st_ctime; unsigned long st_ctime_nsec; /* Reserved for st_ctime expansion */ unsigned long st_blksize; @@ -105,7 +105,7 @@ struct stat { unsigned int st_rdev; unsigned int st_pad1[3]; /* Reserved for st_rdev expansion */ - off_t st_size; + long st_size; /* * Actually this should be timestruc_t st_atime, st_mtime and st_ctime diff --git a/arch/mips/jazz/jazzdma.c b/arch/mips/jazz/jazzdma.c index a01e14955187..c64a297e82b3 100644 --- a/arch/mips/jazz/jazzdma.c +++ b/arch/mips/jazz/jazzdma.c @@ -592,7 +592,7 @@ static dma_addr_t jazz_dma_map_page(struct device *dev, struct page *page, phys_addr_t phys = page_to_phys(page) + offset; if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, phys, size, dir); + arch_sync_dma_for_device(phys, size, dir); return vdma_alloc(phys, size); } @@ -600,7 +600,7 @@ static void jazz_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_cpu(dev, vdma_log2phys(dma_addr), size, dir); + arch_sync_dma_for_cpu(vdma_log2phys(dma_addr), size, dir); vdma_free(dma_addr); } @@ -612,7 +612,7 @@ static int jazz_dma_map_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, sg, nents, i) { if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, + arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); sg->dma_address = vdma_alloc(sg_phys(sg), sg->length); if (sg->dma_address == DMA_MAPPING_ERROR) @@ -631,8 +631,7 @@ static void jazz_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, for_each_sg(sglist, sg, nents, i) { if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, - dir); + arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); vdma_free(sg->dma_address); } } @@ -640,13 +639,13 @@ static void jazz_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, static void jazz_dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - arch_sync_dma_for_device(dev, vdma_log2phys(addr), size, dir); + arch_sync_dma_for_device(vdma_log2phys(addr), size, dir); } static void jazz_dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - arch_sync_dma_for_cpu(dev, vdma_log2phys(addr), size, dir); + arch_sync_dma_for_cpu(vdma_log2phys(addr), size, dir); } static void jazz_dma_sync_sg_for_device(struct device *dev, @@ -656,7 +655,7 @@ static void jazz_dma_sync_sg_for_device(struct device *dev, int i; for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); + arch_sync_dma_for_device(sg_phys(sg), sg->length, dir); } static void jazz_dma_sync_sg_for_cpu(struct device *dev, @@ -666,7 +665,7 @@ static void jazz_dma_sync_sg_for_cpu(struct device *dev, int i; for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); + arch_sync_dma_for_cpu(sg_phys(sg), sg->length, dir); } const struct dma_map_ops jazz_dma_ops = { diff --git a/arch/mips/kernel/binfmt_elfn32.c b/arch/mips/kernel/binfmt_elfn32.c index 7a12763d553a..6ee3f7218c67 100644 --- a/arch/mips/kernel/binfmt_elfn32.c +++ b/arch/mips/kernel/binfmt_elfn32.c @@ -100,7 +100,7 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef TASK_SIZE #define TASK_SIZE TASK_SIZE32 -#undef ns_to_timeval -#define ns_to_timeval ns_to_old_timeval32 +#undef ns_to_kernel_old_timeval +#define ns_to_kernel_old_timeval ns_to_old_timeval32 #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/binfmt_elfo32.c b/arch/mips/kernel/binfmt_elfo32.c index e6db06a1d31a..6dd103d3cebb 100644 --- a/arch/mips/kernel/binfmt_elfo32.c +++ b/arch/mips/kernel/binfmt_elfo32.c @@ -103,7 +103,7 @@ jiffies_to_old_timeval32(unsigned long jiffies, struct old_timeval32 *value) #undef TASK_SIZE #define TASK_SIZE TASK_SIZE32 -#undef ns_to_timeval -#define ns_to_timeval ns_to_old_timeval32 +#undef ns_to_kernel_old_timeval +#define ns_to_kernel_old_timeval ns_to_old_timeval32 #include "../../../fs/binfmt_elf.c" diff --git a/arch/mips/kernel/vmlinux.lds.S b/arch/mips/kernel/vmlinux.lds.S index 33ee0d18fb0a..a5f00ec73ea6 100644 --- a/arch/mips/kernel/vmlinux.lds.S +++ b/arch/mips/kernel/vmlinux.lds.S @@ -10,6 +10,11 @@ */ #define BSS_FIRST_SECTIONS *(.bss..swapper_pg_dir) +/* Cavium Octeon should not have a separate PT_NOTE Program Header. */ +#ifndef CONFIG_CAVIUM_OCTEON_SOC +#define EMITS_PT_NOTE +#endif + #include <asm-generic/vmlinux.lds.h> #undef mips @@ -76,16 +81,8 @@ SECTIONS __stop___dbe_table = .; } -#ifdef CONFIG_CAVIUM_OCTEON_SOC -#define NOTES_HEADER -#else /* CONFIG_CAVIUM_OCTEON_SOC */ -#define NOTES_HEADER :note -#endif /* CONFIG_CAVIUM_OCTEON_SOC */ - NOTES :text NOTES_HEADER - .dummy : { *(.dummy) } :text - _sdata = .; /* Start of data section */ - RODATA + RO_DATA(4096) /* writeable */ .data : { /* Data */ diff --git a/arch/mips/mm/dma-noncoherent.c b/arch/mips/mm/dma-noncoherent.c index 1d4d57dd9acf..dc42ffc83825 100644 --- a/arch/mips/mm/dma-noncoherent.c +++ b/arch/mips/mm/dma-noncoherent.c @@ -27,7 +27,7 @@ * R10000 and R12000 are used in such systems, the SGI IP28 Indigo² rsp. * SGI IP32 aka O2. */ -static inline bool cpu_needs_post_dma_flush(struct device *dev) +static inline bool cpu_needs_post_dma_flush(void) { switch (boot_cpu_type()) { case CPU_R10000: @@ -59,12 +59,6 @@ void *cached_kernel_address(void *addr) return __va(addr) - UNCAC_BASE; } -long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, - dma_addr_t dma_addr) -{ - return page_to_pfn(virt_to_page(cached_kernel_address(cpu_addr))); -} - static inline void dma_sync_virt(void *addr, size_t size, enum dma_data_direction dir) { @@ -118,17 +112,17 @@ static inline void dma_sync_phys(phys_addr_t paddr, size_t size, } while (left); } -void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_device(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { dma_sync_phys(paddr, size, dir); } #ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, - size_t size, enum dma_data_direction dir) +void arch_sync_dma_for_cpu(phys_addr_t paddr, size_t size, + enum dma_data_direction dir) { - if (cpu_needs_post_dma_flush(dev)) + if (cpu_needs_post_dma_flush()) dma_sync_phys(paddr, size, dir); } #endif diff --git a/arch/mips/net/ebpf_jit.c b/arch/mips/net/ebpf_jit.c index a2405d5f7d1e..561154cbcc40 100644 --- a/arch/mips/net/ebpf_jit.c +++ b/arch/mips/net/ebpf_jit.c @@ -604,6 +604,7 @@ static void emit_const_to_reg(struct jit_ctx *ctx, int dst, u64 value) static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) { int off, b_off; + int tcc_reg; ctx->flags |= EBPF_SEEN_TC; /* @@ -616,14 +617,14 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx, int this_idx) b_off = b_imm(this_idx + 1, ctx); emit_instr(ctx, bne, MIPS_R_AT, MIPS_R_ZERO, b_off); /* - * if (--TCC < 0) + * if (TCC-- < 0) * goto out; */ /* Delay slot */ - emit_instr(ctx, daddiu, MIPS_R_T5, - (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4, -1); + tcc_reg = (ctx->flags & EBPF_TCC_IN_V1) ? MIPS_R_V1 : MIPS_R_S4; + emit_instr(ctx, daddiu, MIPS_R_T5, tcc_reg, -1); b_off = b_imm(this_idx + 1, ctx); - emit_instr(ctx, bltz, MIPS_R_T5, b_off); + emit_instr(ctx, bltz, tcc_reg, b_off); /* * prog = array->ptrs[index]; * if (prog == NULL) diff --git a/arch/mips/pci/fixup-sb1250.c b/arch/mips/pci/fixup-sb1250.c index 8a41b359cf90..40efc990cdce 100644 --- a/arch/mips/pci/fixup-sb1250.c +++ b/arch/mips/pci/fixup-sb1250.c @@ -21,22 +21,22 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, /* * The BCM1250, etc. PCI host bridge does not support DAC on its 32-bit - * bus, so we set the bus's DMA mask accordingly. However the HT link + * bus, so we set the bus's DMA limit accordingly. However the HT link * down the artificial PCI-HT bridge supports 40-bit addressing and the * SP1011 HT-PCI bridge downstream supports both DAC and a 64-bit bus * width, so we record the PCI-HT bridge's secondary and subordinate bus - * numbers and do not set the mask for devices present in the inclusive + * numbers and do not set the limit for devices present in the inclusive * range of those. */ -struct sb1250_bus_dma_mask_exclude { +struct sb1250_bus_dma_limit_exclude { bool set; unsigned char start; unsigned char end; }; -static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data) +static int sb1250_bus_dma_limit(struct pci_dev *dev, void *data) { - struct sb1250_bus_dma_mask_exclude *exclude = data; + struct sb1250_bus_dma_limit_exclude *exclude = data; bool exclude_this; bool ht_bridge; @@ -55,7 +55,7 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data) exclude->start, exclude->end); } else { dev_dbg(&dev->dev, "disabling DAC for device"); - dev->dev.bus_dma_mask = DMA_BIT_MASK(32); + dev->dev.bus_dma_limit = DMA_BIT_MASK(32); } return 0; @@ -63,9 +63,9 @@ static int sb1250_bus_dma_mask(struct pci_dev *dev, void *data) static void quirk_sb1250_pci_dac(struct pci_dev *dev) { - struct sb1250_bus_dma_mask_exclude exclude = { .set = false }; + struct sb1250_bus_dma_limit_exclude exclude = { .set = false }; - pci_walk_bus(dev->bus, sb1250_bus_dma_mask, &exclude); + pci_walk_bus(dev->bus, sb1250_bus_dma_limit, &exclude); } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_SIBYTE, PCI_DEVICE_ID_BCM1250_PCI, quirk_sb1250_pci_dac); diff --git a/arch/mips/ralink/Kconfig b/arch/mips/ralink/Kconfig index 1434fa60f3db..94e9ce994494 100644 --- a/arch/mips/ralink/Kconfig +++ b/arch/mips/ralink/Kconfig @@ -51,6 +51,7 @@ choice select MIPS_GIC select COMMON_CLK select CLKSRC_MIPS_GIC + select HAVE_PCI if PCI_MT7621 endchoice choice diff --git a/arch/mips/sgi-ip27/Kconfig b/arch/mips/sgi-ip27/Kconfig index ef3847e7aee0..e5b6cadbec85 100644 --- a/arch/mips/sgi-ip27/Kconfig +++ b/arch/mips/sgi-ip27/Kconfig @@ -38,10 +38,3 @@ config REPLICATE_KTEXT Say Y here to enable replicating the kernel text across multiple nodes in a NUMA cluster. This trades memory for speed. -config REPLICATE_EXHANDLERS - bool "Exception handler replication support" - depends on SGI_IP27 - help - Say Y here to enable replicating the kernel exception handlers - across multiple nodes in a NUMA cluster. This trades memory for - speed. diff --git a/arch/mips/sgi-ip27/ip27-init.c b/arch/mips/sgi-ip27/ip27-init.c index 8fd3505e2b9c..f597e1ee2df7 100644 --- a/arch/mips/sgi-ip27/ip27-init.c +++ b/arch/mips/sgi-ip27/ip27-init.c @@ -64,23 +64,14 @@ static void per_hub_init(nasid_t nasid) hub_rtc_init(nasid); -#ifdef CONFIG_REPLICATE_EXHANDLERS - /* - * If this is not a headless node initialization, - * copy over the caliased exception handlers. - */ - if (get_nasid() == nasid) { - extern char except_vec2_generic, except_vec3_generic; - extern void build_tlb_refill_handler(void); - - memcpy((void *)(CKSEG0 + 0x100), &except_vec2_generic, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x80); - build_tlb_refill_handler(); - memcpy((void *)(CKSEG0 + 0x100), (void *) CKSEG0, 0x80); - memcpy((void *)(CKSEG0 + 0x180), &except_vec3_generic, 0x100); + if (nasid) { + /* copy exception handlers from first node to current node */ + memcpy((void *)NODE_OFFSET_TO_K0(nasid, 0), + (void *)CKSEG0, 0x200); __flush_cache_all(); + /* switch to node local exception handlers */ + REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); } -#endif } void per_cpu_init(void) diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c index f610fff592a6..563aad5e6398 100644 --- a/arch/mips/sgi-ip27/ip27-memory.c +++ b/arch/mips/sgi-ip27/ip27-memory.c @@ -311,11 +311,7 @@ static void __init mlreset(void) * thinks it is a node 0 address. */ REMOTE_HUB_S(nasid, PI_REGION_PRESENT, (region_mask | 1)); -#ifdef CONFIG_REPLICATE_EXHANDLERS - REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_8K); -#else REMOTE_HUB_S(nasid, PI_CALIAS_SIZE, PI_CALIAS_SIZE_0); -#endif #ifdef LATER /* diff --git a/arch/mips/sgi-ip32/ip32-platform.c b/arch/mips/sgi-ip32/ip32-platform.c index 5a2a82148d8d..c3909bd8dd1a 100644 --- a/arch/mips/sgi-ip32/ip32-platform.c +++ b/arch/mips/sgi-ip32/ip32-platform.c @@ -115,7 +115,7 @@ ip32_rtc_platform_data[] = { .bcd_mode = true, .no_irq = false, .uie_unsupported = false, - .alloc_io_resources = true, + .access_type = ds1685_reg_direct, .plat_prepare_poweroff = ip32_prepare_poweroff, }, }; |