summaryrefslogtreecommitdiffstats
path: root/arch/arm
diff options
context:
space:
mode:
Diffstat (limited to 'arch/arm')
-rw-r--r--arch/arm/crypto/aes-cipher-core.S42
-rw-r--r--arch/arm/crypto/blake2b-neon-glue.c4
-rw-r--r--arch/arm/crypto/blake2s-core.S21
-rw-r--r--arch/arm/crypto/chacha-scalar-core.S43
-rw-r--r--arch/arm/crypto/curve25519-core.S2
-rw-r--r--arch/arm/crypto/poly1305-glue.c2
6 files changed, 48 insertions, 66 deletions
diff --git a/arch/arm/crypto/aes-cipher-core.S b/arch/arm/crypto/aes-cipher-core.S
index 472e56d09eea..1da3f41359aa 100644
--- a/arch/arm/crypto/aes-cipher-core.S
+++ b/arch/arm/crypto/aes-cipher-core.S
@@ -99,28 +99,6 @@
__hround \out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
.endm
- .macro __rev, out, in
- .if __LINUX_ARM_ARCH__ < 6
- lsl t0, \in, #24
- and t1, \in, #0xff00
- and t2, \in, #0xff0000
- orr \out, t0, \in, lsr #24
- orr \out, \out, t1, lsl #8
- orr \out, \out, t2, lsr #8
- .else
- rev \out, \in
- .endif
- .endm
-
- .macro __adrl, out, sym, c
- .if __LINUX_ARM_ARCH__ < 7
- ldr\c \out, =\sym
- .else
- movw\c \out, #:lower16:\sym
- movt\c \out, #:upper16:\sym
- .endif
- .endm
-
.macro do_crypt, round, ttab, ltab, bsz
push {r3-r11, lr}
@@ -133,10 +111,10 @@
ldr r7, [in, #12]
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
eor r4, r4, r8
@@ -144,7 +122,7 @@
eor r6, r6, r10
eor r7, r7, r11
- __adrl ttab, \ttab
+ mov_l ttab, \ttab
/*
* Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
* L1 cache, assuming cacheline size >= 32. This is a hardening measure
@@ -180,7 +158,7 @@
2: .ifb \ltab
add ttab, ttab, #1
.else
- __adrl ttab, \ltab
+ mov_l ttab, \ltab
// Prefetch inverse S-box for final round; see explanation above
.set i, 0
.rept 256 / 64
@@ -194,10 +172,10 @@
\round r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
#ifdef CONFIG_CPU_BIG_ENDIAN
- __rev r4, r4
- __rev r5, r5
- __rev r6, r6
- __rev r7, r7
+ rev_l r4, t0
+ rev_l r5, t0
+ rev_l r6, t0
+ rev_l r7, t0
#endif
ldr out, [sp]
diff --git a/arch/arm/crypto/blake2b-neon-glue.c b/arch/arm/crypto/blake2b-neon-glue.c
index 34d73200e7fa..4b59d027ba4a 100644
--- a/arch/arm/crypto/blake2b-neon-glue.c
+++ b/arch/arm/crypto/blake2b-neon-glue.c
@@ -85,8 +85,8 @@ static int __init blake2b_neon_mod_init(void)
static void __exit blake2b_neon_mod_exit(void)
{
- return crypto_unregister_shashes(blake2b_neon_algs,
- ARRAY_SIZE(blake2b_neon_algs));
+ crypto_unregister_shashes(blake2b_neon_algs,
+ ARRAY_SIZE(blake2b_neon_algs));
}
module_init(blake2b_neon_mod_init);
diff --git a/arch/arm/crypto/blake2s-core.S b/arch/arm/crypto/blake2s-core.S
index bed897e9a181..86345751bbf3 100644
--- a/arch/arm/crypto/blake2s-core.S
+++ b/arch/arm/crypto/blake2s-core.S
@@ -8,6 +8,7 @@
*/
#include <linux/linkage.h>
+#include <asm/assembler.h>
// Registers used to hold message words temporarily. There aren't
// enough ARM registers to hold the whole message block, so we have to
@@ -38,6 +39,23 @@
#endif
.endm
+.macro _le32_bswap a, tmp
+#ifdef __ARMEB__
+ rev_l \a, \tmp
+#endif
+.endm
+
+.macro _le32_bswap_8x a, b, c, d, e, f, g, h, tmp
+ _le32_bswap \a, \tmp
+ _le32_bswap \b, \tmp
+ _le32_bswap \c, \tmp
+ _le32_bswap \d, \tmp
+ _le32_bswap \e, \tmp
+ _le32_bswap \f, \tmp
+ _le32_bswap \g, \tmp
+ _le32_bswap \h, \tmp
+.endm
+
// Execute a quarter-round of BLAKE2s by mixing two columns or two diagonals.
// (a0, b0, c0, d0) and (a1, b1, c1, d1) give the registers containing the two
// columns/diagonals. s0-s1 are the word offsets to the message words the first
@@ -180,8 +198,10 @@ ENTRY(blake2s_compress_arch)
tst r1, #3
bne .Lcopy_block_misaligned
ldmia r1!, {r2-r9}
+ _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12!, {r2-r9}
ldmia r1!, {r2-r9}
+ _le32_bswap_8x r2, r3, r4, r5, r6, r7, r8, r9, r14
stmia r12, {r2-r9}
.Lcopy_block_done:
str r1, [sp, #68] // Update message pointer
@@ -268,6 +288,7 @@ ENTRY(blake2s_compress_arch)
1:
#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
ldr r3, [r1], #4
+ _le32_bswap r3, r4
#else
ldrb r3, [r1, #0]
ldrb r4, [r1, #1]
diff --git a/arch/arm/crypto/chacha-scalar-core.S b/arch/arm/crypto/chacha-scalar-core.S
index 2985b80a45b5..083fe1ab96d0 100644
--- a/arch/arm/crypto/chacha-scalar-core.S
+++ b/arch/arm/crypto/chacha-scalar-core.S
@@ -41,32 +41,15 @@
X14 .req r12
X15 .req r14
-.macro __rev out, in, t0, t1, t2
-.if __LINUX_ARM_ARCH__ >= 6
- rev \out, \in
-.else
- lsl \t0, \in, #24
- and \t1, \in, #0xff00
- and \t2, \in, #0xff0000
- orr \out, \t0, \in, lsr #24
- orr \out, \out, \t1, lsl #8
- orr \out, \out, \t2, lsr #8
-.endif
-.endm
-
-.macro _le32_bswap x, t0, t1, t2
+.macro _le32_bswap_4x a, b, c, d, tmp
#ifdef __ARMEB__
- __rev \x, \x, \t0, \t1, \t2
+ rev_l \a, \tmp
+ rev_l \b, \tmp
+ rev_l \c, \tmp
+ rev_l \d, \tmp
#endif
.endm
-.macro _le32_bswap_4x a, b, c, d, t0, t1, t2
- _le32_bswap \a, \t0, \t1, \t2
- _le32_bswap \b, \t0, \t1, \t2
- _le32_bswap \c, \t0, \t1, \t2
- _le32_bswap \d, \t0, \t1, \t2
-.endm
-
.macro __ldrd a, b, src, offset
#if __LINUX_ARM_ARCH__ >= 6
ldrd \a, \b, [\src, #\offset]
@@ -200,7 +183,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
ldmia r12!, {r8-r11}
eor X0, X0, r8
eor X1, X1, r9
@@ -216,7 +199,7 @@
ldmia r12!, {X0-X3}
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
eor X4, X4, X0
eor X5, X5, X1
eor X6, X6, X2
@@ -231,7 +214,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
ldmia r12!, {r8-r11}
eor r0, r0, r8 // x8
eor r1, r1, r9 // x9
@@ -245,7 +228,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
ldr r9, [sp, #72] // load LEN
eor r2, r2, r0 // x12
eor r3, r3, r1 // x13
@@ -301,7 +284,7 @@
add X1, X1, r9
add X2, X2, r10
add X3, X3, r11
- _le32_bswap_4x X0, X1, X2, X3, r8, r9, r10
+ _le32_bswap_4x X0, X1, X2, X3, r8
stmia r14!, {X0-X3}
// Save keystream for x4-x7
@@ -311,7 +294,7 @@
add X5, r9, X5, ror #brot
add X6, r10, X6, ror #brot
add X7, r11, X7, ror #brot
- _le32_bswap_4x X4, X5, X6, X7, r8, r9, r10
+ _le32_bswap_4x X4, X5, X6, X7, r8
add r8, sp, #64
stmia r14!, {X4-X7}
@@ -323,7 +306,7 @@
add r1, r1, r9 // x9
add r6, r6, r10 // x10
add r7, r7, r11 // x11
- _le32_bswap_4x r0, r1, r6, r7, r8, r9, r10
+ _le32_bswap_4x r0, r1, r6, r7, r8
stmia r14!, {r0,r1,r6,r7}
__ldrd r8, r9, sp, 144
__ldrd r10, r11, sp, 152
@@ -331,7 +314,7 @@
add r3, r9, r3, ror #drot // x13
add r4, r10, r4, ror #drot // x14
add r5, r11, r5, ror #drot // x15
- _le32_bswap_4x r2, r3, r4, r5, r9, r10, r11
+ _le32_bswap_4x r2, r3, r4, r5, r9
stmia r14, {r2-r5}
// Stack: ks0-ks15 unused0-unused7 x0-x15 OUT IN LEN
diff --git a/arch/arm/crypto/curve25519-core.S b/arch/arm/crypto/curve25519-core.S
index be18af52e7dc..b697fa5d059a 100644
--- a/arch/arm/crypto/curve25519-core.S
+++ b/arch/arm/crypto/curve25519-core.S
@@ -10,8 +10,8 @@
#include <linux/linkage.h>
.text
-.fpu neon
.arch armv7-a
+.fpu neon
.align 4
ENTRY(curve25519_neon)
diff --git a/arch/arm/crypto/poly1305-glue.c b/arch/arm/crypto/poly1305-glue.c
index 3023c1acfa19..c31bd8f7c092 100644
--- a/arch/arm/crypto/poly1305-glue.c
+++ b/arch/arm/crypto/poly1305-glue.c
@@ -29,7 +29,7 @@ void __weak poly1305_blocks_neon(void *state, const u8 *src, u32 len, u32 hibit)
static __ro_after_init DEFINE_STATIC_KEY_FALSE(have_neon);
-void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 *key)
+void poly1305_init_arch(struct poly1305_desc_ctx *dctx, const u8 key[POLY1305_KEY_SIZE])
{
poly1305_init_arm(&dctx->h, key);
dctx->s[0] = get_unaligned_le32(key + 16);