diff options
-rw-r--r-- | arch/arm64/crypto/aes-glue.c | 21 | ||||
-rw-r--r-- | arch/arm64/crypto/aes-modes.S | 18 |
2 files changed, 20 insertions, 19 deletions
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c index 30b7cc6a7079..7d66f8babb1d 100644 --- a/arch/arm64/crypto/aes-glue.c +++ b/arch/arm64/crypto/aes-glue.c @@ -24,7 +24,6 @@ #ifdef USE_V8_CRYPTO_EXTENSIONS #define MODE "ce" #define PRIO 300 -#define STRIDE 5 #define aes_expandkey ce_aes_expandkey #define aes_ecb_encrypt ce_aes_ecb_encrypt #define aes_ecb_decrypt ce_aes_ecb_decrypt @@ -42,7 +41,6 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions"); #else #define MODE "neon" #define PRIO 200 -#define STRIDE 4 #define aes_ecb_encrypt neon_aes_ecb_encrypt #define aes_ecb_decrypt neon_aes_ecb_decrypt #define aes_cbc_encrypt neon_aes_cbc_encrypt @@ -89,7 +87,7 @@ asmlinkage void aes_cbc_cts_decrypt(u8 out[], u8 const in[], u32 const rk[], int rounds, int bytes, u8 const iv[]); asmlinkage void aes_ctr_encrypt(u8 out[], u8 const in[], u32 const rk[], - int rounds, int bytes, u8 ctr[], u8 finalbuf[]); + int rounds, int bytes, u8 ctr[]); asmlinkage void aes_xts_encrypt(u8 out[], u8 const in[], u32 const rk1[], int rounds, int bytes, u32 const rk2[], u8 iv[], @@ -458,26 +456,21 @@ static int __maybe_unused ctr_encrypt(struct skcipher_request *req) unsigned int nbytes = walk.nbytes; u8 *dst = walk.dst.virt.addr; u8 buf[AES_BLOCK_SIZE]; - unsigned int tail; if (unlikely(nbytes < AES_BLOCK_SIZE)) - src = memcpy(buf, src, nbytes); + src = dst = memcpy(buf + sizeof(buf) - nbytes, + src, nbytes); else if (nbytes < walk.total) nbytes &= ~(AES_BLOCK_SIZE - 1); kernel_neon_begin(); aes_ctr_encrypt(dst, src, ctx->key_enc, rounds, nbytes, - walk.iv, buf); + walk.iv); kernel_neon_end(); - tail = nbytes % (STRIDE * AES_BLOCK_SIZE); - if (tail > 0 && tail < AES_BLOCK_SIZE) - /* - * The final partial block could not be returned using - * an overlapping store, so it was passed via buf[] - * instead. - */ - memcpy(dst + nbytes - tail, buf, tail); + if (unlikely(nbytes < AES_BLOCK_SIZE)) + memcpy(walk.dst.virt.addr, + buf + sizeof(buf) - nbytes, nbytes); err = skcipher_walk_done(&walk, walk.nbytes - nbytes); } diff --git a/arch/arm64/crypto/aes-modes.S b/arch/arm64/crypto/aes-modes.S index ff01f0167ba2..dc35eb0245c5 100644 --- a/arch/arm64/crypto/aes-modes.S +++ b/arch/arm64/crypto/aes-modes.S @@ -321,7 +321,7 @@ AES_FUNC_END(aes_cbc_cts_decrypt) /* * aes_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds, - * int bytes, u8 ctr[], u8 finalbuf[]) + * int bytes, u8 ctr[]) */ AES_FUNC_START(aes_ctr_encrypt) @@ -414,8 +414,8 @@ ST5( st1 {v4.16b}, [x0], #16 ) .Lctrtail: /* XOR up to MAX_STRIDE * 16 - 1 bytes of in/output with v0 ... v3/v4 */ mov x16, #16 - ands x13, x4, #0xf - csel x13, x13, x16, ne + ands x6, x4, #0xf + csel x13, x6, x16, ne ST5( cmp w4, #64 - (MAX_STRIDE << 4) ) ST5( csel x14, x16, xzr, gt ) @@ -424,10 +424,10 @@ ST5( csel x14, x16, xzr, gt ) cmp w4, #32 - (MAX_STRIDE << 4) csel x16, x16, xzr, gt cmp w4, #16 - (MAX_STRIDE << 4) - ble .Lctrtail1x adr_l x12, .Lcts_permute_table add x12, x12, x13 + ble .Lctrtail1x ST5( ld1 {v5.16b}, [x1], x14 ) ld1 {v6.16b}, [x1], x15 @@ -462,11 +462,19 @@ ST5( st1 {v5.16b}, [x0], x14 ) b .Lctrout .Lctrtail1x: - csel x0, x0, x6, eq // use finalbuf if less than a full block + sub x7, x6, #16 + csel x6, x6, x7, eq + add x1, x1, x6 + add x0, x0, x6 ld1 {v5.16b}, [x1] + ld1 {v6.16b}, [x0] ST5( mov v3.16b, v4.16b ) encrypt_block v3, w3, x2, x8, w7 + ld1 {v10.16b-v11.16b}, [x12] + tbl v3.16b, {v3.16b}, v10.16b + sshr v11.16b, v11.16b, #7 eor v5.16b, v5.16b, v3.16b + bif v5.16b, v6.16b, v11.16b st1 {v5.16b}, [x0] b .Lctrout AES_FUNC_END(aes_ctr_encrypt) |