summaryrefslogtreecommitdiffstats
path: root/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/crypto/camellia-aesni-avx2-asm_64.S')
-rw-r--r--arch/x86/crypto/camellia-aesni-avx2-asm_64.S144
1 files changed, 0 insertions, 144 deletions
diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
index 9561dee52de0..782e9712a1ec 100644
--- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
+++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S
@@ -624,10 +624,6 @@ SYM_FUNC_END(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab)
.section .rodata.cst16, "aM", @progbits, 16
.align 16
-/* For CTR-mode IV byteswap */
-.Lbswap128_mask:
- .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0
-
/*
* pre-SubByte transform
*
@@ -1054,143 +1050,3 @@ SYM_FUNC_START(camellia_cbc_dec_32way)
FRAME_END
ret;
SYM_FUNC_END(camellia_cbc_dec_32way)
-
-#define inc_le128(x, minus_one, tmp) \
- vpcmpeqq minus_one, x, tmp; \
- vpsubq minus_one, x, x; \
- vpslldq $8, tmp, tmp; \
- vpsubq tmp, x, x;
-
-#define add2_le128(x, minus_one, minus_two, tmp1, tmp2) \
- vpcmpeqq minus_one, x, tmp1; \
- vpcmpeqq minus_two, x, tmp2; \
- vpsubq minus_two, x, x; \
- vpor tmp2, tmp1, tmp1; \
- vpslldq $8, tmp1, tmp1; \
- vpsubq tmp1, x, x;
-
-SYM_FUNC_START(camellia_ctr_32way)
- /* input:
- * %rdi: ctx, CTX
- * %rsi: dst (32 blocks)
- * %rdx: src (32 blocks)
- * %rcx: iv (little endian, 128bit)
- */
- FRAME_BEGIN
-
- vzeroupper;
-
- movq %rsp, %r10;
- cmpq %rsi, %rdx;
- je .Lctr_use_stack;
-
- /* dst can be used as temporary storage, src is not overwritten. */
- movq %rsi, %rax;
- jmp .Lctr_continue;
-
-.Lctr_use_stack:
- subq $(16 * 32), %rsp;
- movq %rsp, %rax;
-
-.Lctr_continue:
- vpcmpeqd %ymm15, %ymm15, %ymm15;
- vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */
- vpaddq %ymm15, %ymm15, %ymm12; /* ab: -2:0 ; cd: -2:0 */
-
- /* load IV and byteswap */
- vmovdqu (%rcx), %xmm0;
- vmovdqa %xmm0, %xmm1;
- inc_le128(%xmm0, %xmm15, %xmm14);
- vbroadcasti128 .Lbswap128_mask, %ymm14;
- vinserti128 $1, %xmm0, %ymm1, %ymm0;
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 15 * 32(%rax);
-
- /* construct IVs */
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13); /* ab:le2 ; cd:le3 */
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 14 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 13 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 12 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm13;
- vmovdqu %ymm13, 11 * 32(%rax);
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm10;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm9;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm8;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm7;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm6;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm5;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm4;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm3;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm2;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vpshufb %ymm14, %ymm0, %ymm1;
- add2_le128(%ymm0, %ymm15, %ymm12, %ymm11, %ymm13);
- vextracti128 $1, %ymm0, %xmm13;
- vpshufb %ymm14, %ymm0, %ymm0;
- inc_le128(%xmm13, %xmm15, %xmm14);
- vmovdqu %xmm13, (%rcx);
-
- /* inpack32_pre: */
- vpbroadcastq (key_table)(CTX), %ymm15;
- vpshufb .Lpack_bswap, %ymm15, %ymm15;
- vpxor %ymm0, %ymm15, %ymm0;
- vpxor %ymm1, %ymm15, %ymm1;
- vpxor %ymm2, %ymm15, %ymm2;
- vpxor %ymm3, %ymm15, %ymm3;
- vpxor %ymm4, %ymm15, %ymm4;
- vpxor %ymm5, %ymm15, %ymm5;
- vpxor %ymm6, %ymm15, %ymm6;
- vpxor %ymm7, %ymm15, %ymm7;
- vpxor %ymm8, %ymm15, %ymm8;
- vpxor %ymm9, %ymm15, %ymm9;
- vpxor %ymm10, %ymm15, %ymm10;
- vpxor 11 * 32(%rax), %ymm15, %ymm11;
- vpxor 12 * 32(%rax), %ymm15, %ymm12;
- vpxor 13 * 32(%rax), %ymm15, %ymm13;
- vpxor 14 * 32(%rax), %ymm15, %ymm14;
- vpxor 15 * 32(%rax), %ymm15, %ymm15;
-
- call __camellia_enc_blk32;
-
- movq %r10, %rsp;
-
- vpxor 0 * 32(%rdx), %ymm7, %ymm7;
- vpxor 1 * 32(%rdx), %ymm6, %ymm6;
- vpxor 2 * 32(%rdx), %ymm5, %ymm5;
- vpxor 3 * 32(%rdx), %ymm4, %ymm4;
- vpxor 4 * 32(%rdx), %ymm3, %ymm3;
- vpxor 5 * 32(%rdx), %ymm2, %ymm2;
- vpxor 6 * 32(%rdx), %ymm1, %ymm1;
- vpxor 7 * 32(%rdx), %ymm0, %ymm0;
- vpxor 8 * 32(%rdx), %ymm15, %ymm15;
- vpxor 9 * 32(%rdx), %ymm14, %ymm14;
- vpxor 10 * 32(%rdx), %ymm13, %ymm13;
- vpxor 11 * 32(%rdx), %ymm12, %ymm12;
- vpxor 12 * 32(%rdx), %ymm11, %ymm11;
- vpxor 13 * 32(%rdx), %ymm10, %ymm10;
- vpxor 14 * 32(%rdx), %ymm9, %ymm9;
- vpxor 15 * 32(%rdx), %ymm8, %ymm8;
- write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0,
- %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9,
- %ymm8, %rsi);
-
- vzeroupper;
-
- FRAME_END
- ret;
-SYM_FUNC_END(camellia_ctr_32way)