diff options
-rw-r--r-- | arch/x86/crypto/aes_ctrby8_avx-x86_64.S | 17 |
1 files changed, 15 insertions, 2 deletions
diff --git a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S index f091f122ed24..a029bc744244 100644 --- a/arch/x86/crypto/aes_ctrby8_avx-x86_64.S +++ b/arch/x86/crypto/aes_ctrby8_avx-x86_64.S @@ -108,6 +108,10 @@ byteswap_const: .octa 0x000102030405060708090A0B0C0D0E0F +ddq_low_msk: + .octa 0x0000000000000000FFFFFFFFFFFFFFFF +ddq_high_add_1: + .octa 0x00000000000000010000000000000000 ddq_add_1: .octa 0x00000000000000000000000000000001 ddq_add_2: @@ -169,7 +173,12 @@ ddq_add_8: .rept (by - 1) club DDQ_DATA, i club XDATA, i - vpaddd var_ddq_add(%rip), xcounter, var_xdata + vpaddq var_ddq_add(%rip), xcounter, var_xdata + vptest ddq_low_msk(%rip), var_xdata + jnz 1f + vpaddq ddq_high_add_1(%rip), var_xdata, var_xdata + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: vpshufb xbyteswap, var_xdata, var_xdata .set i, (i +1) .endr @@ -178,7 +187,11 @@ ddq_add_8: vpxor xkey0, xdata0, xdata0 club DDQ_DATA, by - vpaddd var_ddq_add(%rip), xcounter, xcounter + vpaddq var_ddq_add(%rip), xcounter, xcounter + vptest ddq_low_msk(%rip), xcounter + jnz 1f + vpaddq ddq_high_add_1(%rip), xcounter, xcounter + 1: .set i, 1 .rept (by - 1) |