From ba25d8b4011bc496afff65b2f28136aa141d7d6b Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Thu, 17 Sep 2015 14:45:56 +0100 Subject: ARM: STi: STiH407: Enable the 2 HW Random Number Generators for STiH4{07, 10} Signed-off-by: Lee Jones Signed-off-by: Herbert Xu --- arch/arm/boot/dts/stih407-family.dtsi | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stih407-family.dtsi b/arch/arm/boot/dts/stih407-family.dtsi index ae0527754000..0c24fcb03577 100644 --- a/arch/arm/boot/dts/stih407-family.dtsi +++ b/arch/arm/boot/dts/stih407-family.dtsi @@ -610,5 +610,19 @@ clocks = <&clk_sysin>; st,pwm-num-chan = <4>; }; + + rng10: rng@08a89000 { + compatible = "st,rng"; + reg = <0x08a89000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; + + rng11: rng@08a8a000 { + compatible = "st,rng"; + reg = <0x08a8a000 0x1000>; + clocks = <&clk_sysin>; + status = "okay"; + }; }; }; -- cgit v1.2.3 From c356a7e975a25e8867961c1b7a4a965d506f0a04 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:26:59 -0700 Subject: crypto: x86/sha - Intel SHA Extensions optimized SHA1 transform function This patch includes the Intel SHA Extensions optimized implementation of SHA-1 update function. This function has been tested on Broxton platform and measured a speed up of 3.6x over the SSSE3 implementiation for 4K blocks. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ni_asm.S | 302 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 302 insertions(+) create mode 100644 arch/x86/crypto/sha1_ni_asm.S (limited to 'arch') diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S new file mode 100644 index 000000000000..874a651b9e7d --- /dev/null +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -0,0 +1,302 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define RSPSAVE %rax + +/* gcc conversion */ +#define FRAME_SIZE 32 /* space for 2x16 bytes */ + +#define ABCD %xmm0 +#define E0 %xmm1 /* Need two E's b/c they ping pong */ +#define E1 %xmm2 +#define MSG0 %xmm3 +#define MSG1 %xmm4 +#define MSG2 %xmm5 +#define MSG3 %xmm6 +#define SHUF_MASK %xmm7 + + +/* + * Intel SHA Extensions optimized implementation of a SHA-1 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha1_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks) + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ +.text +.align 32 +ENTRY(sha1_ni_transform) + mov %rsp, RSPSAVE + sub $FRAME_SIZE, %rsp + and $~0xF, %rsp + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* load initial hash values */ + pinsrd $3, 1*16(DIGEST_PTR), E0 + movdqu 0*16(DIGEST_PTR), ABCD + pand UPPER_WORD_MASK(%rip), E0 + pshufd $0x1B, ABCD, ABCD + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa E0, (0*16)(%rsp) + movdqa ABCD, (1*16)(%rsp) + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG0 + pshufb SHUF_MASK, MSG0 + paddd MSG0, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG1 + pshufb SHUF_MASK, MSG1 + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG1, MSG0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG2 + pshufb SHUF_MASK, MSG2 + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG3 + pshufb SHUF_MASK, MSG3 + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $0, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 16-19 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $0, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 20-23 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 24-27 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 28-31 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 32-35 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $1, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 36-39 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $1, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 40-43 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 44-47 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 48-51 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 52-55 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $2, E1, ABCD + sha1msg1 MSG1, MSG0 + pxor MSG1, MSG3 + + /* Rounds 56-59 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $2, E0, ABCD + sha1msg1 MSG2, MSG1 + pxor MSG2, MSG0 + + /* Rounds 60-63 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1msg2 MSG3, MSG0 + sha1rnds4 $3, E1, ABCD + sha1msg1 MSG3, MSG2 + pxor MSG3, MSG1 + + /* Rounds 64-67 */ + sha1nexte MSG0, E0 + movdqa ABCD, E1 + sha1msg2 MSG0, MSG1 + sha1rnds4 $3, E0, ABCD + sha1msg1 MSG0, MSG3 + pxor MSG0, MSG2 + + /* Rounds 68-71 */ + sha1nexte MSG1, E1 + movdqa ABCD, E0 + sha1msg2 MSG1, MSG2 + sha1rnds4 $3, E1, ABCD + pxor MSG1, MSG3 + + /* Rounds 72-75 */ + sha1nexte MSG2, E0 + movdqa ABCD, E1 + sha1msg2 MSG2, MSG3 + sha1rnds4 $3, E0, ABCD + + /* Rounds 76-79 */ + sha1nexte MSG3, E1 + movdqa ABCD, E0 + sha1rnds4 $3, E1, ABCD + + /* Add current hash values with previously saved */ + sha1nexte (0*16)(%rsp), E0 + paddd (1*16)(%rsp), ABCD + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, ABCD, ABCD + movdqu ABCD, 0*16(DIGEST_PTR) + pextrd $3, E0, 1*16(DIGEST_PTR) + +.Ldone_hash: + mov RSPSAVE, %rsp + + ret +ENDPROC(sha1_ni_transform) + +.data + +.align 64 +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x000102030405060708090a0b0c0d0e0f +UPPER_WORD_MASK: + .octa 0xFFFFFFFF000000000000000000000000 -- cgit v1.2.3 From 600a2334e83d22e5c3f7ff2581f545bfc354d206 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:13 -0700 Subject: crypto: x86/sha - Intel SHA Extensions optimized SHA256 transform function This patch includes the Intel SHA Extensions optimized implementation of SHA-256 update function. This function has been tested on Broxton platform and measured a speed up of 3.6x over the SSSE3 implementiation for 4K blocks. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ni_asm.S | 353 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 353 insertions(+) create mode 100644 arch/x86/crypto/sha256_ni_asm.S (limited to 'arch') diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S new file mode 100644 index 000000000000..748cdf21a938 --- /dev/null +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -0,0 +1,353 @@ +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * Contact Information: + * Sean Gulley + * Tim Chen + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include + +#define DIGEST_PTR %rdi /* 1st arg */ +#define DATA_PTR %rsi /* 2nd arg */ +#define NUM_BLKS %rdx /* 3rd arg */ + +#define SHA256CONSTANTS %rax + +#define MSG %xmm0 +#define STATE0 %xmm1 +#define STATE1 %xmm2 +#define MSGTMP0 %xmm3 +#define MSGTMP1 %xmm4 +#define MSGTMP2 %xmm5 +#define MSGTMP3 %xmm6 +#define MSGTMP4 %xmm7 + +#define SHUF_MASK %xmm8 + +#define ABEF_SAVE %xmm9 +#define CDGH_SAVE %xmm10 + +/* + * Intel SHA Extensions optimized implementation of a SHA-256 update function + * + * The function takes a pointer to the current hash values, a pointer to the + * input data, and a number of 64 byte blocks to process. Once all blocks have + * been processed, the digest pointer is updated with the resulting hash value. + * The function only processes complete blocks, there is no functionality to + * store partial blocks. All message padding and hash value initialization must + * be done outside the update function. + * + * The indented lines in the loop are instructions related to rounds processing. + * The non-indented lines are instructions related to the message schedule. + * + * void sha256_ni_transform(uint32_t *digest, const void *data, + uint32_t numBlocks); + * digest : pointer to digest + * data: pointer to input data + * numBlocks: Number of blocks to process + */ + +.text +.align 32 +ENTRY(sha256_ni_transform) + + shl $6, NUM_BLKS /* convert to bytes */ + jz .Ldone_hash + add DATA_PTR, NUM_BLKS /* pointer to end of data */ + + /* + * load initial hash values + * Need to reorder these appropriately + * DCBA, HGFE -> ABEF, CDGH + */ + movdqu 0*16(DIGEST_PTR), STATE0 + movdqu 1*16(DIGEST_PTR), STATE1 + + pshufd $0xB1, STATE0, STATE0 /* CDAB */ + pshufd $0x1B, STATE1, STATE1 /* EFGH */ + movdqa STATE0, MSGTMP4 + palignr $8, STATE1, STATE0 /* ABEF */ + pblendw $0xF0, MSGTMP4, STATE1 /* CDGH */ + + movdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), SHUF_MASK + lea K256(%rip), SHA256CONSTANTS + +.Lloop0: + /* Save hash values for addition after rounds */ + movdqa STATE0, ABEF_SAVE + movdqa STATE1, CDGH_SAVE + + /* Rounds 0-3 */ + movdqu 0*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP0 + paddd 0*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 4-7 */ + movdqu 1*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP1 + paddd 1*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 8-11 */ + movdqu 2*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP2 + paddd 2*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 12-15 */ + movdqu 3*16(DATA_PTR), MSG + pshufb SHUF_MASK, MSG + movdqa MSG, MSGTMP3 + paddd 3*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 16-19 */ + movdqa MSGTMP0, MSG + paddd 4*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 20-23 */ + movdqa MSGTMP1, MSG + paddd 5*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 24-27 */ + movdqa MSGTMP2, MSG + paddd 6*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 28-31 */ + movdqa MSGTMP3, MSG + paddd 7*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 32-35 */ + movdqa MSGTMP0, MSG + paddd 8*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 36-39 */ + movdqa MSGTMP1, MSG + paddd 9*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP1, MSGTMP0 + + /* Rounds 40-43 */ + movdqa MSGTMP2, MSG + paddd 10*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP2, MSGTMP1 + + /* Rounds 44-47 */ + movdqa MSGTMP3, MSG + paddd 11*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP3, MSGTMP4 + palignr $4, MSGTMP2, MSGTMP4 + paddd MSGTMP4, MSGTMP0 + sha256msg2 MSGTMP3, MSGTMP0 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP3, MSGTMP2 + + /* Rounds 48-51 */ + movdqa MSGTMP0, MSG + paddd 12*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP0, MSGTMP4 + palignr $4, MSGTMP3, MSGTMP4 + paddd MSGTMP4, MSGTMP1 + sha256msg2 MSGTMP0, MSGTMP1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + sha256msg1 MSGTMP0, MSGTMP3 + + /* Rounds 52-55 */ + movdqa MSGTMP1, MSG + paddd 13*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP1, MSGTMP4 + palignr $4, MSGTMP0, MSGTMP4 + paddd MSGTMP4, MSGTMP2 + sha256msg2 MSGTMP1, MSGTMP2 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 56-59 */ + movdqa MSGTMP2, MSG + paddd 14*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + movdqa MSGTMP2, MSGTMP4 + palignr $4, MSGTMP1, MSGTMP4 + paddd MSGTMP4, MSGTMP3 + sha256msg2 MSGTMP2, MSGTMP3 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Rounds 60-63 */ + movdqa MSGTMP3, MSG + paddd 15*16(SHA256CONSTANTS), MSG + sha256rnds2 STATE0, STATE1 + pshufd $0x0E, MSG, MSG + sha256rnds2 STATE1, STATE0 + + /* Add current hash values with previously saved */ + paddd ABEF_SAVE, STATE0 + paddd CDGH_SAVE, STATE1 + + /* Increment data pointer and loop if more to process */ + add $64, DATA_PTR + cmp NUM_BLKS, DATA_PTR + jne .Lloop0 + + /* Write hash values back in the correct order */ + pshufd $0x1B, STATE0, STATE0 /* FEBA */ + pshufd $0xB1, STATE1, STATE1 /* DCHG */ + movdqa STATE0, MSGTMP4 + pblendw $0xF0, STATE1, STATE0 /* DCBA */ + palignr $8, MSGTMP4, STATE1 /* HGFE */ + + movdqu STATE0, 0*16(DIGEST_PTR) + movdqu STATE1, 1*16(DIGEST_PTR) + +.Ldone_hash: + + ret +ENDPROC(sha256_ni_transform) + +.data +.align 64 +K256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +PSHUFFLE_BYTE_FLIP_MASK: + .octa 0x0c0d0e0f08090a0b0405060700010203 -- cgit v1.2.3 From 95fca7df0b4964fbe3fe159e3d6e681e6b5b7a53 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:20 -0700 Subject: crypto: x86/sha - glue code for Intel SHA extensions optimized SHA1 & SHA256 This patch adds the glue code to detect and utilize the Intel SHA extensions optimized SHA1 and SHA256 update transforms when available. This code has been tested on Broxton for functionality. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_glue.c | 12 +++++++++++- arch/x86/crypto/sha256_ssse3_glue.c | 38 ++++++++++++++++++++++--------------- 2 files changed, 34 insertions(+), 16 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 7c48e8b20848..98be8cc17ca2 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -44,6 +44,10 @@ asmlinkage void sha1_transform_avx(u32 *digest, const char *data, asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, unsigned int rounds); #endif +#ifdef CONFIG_AS_SHA1_NI +asmlinkage void sha1_ni_transform(u32 *digest, const char *data, + unsigned int rounds); +#endif static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); @@ -166,12 +170,18 @@ static int __init sha1_ssse3_mod_init(void) #endif } #endif +#ifdef CONFIG_AS_SHA1_NI + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { + sha1_transform_asm = sha1_ni_transform; + algo_name = "SHA-NI"; + } +#endif if (sha1_transform_asm) { pr_info("Using %s optimized SHA-1 implementation\n", algo_name); return crypto_register_shash(&alg); } - pr_info("Neither AVX nor AVX2 nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor AVX2 nor SSSE3/SHA-NI is available/usable.\n"); return -ENODEV; } diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index f8097fc0d1d1..9c7b22c489f6 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -50,6 +50,10 @@ asmlinkage void sha256_transform_avx(u32 *digest, const char *data, asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, u64 rounds); #endif +#ifdef CONFIG_AS_SHA256_NI +asmlinkage void sha256_ni_transform(u32 *digest, const char *data, + u64 rounds); /*unsigned int rounds);*/ +#endif static void (*sha256_transform_asm)(u32 *, const char *, u64); @@ -142,36 +146,40 @@ static bool __init avx_usable(void) static int __init sha256_ssse3_mod_init(void) { + char *algo; + /* test for SSSE3 first */ - if (cpu_has_ssse3) + if (cpu_has_ssse3) { sha256_transform_asm = sha256_transform_ssse3; + algo = "SSSE3"; + } #ifdef CONFIG_AS_AVX /* allow AVX to override SSSE3, it's a little faster */ if (avx_usable()) { + sha256_transform_asm = sha256_transform_avx; + algo = "AVX"; #ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2) && boot_cpu_has(X86_FEATURE_BMI2)) + if (boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) { sha256_transform_asm = sha256_transform_rorx; - else + algo = "AVX2"; + } +#endif + } #endif - sha256_transform_asm = sha256_transform_avx; +#ifdef CONFIG_AS_SHA256_NI + if (boot_cpu_has(X86_FEATURE_SHA_NI)) { + sha256_transform_asm = sha256_ni_transform; + algo = "SHA-256-NI"; } #endif if (sha256_transform_asm) { -#ifdef CONFIG_AS_AVX - if (sha256_transform_asm == sha256_transform_avx) - pr_info("Using AVX optimized SHA-256 implementation\n"); -#ifdef CONFIG_AS_AVX2 - else if (sha256_transform_asm == sha256_transform_rorx) - pr_info("Using AVX2 optimized SHA-256 implementation\n"); -#endif - else -#endif - pr_info("Using SSSE3 optimized SHA-256 implementation\n"); + pr_info("Using %s optimized SHA-256 implementation\n", algo); return crypto_register_shashes(algs, ARRAY_SIZE(algs)); } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + pr_info("Neither AVX nor SSSE3/SHA-NI is available/usable.\n"); return -ENODEV; } -- cgit v1.2.3 From e38b6b7fcfd11fb83dcac54a33cbca3739c45a09 Mon Sep 17 00:00:00 2001 From: tim Date: Thu, 10 Sep 2015 15:27:26 -0700 Subject: crypto: x86/sha - Add build support for Intel SHA Extensions optimized SHA1 and SHA256 This patch provides the configuration and build support to include and build the optimized SHA1 and SHA256 update transforms for the kernel's crypto library. Originally-by: Chandramouli Narayanan Signed-off-by: Tim Chen Acked-by: David S. Miller Signed-off-by: Herbert Xu --- arch/x86/Makefile | 6 ++++-- arch/x86/crypto/Makefile | 8 ++++++++ crypto/Kconfig | 10 ++++++---- 3 files changed, 18 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 747860c696e1..a8009c77918a 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -165,9 +165,11 @@ asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1) asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1) avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1) avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1) +sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1) +sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1) -KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) -KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) +KBUILD_AFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) +KBUILD_CFLAGS += $(asinstr) $(avx_instr) $(avx2_instr) $(sha1_ni_instr) $(sha256_ni_instr) LDFLAGS := -m elf_$(UTS_MACHINE) diff --git a/arch/x86/crypto/Makefile b/arch/x86/crypto/Makefile index 9a2838cf0591..b9b912a44d61 100644 --- a/arch/x86/crypto/Makefile +++ b/arch/x86/crypto/Makefile @@ -5,6 +5,8 @@ avx_supported := $(call as-instr,vpxor %xmm0$(comma)%xmm0$(comma)%xmm0,yes,no) avx2_supported := $(call as-instr,vpgatherdd %ymm0$(comma)(%eax$(comma)%ymm1\ $(comma)4)$(comma)%ymm2,yes,no) +sha1_ni_supported :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,yes,no) +sha256_ni_supported :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,yes,no) obj-$(CONFIG_CRYPTO_GLUE_HELPER_X86) += glue_helper.o @@ -91,9 +93,15 @@ ifeq ($(avx2_supported),yes) sha1-ssse3-y += sha1_avx2_x86_64_asm.o poly1305-x86_64-y += poly1305-avx2-x86_64.o endif +ifeq ($(sha1_ni_supported),yes) +sha1-ssse3-y += sha1_ni_asm.o +endif crc32c-intel-y := crc32c-intel_glue.o crc32c-intel-$(CONFIG_64BIT) += crc32c-pcl-intel-asm_64.o crc32-pclmul-y := crc32-pclmul_asm.o crc32-pclmul_glue.o sha256-ssse3-y := sha256-ssse3-asm.o sha256-avx-asm.o sha256-avx2-asm.o sha256_ssse3_glue.o +ifeq ($(sha256_ni_supported),yes) +sha256-ssse3-y += sha256_ni_asm.o +endif sha512-ssse3-y := sha512-ssse3-asm.o sha512-avx-asm.o sha512-avx2-asm.o sha512_ssse3_glue.o crct10dif-pclmul-y := crct10dif-pcl-asm_64.o crct10dif-pclmul_glue.o diff --git a/crypto/Kconfig b/crypto/Kconfig index 48ee3e175dac..fc934444d3a2 100644 --- a/crypto/Kconfig +++ b/crypto/Kconfig @@ -597,17 +597,18 @@ config CRYPTO_SHA1 SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2). config CRYPTO_SHA1_SSSE3 - tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2)" + tristate "SHA1 digest algorithm (SSSE3/AVX/AVX2/SHA-NI)" depends on X86 && 64BIT select CRYPTO_SHA1 select CRYPTO_HASH help SHA-1 secure hash standard (FIPS 180-1/DFIPS 180-2) implemented using Supplemental SSE3 (SSSE3) instructions or Advanced Vector - Extensions (AVX/AVX2), when available. + Extensions (AVX/AVX2) or SHA-NI(SHA Extensions New Instructions), + when available. config CRYPTO_SHA256_SSSE3 - tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2)" + tristate "SHA256 digest algorithm (SSSE3/AVX/AVX2/SHA-NI)" depends on X86 && 64BIT select CRYPTO_SHA256 select CRYPTO_HASH @@ -615,7 +616,8 @@ config CRYPTO_SHA256_SSSE3 SHA-256 secure hash standard (DFIPS 180-2) implemented using Supplemental SSE3 (SSSE3) instructions, or Advanced Vector Extensions version 1 (AVX1), or Advanced Vector Extensions - version 2 (AVX2) instructions, when available. + version 2 (AVX2) instructions, or SHA-NI (SHA Extensions New + Instructions) when available. config CRYPTO_SHA512_SSSE3 tristate "SHA512 digest algorithm (SSSE3/AVX/AVX2)" -- cgit v1.2.3 From 85c66ecd6f2144c075044292359e179b20af1f2d Mon Sep 17 00:00:00 2001 From: tim Date: Wed, 16 Sep 2015 16:34:53 -0700 Subject: crypto: x86/sha - Restructure x86 sha1 glue code to expose all the available sha1 transforms Restructure the x86 sha1 glue code so we will expose sha1 transforms based on SSSE3, AVX, AVX2 or SHA-NI extension as separate individual drivers when cpu provides such support. This will make it easy for alternative algorithms to be used if desired and makes the code cleaner and easier to maintain. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha1_ssse3_glue.c | 324 +++++++++++++++++++++++++++++--------- 1 file changed, 250 insertions(+), 74 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c index 98be8cc17ca2..c934197fe84a 100644 --- a/arch/x86/crypto/sha1_ssse3_glue.c +++ b/arch/x86/crypto/sha1_ssse3_glue.c @@ -31,28 +31,11 @@ #include #include +typedef void (sha1_transform_fn)(u32 *digest, const char *data, + unsigned int rounds); -asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, - unsigned int rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha1_transform_avx(u32 *digest, const char *data, - unsigned int rounds); -#endif -#ifdef CONFIG_AS_AVX2 -#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ - -asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, - unsigned int rounds); -#endif -#ifdef CONFIG_AS_SHA1_NI -asmlinkage void sha1_ni_transform(u32 *digest, const char *data, - unsigned int rounds); -#endif - -static void (*sha1_transform_asm)(u32 *, const char *, unsigned int); - -static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha1_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha1_transform_fn *sha1_xform) { struct sha1_state *sctx = shash_desc_ctx(desc); @@ -65,14 +48,14 @@ static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_transform_asm); + (sha1_block_fn *)sha1_xform); kernel_fpu_end(); return 0; } -static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha1_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha1_transform_fn *sha1_xform) { if (!irq_fpu_usable()) return crypto_sha1_finup(desc, data, len, out); @@ -80,32 +63,37 @@ static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha1_base_do_update(desc, data, len, - (sha1_block_fn *)sha1_transform_asm); - sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_transform_asm); + (sha1_block_fn *)sha1_xform); + sha1_base_do_finalize(desc, (sha1_block_fn *)sha1_xform); kernel_fpu_end(); return sha1_base_finish(desc, out); } -/* Add padding and return the message digest. */ -static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +asmlinkage void sha1_transform_ssse3(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - return sha1_ssse3_finup(desc, NULL, 0, out); + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_transform_ssse3); } -#ifdef CONFIG_AS_AVX2 -static void sha1_apply_transform_avx2(u32 *digest, const char *data, - unsigned int rounds) +static int sha1_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) { - /* Select the optimal transform based on data block size */ - if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) - sha1_transform_avx2(digest, data, rounds); - else - sha1_transform_avx(digest, data, rounds); + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_transform_ssse3); +} + +/* Add padding and return the message digest. */ +static int sha1_ssse3_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ssse3_finup(desc, NULL, 0, out); } -#endif -static struct shash_alg alg = { +static struct shash_alg sha1_ssse3_alg = { .digestsize = SHA1_DIGEST_SIZE, .init = sha1_base_init, .update = sha1_ssse3_update, @@ -114,7 +102,7 @@ static struct shash_alg alg = { .descsize = sizeof(struct sha1_state), .base = { .cra_name = "sha1", - .cra_driver_name= "sha1-ssse3", + .cra_driver_name = "sha1-ssse3", .cra_priority = 150, .cra_flags = CRYPTO_ALG_TYPE_SHASH, .cra_blocksize = SHA1_BLOCK_SIZE, @@ -122,8 +110,60 @@ static struct shash_alg alg = { } }; +static int register_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shash(&sha1_ssse3_alg); + return 0; +} + +static void unregister_sha1_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shash(&sha1_ssse3_alg); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha1_transform_avx(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_transform_avx); +} + +static int sha1_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_transform_avx); +} + +static int sha1_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx_update, + .final = sha1_avx_final, + .finup = sha1_avx_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}; + +static bool avx_usable(void) { if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { if (cpu_has_avx) @@ -134,61 +174,197 @@ static bool __init avx_usable(void) return true; } -#ifdef CONFIG_AS_AVX2 -static bool __init avx2_usable(void) +static int register_sha1_avx(void) +{ + if (avx_usable()) + return crypto_register_shash(&sha1_avx_alg); + return 0; +} + +static void unregister_sha1_avx(void) +{ + if (avx_usable()) + crypto_unregister_shash(&sha1_avx_alg); +} + +#else /* CONFIG_AS_AVX */ +static inline int register_sha1_avx(void) { return 0; } +static inline void unregister_sha1_avx(void) { } +#endif /* CONFIG_AS_AVX */ + + +#if defined(CONFIG_AS_AVX2) && (CONFIG_AS_AVX) +#define SHA1_AVX2_BLOCK_OPTSIZE 4 /* optimal 4*64 bytes of SHA1 blocks */ + +asmlinkage void sha1_transform_avx2(u32 *digest, const char *data, + unsigned int rounds); + +static bool avx2_usable(void) { - if (avx_usable() && cpu_has_avx2 && boot_cpu_has(X86_FEATURE_BMI1) && - boot_cpu_has(X86_FEATURE_BMI2)) + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) + && boot_cpu_has(X86_FEATURE_BMI1) + && boot_cpu_has(X86_FEATURE_BMI2)) return true; return false; } -#endif -#endif -static int __init sha1_ssse3_mod_init(void) +static void sha1_apply_transform_avx2(u32 *digest, const char *data, + unsigned int rounds) { - char *algo_name; + /* Select the optimal transform based on data block size */ + if (rounds >= SHA1_AVX2_BLOCK_OPTSIZE) + sha1_transform_avx2(digest, data, rounds); + else + sha1_transform_avx(digest, data, rounds); +} - /* test for SSSE3 first */ - if (cpu_has_ssse3) { - sha1_transform_asm = sha1_transform_ssse3; - algo_name = "SSSE3"; - } +static int sha1_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_apply_transform_avx2); +} -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { - sha1_transform_asm = sha1_transform_avx; - algo_name = "AVX"; -#ifdef CONFIG_AS_AVX2 - /* allow AVX2 to override AVX, it's a little faster */ - if (avx2_usable()) { - sha1_transform_asm = sha1_apply_transform_avx2; - algo_name = "AVX2"; - } -#endif +static int sha1_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_apply_transform_avx2); +} + +static int sha1_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha1_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_avx2_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_avx2_update, + .final = sha1_avx2_final, + .finup = sha1_avx2_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, } +}; + +static int register_sha1_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shash(&sha1_avx2_alg); + return 0; +} + +static void unregister_sha1_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shash(&sha1_avx2_alg); +} + +#else +static inline int register_sha1_avx2(void) { return 0; } +static inline void unregister_sha1_avx2(void) { } #endif + #ifdef CONFIG_AS_SHA1_NI - if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - sha1_transform_asm = sha1_ni_transform; - algo_name = "SHA-NI"; +asmlinkage void sha1_ni_transform(u32 *digest, const char *data, + unsigned int rounds); + +static int sha1_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha1_update(desc, data, len, + (sha1_transform_fn *) sha1_ni_transform); +} + +static int sha1_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha1_finup(desc, data, len, out, + (sha1_transform_fn *) sha1_ni_transform); +} + +static int sha1_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha1_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha1_ni_alg = { + .digestsize = SHA1_DIGEST_SIZE, + .init = sha1_base_init, + .update = sha1_ni_update, + .final = sha1_ni_final, + .finup = sha1_ni_finup, + .descsize = sizeof(struct sha1_state), + .base = { + .cra_name = "sha1", + .cra_driver_name = "sha1-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA1_BLOCK_SIZE, + .cra_module = THIS_MODULE, } +}; + +static int register_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shash(&sha1_ni_alg); + return 0; +} + +static void unregister_sha1_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shash(&sha1_ni_alg); +} + +#else +static inline int register_sha1_ni(void) { return 0; } +static inline void unregister_sha1_ni(void) { } #endif - if (sha1_transform_asm) { - pr_info("Using %s optimized SHA-1 implementation\n", algo_name); - return crypto_register_shash(&alg); +static int __init sha1_ssse3_mod_init(void) +{ + if (register_sha1_ssse3()) + goto fail; + + if (register_sha1_avx()) { + unregister_sha1_ssse3(); + goto fail; } - pr_info("Neither AVX nor AVX2 nor SSSE3/SHA-NI is available/usable.\n"); + if (register_sha1_avx2()) { + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; + } + + if (register_sha1_ni()) { + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); + goto fail; + } + + return 0; +fail: return -ENODEV; } static void __exit sha1_ssse3_mod_fini(void) { - crypto_unregister_shash(&alg); + unregister_sha1_ni(); + unregister_sha1_avx2(); + unregister_sha1_avx(); + unregister_sha1_ssse3(); } module_init(sha1_ssse3_mod_init); -- cgit v1.2.3 From 5dda42fc89f26fb3b6312076b17feda8c397d2b8 Mon Sep 17 00:00:00 2001 From: tim Date: Wed, 16 Sep 2015 16:35:23 -0700 Subject: crypto: x86/sha - Restructure x86 sha256 glue code to expose all the available sha256 transforms Restructure the x86 sha256 glue code so we will expose sha256 transforms based on SSSE3, AVX, AVX2 or SHA-NI extension as separate individual drivers when cpu provides such support. This will make it easy for alternative algorithms to be used if desired and makes the code cleaner and easier to maintain. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha256_ssse3_glue.c | 331 ++++++++++++++++++++++++++++++------ 1 file changed, 281 insertions(+), 50 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c index 9c7b22c489f6..863e2f6aad13 100644 --- a/arch/x86/crypto/sha256_ssse3_glue.c +++ b/arch/x86/crypto/sha256_ssse3_glue.c @@ -42,23 +42,10 @@ asmlinkage void sha256_transform_ssse3(u32 *digest, const char *data, u64 rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha256_transform_avx(u32 *digest, const char *data, - u64 rounds); -#endif -#ifdef CONFIG_AS_AVX2 -asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, - u64 rounds); -#endif -#ifdef CONFIG_AS_SHA256_NI -asmlinkage void sha256_ni_transform(u32 *digest, const char *data, - u64 rounds); /*unsigned int rounds);*/ -#endif +typedef void (sha256_transform_fn)(u32 *digest, const char *data, u64 rounds); -static void (*sha256_transform_asm)(u32 *, const char *, u64); - -static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha256_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha256_transform_fn *sha256_xform) { struct sha256_state *sctx = shash_desc_ctx(desc); @@ -71,14 +58,14 @@ static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_transform_asm); + (sha256_block_fn *)sha256_xform); kernel_fpu_end(); return 0; } -static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha256_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha256_transform_fn *sha256_xform) { if (!irq_fpu_usable()) return crypto_sha256_finup(desc, data, len, out); @@ -86,20 +73,32 @@ static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha256_base_do_update(desc, data, len, - (sha256_block_fn *)sha256_transform_asm); - sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_transform_asm); + (sha256_block_fn *)sha256_xform); + sha256_base_do_finalize(desc, (sha256_block_fn *)sha256_xform); kernel_fpu_end(); return sha256_base_finish(desc, out); } +static int sha256_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_ssse3); +} + +static int sha256_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_ssse3); +} + /* Add padding and return the message digest. */ static int sha256_ssse3_final(struct shash_desc *desc, u8 *out) { return sha256_ssse3_finup(desc, NULL, 0, out); } -static struct shash_alg algs[] = { { +static struct shash_alg sha256_ssse3_algs[] = { { .digestsize = SHA256_DIGEST_SIZE, .init = sha256_base_init, .update = sha256_ssse3_update, @@ -131,8 +130,75 @@ static struct shash_alg algs[] = { { } } }; +static int register_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); + return 0; +} + +static void unregister_sha256_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha256_ssse3_algs, + ARRAY_SIZE(sha256_ssse3_algs)); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha256_transform_avx(u32 *digest, const char *data, + u64 rounds); + +static int sha256_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_avx); +} + +static int sha256_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_avx); +} + +static int sha256_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx_update, + .final = sha256_avx_final, + .finup = sha256_avx_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx_usable(void) { if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { if (cpu_has_avx) @@ -142,51 +208,216 @@ static bool __init avx_usable(void) return true; } -#endif -static int __init sha256_ssse3_mod_init(void) +static int register_sha256_avx(void) { - char *algo; + if (avx_usable()) + return crypto_register_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); + return 0; +} - /* test for SSSE3 first */ - if (cpu_has_ssse3) { - sha256_transform_asm = sha256_transform_ssse3; - algo = "SSSE3"; - } +static void unregister_sha256_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha256_avx_algs, + ARRAY_SIZE(sha256_avx_algs)); +} -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { - sha256_transform_asm = sha256_transform_avx; - algo = "AVX"; -#ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2) && - boot_cpu_has(X86_FEATURE_BMI2)) { - sha256_transform_asm = sha256_transform_rorx; - algo = "AVX2"; - } +#else +static inline int register_sha256_avx(void) { return 0; } +static inline void unregister_sha256_avx(void) { } #endif + +#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) +asmlinkage void sha256_transform_rorx(u32 *digest, const char *data, + u64 rounds); + +static int sha256_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_transform_rorx); +} + +static int sha256_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_transform_rorx); +} + +static int sha256_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha256_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_avx2_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_avx2_update, + .final = sha256_avx2_final, + .finup = sha256_avx2_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha256_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); + return 0; +} + +static void unregister_sha256_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha256_avx2_algs, + ARRAY_SIZE(sha256_avx2_algs)); +} + +#else +static inline int register_sha256_avx2(void) { return 0; } +static inline void unregister_sha256_avx2(void) { } #endif + #ifdef CONFIG_AS_SHA256_NI - if (boot_cpu_has(X86_FEATURE_SHA_NI)) { - sha256_transform_asm = sha256_ni_transform; - algo = "SHA-256-NI"; +asmlinkage void sha256_ni_transform(u32 *digest, const char *data, + u64 rounds); /*unsigned int rounds);*/ + +static int sha256_ni_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha256_update(desc, data, len, sha256_ni_transform); +} + +static int sha256_ni_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha256_finup(desc, data, len, out, sha256_ni_transform); +} + +static int sha256_ni_final(struct shash_desc *desc, u8 *out) +{ + return sha256_ni_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha256_ni_algs[] = { { + .digestsize = SHA256_DIGEST_SIZE, + .init = sha256_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha256", + .cra_driver_name = "sha256-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA256_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA224_DIGEST_SIZE, + .init = sha224_base_init, + .update = sha256_ni_update, + .final = sha256_ni_final, + .finup = sha256_ni_finup, + .descsize = sizeof(struct sha256_state), + .base = { + .cra_name = "sha224", + .cra_driver_name = "sha224-ni", + .cra_priority = 250, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA224_BLOCK_SIZE, + .cra_module = THIS_MODULE, } +} }; + +static int register_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + return crypto_register_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); + return 0; +} + +static void unregister_sha256_ni(void) +{ + if (boot_cpu_has(X86_FEATURE_SHA_NI)) + crypto_unregister_shashes(sha256_ni_algs, + ARRAY_SIZE(sha256_ni_algs)); +} + +#else +static inline int register_sha256_ni(void) { return 0; } +static inline void unregister_sha256_ni(void) { } #endif - if (sha256_transform_asm) { - pr_info("Using %s optimized SHA-256 implementation\n", algo); - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); +static int __init sha256_ssse3_mod_init(void) +{ + if (register_sha256_ssse3()) + goto fail; + + if (register_sha256_avx()) { + unregister_sha256_ssse3(); + goto fail; } - pr_info("Neither AVX nor SSSE3/SHA-NI is available/usable.\n"); + if (register_sha256_avx2()) { + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + if (register_sha256_ni()) { + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); + goto fail; + } + + return 0; +fail: return -ENODEV; } static void __exit sha256_ssse3_mod_fini(void) { - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + unregister_sha256_ni(); + unregister_sha256_avx2(); + unregister_sha256_avx(); + unregister_sha256_ssse3(); } module_init(sha256_ssse3_mod_init); -- cgit v1.2.3 From be6ec98ddb6749bba0fc7f67bd2f89a2396805de Mon Sep 17 00:00:00 2001 From: tim Date: Wed, 16 Sep 2015 16:35:53 -0700 Subject: crypto: x86/sha - Restructure x86 sha512 glue code to expose all the available sha512 transforms Restructure the x86 sha512 glue code so we will expose sha512 transforms based on SSSE3, AVX or AVX2 as separate individual drivers when cpu provides support. This will make it easy for alternative algorithms to be used if desired and makes the code cleaner and easier to maintain. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512_ssse3_glue.c | 249 +++++++++++++++++++++++++++++------- 1 file changed, 204 insertions(+), 45 deletions(-) (limited to 'arch') diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c index 2edad7b81870..0dfe9a2ba64b 100644 --- a/arch/x86/crypto/sha512_ssse3_glue.c +++ b/arch/x86/crypto/sha512_ssse3_glue.c @@ -41,19 +41,11 @@ asmlinkage void sha512_transform_ssse3(u64 *digest, const char *data, u64 rounds); -#ifdef CONFIG_AS_AVX -asmlinkage void sha512_transform_avx(u64 *digest, const char *data, - u64 rounds); -#endif -#ifdef CONFIG_AS_AVX2 -asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, - u64 rounds); -#endif -static void (*sha512_transform_asm)(u64 *, const char *, u64); +typedef void (sha512_transform_fn)(u64 *digest, const char *data, u64 rounds); -static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, - unsigned int len) +static int sha512_update(struct shash_desc *desc, const u8 *data, + unsigned int len, sha512_transform_fn *sha512_xform) { struct sha512_state *sctx = shash_desc_ctx(desc); @@ -66,14 +58,14 @@ static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_transform_asm); + (sha512_block_fn *)sha512_xform); kernel_fpu_end(); return 0; } -static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, - unsigned int len, u8 *out) +static int sha512_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out, sha512_transform_fn *sha512_xform) { if (!irq_fpu_usable()) return crypto_sha512_finup(desc, data, len, out); @@ -81,20 +73,32 @@ static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, kernel_fpu_begin(); if (len) sha512_base_do_update(desc, data, len, - (sha512_block_fn *)sha512_transform_asm); - sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_transform_asm); + (sha512_block_fn *)sha512_xform); + sha512_base_do_finalize(desc, (sha512_block_fn *)sha512_xform); kernel_fpu_end(); return sha512_base_finish(desc, out); } +static int sha512_ssse3_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_ssse3); +} + +static int sha512_ssse3_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_ssse3); +} + /* Add padding and return the message digest. */ static int sha512_ssse3_final(struct shash_desc *desc, u8 *out) { return sha512_ssse3_finup(desc, NULL, 0, out); } -static struct shash_alg algs[] = { { +static struct shash_alg sha512_ssse3_algs[] = { { .digestsize = SHA512_DIGEST_SIZE, .init = sha512_base_init, .update = sha512_ssse3_update, @@ -126,8 +130,25 @@ static struct shash_alg algs[] = { { } } }; +static int register_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + return crypto_register_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); + return 0; +} + +static void unregister_sha512_ssse3(void) +{ + if (boot_cpu_has(X86_FEATURE_SSSE3)) + crypto_unregister_shashes(sha512_ssse3_algs, + ARRAY_SIZE(sha512_ssse3_algs)); +} + #ifdef CONFIG_AS_AVX -static bool __init avx_usable(void) +asmlinkage void sha512_transform_avx(u64 *digest, const char *data, + u64 rounds); +static bool avx_usable(void) { if (!cpu_has_xfeatures(XSTATE_SSE | XSTATE_YMM, NULL)) { if (cpu_has_avx) @@ -137,47 +158,185 @@ static bool __init avx_usable(void) return true; } -#endif -static int __init sha512_ssse3_mod_init(void) +static int sha512_avx_update(struct shash_desc *desc, const u8 *data, + unsigned int len) { - /* test for SSSE3 first */ - if (cpu_has_ssse3) - sha512_transform_asm = sha512_transform_ssse3; + return sha512_update(desc, data, len, sha512_transform_avx); +} -#ifdef CONFIG_AS_AVX - /* allow AVX to override SSSE3, it's a little faster */ - if (avx_usable()) { -#ifdef CONFIG_AS_AVX2 - if (boot_cpu_has(X86_FEATURE_AVX2)) - sha512_transform_asm = sha512_transform_rorx; - else -#endif - sha512_transform_asm = sha512_transform_avx; +static int sha512_avx_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_avx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, } -#endif +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx_update, + .final = sha512_avx_final, + .finup = sha512_avx_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx", + .cra_priority = 160, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; - if (sha512_transform_asm) { -#ifdef CONFIG_AS_AVX - if (sha512_transform_asm == sha512_transform_avx) - pr_info("Using AVX optimized SHA-512 implementation\n"); -#ifdef CONFIG_AS_AVX2 - else if (sha512_transform_asm == sha512_transform_rorx) - pr_info("Using AVX2 optimized SHA-512 implementation\n"); +static int register_sha512_avx(void) +{ + if (avx_usable()) + return crypto_register_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); + return 0; +} + +static void unregister_sha512_avx(void) +{ + if (avx_usable()) + crypto_unregister_shashes(sha512_avx_algs, + ARRAY_SIZE(sha512_avx_algs)); +} +#else +static inline int register_sha512_avx(void) { return 0; } +static inline void unregister_sha512_avx(void) { } #endif - else + +#if defined(CONFIG_AS_AVX2) && defined(CONFIG_AS_AVX) +asmlinkage void sha512_transform_rorx(u64 *digest, const char *data, + u64 rounds); + +static int sha512_avx2_update(struct shash_desc *desc, const u8 *data, + unsigned int len) +{ + return sha512_update(desc, data, len, sha512_transform_rorx); +} + +static int sha512_avx2_finup(struct shash_desc *desc, const u8 *data, + unsigned int len, u8 *out) +{ + return sha512_finup(desc, data, len, out, sha512_transform_rorx); +} + +/* Add padding and return the message digest. */ +static int sha512_avx2_final(struct shash_desc *desc, u8 *out) +{ + return sha512_avx2_finup(desc, NULL, 0, out); +} + +static struct shash_alg sha512_avx2_algs[] = { { + .digestsize = SHA512_DIGEST_SIZE, + .init = sha512_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha512", + .cra_driver_name = "sha512-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA512_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +}, { + .digestsize = SHA384_DIGEST_SIZE, + .init = sha384_base_init, + .update = sha512_avx2_update, + .final = sha512_avx2_final, + .finup = sha512_avx2_finup, + .descsize = sizeof(struct sha512_state), + .base = { + .cra_name = "sha384", + .cra_driver_name = "sha384-avx2", + .cra_priority = 170, + .cra_flags = CRYPTO_ALG_TYPE_SHASH, + .cra_blocksize = SHA384_BLOCK_SIZE, + .cra_module = THIS_MODULE, + } +} }; + +static bool avx2_usable(void) +{ + if (avx_usable() && boot_cpu_has(X86_FEATURE_AVX2) && + boot_cpu_has(X86_FEATURE_BMI2)) + return true; + + return false; +} + +static int register_sha512_avx2(void) +{ + if (avx2_usable()) + return crypto_register_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); + return 0; +} + +static void unregister_sha512_avx2(void) +{ + if (avx2_usable()) + crypto_unregister_shashes(sha512_avx2_algs, + ARRAY_SIZE(sha512_avx2_algs)); +} +#else +static inline int register_sha512_avx2(void) { return 0; } +static inline void unregister_sha512_avx2(void) { } #endif - pr_info("Using SSSE3 optimized SHA-512 implementation\n"); - return crypto_register_shashes(algs, ARRAY_SIZE(algs)); + +static int __init sha512_ssse3_mod_init(void) +{ + + if (register_sha512_ssse3()) + goto fail; + + if (register_sha512_avx()) { + unregister_sha512_ssse3(); + goto fail; } - pr_info("Neither AVX nor SSSE3 is available/usable.\n"); + if (register_sha512_avx2()) { + unregister_sha512_avx(); + unregister_sha512_ssse3(); + goto fail; + } + + return 0; +fail: return -ENODEV; } static void __exit sha512_ssse3_mod_fini(void) { - crypto_unregister_shashes(algs, ARRAY_SIZE(algs)); + unregister_sha512_avx2(); + unregister_sha512_avx(); + unregister_sha512_ssse3(); } module_init(sha512_ssse3_mod_init); -- cgit v1.2.3 From 97bce7e0b58dfc7d159ded329f57961868fb060b Mon Sep 17 00:00:00 2001 From: Nicolas Iooss Date: Sun, 20 Sep 2015 16:42:36 +0200 Subject: crypto: crc32c-pclmul - use .rodata instead of .rotata Module crc32c-intel uses a special read-only data section named .rotata. This section is defined for K_table, and its name seems to be a spelling mistake for .rodata. Fixes: 473946e674eb ("crypto: crc32c-pclmul - Shrink K_table to 32-bit words") Signed-off-by: Nicolas Iooss Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index 225be06edc80..4fe27e074194 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -330,7 +330,7 @@ ENDPROC(crc_pcl) ## PCLMULQDQ tables ## Table is 128 entries x 2 words (8 bytes) each ################################################################ -.section .rotata, "a", %progbits +.section .rodata, "a", %progbits .align 8 K_table: .long 0x493c7d27, 0x00000001 -- cgit v1.2.3 From b47c9fab25fe0a2f92d32fcbb2eb55ad2111cd38 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Mon, 12 Oct 2015 09:21:30 +0100 Subject: ARM: dts: stm32f429: Adopt STM32 RNG driver New bindings and driver have been created for STM32 series parts. This patch integrates this changes. Signed-off-by: Daniel Thompson Acked-by: Maxime Coquelin Signed-off-by: Herbert Xu --- arch/arm/boot/dts/stm32f429.dtsi | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/stm32f429.dtsi b/arch/arm/boot/dts/stm32f429.dtsi index d78a4815da8f..5e1e234e8c0a 100644 --- a/arch/arm/boot/dts/stm32f429.dtsi +++ b/arch/arm/boot/dts/stm32f429.dtsi @@ -174,6 +174,13 @@ reg = <0x40023800 0x400>; clocks = <&clk_hse>; }; + + rng: rng@50060800 { + compatible = "st,stm32-rng"; + reg = <0x50060800 0x400>; + interrupts = <80>; + clocks = <&rcc 0 38>; + }; }; }; -- cgit v1.2.3 From 19b14e7e224f1c119a1756fde02ccacefd280212 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Wed, 14 Oct 2015 21:11:00 +0200 Subject: crypto: s390/sha - replace raw value by their coresponding define SHA_MAX_STATE_SIZE is just the number of u32 word for SHA512. So replace the raw value "16" by their meaning (SHA512_DIGEST_SIZE / 4) Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu --- arch/s390/crypto/sha.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/s390/crypto/sha.h b/arch/s390/crypto/sha.h index f4e9dc71675f..10f200790079 100644 --- a/arch/s390/crypto/sha.h +++ b/arch/s390/crypto/sha.h @@ -19,7 +19,7 @@ #include /* must be big enough for the largest SHA variant */ -#define SHA_MAX_STATE_SIZE 16 +#define SHA_MAX_STATE_SIZE (SHA512_DIGEST_SIZE / 4) #define SHA_MAX_BLOCK_SIZE SHA512_BLOCK_SIZE struct s390_sha_ctx { -- cgit v1.2.3