summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml50
-rw-r--r--Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml47
-rw-r--r--Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml22
-rw-r--r--MAINTAINERS23
-rw-r--r--arch/arm/crypto/Makefile10
-rw-r--r--arch/arm/crypto/poly1305-core.S_shipped1158
-rw-r--r--arch/arm/crypto/sha256-core.S_shipped2816
-rw-r--r--arch/arm/crypto/sha512-core.S_shipped1869
-rw-r--r--arch/arm/mach-ixp4xx/common.c26
-rw-r--r--arch/arm64/crypto/Makefile10
-rw-r--r--arch/arm64/crypto/poly1305-core.S_shipped835
-rw-r--r--arch/arm64/crypto/sha256-core.S_shipped2069
-rw-r--r--arch/arm64/crypto/sha512-core.S_shipped1093
-rw-r--r--arch/x86/crypto/curve25519-x86_64.c2
-rw-r--r--crypto/af_alg.c2
-rw-r--r--crypto/algapi.c18
-rw-r--r--crypto/algboss.c31
-rw-r--r--crypto/drbg.c12
-rw-r--r--crypto/ecdh.c49
-rw-r--r--crypto/internal.h12
-rw-r--r--crypto/khazad.c2
-rw-r--r--crypto/shash.c18
-rw-r--r--crypto/sm2.c24
-rw-r--r--crypto/tcrypt.c36
-rw-r--r--crypto/testmgr.c10
-rw-r--r--crypto/testmgr.h71
-rw-r--r--crypto/wp512.c40
-rw-r--r--drivers/char/hw_random/Kconfig10
-rw-r--r--drivers/char/hw_random/amd-rng.c2
-rw-r--r--drivers/char/hw_random/core.c38
-rw-r--r--drivers/char/hw_random/exynos-trng.c7
-rw-r--r--drivers/char/hw_random/ks-sa-rng.c3
-rw-r--r--drivers/char/hw_random/omap-rng.c6
-rw-r--r--drivers/crypto/Kconfig41
-rw-r--r--drivers/crypto/Makefile1
-rw-r--r--drivers/crypto/cavium/cpt/cptpf_main.c2
-rw-r--r--drivers/crypto/cavium/cpt/cptvf_reqmanager.c10
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_isr.c4
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_main.c21
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_mbx.c4
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_reqmgr.c16
-rw-r--r--drivers/crypto/cavium/nitrox/nitrox_skcipher.c2
-rw-r--r--drivers/crypto/ccp/ccp-dev.c2
-rw-r--r--drivers/crypto/ccp/ccp-dmaengine.c3
-rw-r--r--drivers/crypto/ccp/sev-dev.c4
-rw-r--r--drivers/crypto/ccp/sp-pci.c6
-rw-r--r--drivers/crypto/gemini/Makefile2
-rw-r--r--drivers/crypto/gemini/sl3516-ce-cipher.c387
-rw-r--r--drivers/crypto/gemini/sl3516-ce-core.c535
-rw-r--r--drivers/crypto/gemini/sl3516-ce-rng.c61
-rw-r--r--drivers/crypto/gemini/sl3516-ce.h347
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_crypto.c185
-rw-r--r--drivers/crypto/hisilicon/hpre/hpre_main.c256
-rw-r--r--drivers/crypto/hisilicon/qm.c1843
-rw-r--r--drivers/crypto/hisilicon/qm.h17
-rw-r--r--drivers/crypto/hisilicon/sec2/sec.h23
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.c1036
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_crypto.h193
-rw-r--r--drivers/crypto/hisilicon/sec2/sec_main.c100
-rw-r--r--drivers/crypto/hisilicon/zip/zip_main.c99
-rw-r--r--drivers/crypto/ixp4xx_crypto.c413
-rw-r--r--drivers/crypto/marvell/cesa/cesa.h2
-rw-r--r--drivers/crypto/marvell/octeontx2/Makefile13
-rw-r--r--drivers/crypto/marvell/octeontx2/cn10k_cpt.c93
-rw-r--r--drivers/crypto/marvell/octeontx2/cn10k_cpt.h36
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_common.h23
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h16
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptlf.c9
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptlf.h10
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf.h1
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c160
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c32
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h8
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf.h3
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c49
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c43
-rw-r--r--drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c17
-rw-r--r--drivers/crypto/nx/nx-842-pseries.c31
-rw-r--r--drivers/crypto/nx/nx-aes-cbc.c2
-rw-r--r--drivers/crypto/nx/nx-aes-ccm.c4
-rw-r--r--drivers/crypto/nx/nx-aes-ctr.c4
-rw-r--r--drivers/crypto/nx/nx-aes-ecb.c2
-rw-r--r--drivers/crypto/nx/nx-aes-gcm.c2
-rw-r--r--drivers/crypto/nx/nx-common-powernv.c4
-rw-r--r--drivers/crypto/nx/nx-sha256.c19
-rw-r--r--drivers/crypto/nx/nx-sha512.c19
-rw-r--r--drivers/crypto/nx/nx_csbcpb.h4
-rw-r--r--drivers/crypto/omap-des.c9
-rw-r--r--drivers/crypto/omap-sham.c4
-rw-r--r--drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h2
-rw-r--r--drivers/crypto/qat/qat_common/qat_hal.c14
-rw-r--r--drivers/crypto/qat/qat_common/qat_uclo.c12
-rw-r--r--drivers/crypto/qce/Makefile1
-rw-r--r--drivers/crypto/qce/aead.c847
-rw-r--r--drivers/crypto/qce/aead.h56
-rw-r--r--drivers/crypto/qce/common.c196
-rw-r--r--drivers/crypto/qce/common.h9
-rw-r--r--drivers/crypto/qce/core.c4
-rw-r--r--drivers/crypto/qce/skcipher.c19
-rw-r--r--drivers/crypto/sa2ul.c50
-rw-r--r--drivers/crypto/ux500/hash/hash_core.c1
-rw-r--r--drivers/soc/ixp4xx/ixp4xx-npe.c7
-rw-r--r--include/crypto/aead.h2
-rw-r--r--include/crypto/algapi.h10
-rw-r--r--include/crypto/engine.h2
-rw-r--r--include/crypto/hash.h2
-rw-r--r--include/crypto/internal/hash.h8
-rw-r--r--include/linux/crypto.h26
108 files changed, 6915 insertions, 11006 deletions
diff --git a/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
new file mode 100644
index 000000000000..b633b8d0e6f0
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/cortina,sl3516-crypto.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/crypto/cortina,sl3516-crypto.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: SL3516 cryptographic offloader driver
+
+maintainers:
+ - Corentin Labbe <clabbe@baylibre.com>
+
+properties:
+ compatible:
+ enum:
+ - cortina,sl3516-crypto
+
+ reg:
+ maxItems: 1
+
+ interrupts:
+ maxItems: 1
+
+ clocks:
+ maxItems: 1
+
+ resets:
+ maxItems: 1
+
+required:
+ - compatible
+ - reg
+ - interrupts
+ - clocks
+ - resets
+
+additionalProperties: false
+
+examples:
+ - |
+ #include <dt-bindings/interrupt-controller/irq.h>
+ #include <dt-bindings/clock/cortina,gemini-clock.h>
+ #include <dt-bindings/reset/cortina,gemini-reset.h>
+
+ crypto@62000000 {
+ compatible = "cortina,sl3516-crypto";
+ reg = <0x62000000 0x10000>;
+ interrupts = <7 IRQ_TYPE_EDGE_RISING>;
+ resets = <&syscon GEMINI_RESET_SECURITY>;
+ clocks = <&syscon GEMINI_CLK_GATE_SECURITY>;
+ };
diff --git a/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
new file mode 100644
index 000000000000..9c53c27bd20a
--- /dev/null
+++ b/Documentation/devicetree/bindings/crypto/intel,ixp4xx-crypto.yaml
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+# Copyright 2018 Linaro Ltd.
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/crypto/intel,ixp4xx-crypto.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: Intel IXP4xx cryptographic engine
+
+maintainers:
+ - Linus Walleij <linus.walleij@linaro.org>
+
+description: |
+ The Intel IXP4xx cryptographic engine makes use of the IXP4xx NPE
+ (Network Processing Engine). Since it is not a device on its own
+ it is defined as a subnode of the NPE, if crypto support is
+ available on the platform.
+
+properties:
+ compatible:
+ const: intel,ixp4xx-crypto
+
+ intel,npe-handle:
+ $ref: '/schemas/types.yaml#/definitions/phandle-array'
+ maxItems: 1
+ description: phandle to the NPE this crypto engine is using, the cell
+ describing the NPE instance to be used.
+
+ queue-rx:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+ description: phandle to the RX queue on the NPE, the cell describing
+ the queue instance to be used.
+
+ queue-txready:
+ $ref: /schemas/types.yaml#/definitions/phandle-array
+ maxItems: 1
+ description: phandle to the TX READY queue on the NPE, the cell describing
+ the queue instance to be used.
+
+required:
+ - compatible
+ - intel,npe-handle
+ - queue-rx
+ - queue-txready
+
+additionalProperties: false
diff --git a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
index 1bd2870c3a9c..c435c9f369a4 100644
--- a/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
+++ b/Documentation/devicetree/bindings/firmware/intel,ixp4xx-network-processing-engine.yaml
@@ -26,9 +26,16 @@ properties:
reg:
items:
- - description: NPE0 register range
- - description: NPE1 register range
- - description: NPE2 register range
+ - description: NPE0 (NPE-A) register range
+ - description: NPE1 (NPE-B) register range
+ - description: NPE2 (NPE-C) register range
+
+ crypto:
+ $ref: /schemas/crypto/intel,ixp4xx-crypto.yaml#
+ type: object
+ description: Optional node for the embedded crypto engine, the node
+ should be named with the instance number of the NPE engine used for
+ the crypto engine.
required:
- compatible
@@ -38,8 +45,15 @@ additionalProperties: false
examples:
- |
- npe@c8006000 {
+ npe: npe@c8006000 {
compatible = "intel,ixp4xx-network-processing-engine";
reg = <0xc8006000 0x1000>, <0xc8007000 0x1000>, <0xc8008000 0x1000>;
+
+ crypto {
+ compatible = "intel,ixp4xx-crypto";
+ intel,npe-handle = <&npe 2>;
+ queue-rx = <&qmgr 30>;
+ queue-txready = <&qmgr 29>;
+ };
};
...
diff --git a/MAINTAINERS b/MAINTAINERS
index e6e48391625c..8708efec44b4 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1811,6 +1811,7 @@ F: Documentation/devicetree/bindings/net/cortina,gemini-ethernet.txt
F: Documentation/devicetree/bindings/pinctrl/cortina,gemini-pinctrl.txt
F: Documentation/devicetree/bindings/rtc/faraday,ftrtc010.txt
F: arch/arm/mach-gemini/
+F: drivers/crypto/gemini/
F: drivers/net/ethernet/cortina/
F: drivers/pinctrl/pinctrl-gemini.c
F: drivers/rtc/rtc-ftrtc010.c
@@ -1972,6 +1973,7 @@ F: Documentation/devicetree/bindings/interrupt-controller/intel,ixp4xx-interrupt
F: Documentation/devicetree/bindings/timer/intel,ixp4xx-timer.yaml
F: arch/arm/mach-ixp4xx/
F: drivers/clocksource/timer-ixp4xx.c
+F: drivers/crypto/ixp4xx_crypto.c
F: drivers/gpio/gpio-ixp4xx.c
F: drivers/irqchip/irq-ixp4xx.c
F: include/linux/irqchip/irq-ixp4xx.h
@@ -7179,7 +7181,7 @@ F: include/video/
FREESCALE CAAM (Cryptographic Acceleration and Assurance Module) DRIVER
M: Horia Geantă <horia.geanta@nxp.com>
-M: Aymen Sghaier <aymen.sghaier@nxp.com>
+M: Pankaj Gupta <pankaj.gupta@nxp.com>
L: linux-crypto@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/crypto/fsl-sec4.txt
@@ -7569,6 +7571,12 @@ M: Kieran Bingham <kbingham@kernel.org>
S: Supported
F: scripts/gdb/
+GEMINI CRYPTO DRIVER
+M: Corentin Labbe <clabbe@baylibre.com>
+L: linux-crypto@vger.kernel.org
+S: Maintained
+F: drivers/crypto/gemini/
+
GEMTEK FM RADIO RECEIVER DRIVER
M: Hans Verkuil <hverkuil@xs4all.nl>
L: linux-media@vger.kernel.org
@@ -9240,6 +9248,12 @@ F: Documentation/admin-guide/media/ipu3_rcb.svg
F: Documentation/userspace-api/media/v4l/pixfmt-meta-intel-ipu3.rst
F: drivers/staging/media/ipu3/
+INTEL IXP4XX CRYPTO SUPPORT
+M: Corentin Labbe <clabbe@baylibre.com>
+L: linux-crypto@vger.kernel.org
+S: Maintained
+F: drivers/crypto/ixp4xx_crypto.c
+
INTEL IXP4XX QMGR, NPE, ETHERNET and HSS SUPPORT
M: Krzysztof Halasa <khalasa@piap.pl>
S: Maintained
@@ -15143,6 +15157,13 @@ S: Maintained
F: Documentation/devicetree/bindings/opp/qcom-nvmem-cpufreq.txt
F: drivers/cpufreq/qcom-cpufreq-nvmem.c
+QUALCOMM CRYPTO DRIVERS
+M: Thara Gopinath <thara.gopinath@linaro.org>
+L: linux-crypto@vger.kernel.org
+L: linux-arm-msm@vger.kernel.org
+S: Maintained
+F: drivers/crypto/qce/
+
QUALCOMM EMAC GIGABIT ETHERNET DRIVER
M: Timur Tabi <timur@kernel.org>
L: netdev@vger.kernel.org
diff --git a/arch/arm/crypto/Makefile b/arch/arm/crypto/Makefile
index 8f26c454ea12..eafa898ba6a7 100644
--- a/arch/arm/crypto/Makefile
+++ b/arch/arm/crypto/Makefile
@@ -45,20 +45,12 @@ poly1305-arm-y := poly1305-core.o poly1305-glue.o
nhpoly1305-neon-y := nh-neon-core.o nhpoly1305-neon-glue.o
curve25519-neon-y := curve25519-core.o curve25519-glue.o
-ifdef REGENERATE_ARM_CRYPTO
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)
-$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv4.pl
+$(obj)/%-core.S: $(src)/%-armv4.pl
$(call cmd,perl)
-$(src)/sha256-core.S_shipped: $(src)/sha256-armv4.pl
- $(call cmd,perl)
-
-$(src)/sha512-core.S_shipped: $(src)/sha512-armv4.pl
- $(call cmd,perl)
-endif
-
clean-files += poly1305-core.S sha256-core.S sha512-core.S
# massage the perlasm code a bit so we only get the NEON routine if we need it
diff --git a/arch/arm/crypto/poly1305-core.S_shipped b/arch/arm/crypto/poly1305-core.S_shipped
deleted file mode 100644
index 37b71d990293..000000000000
--- a/arch/arm/crypto/poly1305-core.S_shipped
+++ /dev/null
@@ -1,1158 +0,0 @@
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
-# define poly1305_init poly1305_init_arm
-# define poly1305_blocks poly1305_blocks_arm
-# define poly1305_emit poly1305_emit_arm
-.globl poly1305_blocks_neon
-#endif
-
-#if defined(__thumb2__)
-.syntax unified
-.thumb
-#else
-.code 32
-#endif
-
-.text
-
-.globl poly1305_emit
-.globl poly1305_blocks
-.globl poly1305_init
-.type poly1305_init,%function
-.align 5
-poly1305_init:
-.Lpoly1305_init:
- stmdb sp!,{r4-r11}
-
- eor r3,r3,r3
- cmp r1,#0
- str r3,[r0,#0] @ zero hash value
- str r3,[r0,#4]
- str r3,[r0,#8]
- str r3,[r0,#12]
- str r3,[r0,#16]
- str r3,[r0,#36] @ clear is_base2_26
- add r0,r0,#20
-
-#ifdef __thumb2__
- it eq
-#endif
- moveq r0,#0
- beq .Lno_key
-
-#if __ARM_MAX_ARCH__>=7
- mov r3,#-1
- str r3,[r0,#28] @ impossible key power value
-# ifndef __KERNEL__
- adr r11,.Lpoly1305_init
- ldr r12,.LOPENSSL_armcap
-# endif
-#endif
- ldrb r4,[r1,#0]
- mov r10,#0x0fffffff
- ldrb r5,[r1,#1]
- and r3,r10,#-4 @ 0x0ffffffc
- ldrb r6,[r1,#2]
- ldrb r7,[r1,#3]
- orr r4,r4,r5,lsl#8
- ldrb r5,[r1,#4]
- orr r4,r4,r6,lsl#16
- ldrb r6,[r1,#5]
- orr r4,r4,r7,lsl#24
- ldrb r7,[r1,#6]
- and r4,r4,r10
-
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-# if !defined(_WIN32)
- ldr r12,[r11,r12] @ OPENSSL_armcap_P
-# endif
-# if defined(__APPLE__) || defined(_WIN32)
- ldr r12,[r12]
-# endif
-#endif
- ldrb r8,[r1,#7]
- orr r5,r5,r6,lsl#8
- ldrb r6,[r1,#8]
- orr r5,r5,r7,lsl#16
- ldrb r7,[r1,#9]
- orr r5,r5,r8,lsl#24
- ldrb r8,[r1,#10]
- and r5,r5,r3
-
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- tst r12,#ARMV7_NEON @ check for NEON
-# ifdef __thumb2__
- adr r9,.Lpoly1305_blocks_neon
- adr r11,.Lpoly1305_blocks
- it ne
- movne r11,r9
- adr r12,.Lpoly1305_emit
- orr r11,r11,#1 @ thumb-ify addresses
- orr r12,r12,#1
-# else
- add r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
- ite eq
- addeq r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
- addne r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
-# endif
-#endif
- ldrb r9,[r1,#11]
- orr r6,r6,r7,lsl#8
- ldrb r7,[r1,#12]
- orr r6,r6,r8,lsl#16
- ldrb r8,[r1,#13]
- orr r6,r6,r9,lsl#24
- ldrb r9,[r1,#14]
- and r6,r6,r3
-
- ldrb r10,[r1,#15]
- orr r7,r7,r8,lsl#8
- str r4,[r0,#0]
- orr r7,r7,r9,lsl#16
- str r5,[r0,#4]
- orr r7,r7,r10,lsl#24
- str r6,[r0,#8]
- and r7,r7,r3
- str r7,[r0,#12]
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- stmia r2,{r11,r12} @ fill functions table
- mov r0,#1
-#else
- mov r0,#0
-#endif
-.Lno_key:
- ldmia sp!,{r4-r11}
-#if __ARM_ARCH__>=5
- bx lr @ bx lr
-#else
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size poly1305_init,.-poly1305_init
-.type poly1305_blocks,%function
-.align 5
-poly1305_blocks:
-.Lpoly1305_blocks:
- stmdb sp!,{r3-r11,lr}
-
- ands r2,r2,#-16
- beq .Lno_data
-
- add r2,r2,r1 @ end pointer
- sub sp,sp,#32
-
-#if __ARM_ARCH__<7
- ldmia r0,{r4-r12} @ load context
- add r0,r0,#20
- str r2,[sp,#16] @ offload stuff
- str r0,[sp,#12]
-#else
- ldr lr,[r0,#36] @ is_base2_26
- ldmia r0!,{r4-r8} @ load hash value
- str r2,[sp,#16] @ offload stuff
- str r0,[sp,#12]
-
- adds r9,r4,r5,lsl#26 @ base 2^26 -> base 2^32
- mov r10,r5,lsr#6
- adcs r10,r10,r6,lsl#20
- mov r11,r6,lsr#12
- adcs r11,r11,r7,lsl#14
- mov r12,r7,lsr#18
- adcs r12,r12,r8,lsl#8
- mov r2,#0
- teq lr,#0
- str r2,[r0,#16] @ clear is_base2_26
- adc r2,r2,r8,lsr#24
-
- itttt ne
- movne r4,r9 @ choose between radixes
- movne r5,r10
- movne r6,r11
- movne r7,r12
- ldmia r0,{r9-r12} @ load key
- it ne
- movne r8,r2
-#endif
-
- mov lr,r1
- cmp r3,#0
- str r10,[sp,#20]
- str r11,[sp,#24]
- str r12,[sp,#28]
- b .Loop
-
-.align 4
-.Loop:
-#if __ARM_ARCH__<7
- ldrb r0,[lr],#16 @ load input
-# ifdef __thumb2__
- it hi
-# endif
- addhi r8,r8,#1 @ 1<<128
- ldrb r1,[lr,#-15]
- ldrb r2,[lr,#-14]
- ldrb r3,[lr,#-13]
- orr r1,r0,r1,lsl#8
- ldrb r0,[lr,#-12]
- orr r2,r1,r2,lsl#16
- ldrb r1,[lr,#-11]
- orr r3,r2,r3,lsl#24
- ldrb r2,[lr,#-10]
- adds r4,r4,r3 @ accumulate input
-
- ldrb r3,[lr,#-9]
- orr r1,r0,r1,lsl#8
- ldrb r0,[lr,#-8]
- orr r2,r1,r2,lsl#16
- ldrb r1,[lr,#-7]
- orr r3,r2,r3,lsl#24
- ldrb r2,[lr,#-6]
- adcs r5,r5,r3
-
- ldrb r3,[lr,#-5]
- orr r1,r0,r1,lsl#8
- ldrb r0,[lr,#-4]
- orr r2,r1,r2,lsl#16
- ldrb r1,[lr,#-3]
- orr r3,r2,r3,lsl#24
- ldrb r2,[lr,#-2]
- adcs r6,r6,r3
-
- ldrb r3,[lr,#-1]
- orr r1,r0,r1,lsl#8
- str lr,[sp,#8] @ offload input pointer
- orr r2,r1,r2,lsl#16
- add r10,r10,r10,lsr#2
- orr r3,r2,r3,lsl#24
-#else
- ldr r0,[lr],#16 @ load input
- it hi
- addhi r8,r8,#1 @ padbit
- ldr r1,[lr,#-12]
- ldr r2,[lr,#-8]
- ldr r3,[lr,#-4]
-# ifdef __ARMEB__
- rev r0,r0
- rev r1,r1
- rev r2,r2
- rev r3,r3
-# endif
- adds r4,r4,r0 @ accumulate input
- str lr,[sp,#8] @ offload input pointer
- adcs r5,r5,r1
- add r10,r10,r10,lsr#2
- adcs r6,r6,r2
-#endif
- add r11,r11,r11,lsr#2
- adcs r7,r7,r3
- add r12,r12,r12,lsr#2
-
- umull r2,r3,r5,r9
- adc r8,r8,#0
- umull r0,r1,r4,r9
- umlal r2,r3,r8,r10
- umlal r0,r1,r7,r10
- ldr r10,[sp,#20] @ reload r10
- umlal r2,r3,r6,r12
- umlal r0,r1,r5,r12
- umlal r2,r3,r7,r11
- umlal r0,r1,r6,r11
- umlal r2,r3,r4,r10
- str r0,[sp,#0] @ future r4
- mul r0,r11,r8
- ldr r11,[sp,#24] @ reload r11
- adds r2,r2,r1 @ d1+=d0>>32
- eor r1,r1,r1
- adc lr,r3,#0 @ future r6
- str r2,[sp,#4] @ future r5
-
- mul r2,r12,r8
- eor r3,r3,r3
- umlal r0,r1,r7,r12
- ldr r12,[sp,#28] @ reload r12
- umlal r2,r3,r7,r9
- umlal r0,r1,r6,r9
- umlal r2,r3,r6,r10
- umlal r0,r1,r5,r10
- umlal r2,r3,r5,r11
- umlal r0,r1,r4,r11
- umlal r2,r3,r4,r12
- ldr r4,[sp,#0]
- mul r8,r9,r8
- ldr r5,[sp,#4]
-
- adds r6,lr,r0 @ d2+=d1>>32
- ldr lr,[sp,#8] @ reload input pointer
- adc r1,r1,#0
- adds r7,r2,r1 @ d3+=d2>>32
- ldr r0,[sp,#16] @ reload end pointer
- adc r3,r3,#0
- add r8,r8,r3 @ h4+=d3>>32
-
- and r1,r8,#-4
- and r8,r8,#3
- add r1,r1,r1,lsr#2 @ *=5
- adds r4,r4,r1
- adcs r5,r5,#0
- adcs r6,r6,#0
- adcs r7,r7,#0
- adc r8,r8,#0
-
- cmp r0,lr @ done yet?
- bhi .Loop
-
- ldr r0,[sp,#12]
- add sp,sp,#32
- stmdb r0,{r4-r8} @ store the result
-
-.Lno_data:
-#if __ARM_ARCH__>=5
- ldmia sp!,{r3-r11,pc}
-#else
- ldmia sp!,{r3-r11,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size poly1305_blocks,.-poly1305_blocks
-.type poly1305_emit,%function
-.align 5
-poly1305_emit:
-.Lpoly1305_emit:
- stmdb sp!,{r4-r11}
-
- ldmia r0,{r3-r7}
-
-#if __ARM_ARCH__>=7
- ldr ip,[r0,#36] @ is_base2_26
-
- adds r8,r3,r4,lsl#26 @ base 2^26 -> base 2^32
- mov r9,r4,lsr#6
- adcs r9,r9,r5,lsl#20
- mov r10,r5,lsr#12
- adcs r10,r10,r6,lsl#14
- mov r11,r6,lsr#18
- adcs r11,r11,r7,lsl#8
- mov r0,#0
- adc r0,r0,r7,lsr#24
-
- tst ip,ip
- itttt ne
- movne r3,r8
- movne r4,r9
- movne r5,r10
- movne r6,r11
- it ne
- movne r7,r0
-#endif
-
- adds r8,r3,#5 @ compare to modulus
- adcs r9,r4,#0
- adcs r10,r5,#0
- adcs r11,r6,#0
- adc r0,r7,#0
- tst r0,#4 @ did it carry/borrow?
-
-#ifdef __thumb2__
- it ne
-#endif
- movne r3,r8
- ldr r8,[r2,#0]
-#ifdef __thumb2__
- it ne
-#endif
- movne r4,r9
- ldr r9,[r2,#4]
-#ifdef __thumb2__
- it ne
-#endif
- movne r5,r10
- ldr r10,[r2,#8]
-#ifdef __thumb2__
- it ne
-#endif
- movne r6,r11
- ldr r11,[r2,#12]
-
- adds r3,r3,r8
- adcs r4,r4,r9
- adcs r5,r5,r10
- adc r6,r6,r11
-
-#if __ARM_ARCH__>=7
-# ifdef __ARMEB__
- rev r3,r3
- rev r4,r4
- rev r5,r5
- rev r6,r6
-# endif
- str r3,[r1,#0]
- str r4,[r1,#4]
- str r5,[r1,#8]
- str r6,[r1,#12]
-#else
- strb r3,[r1,#0]
- mov r3,r3,lsr#8
- strb r4,[r1,#4]
- mov r4,r4,lsr#8
- strb r5,[r1,#8]
- mov r5,r5,lsr#8
- strb r6,[r1,#12]
- mov r6,r6,lsr#8
-
- strb r3,[r1,#1]
- mov r3,r3,lsr#8
- strb r4,[r1,#5]
- mov r4,r4,lsr#8
- strb r5,[r1,#9]
- mov r5,r5,lsr#8
- strb r6,[r1,#13]
- mov r6,r6,lsr#8
-
- strb r3,[r1,#2]
- mov r3,r3,lsr#8
- strb r4,[r1,#6]
- mov r4,r4,lsr#8
- strb r5,[r1,#10]
- mov r5,r5,lsr#8
- strb r6,[r1,#14]
- mov r6,r6,lsr#8
-
- strb r3,[r1,#3]
- strb r4,[r1,#7]
- strb r5,[r1,#11]
- strb r6,[r1,#15]
-#endif
- ldmia sp!,{r4-r11}
-#if __ARM_ARCH__>=5
- bx lr @ bx lr
-#else
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size poly1305_emit,.-poly1305_emit
-#if __ARM_MAX_ARCH__>=7
-.fpu neon
-
-.type poly1305_init_neon,%function
-.align 5
-poly1305_init_neon:
-.Lpoly1305_init_neon:
- ldr r3,[r0,#48] @ first table element
- cmp r3,#-1 @ is value impossible?
- bne .Lno_init_neon
-
- ldr r4,[r0,#20] @ load key base 2^32
- ldr r5,[r0,#24]
- ldr r6,[r0,#28]
- ldr r7,[r0,#32]
-
- and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
- mov r3,r4,lsr#26
- mov r4,r5,lsr#20
- orr r3,r3,r5,lsl#6
- mov r5,r6,lsr#14
- orr r4,r4,r6,lsl#12
- mov r6,r7,lsr#8
- orr r5,r5,r7,lsl#18
- and r3,r3,#0x03ffffff
- and r4,r4,#0x03ffffff
- and r5,r5,#0x03ffffff
-
- vdup.32 d0,r2 @ r^1 in both lanes
- add r2,r3,r3,lsl#2 @ *5
- vdup.32 d1,r3
- add r3,r4,r4,lsl#2
- vdup.32 d2,r2
- vdup.32 d3,r4
- add r4,r5,r5,lsl#2
- vdup.32 d4,r3
- vdup.32 d5,r5
- add r5,r6,r6,lsl#2
- vdup.32 d6,r4
- vdup.32 d7,r6
- vdup.32 d8,r5
-
- mov r5,#2 @ counter
-
-.Lsquare_neon:
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
- @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
- @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
- @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
- @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
-
- vmull.u32 q5,d0,d0[1]
- vmull.u32 q6,d1,d0[1]
- vmull.u32 q7,d3,d0[1]
- vmull.u32 q8,d5,d0[1]
- vmull.u32 q9,d7,d0[1]
-
- vmlal.u32 q5,d7,d2[1]
- vmlal.u32 q6,d0,d1[1]
- vmlal.u32 q7,d1,d1[1]
- vmlal.u32 q8,d3,d1[1]
- vmlal.u32 q9,d5,d1[1]
-
- vmlal.u32 q5,d5,d4[1]
- vmlal.u32 q6,d7,d4[1]
- vmlal.u32 q8,d1,d3[1]
- vmlal.u32 q7,d0,d3[1]
- vmlal.u32 q9,d3,d3[1]
-
- vmlal.u32 q5,d3,d6[1]
- vmlal.u32 q8,d0,d5[1]
- vmlal.u32 q6,d5,d6[1]
- vmlal.u32 q7,d7,d6[1]
- vmlal.u32 q9,d1,d5[1]
-
- vmlal.u32 q8,d7,d8[1]
- vmlal.u32 q5,d1,d8[1]
- vmlal.u32 q6,d3,d8[1]
- vmlal.u32 q7,d5,d8[1]
- vmlal.u32 q9,d0,d7[1]
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
- @ and P. Schwabe
- @
- @ H0>>+H1>>+H2>>+H3>>+H4
- @ H3>>+H4>>*5+H0>>+H1
- @
- @ Trivia.
- @
- @ Result of multiplication of n-bit number by m-bit number is
- @ n+m bits wide. However! Even though 2^n is a n+1-bit number,
- @ m-bit number multiplied by 2^n is still n+m bits wide.
- @
- @ Sum of two n-bit numbers is n+1 bits wide, sum of three - n+2,
- @ and so is sum of four. Sum of 2^m n-m-bit numbers and n-bit
- @ one is n+1 bits wide.
- @
- @ >>+ denotes Hnext += Hn>>26, Hn &= 0x3ffffff. This means that
- @ H0, H2, H3 are guaranteed to be 26 bits wide, while H1 and H4
- @ can be 27. However! In cases when their width exceeds 26 bits
- @ they are limited by 2^26+2^6. This in turn means that *sum*
- @ of the products with these values can still be viewed as sum
- @ of 52-bit numbers as long as the amount of addends is not a
- @ power of 2. For example,
- @
- @ H4 = H4*R0 + H3*R1 + H2*R2 + H1*R3 + H0 * R4,
- @
- @ which can't be larger than 5 * (2^26 + 2^6) * (2^26 + 2^6), or
- @ 5 * (2^52 + 2*2^32 + 2^12), which in turn is smaller than
- @ 8 * (2^52) or 2^55. However, the value is then multiplied by
- @ by 5, so we should be looking at 5 * 5 * (2^52 + 2^33 + 2^12),
- @ which is less than 32 * (2^52) or 2^57. And when processing
- @ data we are looking at triple as many addends...
- @
- @ In key setup procedure pre-reduced H0 is limited by 5*4+1 and
- @ 5*H4 - by 5*5 52-bit addends, or 57 bits. But when hashing the
- @ input H0 is limited by (5*4+1)*3 addends, or 58 bits, while
- @ 5*H4 by 5*5*3, or 59[!] bits. How is this relevant? vmlal.u32
- @ instruction accepts 2x32-bit input and writes 2x64-bit result.
- @ This means that result of reduction have to be compressed upon
- @ loop wrap-around. This can be done in the process of reduction
- @ to minimize amount of instructions [as well as amount of
- @ 128-bit instructions, which benefits low-end processors], but
- @ one has to watch for H2 (which is narrower than H0) and 5*H4
- @ not being wider than 58 bits, so that result of right shift
- @ by 26 bits fits in 32 bits. This is also useful on x86,
- @ because it allows to use paddd in place for paddq, which
- @ benefits Atom, where paddq is ridiculously slow.
-
- vshr.u64 q15,q8,#26
- vmovn.i64 d16,q8
- vshr.u64 q4,q5,#26
- vmovn.i64 d10,q5
- vadd.i64 q9,q9,q15 @ h3 -> h4
- vbic.i32 d16,#0xfc000000 @ &=0x03ffffff
- vadd.i64 q6,q6,q4 @ h0 -> h1
- vbic.i32 d10,#0xfc000000
-
- vshrn.u64 d30,q9,#26
- vmovn.i64 d18,q9
- vshr.u64 q4,q6,#26
- vmovn.i64 d12,q6
- vadd.i64 q7,q7,q4 @ h1 -> h2
- vbic.i32 d18,#0xfc000000
- vbic.i32 d12,#0xfc000000
-
- vadd.i32 d10,d10,d30
- vshl.u32 d30,d30,#2
- vshrn.u64 d8,q7,#26
- vmovn.i64 d14,q7
- vadd.i32 d10,d10,d30 @ h4 -> h0
- vadd.i32 d16,d16,d8 @ h2 -> h3
- vbic.i32 d14,#0xfc000000
-
- vshr.u32 d30,d10,#26
- vbic.i32 d10,#0xfc000000
- vshr.u32 d8,d16,#26
- vbic.i32 d16,#0xfc000000
- vadd.i32 d12,d12,d30 @ h0 -> h1
- vadd.i32 d18,d18,d8 @ h3 -> h4
-
- subs r5,r5,#1
- beq .Lsquare_break_neon
-
- add r6,r0,#(48+0*9*4)
- add r7,r0,#(48+1*9*4)
-
- vtrn.32 d0,d10 @ r^2:r^1
- vtrn.32 d3,d14
- vtrn.32 d5,d16
- vtrn.32 d1,d12
- vtrn.32 d7,d18
-
- vshl.u32 d4,d3,#2 @ *5
- vshl.u32 d6,d5,#2
- vshl.u32 d2,d1,#2
- vshl.u32 d8,d7,#2
- vadd.i32 d4,d4,d3
- vadd.i32 d2,d2,d1
- vadd.i32 d6,d6,d5
- vadd.i32 d8,d8,d7
-
- vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
- vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
- vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
- vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
- vst1.32 {d8[0]},[r6,:32]
- vst1.32 {d8[1]},[r7,:32]
-
- b .Lsquare_neon
-
-.align 4
-.Lsquare_break_neon:
- add r6,r0,#(48+2*4*9)
- add r7,r0,#(48+3*4*9)
-
- vmov d0,d10 @ r^4:r^3
- vshl.u32 d2,d12,#2 @ *5
- vmov d1,d12
- vshl.u32 d4,d14,#2
- vmov d3,d14
- vshl.u32 d6,d16,#2
- vmov d5,d16
- vshl.u32 d8,d18,#2
- vmov d7,d18
- vadd.i32 d2,d2,d12
- vadd.i32 d4,d4,d14
- vadd.i32 d6,d6,d16
- vadd.i32 d8,d8,d18
-
- vst4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]!
- vst4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]!
- vst4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
- vst4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
- vst1.32 {d8[0]},[r6]
- vst1.32 {d8[1]},[r7]
-
-.Lno_init_neon:
- bx lr @ bx lr
-.size poly1305_init_neon,.-poly1305_init_neon
-
-.type poly1305_blocks_neon,%function
-.align 5
-poly1305_blocks_neon:
-.Lpoly1305_blocks_neon:
- ldr ip,[r0,#36] @ is_base2_26
-
- cmp r2,#64
- blo .Lpoly1305_blocks
-
- stmdb sp!,{r4-r7}
- vstmdb sp!,{d8-d15} @ ABI specification says so
-
- tst ip,ip @ is_base2_26?
- bne .Lbase2_26_neon
-
- stmdb sp!,{r1-r3,lr}
- bl .Lpoly1305_init_neon
-
- ldr r4,[r0,#0] @ load hash value base 2^32
- ldr r5,[r0,#4]
- ldr r6,[r0,#8]
- ldr r7,[r0,#12]
- ldr ip,[r0,#16]
-
- and r2,r4,#0x03ffffff @ base 2^32 -> base 2^26
- mov r3,r4,lsr#26
- veor d10,d10,d10
- mov r4,r5,lsr#20
- orr r3,r3,r5,lsl#6
- veor d12,d12,d12
- mov r5,r6,lsr#14
- orr r4,r4,r6,lsl#12
- veor d14,d14,d14
- mov r6,r7,lsr#8
- orr r5,r5,r7,lsl#18
- veor d16,d16,d16
- and r3,r3,#0x03ffffff
- orr r6,r6,ip,lsl#24
- veor d18,d18,d18
- and r4,r4,#0x03ffffff
- mov r1,#1
- and r5,r5,#0x03ffffff
- str r1,[r0,#36] @ set is_base2_26
-
- vmov.32 d10[0],r2
- vmov.32 d12[0],r3
- vmov.32 d14[0],r4
- vmov.32 d16[0],r5
- vmov.32 d18[0],r6
- adr r5,.Lzeros
-
- ldmia sp!,{r1-r3,lr}
- b .Lhash_loaded
-
-.align 4
-.Lbase2_26_neon:
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ load hash value
-
- veor d10,d10,d10
- veor d12,d12,d12
- veor d14,d14,d14
- veor d16,d16,d16
- veor d18,d18,d18
- vld4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
- adr r5,.Lzeros
- vld1.32 {d18[0]},[r0]
- sub r0,r0,#16 @ rewind
-
-.Lhash_loaded:
- add r4,r1,#32
- mov r3,r3,lsl#24
- tst r2,#31
- beq .Leven
-
- vld4.32 {d20[0],d22[0],d24[0],d26[0]},[r1]!
- vmov.32 d28[0],r3
- sub r2,r2,#16
- add r4,r1,#32
-
-# ifdef __ARMEB__
- vrev32.8 q10,q10
- vrev32.8 q13,q13
- vrev32.8 q11,q11
- vrev32.8 q12,q12
-# endif
- vsri.u32 d28,d26,#8 @ base 2^32 -> base 2^26
- vshl.u32 d26,d26,#18
-
- vsri.u32 d26,d24,#14
- vshl.u32 d24,d24,#12
- vadd.i32 d29,d28,d18 @ add hash value and move to #hi
-
- vbic.i32 d26,#0xfc000000
- vsri.u32 d24,d22,#20
- vshl.u32 d22,d22,#6
-
- vbic.i32 d24,#0xfc000000
- vsri.u32 d22,d20,#26
- vadd.i32 d27,d26,d16
-
- vbic.i32 d20,#0xfc000000
- vbic.i32 d22,#0xfc000000
- vadd.i32 d25,d24,d14
-
- vadd.i32 d21,d20,d10
- vadd.i32 d23,d22,d12
-
- mov r7,r5
- add r6,r0,#48
-
- cmp r2,r2
- b .Long_tail
-
-.align 4
-.Leven:
- subs r2,r2,#64
- it lo
- movlo r4,r5
-
- vmov.i32 q14,#1<<24 @ padbit, yes, always
- vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
- add r1,r1,#64
- vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
- add r4,r4,#64
- itt hi
- addhi r7,r0,#(48+1*9*4)
- addhi r6,r0,#(48+3*9*4)
-
-# ifdef __ARMEB__
- vrev32.8 q10,q10
- vrev32.8 q13,q13
- vrev32.8 q11,q11
- vrev32.8 q12,q12
-# endif
- vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
- vshl.u32 q13,q13,#18
-
- vsri.u32 q13,q12,#14
- vshl.u32 q12,q12,#12
-
- vbic.i32 q13,#0xfc000000
- vsri.u32 q12,q11,#20
- vshl.u32 q11,q11,#6
-
- vbic.i32 q12,#0xfc000000
- vsri.u32 q11,q10,#26
-
- vbic.i32 q10,#0xfc000000
- vbic.i32 q11,#0xfc000000
-
- bls .Lskip_loop
-
- vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^2
- vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
- vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
- vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
- b .Loop_neon
-
-.align 5
-.Loop_neon:
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
- @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
- @ ___________________/
- @ ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
- @ ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
- @ ___________________/ ____________________/
- @
- @ Note that we start with inp[2:3]*r^2. This is because it
- @ doesn't depend on reduction in previous iteration.
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ d4 = h4*r0 + h3*r1 + h2*r2 + h1*r3 + h0*r4
- @ d3 = h3*r0 + h2*r1 + h1*r2 + h0*r3 + h4*5*r4
- @ d2 = h2*r0 + h1*r1 + h0*r2 + h4*5*r3 + h3*5*r4
- @ d1 = h1*r0 + h0*r1 + h4*5*r2 + h3*5*r3 + h2*5*r4
- @ d0 = h0*r0 + h4*5*r1 + h3*5*r2 + h2*5*r3 + h1*5*r4
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ inp[2:3]*r^2
-
- vadd.i32 d24,d24,d14 @ accumulate inp[0:1]
- vmull.u32 q7,d25,d0[1]
- vadd.i32 d20,d20,d10
- vmull.u32 q5,d21,d0[1]
- vadd.i32 d26,d26,d16
- vmull.u32 q8,d27,d0[1]
- vmlal.u32 q7,d23,d1[1]
- vadd.i32 d22,d22,d12
- vmull.u32 q6,d23,d0[1]
-
- vadd.i32 d28,d28,d18
- vmull.u32 q9,d29,d0[1]
- subs r2,r2,#64
- vmlal.u32 q5,d29,d2[1]
- it lo
- movlo r4,r5
- vmlal.u32 q8,d25,d1[1]
- vld1.32 d8[1],[r7,:32]
- vmlal.u32 q6,d21,d1[1]
- vmlal.u32 q9,d27,d1[1]
-
- vmlal.u32 q5,d27,d4[1]
- vmlal.u32 q8,d23,d3[1]
- vmlal.u32 q9,d25,d3[1]
- vmlal.u32 q6,d29,d4[1]
- vmlal.u32 q7,d21,d3[1]
-
- vmlal.u32 q8,d21,d5[1]
- vmlal.u32 q5,d25,d6[1]
- vmlal.u32 q9,d23,d5[1]
- vmlal.u32 q6,d27,d6[1]
- vmlal.u32 q7,d29,d6[1]
-
- vmlal.u32 q8,d29,d8[1]
- vmlal.u32 q5,d23,d8[1]
- vmlal.u32 q9,d21,d7[1]
- vmlal.u32 q6,d25,d8[1]
- vmlal.u32 q7,d27,d8[1]
-
- vld4.32 {d21,d23,d25,d27},[r4] @ inp[2:3] (or 0)
- add r4,r4,#64
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ (hash+inp[0:1])*r^4 and accumulate
-
- vmlal.u32 q8,d26,d0[0]
- vmlal.u32 q5,d20,d0[0]
- vmlal.u32 q9,d28,d0[0]
- vmlal.u32 q6,d22,d0[0]
- vmlal.u32 q7,d24,d0[0]
- vld1.32 d8[0],[r6,:32]
-
- vmlal.u32 q8,d24,d1[0]
- vmlal.u32 q5,d28,d2[0]
- vmlal.u32 q9,d26,d1[0]
- vmlal.u32 q6,d20,d1[0]
- vmlal.u32 q7,d22,d1[0]
-
- vmlal.u32 q8,d22,d3[0]
- vmlal.u32 q5,d26,d4[0]
- vmlal.u32 q9,d24,d3[0]
- vmlal.u32 q6,d28,d4[0]
- vmlal.u32 q7,d20,d3[0]
-
- vmlal.u32 q8,d20,d5[0]
- vmlal.u32 q5,d24,d6[0]
- vmlal.u32 q9,d22,d5[0]
- vmlal.u32 q6,d26,d6[0]
- vmlal.u32 q8,d28,d8[0]
-
- vmlal.u32 q7,d28,d6[0]
- vmlal.u32 q5,d22,d8[0]
- vmlal.u32 q9,d20,d7[0]
- vmov.i32 q14,#1<<24 @ padbit, yes, always
- vmlal.u32 q6,d24,d8[0]
- vmlal.u32 q7,d26,d8[0]
-
- vld4.32 {d20,d22,d24,d26},[r1] @ inp[0:1]
- add r1,r1,#64
-# ifdef __ARMEB__
- vrev32.8 q10,q10
- vrev32.8 q11,q11
- vrev32.8 q12,q12
- vrev32.8 q13,q13
-# endif
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ lazy reduction interleaved with base 2^32 -> base 2^26 of
- @ inp[0:3] previously loaded to q10-q13 and smashed to q10-q14.
-
- vshr.u64 q15,q8,#26
- vmovn.i64 d16,q8
- vshr.u64 q4,q5,#26
- vmovn.i64 d10,q5
- vadd.i64 q9,q9,q15 @ h3 -> h4
- vbic.i32 d16,#0xfc000000
- vsri.u32 q14,q13,#8 @ base 2^32 -> base 2^26
- vadd.i64 q6,q6,q4 @ h0 -> h1
- vshl.u32 q13,q13,#18
- vbic.i32 d10,#0xfc000000
-
- vshrn.u64 d30,q9,#26
- vmovn.i64 d18,q9
- vshr.u64 q4,q6,#26
- vmovn.i64 d12,q6
- vadd.i64 q7,q7,q4 @ h1 -> h2
- vsri.u32 q13,q12,#14
- vbic.i32 d18,#0xfc000000
- vshl.u32 q12,q12,#12
- vbic.i32 d12,#0xfc000000
-
- vadd.i32 d10,d10,d30
- vshl.u32 d30,d30,#2
- vbic.i32 q13,#0xfc000000
- vshrn.u64 d8,q7,#26
- vmovn.i64 d14,q7
- vaddl.u32 q5,d10,d30 @ h4 -> h0 [widen for a sec]
- vsri.u32 q12,q11,#20
- vadd.i32 d16,d16,d8 @ h2 -> h3
- vshl.u32 q11,q11,#6
- vbic.i32 d14,#0xfc000000
- vbic.i32 q12,#0xfc000000
-
- vshrn.u64 d30,q5,#26 @ re-narrow
- vmovn.i64 d10,q5
- vsri.u32 q11,q10,#26
- vbic.i32 q10,#0xfc000000
- vshr.u32 d8,d16,#26
- vbic.i32 d16,#0xfc000000
- vbic.i32 d10,#0xfc000000
- vadd.i32 d12,d12,d30 @ h0 -> h1
- vadd.i32 d18,d18,d8 @ h3 -> h4
- vbic.i32 q11,#0xfc000000
-
- bhi .Loop_neon
-
-.Lskip_loop:
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
-
- add r7,r0,#(48+0*9*4)
- add r6,r0,#(48+1*9*4)
- adds r2,r2,#32
- it ne
- movne r2,#0
- bne .Long_tail
-
- vadd.i32 d25,d24,d14 @ add hash value and move to #hi
- vadd.i32 d21,d20,d10
- vadd.i32 d27,d26,d16
- vadd.i32 d23,d22,d12
- vadd.i32 d29,d28,d18
-
-.Long_tail:
- vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^1
- vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^2
-
- vadd.i32 d24,d24,d14 @ can be redundant
- vmull.u32 q7,d25,d0
- vadd.i32 d20,d20,d10
- vmull.u32 q5,d21,d0
- vadd.i32 d26,d26,d16
- vmull.u32 q8,d27,d0
- vadd.i32 d22,d22,d12
- vmull.u32 q6,d23,d0
- vadd.i32 d28,d28,d18
- vmull.u32 q9,d29,d0
-
- vmlal.u32 q5,d29,d2
- vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
- vmlal.u32 q8,d25,d1
- vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
- vmlal.u32 q6,d21,d1
- vmlal.u32 q9,d27,d1
- vmlal.u32 q7,d23,d1
-
- vmlal.u32 q8,d23,d3
- vld1.32 d8[1],[r7,:32]
- vmlal.u32 q5,d27,d4
- vld1.32 d8[0],[r6,:32]
- vmlal.u32 q9,d25,d3
- vmlal.u32 q6,d29,d4
- vmlal.u32 q7,d21,d3
-
- vmlal.u32 q8,d21,d5
- it ne
- addne r7,r0,#(48+2*9*4)
- vmlal.u32 q5,d25,d6
- it ne
- addne r6,r0,#(48+3*9*4)
- vmlal.u32 q9,d23,d5
- vmlal.u32 q6,d27,d6
- vmlal.u32 q7,d29,d6
-
- vmlal.u32 q8,d29,d8
- vorn q0,q0,q0 @ all-ones, can be redundant
- vmlal.u32 q5,d23,d8
- vshr.u64 q0,q0,#38
- vmlal.u32 q9,d21,d7
- vmlal.u32 q6,d25,d8
- vmlal.u32 q7,d27,d8
-
- beq .Lshort_tail
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ (hash+inp[0:1])*r^4:r^3 and accumulate
-
- vld4.32 {d0[1],d1[1],d2[1],d3[1]},[r7]! @ load r^3
- vld4.32 {d0[0],d1[0],d2[0],d3[0]},[r6]! @ load r^4
-
- vmlal.u32 q7,d24,d0
- vmlal.u32 q5,d20,d0
- vmlal.u32 q8,d26,d0
- vmlal.u32 q6,d22,d0
- vmlal.u32 q9,d28,d0
-
- vmlal.u32 q5,d28,d2
- vld4.32 {d4[1],d5[1],d6[1],d7[1]},[r7]!
- vmlal.u32 q8,d24,d1
- vld4.32 {d4[0],d5[0],d6[0],d7[0]},[r6]!
- vmlal.u32 q6,d20,d1
- vmlal.u32 q9,d26,d1
- vmlal.u32 q7,d22,d1
-
- vmlal.u32 q8,d22,d3
- vld1.32 d8[1],[r7,:32]
- vmlal.u32 q5,d26,d4
- vld1.32 d8[0],[r6,:32]
- vmlal.u32 q9,d24,d3
- vmlal.u32 q6,d28,d4
- vmlal.u32 q7,d20,d3
-
- vmlal.u32 q8,d20,d5
- vmlal.u32 q5,d24,d6
- vmlal.u32 q9,d22,d5
- vmlal.u32 q6,d26,d6
- vmlal.u32 q7,d28,d6
-
- vmlal.u32 q8,d28,d8
- vorn q0,q0,q0 @ all-ones
- vmlal.u32 q5,d22,d8
- vshr.u64 q0,q0,#38
- vmlal.u32 q9,d20,d7
- vmlal.u32 q6,d24,d8
- vmlal.u32 q7,d26,d8
-
-.Lshort_tail:
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ horizontal addition
-
- vadd.i64 d16,d16,d17
- vadd.i64 d10,d10,d11
- vadd.i64 d18,d18,d19
- vadd.i64 d12,d12,d13
- vadd.i64 d14,d14,d15
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ lazy reduction, but without narrowing
-
- vshr.u64 q15,q8,#26
- vand.i64 q8,q8,q0
- vshr.u64 q4,q5,#26
- vand.i64 q5,q5,q0
- vadd.i64 q9,q9,q15 @ h3 -> h4
- vadd.i64 q6,q6,q4 @ h0 -> h1
-
- vshr.u64 q15,q9,#26
- vand.i64 q9,q9,q0
- vshr.u64 q4,q6,#26
- vand.i64 q6,q6,q0
- vadd.i64 q7,q7,q4 @ h1 -> h2
-
- vadd.i64 q5,q5,q15
- vshl.u64 q15,q15,#2
- vshr.u64 q4,q7,#26
- vand.i64 q7,q7,q0
- vadd.i64 q5,q5,q15 @ h4 -> h0
- vadd.i64 q8,q8,q4 @ h2 -> h3
-
- vshr.u64 q15,q5,#26
- vand.i64 q5,q5,q0
- vshr.u64 q4,q8,#26
- vand.i64 q8,q8,q0
- vadd.i64 q6,q6,q15 @ h0 -> h1
- vadd.i64 q9,q9,q4 @ h3 -> h4
-
- cmp r2,#0
- bne .Leven
-
- @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
- @ store hash value
-
- vst4.32 {d10[0],d12[0],d14[0],d16[0]},[r0]!
- vst1.32 {d18[0]},[r0]
-
- vldmia sp!,{d8-d15} @ epilogue
- ldmia sp!,{r4-r7}
- bx lr @ bx lr
-.size poly1305_blocks_neon,.-poly1305_blocks_neon
-
-.align 5
-.Lzeros:
-.long 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
-#ifndef __KERNEL__
-.LOPENSSL_armcap:
-# ifdef _WIN32
-.word OPENSSL_armcap_P
-# else
-.word OPENSSL_armcap_P-.Lpoly1305_init
-# endif
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
-#endif
-.asciz "Poly1305 for ARMv4/NEON, CRYPTOGAMS by @dot-asm"
-.align 2
diff --git a/arch/arm/crypto/sha256-core.S_shipped b/arch/arm/crypto/sha256-core.S_shipped
deleted file mode 100644
index 6363014a50d7..000000000000
--- a/arch/arm/crypto/sha256-core.S_shipped
+++ /dev/null
@@ -1,2816 +0,0 @@
-@ SPDX-License-Identifier: GPL-2.0
-
-@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
-@ has relicensed it under the GPLv2. Therefore this program is free software;
-@ you can redistribute it and/or modify it under the terms of the GNU General
-@ Public License version 2 as published by the Free Software Foundation.
-@
-@ The original headers, including the original license headers, are
-@ included below for completeness.
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see https://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ SHA256 block procedure for ARMv4. May 2007.
-
-@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
-@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
-@ byte [on single-issue Xscale PXA250 core].
-
-@ July 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
-@ Cortex A8 core and ~20 cycles per processed byte.
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 16%
-@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
-
-@ September 2013.
-@
-@ Add NEON implementation. On Cortex A8 it was measured to process one
-@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
-@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
-@ code (meaning that latter performs sub-optimally, nothing was done
-@ about it).
-
-@ May 2014.
-@
-@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ 7
-#endif
-
-.text
-#if __ARM_ARCH__<7
-.code 32
-#else
-.syntax unified
-# ifdef __thumb2__
-.thumb
-# else
-.code 32
-# endif
-#endif
-
-.type K256,%object
-.align 5
-K256:
-.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
-.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
-.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
-.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
-.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
-.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
-.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
-.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
-.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
-.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
-.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
-.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
-.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
-.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
-.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
-.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
-.size K256,.-K256
-.word 0 @ terminator
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha256_block_data_order
-#endif
-.align 5
-
-.global sha256_block_data_order
-.type sha256_block_data_order,%function
-sha256_block_data_order:
-.Lsha256_block_data_order:
-#if __ARM_ARCH__<7
- sub r3,pc,#8 @ sha256_block_data_order
-#else
- adr r3,.Lsha256_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#ARMV8_SHA256
- bne .LARMv8
- tst r12,#ARMV7_NEON
- bne .LNEON
-#endif
- add r2,r1,r2,lsl#6 @ len to point at the end of inp
- stmdb sp!,{r0,r1,r2,r4-r11,lr}
- ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11}
- sub r14,r3,#256+32 @ K256
- sub sp,sp,#16*4 @ alloca(X[16])
-.Loop:
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ magic
- eor r12,r12,r12
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 0
-# if 0==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 0
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 0==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#0*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 0==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 0<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#2*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#15*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 1
-# if 1==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 1
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 1==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#1*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 1==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 1<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#3*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#0*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 2
-# if 2==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 2
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 2==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#2*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 2==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 2<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#4*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#1*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 3
-# if 3==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 3
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 3==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#3*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 3==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 3<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#5*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#2*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 4
-# if 4==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 4
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 4==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#4*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 4==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 4<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#6*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#3*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 5
-# if 5==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 5==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#5*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 5==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 5<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#7*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#4*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 6
-# if 6==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 6
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 6==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#6*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 6==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 6<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#8*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#5*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 7
-# if 7==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 7==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#7*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 7==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 7<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#9*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#6*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 8
-# if 8==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 8
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 8==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r8,r8,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r8,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#8*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 8==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 8<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#10*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#7*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 9
-# if 9==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 9
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 9==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r7,r7,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r7,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#9*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 9==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 9<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#11*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#8*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 10
-# if 10==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 10
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 10==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r6,r6,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r6,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#10*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 10==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 10<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#12*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#9*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 11
-# if 11==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 11
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 11==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r5,r5,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r5,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#11*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 11==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 11<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#13*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#10*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 12
-# if 12==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 12
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 12==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r4,r4,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r4,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#12*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 12==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 12<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#14*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#11*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 13
-# if 13==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 13
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 13==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r11,r11,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r11,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#13*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 13==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 13<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#15*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#12*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 14
-# if 14==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 14
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- ldrb r12,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r12,lsl#8
- ldrb r12,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 14==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r10,r10,ror#5
- orr r2,r2,r12,lsl#24
- eor r0,r0,r10,ror#19 @ Sigma1(e)
-#endif
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#14*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 14==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 14<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#0*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#13*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- @ ldr r2,[r1],#4 @ 15
-# if 15==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-# ifndef __ARMEB__
- rev r2,r2
-# endif
-#else
- @ ldrb r2,[r1,#3] @ 15
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- ldrb r3,[r1,#2]
- ldrb r0,[r1,#1]
- orr r2,r2,r3,lsl#8
- ldrb r3,[r1],#4
- orr r2,r2,r0,lsl#16
-# if 15==15
- str r1,[sp,#17*4] @ make room for r1
-# endif
- eor r0,r9,r9,ror#5
- orr r2,r2,r3,lsl#24
- eor r0,r0,r9,ror#19 @ Sigma1(e)
-#endif
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#15*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 15==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 15<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#1*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#14*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
-.Lrounds_16_xx:
- @ ldr r2,[sp,#1*4] @ 16
- @ ldr r1,[sp,#14*4]
- mov r0,r2,ror#7
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#0*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#9*4]
-
- add r12,r12,r0
- eor r0,r8,r8,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#0*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 16==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 16<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#2*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#15*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#2*4] @ 17
- @ ldr r1,[sp,#15*4]
- mov r0,r2,ror#7
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#1*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#10*4]
-
- add r3,r3,r0
- eor r0,r7,r7,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#1*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 17==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 17<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#3*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#0*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#3*4] @ 18
- @ ldr r1,[sp,#0*4]
- mov r0,r2,ror#7
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#2*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#11*4]
-
- add r12,r12,r0
- eor r0,r6,r6,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#2*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 18==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 18<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#4*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#1*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#4*4] @ 19
- @ ldr r1,[sp,#1*4]
- mov r0,r2,ror#7
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#3*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#12*4]
-
- add r3,r3,r0
- eor r0,r5,r5,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#3*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 19==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 19<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#5*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#2*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#5*4] @ 20
- @ ldr r1,[sp,#2*4]
- mov r0,r2,ror#7
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#4*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#13*4]
-
- add r12,r12,r0
- eor r0,r4,r4,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#4*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 20==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 20<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#6*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#3*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#6*4] @ 21
- @ ldr r1,[sp,#3*4]
- mov r0,r2,ror#7
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#5*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#14*4]
-
- add r3,r3,r0
- eor r0,r11,r11,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#5*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 21==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 21<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#7*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#4*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#7*4] @ 22
- @ ldr r1,[sp,#4*4]
- mov r0,r2,ror#7
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#6*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#15*4]
-
- add r12,r12,r0
- eor r0,r10,r10,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#6*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 22==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 22<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#8*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#5*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#8*4] @ 23
- @ ldr r1,[sp,#5*4]
- mov r0,r2,ror#7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#7*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#0*4]
-
- add r3,r3,r0
- eor r0,r9,r9,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#7*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 23==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 23<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#9*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#6*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#9*4] @ 24
- @ ldr r1,[sp,#6*4]
- mov r0,r2,ror#7
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#8*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#1*4]
-
- add r12,r12,r0
- eor r0,r8,r8,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r8,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r11,r11,r2 @ h+=X[i]
- str r2,[sp,#8*4]
- eor r2,r9,r10
- add r11,r11,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r8
- add r11,r11,r12 @ h+=K256[i]
- eor r2,r2,r10 @ Ch(e,f,g)
- eor r0,r4,r4,ror#11
- add r11,r11,r2 @ h+=Ch(e,f,g)
-#if 24==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 24<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r4,r5 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#10*4] @ from future BODY_16_xx
- eor r12,r4,r5 @ a^b, b^c in next round
- ldr r1,[sp,#7*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r4,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r7,r7,r11 @ d+=h
- eor r3,r3,r5 @ Maj(a,b,c)
- add r11,r11,r0,ror#2 @ h+=Sigma0(a)
- @ add r11,r11,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#10*4] @ 25
- @ ldr r1,[sp,#7*4]
- mov r0,r2,ror#7
- add r11,r11,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#9*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#2*4]
-
- add r3,r3,r0
- eor r0,r7,r7,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r7,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r10,r10,r2 @ h+=X[i]
- str r2,[sp,#9*4]
- eor r2,r8,r9
- add r10,r10,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r7
- add r10,r10,r3 @ h+=K256[i]
- eor r2,r2,r9 @ Ch(e,f,g)
- eor r0,r11,r11,ror#11
- add r10,r10,r2 @ h+=Ch(e,f,g)
-#if 25==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 25<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r11,r4 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#11*4] @ from future BODY_16_xx
- eor r3,r11,r4 @ a^b, b^c in next round
- ldr r1,[sp,#8*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r11,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r6,r6,r10 @ d+=h
- eor r12,r12,r4 @ Maj(a,b,c)
- add r10,r10,r0,ror#2 @ h+=Sigma0(a)
- @ add r10,r10,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#11*4] @ 26
- @ ldr r1,[sp,#8*4]
- mov r0,r2,ror#7
- add r10,r10,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#10*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#3*4]
-
- add r12,r12,r0
- eor r0,r6,r6,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r6,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r9,r9,r2 @ h+=X[i]
- str r2,[sp,#10*4]
- eor r2,r7,r8
- add r9,r9,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r6
- add r9,r9,r12 @ h+=K256[i]
- eor r2,r2,r8 @ Ch(e,f,g)
- eor r0,r10,r10,ror#11
- add r9,r9,r2 @ h+=Ch(e,f,g)
-#if 26==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 26<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r10,r11 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#12*4] @ from future BODY_16_xx
- eor r12,r10,r11 @ a^b, b^c in next round
- ldr r1,[sp,#9*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r10,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r5,r5,r9 @ d+=h
- eor r3,r3,r11 @ Maj(a,b,c)
- add r9,r9,r0,ror#2 @ h+=Sigma0(a)
- @ add r9,r9,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#12*4] @ 27
- @ ldr r1,[sp,#9*4]
- mov r0,r2,ror#7
- add r9,r9,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#11*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#4*4]
-
- add r3,r3,r0
- eor r0,r5,r5,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r5,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r8,r8,r2 @ h+=X[i]
- str r2,[sp,#11*4]
- eor r2,r6,r7
- add r8,r8,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r5
- add r8,r8,r3 @ h+=K256[i]
- eor r2,r2,r7 @ Ch(e,f,g)
- eor r0,r9,r9,ror#11
- add r8,r8,r2 @ h+=Ch(e,f,g)
-#if 27==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 27<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r9,r10 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#13*4] @ from future BODY_16_xx
- eor r3,r9,r10 @ a^b, b^c in next round
- ldr r1,[sp,#10*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r9,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r4,r4,r8 @ d+=h
- eor r12,r12,r10 @ Maj(a,b,c)
- add r8,r8,r0,ror#2 @ h+=Sigma0(a)
- @ add r8,r8,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#13*4] @ 28
- @ ldr r1,[sp,#10*4]
- mov r0,r2,ror#7
- add r8,r8,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#12*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#5*4]
-
- add r12,r12,r0
- eor r0,r4,r4,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r4,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r7,r7,r2 @ h+=X[i]
- str r2,[sp,#12*4]
- eor r2,r5,r6
- add r7,r7,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r4
- add r7,r7,r12 @ h+=K256[i]
- eor r2,r2,r6 @ Ch(e,f,g)
- eor r0,r8,r8,ror#11
- add r7,r7,r2 @ h+=Ch(e,f,g)
-#if 28==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 28<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r8,r9 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#14*4] @ from future BODY_16_xx
- eor r12,r8,r9 @ a^b, b^c in next round
- ldr r1,[sp,#11*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r8,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r11,r11,r7 @ d+=h
- eor r3,r3,r9 @ Maj(a,b,c)
- add r7,r7,r0,ror#2 @ h+=Sigma0(a)
- @ add r7,r7,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#14*4] @ 29
- @ ldr r1,[sp,#11*4]
- mov r0,r2,ror#7
- add r7,r7,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#13*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#6*4]
-
- add r3,r3,r0
- eor r0,r11,r11,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r11,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r6,r6,r2 @ h+=X[i]
- str r2,[sp,#13*4]
- eor r2,r4,r5
- add r6,r6,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r11
- add r6,r6,r3 @ h+=K256[i]
- eor r2,r2,r5 @ Ch(e,f,g)
- eor r0,r7,r7,ror#11
- add r6,r6,r2 @ h+=Ch(e,f,g)
-#if 29==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 29<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r7,r8 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#15*4] @ from future BODY_16_xx
- eor r3,r7,r8 @ a^b, b^c in next round
- ldr r1,[sp,#12*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r7,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r10,r10,r6 @ d+=h
- eor r12,r12,r8 @ Maj(a,b,c)
- add r6,r6,r0,ror#2 @ h+=Sigma0(a)
- @ add r6,r6,r12 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#15*4] @ 30
- @ ldr r1,[sp,#12*4]
- mov r0,r2,ror#7
- add r6,r6,r12 @ h+=Maj(a,b,c) from the past
- mov r12,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r12,r12,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#14*4]
- eor r12,r12,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#7*4]
-
- add r12,r12,r0
- eor r0,r10,r10,ror#5 @ from BODY_00_15
- add r2,r2,r12
- eor r0,r0,r10,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r12,[r14],#4 @ *K256++
- add r5,r5,r2 @ h+=X[i]
- str r2,[sp,#14*4]
- eor r2,r11,r4
- add r5,r5,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r10
- add r5,r5,r12 @ h+=K256[i]
- eor r2,r2,r4 @ Ch(e,f,g)
- eor r0,r6,r6,ror#11
- add r5,r5,r2 @ h+=Ch(e,f,g)
-#if 30==31
- and r12,r12,#0xff
- cmp r12,#0xf2 @ done?
-#endif
-#if 30<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r12,r6,r7 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#0*4] @ from future BODY_16_xx
- eor r12,r6,r7 @ a^b, b^c in next round
- ldr r1,[sp,#13*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r6,ror#20 @ Sigma0(a)
- and r3,r3,r12 @ (b^c)&=(a^b)
- add r9,r9,r5 @ d+=h
- eor r3,r3,r7 @ Maj(a,b,c)
- add r5,r5,r0,ror#2 @ h+=Sigma0(a)
- @ add r5,r5,r3 @ h+=Maj(a,b,c)
- @ ldr r2,[sp,#0*4] @ 31
- @ ldr r1,[sp,#13*4]
- mov r0,r2,ror#7
- add r5,r5,r3 @ h+=Maj(a,b,c) from the past
- mov r3,r1,ror#17
- eor r0,r0,r2,ror#18
- eor r3,r3,r1,ror#19
- eor r0,r0,r2,lsr#3 @ sigma0(X[i+1])
- ldr r2,[sp,#15*4]
- eor r3,r3,r1,lsr#10 @ sigma1(X[i+14])
- ldr r1,[sp,#8*4]
-
- add r3,r3,r0
- eor r0,r9,r9,ror#5 @ from BODY_00_15
- add r2,r2,r3
- eor r0,r0,r9,ror#19 @ Sigma1(e)
- add r2,r2,r1 @ X[i]
- ldr r3,[r14],#4 @ *K256++
- add r4,r4,r2 @ h+=X[i]
- str r2,[sp,#15*4]
- eor r2,r10,r11
- add r4,r4,r0,ror#6 @ h+=Sigma1(e)
- and r2,r2,r9
- add r4,r4,r3 @ h+=K256[i]
- eor r2,r2,r11 @ Ch(e,f,g)
- eor r0,r5,r5,ror#11
- add r4,r4,r2 @ h+=Ch(e,f,g)
-#if 31==31
- and r3,r3,#0xff
- cmp r3,#0xf2 @ done?
-#endif
-#if 31<15
-# if __ARM_ARCH__>=7
- ldr r2,[r1],#4 @ prefetch
-# else
- ldrb r2,[r1,#3]
-# endif
- eor r3,r5,r6 @ a^b, b^c in next round
-#else
- ldr r2,[sp,#1*4] @ from future BODY_16_xx
- eor r3,r5,r6 @ a^b, b^c in next round
- ldr r1,[sp,#14*4] @ from future BODY_16_xx
-#endif
- eor r0,r0,r5,ror#20 @ Sigma0(a)
- and r12,r12,r3 @ (b^c)&=(a^b)
- add r8,r8,r4 @ d+=h
- eor r12,r12,r6 @ Maj(a,b,c)
- add r4,r4,r0,ror#2 @ h+=Sigma0(a)
- @ add r4,r4,r12 @ h+=Maj(a,b,c)
-#if __ARM_ARCH__>=7
- ite eq @ Thumb2 thing, sanity check in ARM
-#endif
- ldreq r3,[sp,#16*4] @ pull ctx
- bne .Lrounds_16_xx
-
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldr r0,[r3,#0]
- ldr r2,[r3,#4]
- ldr r12,[r3,#8]
- add r4,r4,r0
- ldr r0,[r3,#12]
- add r5,r5,r2
- ldr r2,[r3,#16]
- add r6,r6,r12
- ldr r12,[r3,#20]
- add r7,r7,r0
- ldr r0,[r3,#24]
- add r8,r8,r2
- ldr r2,[r3,#28]
- add r9,r9,r12
- ldr r1,[sp,#17*4] @ pull inp
- ldr r12,[sp,#18*4] @ pull inp+len
- add r10,r10,r0
- add r11,r11,r2
- stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11}
- cmp r1,r12
- sub r14,r14,#256 @ rewind Ktbl
- bne .Loop
-
- add sp,sp,#19*4 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r11,pc}
-#else
- ldmia sp!,{r4-r11,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size sha256_block_data_order,.-sha256_block_data_order
-#if __ARM_MAX_ARCH__>=7
-.arch armv7-a
-.fpu neon
-
-.global sha256_block_data_order_neon
-.type sha256_block_data_order_neon,%function
-.align 4
-sha256_block_data_order_neon:
-.LNEON:
- stmdb sp!,{r4-r12,lr}
-
- sub r11,sp,#16*4+16
- adr r14,.Lsha256_block_data_order
- sub r14,r14,#.Lsha256_block_data_order-K256
- bic r11,r11,#15 @ align for 128-bit stores
- mov r12,sp
- mov sp,r11 @ alloca
- add r2,r1,r2,lsl#6 @ len to point at the end of inp
-
- vld1.8 {q0},[r1]!
- vld1.8 {q1},[r1]!
- vld1.8 {q2},[r1]!
- vld1.8 {q3},[r1]!
- vld1.32 {q8},[r14,:128]!
- vld1.32 {q9},[r14,:128]!
- vld1.32 {q10},[r14,:128]!
- vld1.32 {q11},[r14,:128]!
- vrev32.8 q0,q0 @ yes, even on
- str r0,[sp,#64]
- vrev32.8 q1,q1 @ big-endian
- str r1,[sp,#68]
- mov r1,sp
- vrev32.8 q2,q2
- str r2,[sp,#72]
- vrev32.8 q3,q3
- str r12,[sp,#76] @ save original sp
- vadd.i32 q8,q8,q0
- vadd.i32 q9,q9,q1
- vst1.32 {q8},[r1,:128]!
- vadd.i32 q10,q10,q2
- vst1.32 {q9},[r1,:128]!
- vadd.i32 q11,q11,q3
- vst1.32 {q10},[r1,:128]!
- vst1.32 {q11},[r1,:128]!
-
- ldmia r0,{r4-r11}
- sub r1,r1,#64
- ldr r2,[sp,#0]
- eor r12,r12,r12
- eor r3,r5,r6
- b .L_00_48
-
-.align 4
-.L_00_48:
- vext.8 q8,q0,q1,#4
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- vext.8 q9,q2,q3,#4
- add r4,r4,r12
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vadd.i32 q0,q0,q9
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- vshr.u32 q9,q8,#3
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#4]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- veor q9,q9,q10
- add r10,r10,r2
- vsli.32 q11,q8,#14
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- vshr.u32 d24,d7,#17
- add r11,r11,r3
- and r2,r2,r7
- veor q9,q9,q11
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- vsli.32 d24,d7,#15
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- vshr.u32 d25,d7,#10
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- vadd.i32 q0,q0,q9
- add r10,r10,r2
- ldr r2,[sp,#8]
- veor d25,d25,d24
- and r12,r12,r3
- add r6,r6,r10
- vshr.u32 d24,d7,#19
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- vsli.32 d24,d7,#13
- add r9,r9,r2
- eor r2,r7,r8
- veor d25,d25,d24
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- vadd.i32 d0,d0,d25
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- vshr.u32 d24,d0,#17
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- vsli.32 d24,d0,#15
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- vshr.u32 d25,d0,#10
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- veor d25,d25,d24
- ldr r2,[sp,#12]
- and r3,r3,r12
- vshr.u32 d24,d0,#19
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- vld1.32 {q8},[r14,:128]!
- add r8,r8,r2
- vsli.32 d24,d0,#13
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- veor d25,d25,d24
- add r9,r9,r3
- and r2,r2,r5
- vadd.i32 d1,d1,d25
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- vadd.i32 q8,q8,q0
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#16]
- and r12,r12,r3
- add r4,r4,r8
- vst1.32 {q8},[r1,:128]!
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vext.8 q8,q1,q2,#4
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- vext.8 q9,q3,q0,#4
- add r8,r8,r12
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vadd.i32 q1,q1,q9
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- vshr.u32 q9,q8,#3
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#20]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- veor q9,q9,q10
- add r6,r6,r2
- vsli.32 q11,q8,#14
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- vshr.u32 d24,d1,#17
- add r7,r7,r3
- and r2,r2,r11
- veor q9,q9,q11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- vsli.32 d24,d1,#15
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- vshr.u32 d25,d1,#10
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- vadd.i32 q1,q1,q9
- add r6,r6,r2
- ldr r2,[sp,#24]
- veor d25,d25,d24
- and r12,r12,r3
- add r10,r10,r6
- vshr.u32 d24,d1,#19
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- vsli.32 d24,d1,#13
- add r5,r5,r2
- eor r2,r11,r4
- veor d25,d25,d24
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- vadd.i32 d2,d2,d25
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- vshr.u32 d24,d2,#17
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- vsli.32 d24,d2,#15
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- vshr.u32 d25,d2,#10
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- veor d25,d25,d24
- ldr r2,[sp,#28]
- and r3,r3,r12
- vshr.u32 d24,d2,#19
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- vld1.32 {q8},[r14,:128]!
- add r4,r4,r2
- vsli.32 d24,d2,#13
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- veor d25,d25,d24
- add r5,r5,r3
- and r2,r2,r9
- vadd.i32 d3,d3,d25
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- vadd.i32 q8,q8,q1
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#32]
- and r12,r12,r3
- add r8,r8,r4
- vst1.32 {q8},[r1,:128]!
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vext.8 q8,q2,q3,#4
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- vext.8 q9,q0,q1,#4
- add r4,r4,r12
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vadd.i32 q2,q2,q9
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- vshr.u32 q9,q8,#3
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#36]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- veor q9,q9,q10
- add r10,r10,r2
- vsli.32 q11,q8,#14
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- vshr.u32 d24,d3,#17
- add r11,r11,r3
- and r2,r2,r7
- veor q9,q9,q11
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- vsli.32 d24,d3,#15
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- vshr.u32 d25,d3,#10
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- vadd.i32 q2,q2,q9
- add r10,r10,r2
- ldr r2,[sp,#40]
- veor d25,d25,d24
- and r12,r12,r3
- add r6,r6,r10
- vshr.u32 d24,d3,#19
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- vsli.32 d24,d3,#13
- add r9,r9,r2
- eor r2,r7,r8
- veor d25,d25,d24
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- vadd.i32 d4,d4,d25
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- vshr.u32 d24,d4,#17
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- vsli.32 d24,d4,#15
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- vshr.u32 d25,d4,#10
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- veor d25,d25,d24
- ldr r2,[sp,#44]
- and r3,r3,r12
- vshr.u32 d24,d4,#19
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- vld1.32 {q8},[r14,:128]!
- add r8,r8,r2
- vsli.32 d24,d4,#13
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- veor d25,d25,d24
- add r9,r9,r3
- and r2,r2,r5
- vadd.i32 d5,d5,d25
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- vadd.i32 q8,q8,q2
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#48]
- and r12,r12,r3
- add r4,r4,r8
- vst1.32 {q8},[r1,:128]!
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vext.8 q8,q3,q0,#4
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- vext.8 q9,q1,q2,#4
- add r8,r8,r12
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- vshr.u32 q10,q8,#7
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vadd.i32 q3,q3,q9
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- vshr.u32 q9,q8,#3
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vsli.32 q10,q8,#25
- ldr r2,[sp,#52]
- and r3,r3,r12
- vshr.u32 q11,q8,#18
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- veor q9,q9,q10
- add r6,r6,r2
- vsli.32 q11,q8,#14
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- vshr.u32 d24,d5,#17
- add r7,r7,r3
- and r2,r2,r11
- veor q9,q9,q11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- vsli.32 d24,d5,#15
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- vshr.u32 d25,d5,#10
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- vadd.i32 q3,q3,q9
- add r6,r6,r2
- ldr r2,[sp,#56]
- veor d25,d25,d24
- and r12,r12,r3
- add r10,r10,r6
- vshr.u32 d24,d5,#19
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- vsli.32 d24,d5,#13
- add r5,r5,r2
- eor r2,r11,r4
- veor d25,d25,d24
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- vadd.i32 d6,d6,d25
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- vshr.u32 d24,d6,#17
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- vsli.32 d24,d6,#15
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- vshr.u32 d25,d6,#10
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- veor d25,d25,d24
- ldr r2,[sp,#60]
- and r3,r3,r12
- vshr.u32 d24,d6,#19
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- vld1.32 {q8},[r14,:128]!
- add r4,r4,r2
- vsli.32 d24,d6,#13
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- veor d25,d25,d24
- add r5,r5,r3
- and r2,r2,r9
- vadd.i32 d7,d7,d25
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- vadd.i32 q8,q8,q3
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[r14]
- and r12,r12,r3
- add r8,r8,r4
- vst1.32 {q8},[r1,:128]!
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- teq r2,#0 @ check for K256 terminator
- ldr r2,[sp,#0]
- sub r1,r1,#64
- bne .L_00_48
-
- ldr r1,[sp,#68]
- ldr r0,[sp,#72]
- sub r14,r14,#256 @ rewind r14
- teq r1,r0
- it eq
- subeq r1,r1,#64 @ avoid SEGV
- vld1.8 {q0},[r1]! @ load next input block
- vld1.8 {q1},[r1]!
- vld1.8 {q2},[r1]!
- vld1.8 {q3},[r1]!
- it ne
- strne r1,[sp,#68]
- mov r1,sp
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- add r4,r4,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vrev32.8 q0,q0
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vadd.i32 q8,q8,q0
- ldr r2,[sp,#4]
- and r3,r3,r12
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- add r10,r10,r2
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- add r11,r11,r3
- and r2,r2,r7
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- add r10,r10,r2
- ldr r2,[sp,#8]
- and r12,r12,r3
- add r6,r6,r10
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- add r9,r9,r2
- eor r2,r7,r8
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- ldr r2,[sp,#12]
- and r3,r3,r12
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- add r8,r8,r2
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- add r9,r9,r3
- and r2,r2,r5
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#16]
- and r12,r12,r3
- add r4,r4,r8
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vst1.32 {q8},[r1,:128]!
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- add r8,r8,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vrev32.8 q1,q1
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vadd.i32 q8,q8,q1
- ldr r2,[sp,#20]
- and r3,r3,r12
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- add r6,r6,r2
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- add r7,r7,r3
- and r2,r2,r11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- add r6,r6,r2
- ldr r2,[sp,#24]
- and r12,r12,r3
- add r10,r10,r6
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- add r5,r5,r2
- eor r2,r11,r4
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- ldr r2,[sp,#28]
- and r3,r3,r12
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- add r4,r4,r2
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- add r5,r5,r3
- and r2,r2,r9
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#32]
- and r12,r12,r3
- add r8,r8,r4
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vst1.32 {q8},[r1,:128]!
- add r11,r11,r2
- eor r2,r9,r10
- eor r0,r8,r8,ror#5
- add r4,r4,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r8
- eor r12,r0,r8,ror#19
- eor r0,r4,r4,ror#11
- eor r2,r2,r10
- vrev32.8 q2,q2
- add r11,r11,r12,ror#6
- eor r12,r4,r5
- eor r0,r0,r4,ror#20
- add r11,r11,r2
- vadd.i32 q8,q8,q2
- ldr r2,[sp,#36]
- and r3,r3,r12
- add r7,r7,r11
- add r11,r11,r0,ror#2
- eor r3,r3,r5
- add r10,r10,r2
- eor r2,r8,r9
- eor r0,r7,r7,ror#5
- add r11,r11,r3
- and r2,r2,r7
- eor r3,r0,r7,ror#19
- eor r0,r11,r11,ror#11
- eor r2,r2,r9
- add r10,r10,r3,ror#6
- eor r3,r11,r4
- eor r0,r0,r11,ror#20
- add r10,r10,r2
- ldr r2,[sp,#40]
- and r12,r12,r3
- add r6,r6,r10
- add r10,r10,r0,ror#2
- eor r12,r12,r4
- add r9,r9,r2
- eor r2,r7,r8
- eor r0,r6,r6,ror#5
- add r10,r10,r12
- and r2,r2,r6
- eor r12,r0,r6,ror#19
- eor r0,r10,r10,ror#11
- eor r2,r2,r8
- add r9,r9,r12,ror#6
- eor r12,r10,r11
- eor r0,r0,r10,ror#20
- add r9,r9,r2
- ldr r2,[sp,#44]
- and r3,r3,r12
- add r5,r5,r9
- add r9,r9,r0,ror#2
- eor r3,r3,r11
- add r8,r8,r2
- eor r2,r6,r7
- eor r0,r5,r5,ror#5
- add r9,r9,r3
- and r2,r2,r5
- eor r3,r0,r5,ror#19
- eor r0,r9,r9,ror#11
- eor r2,r2,r7
- add r8,r8,r3,ror#6
- eor r3,r9,r10
- eor r0,r0,r9,ror#20
- add r8,r8,r2
- ldr r2,[sp,#48]
- and r12,r12,r3
- add r4,r4,r8
- add r8,r8,r0,ror#2
- eor r12,r12,r10
- vst1.32 {q8},[r1,:128]!
- add r7,r7,r2
- eor r2,r5,r6
- eor r0,r4,r4,ror#5
- add r8,r8,r12
- vld1.32 {q8},[r14,:128]!
- and r2,r2,r4
- eor r12,r0,r4,ror#19
- eor r0,r8,r8,ror#11
- eor r2,r2,r6
- vrev32.8 q3,q3
- add r7,r7,r12,ror#6
- eor r12,r8,r9
- eor r0,r0,r8,ror#20
- add r7,r7,r2
- vadd.i32 q8,q8,q3
- ldr r2,[sp,#52]
- and r3,r3,r12
- add r11,r11,r7
- add r7,r7,r0,ror#2
- eor r3,r3,r9
- add r6,r6,r2
- eor r2,r4,r5
- eor r0,r11,r11,ror#5
- add r7,r7,r3
- and r2,r2,r11
- eor r3,r0,r11,ror#19
- eor r0,r7,r7,ror#11
- eor r2,r2,r5
- add r6,r6,r3,ror#6
- eor r3,r7,r8
- eor r0,r0,r7,ror#20
- add r6,r6,r2
- ldr r2,[sp,#56]
- and r12,r12,r3
- add r10,r10,r6
- add r6,r6,r0,ror#2
- eor r12,r12,r8
- add r5,r5,r2
- eor r2,r11,r4
- eor r0,r10,r10,ror#5
- add r6,r6,r12
- and r2,r2,r10
- eor r12,r0,r10,ror#19
- eor r0,r6,r6,ror#11
- eor r2,r2,r4
- add r5,r5,r12,ror#6
- eor r12,r6,r7
- eor r0,r0,r6,ror#20
- add r5,r5,r2
- ldr r2,[sp,#60]
- and r3,r3,r12
- add r9,r9,r5
- add r5,r5,r0,ror#2
- eor r3,r3,r7
- add r4,r4,r2
- eor r2,r10,r11
- eor r0,r9,r9,ror#5
- add r5,r5,r3
- and r2,r2,r9
- eor r3,r0,r9,ror#19
- eor r0,r5,r5,ror#11
- eor r2,r2,r11
- add r4,r4,r3,ror#6
- eor r3,r5,r6
- eor r0,r0,r5,ror#20
- add r4,r4,r2
- ldr r2,[sp,#64]
- and r12,r12,r3
- add r8,r8,r4
- add r4,r4,r0,ror#2
- eor r12,r12,r6
- vst1.32 {q8},[r1,:128]!
- ldr r0,[r2,#0]
- add r4,r4,r12 @ h+=Maj(a,b,c) from the past
- ldr r12,[r2,#4]
- ldr r3,[r2,#8]
- ldr r1,[r2,#12]
- add r4,r4,r0 @ accumulate
- ldr r0,[r2,#16]
- add r5,r5,r12
- ldr r12,[r2,#20]
- add r6,r6,r3
- ldr r3,[r2,#24]
- add r7,r7,r1
- ldr r1,[r2,#28]
- add r8,r8,r0
- str r4,[r2],#4
- add r9,r9,r12
- str r5,[r2],#4
- add r10,r10,r3
- str r6,[r2],#4
- add r11,r11,r1
- str r7,[r2],#4
- stmia r2,{r8-r11}
-
- ittte ne
- movne r1,sp
- ldrne r2,[sp,#0]
- eorne r12,r12,r12
- ldreq sp,[sp,#76] @ restore original sp
- itt ne
- eorne r3,r5,r6
- bne .L_00_48
-
- ldmia sp!,{r4-r12,pc}
-.size sha256_block_data_order_neon,.-sha256_block_data_order_neon
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-
-# ifdef __thumb2__
-# define INST(a,b,c,d) .byte c,d|0xc,a,b
-# else
-# define INST(a,b,c,d) .byte a,b,c,d
-# endif
-
-.type sha256_block_data_order_armv8,%function
-.align 5
-sha256_block_data_order_armv8:
-.LARMv8:
- vld1.32 {q0,q1},[r0]
-# ifdef __thumb2__
- adr r3,.LARMv8
- sub r3,r3,#.LARMv8-K256
-# else
- adrl r3,K256
-# endif
- add r2,r1,r2,lsl#6 @ len to point at the end of inp
-
-.Loop_v8:
- vld1.8 {q8-q9},[r1]!
- vld1.8 {q10-q11},[r1]!
- vld1.32 {q12},[r3]!
- vrev32.8 q8,q8
- vrev32.8 q9,q9
- vrev32.8 q10,q10
- vrev32.8 q11,q11
- vmov q14,q0 @ offload
- vmov q15,q1
- teq r1,r2
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q10
- INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
- INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q11
- INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
- INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10
- vld1.32 {q13},[r3]!
- vadd.i32 q12,q12,q8
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
-
- vld1.32 {q12},[r3]!
- vadd.i32 q13,q13,q9
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
-
- vld1.32 {q13},[r3]
- vadd.i32 q12,q12,q10
- sub r3,r3,#256-16 @ rewind
- vmov q2,q0
- INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12
- INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12
-
- vadd.i32 q13,q13,q11
- vmov q2,q0
- INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13
- INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13
-
- vadd.i32 q0,q0,q14
- vadd.i32 q1,q1,q15
- it ne
- bne .Loop_v8
-
- vst1.32 {q0,q1},[r0]
-
- bx lr @ bx lr
-.size sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
-#endif
-.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm/crypto/sha512-core.S_shipped b/arch/arm/crypto/sha512-core.S_shipped
deleted file mode 100644
index 03014624f2ab..000000000000
--- a/arch/arm/crypto/sha512-core.S_shipped
+++ /dev/null
@@ -1,1869 +0,0 @@
-@ SPDX-License-Identifier: GPL-2.0
-
-@ This code is taken from the OpenSSL project but the author (Andy Polyakov)
-@ has relicensed it under the GPLv2. Therefore this program is free software;
-@ you can redistribute it and/or modify it under the terms of the GNU General
-@ Public License version 2 as published by the Free Software Foundation.
-@
-@ The original headers, including the original license headers, are
-@ included below for completeness.
-
-@ ====================================================================
-@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-@ project. The module is, however, dual licensed under OpenSSL and
-@ CRYPTOGAMS licenses depending on where you obtain it. For further
-@ details see https://www.openssl.org/~appro/cryptogams/.
-@ ====================================================================
-
-@ SHA512 block procedure for ARMv4. September 2007.
-
-@ This code is ~4.5 (four and a half) times faster than code generated
-@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
-@ Xscale PXA250 core].
-@
-@ July 2010.
-@
-@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
-@ Cortex A8 core and ~40 cycles per processed byte.
-
-@ February 2011.
-@
-@ Profiler-assisted and platform-specific optimization resulted in 7%
-@ improvement on Coxtex A8 core and ~38 cycles per byte.
-
-@ March 2011.
-@
-@ Add NEON implementation. On Cortex A8 it was measured to process
-@ one byte in 23.3 cycles or ~60% faster than integer-only code.
-
-@ August 2012.
-@
-@ Improve NEON performance by 12% on Snapdragon S4. In absolute
-@ terms it's 22.6 cycles per byte, which is disappointing result.
-@ Technical writers asserted that 3-way S4 pipeline can sustain
-@ multiple NEON instructions per cycle, but dual NEON issue could
-@ not be observed, see https://www.openssl.org/~appro/Snapdragon-S4.html
-@ for further details. On side note Cortex-A15 processes one byte in
-@ 16 cycles.
-
-@ Byte order [in]dependence. =========================================
-@
-@ Originally caller was expected to maintain specific *dword* order in
-@ h[0-7], namely with most significant dword at *lower* address, which
-@ was reflected in below two parameters as 0 and 4. Now caller is
-@ expected to maintain native byte order for whole 64-bit values.
-#ifndef __KERNEL__
-# include "arm_arch.h"
-# define VFP_ABI_PUSH vstmdb sp!,{d8-d15}
-# define VFP_ABI_POP vldmia sp!,{d8-d15}
-#else
-# define __ARM_ARCH__ __LINUX_ARM_ARCH__
-# define __ARM_MAX_ARCH__ 7
-# define VFP_ABI_PUSH
-# define VFP_ABI_POP
-#endif
-
-#ifdef __ARMEL__
-# define LO 0
-# define HI 4
-# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1
-#else
-# define HI 0
-# define LO 4
-# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1
-#endif
-
-.text
-#if __ARM_ARCH__<7
-.code 32
-#else
-.syntax unified
-# ifdef __thumb2__
-.thumb
-# else
-.code 32
-# endif
-#endif
-
-.type K512,%object
-.align 5
-K512:
-WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd)
-WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc)
-WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019)
-WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118)
-WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe)
-WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2)
-WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1)
-WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694)
-WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3)
-WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65)
-WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483)
-WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5)
-WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210)
-WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4)
-WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725)
-WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70)
-WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926)
-WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df)
-WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8)
-WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b)
-WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001)
-WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30)
-WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910)
-WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8)
-WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53)
-WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8)
-WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb)
-WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3)
-WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60)
-WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec)
-WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9)
-WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b)
-WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207)
-WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178)
-WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6)
-WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b)
-WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493)
-WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
-WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
-WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
-.size K512,.-K512
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.LOPENSSL_armcap:
-.word OPENSSL_armcap_P-sha512_block_data_order
-.skip 32-4
-#else
-.skip 32
-#endif
-
-.global sha512_block_data_order
-.type sha512_block_data_order,%function
-sha512_block_data_order:
-.Lsha512_block_data_order:
-#if __ARM_ARCH__<7
- sub r3,pc,#8 @ sha512_block_data_order
-#else
- adr r3,.Lsha512_block_data_order
-#endif
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
- ldr r12,.LOPENSSL_armcap
- ldr r12,[r3,r12] @ OPENSSL_armcap_P
- tst r12,#1
- bne .LNEON
-#endif
- add r2,r1,r2,lsl#7 @ len to point at the end of inp
- stmdb sp!,{r4-r12,lr}
- sub r14,r3,#672 @ K512
- sub sp,sp,#9*8
-
- ldr r7,[r0,#32+LO]
- ldr r8,[r0,#32+HI]
- ldr r9, [r0,#48+LO]
- ldr r10, [r0,#48+HI]
- ldr r11, [r0,#56+LO]
- ldr r12, [r0,#56+HI]
-.Loop:
- str r9, [sp,#48+0]
- str r10, [sp,#48+4]
- str r11, [sp,#56+0]
- str r12, [sp,#56+4]
- ldr r5,[r0,#0+LO]
- ldr r6,[r0,#0+HI]
- ldr r3,[r0,#8+LO]
- ldr r4,[r0,#8+HI]
- ldr r9, [r0,#16+LO]
- ldr r10, [r0,#16+HI]
- ldr r11, [r0,#24+LO]
- ldr r12, [r0,#24+HI]
- str r3,[sp,#8+0]
- str r4,[sp,#8+4]
- str r9, [sp,#16+0]
- str r10, [sp,#16+4]
- str r11, [sp,#24+0]
- str r12, [sp,#24+4]
- ldr r3,[r0,#40+LO]
- ldr r4,[r0,#40+HI]
- str r3,[sp,#40+0]
- str r4,[sp,#40+4]
-
-.L00_15:
-#if __ARM_ARCH__<7
- ldrb r3,[r1,#7]
- ldrb r9, [r1,#6]
- ldrb r10, [r1,#5]
- ldrb r11, [r1,#4]
- ldrb r4,[r1,#3]
- ldrb r12, [r1,#2]
- orr r3,r3,r9,lsl#8
- ldrb r9, [r1,#1]
- orr r3,r3,r10,lsl#16
- ldrb r10, [r1],#8
- orr r3,r3,r11,lsl#24
- orr r4,r4,r12,lsl#8
- orr r4,r4,r9,lsl#16
- orr r4,r4,r10,lsl#24
-#else
- ldr r3,[r1,#4]
- ldr r4,[r1],#8
-#ifdef __ARMEL__
- rev r3,r3
- rev r4,r4
-#endif
-#endif
- @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- mov r9,r7,lsr#14
- str r3,[sp,#64+0]
- mov r10,r8,lsr#14
- str r4,[sp,#64+4]
- eor r9,r9,r8,lsl#18
- ldr r11,[sp,#56+0] @ h.lo
- eor r10,r10,r7,lsl#18
- ldr r12,[sp,#56+4] @ h.hi
- eor r9,r9,r7,lsr#18
- eor r10,r10,r8,lsr#18
- eor r9,r9,r8,lsl#14
- eor r10,r10,r7,lsl#14
- eor r9,r9,r8,lsr#9
- eor r10,r10,r7,lsr#9
- eor r9,r9,r7,lsl#23
- eor r10,r10,r8,lsl#23 @ Sigma1(e)
- adds r3,r3,r9
- ldr r9,[sp,#40+0] @ f.lo
- adc r4,r4,r10 @ T += Sigma1(e)
- ldr r10,[sp,#40+4] @ f.hi
- adds r3,r3,r11
- ldr r11,[sp,#48+0] @ g.lo
- adc r4,r4,r12 @ T += h
- ldr r12,[sp,#48+4] @ g.hi
-
- eor r9,r9,r11
- str r7,[sp,#32+0]
- eor r10,r10,r12
- str r8,[sp,#32+4]
- and r9,r9,r7
- str r5,[sp,#0+0]
- and r10,r10,r8
- str r6,[sp,#0+4]
- eor r9,r9,r11
- ldr r11,[r14,#LO] @ K[i].lo
- eor r10,r10,r12 @ Ch(e,f,g)
- ldr r12,[r14,#HI] @ K[i].hi
-
- adds r3,r3,r9
- ldr r7,[sp,#24+0] @ d.lo
- adc r4,r4,r10 @ T += Ch(e,f,g)
- ldr r8,[sp,#24+4] @ d.hi
- adds r3,r3,r11
- and r9,r11,#0xff
- adc r4,r4,r12 @ T += K[i]
- adds r7,r7,r3
- ldr r11,[sp,#8+0] @ b.lo
- adc r8,r8,r4 @ d += T
- teq r9,#148
-
- ldr r12,[sp,#16+0] @ c.lo
-#if __ARM_ARCH__>=7
- it eq @ Thumb2 thing, sanity check in ARM
-#endif
- orreq r14,r14,#1
- @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- mov r9,r5,lsr#28
- mov r10,r6,lsr#28
- eor r9,r9,r6,lsl#4
- eor r10,r10,r5,lsl#4
- eor r9,r9,r6,lsr#2
- eor r10,r10,r5,lsr#2
- eor r9,r9,r5,lsl#30
- eor r10,r10,r6,lsl#30
- eor r9,r9,r6,lsr#7
- eor r10,r10,r5,lsr#7
- eor r9,r9,r5,lsl#25
- eor r10,r10,r6,lsl#25 @ Sigma0(a)
- adds r3,r3,r9
- and r9,r5,r11
- adc r4,r4,r10 @ T += Sigma0(a)
-
- ldr r10,[sp,#8+4] @ b.hi
- orr r5,r5,r11
- ldr r11,[sp,#16+4] @ c.hi
- and r5,r5,r12
- and r12,r6,r10
- orr r6,r6,r10
- orr r5,r5,r9 @ Maj(a,b,c).lo
- and r6,r6,r11
- adds r5,r5,r3
- orr r6,r6,r12 @ Maj(a,b,c).hi
- sub sp,sp,#8
- adc r6,r6,r4 @ h += T
- tst r14,#1
- add r14,r14,#8
- tst r14,#1
- beq .L00_15
- ldr r9,[sp,#184+0]
- ldr r10,[sp,#184+4]
- bic r14,r14,#1
-.L16_79:
- @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
- @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
- @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7
- mov r3,r9,lsr#1
- ldr r11,[sp,#80+0]
- mov r4,r10,lsr#1
- ldr r12,[sp,#80+4]
- eor r3,r3,r10,lsl#31
- eor r4,r4,r9,lsl#31
- eor r3,r3,r9,lsr#8
- eor r4,r4,r10,lsr#8
- eor r3,r3,r10,lsl#24
- eor r4,r4,r9,lsl#24
- eor r3,r3,r9,lsr#7
- eor r4,r4,r10,lsr#7
- eor r3,r3,r10,lsl#25
-
- @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
- @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
- @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
- mov r9,r11,lsr#19
- mov r10,r12,lsr#19
- eor r9,r9,r12,lsl#13
- eor r10,r10,r11,lsl#13
- eor r9,r9,r12,lsr#29
- eor r10,r10,r11,lsr#29
- eor r9,r9,r11,lsl#3
- eor r10,r10,r12,lsl#3
- eor r9,r9,r11,lsr#6
- eor r10,r10,r12,lsr#6
- ldr r11,[sp,#120+0]
- eor r9,r9,r12,lsl#26
-
- ldr r12,[sp,#120+4]
- adds r3,r3,r9
- ldr r9,[sp,#192+0]
- adc r4,r4,r10
-
- ldr r10,[sp,#192+4]
- adds r3,r3,r11
- adc r4,r4,r12
- adds r3,r3,r9
- adc r4,r4,r10
- @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41))
- @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
- @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
- mov r9,r7,lsr#14
- str r3,[sp,#64+0]
- mov r10,r8,lsr#14
- str r4,[sp,#64+4]
- eor r9,r9,r8,lsl#18
- ldr r11,[sp,#56+0] @ h.lo
- eor r10,r10,r7,lsl#18
- ldr r12,[sp,#56+4] @ h.hi
- eor r9,r9,r7,lsr#18
- eor r10,r10,r8,lsr#18
- eor r9,r9,r8,lsl#14
- eor r10,r10,r7,lsl#14
- eor r9,r9,r8,lsr#9
- eor r10,r10,r7,lsr#9
- eor r9,r9,r7,lsl#23
- eor r10,r10,r8,lsl#23 @ Sigma1(e)
- adds r3,r3,r9
- ldr r9,[sp,#40+0] @ f.lo
- adc r4,r4,r10 @ T += Sigma1(e)
- ldr r10,[sp,#40+4] @ f.hi
- adds r3,r3,r11
- ldr r11,[sp,#48+0] @ g.lo
- adc r4,r4,r12 @ T += h
- ldr r12,[sp,#48+4] @ g.hi
-
- eor r9,r9,r11
- str r7,[sp,#32+0]
- eor r10,r10,r12
- str r8,[sp,#32+4]
- and r9,r9,r7
- str r5,[sp,#0+0]
- and r10,r10,r8
- str r6,[sp,#0+4]
- eor r9,r9,r11
- ldr r11,[r14,#LO] @ K[i].lo
- eor r10,r10,r12 @ Ch(e,f,g)
- ldr r12,[r14,#HI] @ K[i].hi
-
- adds r3,r3,r9
- ldr r7,[sp,#24+0] @ d.lo
- adc r4,r4,r10 @ T += Ch(e,f,g)
- ldr r8,[sp,#24+4] @ d.hi
- adds r3,r3,r11
- and r9,r11,#0xff
- adc r4,r4,r12 @ T += K[i]
- adds r7,r7,r3
- ldr r11,[sp,#8+0] @ b.lo
- adc r8,r8,r4 @ d += T
- teq r9,#23
-
- ldr r12,[sp,#16+0] @ c.lo
-#if __ARM_ARCH__>=7
- it eq @ Thumb2 thing, sanity check in ARM
-#endif
- orreq r14,r14,#1
- @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
- @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
- @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
- mov r9,r5,lsr#28
- mov r10,r6,lsr#28
- eor r9,r9,r6,lsl#4
- eor r10,r10,r5,lsl#4
- eor r9,r9,r6,lsr#2
- eor r10,r10,r5,lsr#2
- eor r9,r9,r5,lsl#30
- eor r10,r10,r6,lsl#30
- eor r9,r9,r6,lsr#7
- eor r10,r10,r5,lsr#7
- eor r9,r9,r5,lsl#25
- eor r10,r10,r6,lsl#25 @ Sigma0(a)
- adds r3,r3,r9
- and r9,r5,r11
- adc r4,r4,r10 @ T += Sigma0(a)
-
- ldr r10,[sp,#8+4] @ b.hi
- orr r5,r5,r11
- ldr r11,[sp,#16+4] @ c.hi
- and r5,r5,r12
- and r12,r6,r10
- orr r6,r6,r10
- orr r5,r5,r9 @ Maj(a,b,c).lo
- and r6,r6,r11
- adds r5,r5,r3
- orr r6,r6,r12 @ Maj(a,b,c).hi
- sub sp,sp,#8
- adc r6,r6,r4 @ h += T
- tst r14,#1
- add r14,r14,#8
-#if __ARM_ARCH__>=7
- ittt eq @ Thumb2 thing, sanity check in ARM
-#endif
- ldreq r9,[sp,#184+0]
- ldreq r10,[sp,#184+4]
- beq .L16_79
- bic r14,r14,#1
-
- ldr r3,[sp,#8+0]
- ldr r4,[sp,#8+4]
- ldr r9, [r0,#0+LO]
- ldr r10, [r0,#0+HI]
- ldr r11, [r0,#8+LO]
- ldr r12, [r0,#8+HI]
- adds r9,r5,r9
- str r9, [r0,#0+LO]
- adc r10,r6,r10
- str r10, [r0,#0+HI]
- adds r11,r3,r11
- str r11, [r0,#8+LO]
- adc r12,r4,r12
- str r12, [r0,#8+HI]
-
- ldr r5,[sp,#16+0]
- ldr r6,[sp,#16+4]
- ldr r3,[sp,#24+0]
- ldr r4,[sp,#24+4]
- ldr r9, [r0,#16+LO]
- ldr r10, [r0,#16+HI]
- ldr r11, [r0,#24+LO]
- ldr r12, [r0,#24+HI]
- adds r9,r5,r9
- str r9, [r0,#16+LO]
- adc r10,r6,r10
- str r10, [r0,#16+HI]
- adds r11,r3,r11
- str r11, [r0,#24+LO]
- adc r12,r4,r12
- str r12, [r0,#24+HI]
-
- ldr r3,[sp,#40+0]
- ldr r4,[sp,#40+4]
- ldr r9, [r0,#32+LO]
- ldr r10, [r0,#32+HI]
- ldr r11, [r0,#40+LO]
- ldr r12, [r0,#40+HI]
- adds r7,r7,r9
- str r7,[r0,#32+LO]
- adc r8,r8,r10
- str r8,[r0,#32+HI]
- adds r11,r3,r11
- str r11, [r0,#40+LO]
- adc r12,r4,r12
- str r12, [r0,#40+HI]
-
- ldr r5,[sp,#48+0]
- ldr r6,[sp,#48+4]
- ldr r3,[sp,#56+0]
- ldr r4,[sp,#56+4]
- ldr r9, [r0,#48+LO]
- ldr r10, [r0,#48+HI]
- ldr r11, [r0,#56+LO]
- ldr r12, [r0,#56+HI]
- adds r9,r5,r9
- str r9, [r0,#48+LO]
- adc r10,r6,r10
- str r10, [r0,#48+HI]
- adds r11,r3,r11
- str r11, [r0,#56+LO]
- adc r12,r4,r12
- str r12, [r0,#56+HI]
-
- add sp,sp,#640
- sub r14,r14,#640
-
- teq r1,r2
- bne .Loop
-
- add sp,sp,#8*9 @ destroy frame
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r12,pc}
-#else
- ldmia sp!,{r4-r12,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- .word 0xe12fff1e @ interoperable with Thumb ISA:-)
-#endif
-.size sha512_block_data_order,.-sha512_block_data_order
-#if __ARM_MAX_ARCH__>=7
-.arch armv7-a
-.fpu neon
-
-.global sha512_block_data_order_neon
-.type sha512_block_data_order_neon,%function
-.align 4
-sha512_block_data_order_neon:
-.LNEON:
- dmb @ errata #451034 on early Cortex A8
- add r2,r1,r2,lsl#7 @ len to point at the end of inp
- VFP_ABI_PUSH
- adr r3,.Lsha512_block_data_order
- sub r3,r3,.Lsha512_block_data_order-K512
- vldmia r0,{d16-d23} @ load context
-.Loop_neon:
- vshr.u64 d24,d20,#14 @ 0
-#if 0<16
- vld1.64 {d0},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d20,#18
-#if 0>0
- vadd.i64 d16,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d20,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vmov d29,d20
- vsli.64 d26,d20,#23
-#if 0<16 && defined(__ARMEL__)
- vrev64.8 d0,d0
-#endif
- veor d25,d24
- vbsl d29,d21,d22 @ Ch(e,f,g)
- vshr.u64 d24,d16,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d23
- vshr.u64 d25,d16,#34
- vsli.64 d24,d16,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d16,#39
- vadd.i64 d28,d0
- vsli.64 d25,d16,#30
- veor d30,d16,d17
- vsli.64 d26,d16,#25
- veor d23,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d18,d17 @ Maj(a,b,c)
- veor d23,d26 @ Sigma0(a)
- vadd.i64 d19,d27
- vadd.i64 d30,d27
- @ vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 1
-#if 1<16
- vld1.64 {d1},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
-#if 1>0
- vadd.i64 d23,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vmov d29,d19
- vsli.64 d26,d19,#23
-#if 1<16 && defined(__ARMEL__)
- vrev64.8 d1,d1
-#endif
- veor d25,d24
- vbsl d29,d20,d21 @ Ch(e,f,g)
- vshr.u64 d24,d23,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d22
- vshr.u64 d25,d23,#34
- vsli.64 d24,d23,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d23,#39
- vadd.i64 d28,d1
- vsli.64 d25,d23,#30
- veor d30,d23,d16
- vsli.64 d26,d23,#25
- veor d22,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d17,d16 @ Maj(a,b,c)
- veor d22,d26 @ Sigma0(a)
- vadd.i64 d18,d27
- vadd.i64 d30,d27
- @ vadd.i64 d22,d30
- vshr.u64 d24,d18,#14 @ 2
-#if 2<16
- vld1.64 {d2},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d18,#18
-#if 2>0
- vadd.i64 d22,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d18,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vmov d29,d18
- vsli.64 d26,d18,#23
-#if 2<16 && defined(__ARMEL__)
- vrev64.8 d2,d2
-#endif
- veor d25,d24
- vbsl d29,d19,d20 @ Ch(e,f,g)
- vshr.u64 d24,d22,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d21
- vshr.u64 d25,d22,#34
- vsli.64 d24,d22,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d22,#39
- vadd.i64 d28,d2
- vsli.64 d25,d22,#30
- veor d30,d22,d23
- vsli.64 d26,d22,#25
- veor d21,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d16,d23 @ Maj(a,b,c)
- veor d21,d26 @ Sigma0(a)
- vadd.i64 d17,d27
- vadd.i64 d30,d27
- @ vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 3
-#if 3<16
- vld1.64 {d3},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
-#if 3>0
- vadd.i64 d21,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vmov d29,d17
- vsli.64 d26,d17,#23
-#if 3<16 && defined(__ARMEL__)
- vrev64.8 d3,d3
-#endif
- veor d25,d24
- vbsl d29,d18,d19 @ Ch(e,f,g)
- vshr.u64 d24,d21,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d20
- vshr.u64 d25,d21,#34
- vsli.64 d24,d21,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d21,#39
- vadd.i64 d28,d3
- vsli.64 d25,d21,#30
- veor d30,d21,d22
- vsli.64 d26,d21,#25
- veor d20,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d23,d22 @ Maj(a,b,c)
- veor d20,d26 @ Sigma0(a)
- vadd.i64 d16,d27
- vadd.i64 d30,d27
- @ vadd.i64 d20,d30
- vshr.u64 d24,d16,#14 @ 4
-#if 4<16
- vld1.64 {d4},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d16,#18
-#if 4>0
- vadd.i64 d20,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d16,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vmov d29,d16
- vsli.64 d26,d16,#23
-#if 4<16 && defined(__ARMEL__)
- vrev64.8 d4,d4
-#endif
- veor d25,d24
- vbsl d29,d17,d18 @ Ch(e,f,g)
- vshr.u64 d24,d20,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d19
- vshr.u64 d25,d20,#34
- vsli.64 d24,d20,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d20,#39
- vadd.i64 d28,d4
- vsli.64 d25,d20,#30
- veor d30,d20,d21
- vsli.64 d26,d20,#25
- veor d19,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d22,d21 @ Maj(a,b,c)
- veor d19,d26 @ Sigma0(a)
- vadd.i64 d23,d27
- vadd.i64 d30,d27
- @ vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 5
-#if 5<16
- vld1.64 {d5},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
-#if 5>0
- vadd.i64 d19,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vmov d29,d23
- vsli.64 d26,d23,#23
-#if 5<16 && defined(__ARMEL__)
- vrev64.8 d5,d5
-#endif
- veor d25,d24
- vbsl d29,d16,d17 @ Ch(e,f,g)
- vshr.u64 d24,d19,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d18
- vshr.u64 d25,d19,#34
- vsli.64 d24,d19,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d19,#39
- vadd.i64 d28,d5
- vsli.64 d25,d19,#30
- veor d30,d19,d20
- vsli.64 d26,d19,#25
- veor d18,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d21,d20 @ Maj(a,b,c)
- veor d18,d26 @ Sigma0(a)
- vadd.i64 d22,d27
- vadd.i64 d30,d27
- @ vadd.i64 d18,d30
- vshr.u64 d24,d22,#14 @ 6
-#if 6<16
- vld1.64 {d6},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d22,#18
-#if 6>0
- vadd.i64 d18,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d22,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vmov d29,d22
- vsli.64 d26,d22,#23
-#if 6<16 && defined(__ARMEL__)
- vrev64.8 d6,d6
-#endif
- veor d25,d24
- vbsl d29,d23,d16 @ Ch(e,f,g)
- vshr.u64 d24,d18,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d17
- vshr.u64 d25,d18,#34
- vsli.64 d24,d18,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d18,#39
- vadd.i64 d28,d6
- vsli.64 d25,d18,#30
- veor d30,d18,d19
- vsli.64 d26,d18,#25
- veor d17,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d20,d19 @ Maj(a,b,c)
- veor d17,d26 @ Sigma0(a)
- vadd.i64 d21,d27
- vadd.i64 d30,d27
- @ vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 7
-#if 7<16
- vld1.64 {d7},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
-#if 7>0
- vadd.i64 d17,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vmov d29,d21
- vsli.64 d26,d21,#23
-#if 7<16 && defined(__ARMEL__)
- vrev64.8 d7,d7
-#endif
- veor d25,d24
- vbsl d29,d22,d23 @ Ch(e,f,g)
- vshr.u64 d24,d17,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d16
- vshr.u64 d25,d17,#34
- vsli.64 d24,d17,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d17,#39
- vadd.i64 d28,d7
- vsli.64 d25,d17,#30
- veor d30,d17,d18
- vsli.64 d26,d17,#25
- veor d16,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d19,d18 @ Maj(a,b,c)
- veor d16,d26 @ Sigma0(a)
- vadd.i64 d20,d27
- vadd.i64 d30,d27
- @ vadd.i64 d16,d30
- vshr.u64 d24,d20,#14 @ 8
-#if 8<16
- vld1.64 {d8},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d20,#18
-#if 8>0
- vadd.i64 d16,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d20,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vmov d29,d20
- vsli.64 d26,d20,#23
-#if 8<16 && defined(__ARMEL__)
- vrev64.8 d8,d8
-#endif
- veor d25,d24
- vbsl d29,d21,d22 @ Ch(e,f,g)
- vshr.u64 d24,d16,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d23
- vshr.u64 d25,d16,#34
- vsli.64 d24,d16,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d16,#39
- vadd.i64 d28,d8
- vsli.64 d25,d16,#30
- veor d30,d16,d17
- vsli.64 d26,d16,#25
- veor d23,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d18,d17 @ Maj(a,b,c)
- veor d23,d26 @ Sigma0(a)
- vadd.i64 d19,d27
- vadd.i64 d30,d27
- @ vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 9
-#if 9<16
- vld1.64 {d9},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
-#if 9>0
- vadd.i64 d23,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vmov d29,d19
- vsli.64 d26,d19,#23
-#if 9<16 && defined(__ARMEL__)
- vrev64.8 d9,d9
-#endif
- veor d25,d24
- vbsl d29,d20,d21 @ Ch(e,f,g)
- vshr.u64 d24,d23,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d22
- vshr.u64 d25,d23,#34
- vsli.64 d24,d23,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d23,#39
- vadd.i64 d28,d9
- vsli.64 d25,d23,#30
- veor d30,d23,d16
- vsli.64 d26,d23,#25
- veor d22,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d17,d16 @ Maj(a,b,c)
- veor d22,d26 @ Sigma0(a)
- vadd.i64 d18,d27
- vadd.i64 d30,d27
- @ vadd.i64 d22,d30
- vshr.u64 d24,d18,#14 @ 10
-#if 10<16
- vld1.64 {d10},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d18,#18
-#if 10>0
- vadd.i64 d22,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d18,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vmov d29,d18
- vsli.64 d26,d18,#23
-#if 10<16 && defined(__ARMEL__)
- vrev64.8 d10,d10
-#endif
- veor d25,d24
- vbsl d29,d19,d20 @ Ch(e,f,g)
- vshr.u64 d24,d22,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d21
- vshr.u64 d25,d22,#34
- vsli.64 d24,d22,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d22,#39
- vadd.i64 d28,d10
- vsli.64 d25,d22,#30
- veor d30,d22,d23
- vsli.64 d26,d22,#25
- veor d21,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d16,d23 @ Maj(a,b,c)
- veor d21,d26 @ Sigma0(a)
- vadd.i64 d17,d27
- vadd.i64 d30,d27
- @ vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 11
-#if 11<16
- vld1.64 {d11},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
-#if 11>0
- vadd.i64 d21,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vmov d29,d17
- vsli.64 d26,d17,#23
-#if 11<16 && defined(__ARMEL__)
- vrev64.8 d11,d11
-#endif
- veor d25,d24
- vbsl d29,d18,d19 @ Ch(e,f,g)
- vshr.u64 d24,d21,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d20
- vshr.u64 d25,d21,#34
- vsli.64 d24,d21,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d21,#39
- vadd.i64 d28,d11
- vsli.64 d25,d21,#30
- veor d30,d21,d22
- vsli.64 d26,d21,#25
- veor d20,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d23,d22 @ Maj(a,b,c)
- veor d20,d26 @ Sigma0(a)
- vadd.i64 d16,d27
- vadd.i64 d30,d27
- @ vadd.i64 d20,d30
- vshr.u64 d24,d16,#14 @ 12
-#if 12<16
- vld1.64 {d12},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d16,#18
-#if 12>0
- vadd.i64 d20,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d16,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vmov d29,d16
- vsli.64 d26,d16,#23
-#if 12<16 && defined(__ARMEL__)
- vrev64.8 d12,d12
-#endif
- veor d25,d24
- vbsl d29,d17,d18 @ Ch(e,f,g)
- vshr.u64 d24,d20,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d19
- vshr.u64 d25,d20,#34
- vsli.64 d24,d20,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d20,#39
- vadd.i64 d28,d12
- vsli.64 d25,d20,#30
- veor d30,d20,d21
- vsli.64 d26,d20,#25
- veor d19,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d22,d21 @ Maj(a,b,c)
- veor d19,d26 @ Sigma0(a)
- vadd.i64 d23,d27
- vadd.i64 d30,d27
- @ vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 13
-#if 13<16
- vld1.64 {d13},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
-#if 13>0
- vadd.i64 d19,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vmov d29,d23
- vsli.64 d26,d23,#23
-#if 13<16 && defined(__ARMEL__)
- vrev64.8 d13,d13
-#endif
- veor d25,d24
- vbsl d29,d16,d17 @ Ch(e,f,g)
- vshr.u64 d24,d19,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d18
- vshr.u64 d25,d19,#34
- vsli.64 d24,d19,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d19,#39
- vadd.i64 d28,d13
- vsli.64 d25,d19,#30
- veor d30,d19,d20
- vsli.64 d26,d19,#25
- veor d18,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d21,d20 @ Maj(a,b,c)
- veor d18,d26 @ Sigma0(a)
- vadd.i64 d22,d27
- vadd.i64 d30,d27
- @ vadd.i64 d18,d30
- vshr.u64 d24,d22,#14 @ 14
-#if 14<16
- vld1.64 {d14},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d22,#18
-#if 14>0
- vadd.i64 d18,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d22,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vmov d29,d22
- vsli.64 d26,d22,#23
-#if 14<16 && defined(__ARMEL__)
- vrev64.8 d14,d14
-#endif
- veor d25,d24
- vbsl d29,d23,d16 @ Ch(e,f,g)
- vshr.u64 d24,d18,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d17
- vshr.u64 d25,d18,#34
- vsli.64 d24,d18,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d18,#39
- vadd.i64 d28,d14
- vsli.64 d25,d18,#30
- veor d30,d18,d19
- vsli.64 d26,d18,#25
- veor d17,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d20,d19 @ Maj(a,b,c)
- veor d17,d26 @ Sigma0(a)
- vadd.i64 d21,d27
- vadd.i64 d30,d27
- @ vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 15
-#if 15<16
- vld1.64 {d15},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
-#if 15>0
- vadd.i64 d17,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vmov d29,d21
- vsli.64 d26,d21,#23
-#if 15<16 && defined(__ARMEL__)
- vrev64.8 d15,d15
-#endif
- veor d25,d24
- vbsl d29,d22,d23 @ Ch(e,f,g)
- vshr.u64 d24,d17,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d16
- vshr.u64 d25,d17,#34
- vsli.64 d24,d17,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d17,#39
- vadd.i64 d28,d15
- vsli.64 d25,d17,#30
- veor d30,d17,d18
- vsli.64 d26,d17,#25
- veor d16,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d19,d18 @ Maj(a,b,c)
- veor d16,d26 @ Sigma0(a)
- vadd.i64 d20,d27
- vadd.i64 d30,d27
- @ vadd.i64 d16,d30
- mov r12,#4
-.L16_79_neon:
- subs r12,#1
- vshr.u64 q12,q7,#19
- vshr.u64 q13,q7,#61
- vadd.i64 d16,d30 @ h+=Maj from the past
- vshr.u64 q15,q7,#6
- vsli.64 q12,q7,#45
- vext.8 q14,q0,q1,#8 @ X[i+1]
- vsli.64 q13,q7,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q0,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q4,q5,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d20,#14 @ from NEON_00_15
- vadd.i64 q0,q14
- vshr.u64 d25,d20,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d20,#41 @ from NEON_00_15
- vadd.i64 q0,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vmov d29,d20
- vsli.64 d26,d20,#23
-#if 16<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d21,d22 @ Ch(e,f,g)
- vshr.u64 d24,d16,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d23
- vshr.u64 d25,d16,#34
- vsli.64 d24,d16,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d16,#39
- vadd.i64 d28,d0
- vsli.64 d25,d16,#30
- veor d30,d16,d17
- vsli.64 d26,d16,#25
- veor d23,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d18,d17 @ Maj(a,b,c)
- veor d23,d26 @ Sigma0(a)
- vadd.i64 d19,d27
- vadd.i64 d30,d27
- @ vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 17
-#if 17<16
- vld1.64 {d1},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
-#if 17>0
- vadd.i64 d23,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vmov d29,d19
- vsli.64 d26,d19,#23
-#if 17<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d20,d21 @ Ch(e,f,g)
- vshr.u64 d24,d23,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d22
- vshr.u64 d25,d23,#34
- vsli.64 d24,d23,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d23,#39
- vadd.i64 d28,d1
- vsli.64 d25,d23,#30
- veor d30,d23,d16
- vsli.64 d26,d23,#25
- veor d22,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d17,d16 @ Maj(a,b,c)
- veor d22,d26 @ Sigma0(a)
- vadd.i64 d18,d27
- vadd.i64 d30,d27
- @ vadd.i64 d22,d30
- vshr.u64 q12,q0,#19
- vshr.u64 q13,q0,#61
- vadd.i64 d22,d30 @ h+=Maj from the past
- vshr.u64 q15,q0,#6
- vsli.64 q12,q0,#45
- vext.8 q14,q1,q2,#8 @ X[i+1]
- vsli.64 q13,q0,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q1,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q5,q6,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d18,#14 @ from NEON_00_15
- vadd.i64 q1,q14
- vshr.u64 d25,d18,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d18,#41 @ from NEON_00_15
- vadd.i64 q1,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vmov d29,d18
- vsli.64 d26,d18,#23
-#if 18<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d19,d20 @ Ch(e,f,g)
- vshr.u64 d24,d22,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d21
- vshr.u64 d25,d22,#34
- vsli.64 d24,d22,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d22,#39
- vadd.i64 d28,d2
- vsli.64 d25,d22,#30
- veor d30,d22,d23
- vsli.64 d26,d22,#25
- veor d21,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d16,d23 @ Maj(a,b,c)
- veor d21,d26 @ Sigma0(a)
- vadd.i64 d17,d27
- vadd.i64 d30,d27
- @ vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 19
-#if 19<16
- vld1.64 {d3},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
-#if 19>0
- vadd.i64 d21,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vmov d29,d17
- vsli.64 d26,d17,#23
-#if 19<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d18,d19 @ Ch(e,f,g)
- vshr.u64 d24,d21,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d20
- vshr.u64 d25,d21,#34
- vsli.64 d24,d21,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d21,#39
- vadd.i64 d28,d3
- vsli.64 d25,d21,#30
- veor d30,d21,d22
- vsli.64 d26,d21,#25
- veor d20,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d23,d22 @ Maj(a,b,c)
- veor d20,d26 @ Sigma0(a)
- vadd.i64 d16,d27
- vadd.i64 d30,d27
- @ vadd.i64 d20,d30
- vshr.u64 q12,q1,#19
- vshr.u64 q13,q1,#61
- vadd.i64 d20,d30 @ h+=Maj from the past
- vshr.u64 q15,q1,#6
- vsli.64 q12,q1,#45
- vext.8 q14,q2,q3,#8 @ X[i+1]
- vsli.64 q13,q1,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q2,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q6,q7,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d16,#14 @ from NEON_00_15
- vadd.i64 q2,q14
- vshr.u64 d25,d16,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d16,#41 @ from NEON_00_15
- vadd.i64 q2,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vmov d29,d16
- vsli.64 d26,d16,#23
-#if 20<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d17,d18 @ Ch(e,f,g)
- vshr.u64 d24,d20,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d19
- vshr.u64 d25,d20,#34
- vsli.64 d24,d20,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d20,#39
- vadd.i64 d28,d4
- vsli.64 d25,d20,#30
- veor d30,d20,d21
- vsli.64 d26,d20,#25
- veor d19,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d22,d21 @ Maj(a,b,c)
- veor d19,d26 @ Sigma0(a)
- vadd.i64 d23,d27
- vadd.i64 d30,d27
- @ vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 21
-#if 21<16
- vld1.64 {d5},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
-#if 21>0
- vadd.i64 d19,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vmov d29,d23
- vsli.64 d26,d23,#23
-#if 21<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d16,d17 @ Ch(e,f,g)
- vshr.u64 d24,d19,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d18
- vshr.u64 d25,d19,#34
- vsli.64 d24,d19,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d19,#39
- vadd.i64 d28,d5
- vsli.64 d25,d19,#30
- veor d30,d19,d20
- vsli.64 d26,d19,#25
- veor d18,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d21,d20 @ Maj(a,b,c)
- veor d18,d26 @ Sigma0(a)
- vadd.i64 d22,d27
- vadd.i64 d30,d27
- @ vadd.i64 d18,d30
- vshr.u64 q12,q2,#19
- vshr.u64 q13,q2,#61
- vadd.i64 d18,d30 @ h+=Maj from the past
- vshr.u64 q15,q2,#6
- vsli.64 q12,q2,#45
- vext.8 q14,q3,q4,#8 @ X[i+1]
- vsli.64 q13,q2,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q3,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q7,q0,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d22,#14 @ from NEON_00_15
- vadd.i64 q3,q14
- vshr.u64 d25,d22,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d22,#41 @ from NEON_00_15
- vadd.i64 q3,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vmov d29,d22
- vsli.64 d26,d22,#23
-#if 22<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d23,d16 @ Ch(e,f,g)
- vshr.u64 d24,d18,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d17
- vshr.u64 d25,d18,#34
- vsli.64 d24,d18,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d18,#39
- vadd.i64 d28,d6
- vsli.64 d25,d18,#30
- veor d30,d18,d19
- vsli.64 d26,d18,#25
- veor d17,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d20,d19 @ Maj(a,b,c)
- veor d17,d26 @ Sigma0(a)
- vadd.i64 d21,d27
- vadd.i64 d30,d27
- @ vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 23
-#if 23<16
- vld1.64 {d7},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
-#if 23>0
- vadd.i64 d17,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vmov d29,d21
- vsli.64 d26,d21,#23
-#if 23<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d22,d23 @ Ch(e,f,g)
- vshr.u64 d24,d17,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d16
- vshr.u64 d25,d17,#34
- vsli.64 d24,d17,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d17,#39
- vadd.i64 d28,d7
- vsli.64 d25,d17,#30
- veor d30,d17,d18
- vsli.64 d26,d17,#25
- veor d16,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d19,d18 @ Maj(a,b,c)
- veor d16,d26 @ Sigma0(a)
- vadd.i64 d20,d27
- vadd.i64 d30,d27
- @ vadd.i64 d16,d30
- vshr.u64 q12,q3,#19
- vshr.u64 q13,q3,#61
- vadd.i64 d16,d30 @ h+=Maj from the past
- vshr.u64 q15,q3,#6
- vsli.64 q12,q3,#45
- vext.8 q14,q4,q5,#8 @ X[i+1]
- vsli.64 q13,q3,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q4,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q0,q1,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d20,#14 @ from NEON_00_15
- vadd.i64 q4,q14
- vshr.u64 d25,d20,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d20,#41 @ from NEON_00_15
- vadd.i64 q4,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d20,#50
- vsli.64 d25,d20,#46
- vmov d29,d20
- vsli.64 d26,d20,#23
-#if 24<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d21,d22 @ Ch(e,f,g)
- vshr.u64 d24,d16,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d23
- vshr.u64 d25,d16,#34
- vsli.64 d24,d16,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d16,#39
- vadd.i64 d28,d8
- vsli.64 d25,d16,#30
- veor d30,d16,d17
- vsli.64 d26,d16,#25
- veor d23,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d18,d17 @ Maj(a,b,c)
- veor d23,d26 @ Sigma0(a)
- vadd.i64 d19,d27
- vadd.i64 d30,d27
- @ vadd.i64 d23,d30
- vshr.u64 d24,d19,#14 @ 25
-#if 25<16
- vld1.64 {d9},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d19,#18
-#if 25>0
- vadd.i64 d23,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d19,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d19,#50
- vsli.64 d25,d19,#46
- vmov d29,d19
- vsli.64 d26,d19,#23
-#if 25<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d20,d21 @ Ch(e,f,g)
- vshr.u64 d24,d23,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d22
- vshr.u64 d25,d23,#34
- vsli.64 d24,d23,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d23,#39
- vadd.i64 d28,d9
- vsli.64 d25,d23,#30
- veor d30,d23,d16
- vsli.64 d26,d23,#25
- veor d22,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d17,d16 @ Maj(a,b,c)
- veor d22,d26 @ Sigma0(a)
- vadd.i64 d18,d27
- vadd.i64 d30,d27
- @ vadd.i64 d22,d30
- vshr.u64 q12,q4,#19
- vshr.u64 q13,q4,#61
- vadd.i64 d22,d30 @ h+=Maj from the past
- vshr.u64 q15,q4,#6
- vsli.64 q12,q4,#45
- vext.8 q14,q5,q6,#8 @ X[i+1]
- vsli.64 q13,q4,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q5,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q1,q2,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d18,#14 @ from NEON_00_15
- vadd.i64 q5,q14
- vshr.u64 d25,d18,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d18,#41 @ from NEON_00_15
- vadd.i64 q5,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d18,#50
- vsli.64 d25,d18,#46
- vmov d29,d18
- vsli.64 d26,d18,#23
-#if 26<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d19,d20 @ Ch(e,f,g)
- vshr.u64 d24,d22,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d21
- vshr.u64 d25,d22,#34
- vsli.64 d24,d22,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d22,#39
- vadd.i64 d28,d10
- vsli.64 d25,d22,#30
- veor d30,d22,d23
- vsli.64 d26,d22,#25
- veor d21,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d16,d23 @ Maj(a,b,c)
- veor d21,d26 @ Sigma0(a)
- vadd.i64 d17,d27
- vadd.i64 d30,d27
- @ vadd.i64 d21,d30
- vshr.u64 d24,d17,#14 @ 27
-#if 27<16
- vld1.64 {d11},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d17,#18
-#if 27>0
- vadd.i64 d21,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d17,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d17,#50
- vsli.64 d25,d17,#46
- vmov d29,d17
- vsli.64 d26,d17,#23
-#if 27<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d18,d19 @ Ch(e,f,g)
- vshr.u64 d24,d21,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d20
- vshr.u64 d25,d21,#34
- vsli.64 d24,d21,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d21,#39
- vadd.i64 d28,d11
- vsli.64 d25,d21,#30
- veor d30,d21,d22
- vsli.64 d26,d21,#25
- veor d20,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d23,d22 @ Maj(a,b,c)
- veor d20,d26 @ Sigma0(a)
- vadd.i64 d16,d27
- vadd.i64 d30,d27
- @ vadd.i64 d20,d30
- vshr.u64 q12,q5,#19
- vshr.u64 q13,q5,#61
- vadd.i64 d20,d30 @ h+=Maj from the past
- vshr.u64 q15,q5,#6
- vsli.64 q12,q5,#45
- vext.8 q14,q6,q7,#8 @ X[i+1]
- vsli.64 q13,q5,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q6,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q2,q3,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d16,#14 @ from NEON_00_15
- vadd.i64 q6,q14
- vshr.u64 d25,d16,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d16,#41 @ from NEON_00_15
- vadd.i64 q6,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d16,#50
- vsli.64 d25,d16,#46
- vmov d29,d16
- vsli.64 d26,d16,#23
-#if 28<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d17,d18 @ Ch(e,f,g)
- vshr.u64 d24,d20,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d19
- vshr.u64 d25,d20,#34
- vsli.64 d24,d20,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d20,#39
- vadd.i64 d28,d12
- vsli.64 d25,d20,#30
- veor d30,d20,d21
- vsli.64 d26,d20,#25
- veor d19,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d22,d21 @ Maj(a,b,c)
- veor d19,d26 @ Sigma0(a)
- vadd.i64 d23,d27
- vadd.i64 d30,d27
- @ vadd.i64 d19,d30
- vshr.u64 d24,d23,#14 @ 29
-#if 29<16
- vld1.64 {d13},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d23,#18
-#if 29>0
- vadd.i64 d19,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d23,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d23,#50
- vsli.64 d25,d23,#46
- vmov d29,d23
- vsli.64 d26,d23,#23
-#if 29<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d16,d17 @ Ch(e,f,g)
- vshr.u64 d24,d19,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d18
- vshr.u64 d25,d19,#34
- vsli.64 d24,d19,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d19,#39
- vadd.i64 d28,d13
- vsli.64 d25,d19,#30
- veor d30,d19,d20
- vsli.64 d26,d19,#25
- veor d18,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d21,d20 @ Maj(a,b,c)
- veor d18,d26 @ Sigma0(a)
- vadd.i64 d22,d27
- vadd.i64 d30,d27
- @ vadd.i64 d18,d30
- vshr.u64 q12,q6,#19
- vshr.u64 q13,q6,#61
- vadd.i64 d18,d30 @ h+=Maj from the past
- vshr.u64 q15,q6,#6
- vsli.64 q12,q6,#45
- vext.8 q14,q7,q0,#8 @ X[i+1]
- vsli.64 q13,q6,#3
- veor q15,q12
- vshr.u64 q12,q14,#1
- veor q15,q13 @ sigma1(X[i+14])
- vshr.u64 q13,q14,#8
- vadd.i64 q7,q15
- vshr.u64 q15,q14,#7
- vsli.64 q12,q14,#63
- vsli.64 q13,q14,#56
- vext.8 q14,q3,q4,#8 @ X[i+9]
- veor q15,q12
- vshr.u64 d24,d22,#14 @ from NEON_00_15
- vadd.i64 q7,q14
- vshr.u64 d25,d22,#18 @ from NEON_00_15
- veor q15,q13 @ sigma0(X[i+1])
- vshr.u64 d26,d22,#41 @ from NEON_00_15
- vadd.i64 q7,q15
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d22,#50
- vsli.64 d25,d22,#46
- vmov d29,d22
- vsli.64 d26,d22,#23
-#if 30<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d23,d16 @ Ch(e,f,g)
- vshr.u64 d24,d18,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d17
- vshr.u64 d25,d18,#34
- vsli.64 d24,d18,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d18,#39
- vadd.i64 d28,d14
- vsli.64 d25,d18,#30
- veor d30,d18,d19
- vsli.64 d26,d18,#25
- veor d17,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d20,d19 @ Maj(a,b,c)
- veor d17,d26 @ Sigma0(a)
- vadd.i64 d21,d27
- vadd.i64 d30,d27
- @ vadd.i64 d17,d30
- vshr.u64 d24,d21,#14 @ 31
-#if 31<16
- vld1.64 {d15},[r1]! @ handles unaligned
-#endif
- vshr.u64 d25,d21,#18
-#if 31>0
- vadd.i64 d17,d30 @ h+=Maj from the past
-#endif
- vshr.u64 d26,d21,#41
- vld1.64 {d28},[r3,:64]! @ K[i++]
- vsli.64 d24,d21,#50
- vsli.64 d25,d21,#46
- vmov d29,d21
- vsli.64 d26,d21,#23
-#if 31<16 && defined(__ARMEL__)
- vrev64.8 ,
-#endif
- veor d25,d24
- vbsl d29,d22,d23 @ Ch(e,f,g)
- vshr.u64 d24,d17,#28
- veor d26,d25 @ Sigma1(e)
- vadd.i64 d27,d29,d16
- vshr.u64 d25,d17,#34
- vsli.64 d24,d17,#36
- vadd.i64 d27,d26
- vshr.u64 d26,d17,#39
- vadd.i64 d28,d15
- vsli.64 d25,d17,#30
- veor d30,d17,d18
- vsli.64 d26,d17,#25
- veor d16,d24,d25
- vadd.i64 d27,d28
- vbsl d30,d19,d18 @ Maj(a,b,c)
- veor d16,d26 @ Sigma0(a)
- vadd.i64 d20,d27
- vadd.i64 d30,d27
- @ vadd.i64 d16,d30
- bne .L16_79_neon
-
- vadd.i64 d16,d30 @ h+=Maj from the past
- vldmia r0,{d24-d31} @ load context to temp
- vadd.i64 q8,q12 @ vectorized accumulate
- vadd.i64 q9,q13
- vadd.i64 q10,q14
- vadd.i64 q11,q15
- vstmia r0,{d16-d23} @ save context
- teq r1,r2
- sub r3,#640 @ rewind K512
- bne .Loop_neon
-
- VFP_ABI_POP
- bx lr @ .word 0xe12fff1e
-.size sha512_block_data_order_neon,.-sha512_block_data_order_neon
-#endif
-.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm/mach-ixp4xx/common.c b/arch/arm/mach-ixp4xx/common.c
index 000f672a94c9..007a44412e24 100644
--- a/arch/arm/mach-ixp4xx/common.c
+++ b/arch/arm/mach-ixp4xx/common.c
@@ -233,12 +233,38 @@ static struct platform_device *ixp46x_devices[] __initdata = {
unsigned long ixp4xx_exp_bus_size;
EXPORT_SYMBOL(ixp4xx_exp_bus_size);
+static struct platform_device_info ixp_dev_info __initdata = {
+ .name = "ixp4xx_crypto",
+ .id = 0,
+ .dma_mask = DMA_BIT_MASK(32),
+};
+
+static int __init ixp_crypto_register(void)
+{
+ struct platform_device *pdev;
+
+ if (!(~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
+ IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
+ printk(KERN_ERR "ixp_crypto: No HW crypto available\n");
+ return -ENODEV;
+ }
+
+ pdev = platform_device_register_full(&ixp_dev_info);
+ if (IS_ERR(pdev))
+ return PTR_ERR(pdev);
+
+ return 0;
+}
+
void __init ixp4xx_sys_init(void)
{
ixp4xx_exp_bus_size = SZ_16M;
platform_add_devices(ixp4xx_devices, ARRAY_SIZE(ixp4xx_devices));
+ if (IS_ENABLED(CONFIG_CRYPTO_DEV_IXP4XX))
+ ixp_crypto_register();
+
if (cpu_is_ixp46x()) {
int region;
diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile
index d0901e610df3..09a805cc32d7 100644
--- a/arch/arm64/crypto/Makefile
+++ b/arch/arm64/crypto/Makefile
@@ -68,19 +68,13 @@ CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS
$(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE
$(call if_changed_rule,cc_o_c)
-ifdef REGENERATE_ARM64_CRYPTO
quiet_cmd_perlasm = PERLASM $@
cmd_perlasm = $(PERL) $(<) void $(@)
-$(src)/poly1305-core.S_shipped: $(src)/poly1305-armv8.pl
+$(obj)/%-core.S: $(src)/%-armv8.pl
$(call cmd,perlasm)
-$(src)/sha256-core.S_shipped: $(src)/sha512-armv8.pl
+$(obj)/sha256-core.S: $(src)/sha512-armv8.pl
$(call cmd,perlasm)
-$(src)/sha512-core.S_shipped: $(src)/sha512-armv8.pl
- $(call cmd,perlasm)
-
-endif
-
clean-files += poly1305-core.S sha256-core.S sha512-core.S
diff --git a/arch/arm64/crypto/poly1305-core.S_shipped b/arch/arm64/crypto/poly1305-core.S_shipped
deleted file mode 100644
index fb2822abf63a..000000000000
--- a/arch/arm64/crypto/poly1305-core.S_shipped
+++ /dev/null
@@ -1,835 +0,0 @@
-#ifndef __KERNEL__
-# include "arm_arch.h"
-.extern OPENSSL_armcap_P
-#endif
-
-.text
-
-// forward "declarations" are required for Apple
-.globl poly1305_blocks
-.globl poly1305_emit
-
-.globl poly1305_init
-.type poly1305_init,%function
-.align 5
-poly1305_init:
- cmp x1,xzr
- stp xzr,xzr,[x0] // zero hash value
- stp xzr,xzr,[x0,#16] // [along with is_base2_26]
-
- csel x0,xzr,x0,eq
- b.eq .Lno_key
-
-#ifndef __KERNEL__
- adrp x17,OPENSSL_armcap_P
- ldr w17,[x17,#:lo12:OPENSSL_armcap_P]
-#endif
-
- ldp x7,x8,[x1] // load key
- mov x9,#0xfffffffc0fffffff
- movk x9,#0x0fff,lsl#48
-#ifdef __AARCH64EB__
- rev x7,x7 // flip bytes
- rev x8,x8
-#endif
- and x7,x7,x9 // &=0ffffffc0fffffff
- and x9,x9,#-4
- and x8,x8,x9 // &=0ffffffc0ffffffc
- mov w9,#-1
- stp x7,x8,[x0,#32] // save key value
- str w9,[x0,#48] // impossible key power value
-
-#ifndef __KERNEL__
- tst w17,#ARMV7_NEON
-
- adr x12,.Lpoly1305_blocks
- adr x7,.Lpoly1305_blocks_neon
- adr x13,.Lpoly1305_emit
-
- csel x12,x12,x7,eq
-
-# ifdef __ILP32__
- stp w12,w13,[x2]
-# else
- stp x12,x13,[x2]
-# endif
-#endif
- mov x0,#1
-.Lno_key:
- ret
-.size poly1305_init,.-poly1305_init
-
-.type poly1305_blocks,%function
-.align 5
-poly1305_blocks:
-.Lpoly1305_blocks:
- ands x2,x2,#-16
- b.eq .Lno_data
-
- ldp x4,x5,[x0] // load hash value
- ldp x6,x17,[x0,#16] // [along with is_base2_26]
- ldp x7,x8,[x0,#32] // load key value
-
-#ifdef __AARCH64EB__
- lsr x12,x4,#32
- mov w13,w4
- lsr x14,x5,#32
- mov w15,w5
- lsr x16,x6,#32
-#else
- mov w12,w4
- lsr x13,x4,#32
- mov w14,w5
- lsr x15,x5,#32
- mov w16,w6
-#endif
-
- add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
- lsr x13,x14,#12
- adds x12,x12,x14,lsl#52
- add x13,x13,x15,lsl#14
- adc x13,x13,xzr
- lsr x14,x16,#24
- adds x13,x13,x16,lsl#40
- adc x14,x14,xzr
-
- cmp x17,#0 // is_base2_26?
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
- csel x4,x4,x12,eq // choose between radixes
- csel x5,x5,x13,eq
- csel x6,x6,x14,eq
-
-.Loop:
- ldp x10,x11,[x1],#16 // load input
- sub x2,x2,#16
-#ifdef __AARCH64EB__
- rev x10,x10
- rev x11,x11
-#endif
- adds x4,x4,x10 // accumulate input
- adcs x5,x5,x11
-
- mul x12,x4,x7 // h0*r0
- adc x6,x6,x3
- umulh x13,x4,x7
-
- mul x10,x5,x9 // h1*5*r1
- umulh x11,x5,x9
-
- adds x12,x12,x10
- mul x10,x4,x8 // h0*r1
- adc x13,x13,x11
- umulh x14,x4,x8
-
- adds x13,x13,x10
- mul x10,x5,x7 // h1*r0
- adc x14,x14,xzr
- umulh x11,x5,x7
-
- adds x13,x13,x10
- mul x10,x6,x9 // h2*5*r1
- adc x14,x14,x11
- mul x11,x6,x7 // h2*r0
-
- adds x13,x13,x10
- adc x14,x14,x11
-
- and x10,x14,#-4 // final reduction
- and x6,x14,#3
- add x10,x10,x14,lsr#2
- adds x4,x12,x10
- adcs x5,x13,xzr
- adc x6,x6,xzr
-
- cbnz x2,.Loop
-
- stp x4,x5,[x0] // store hash value
- stp x6,xzr,[x0,#16] // [and clear is_base2_26]
-
-.Lno_data:
- ret
-.size poly1305_blocks,.-poly1305_blocks
-
-.type poly1305_emit,%function
-.align 5
-poly1305_emit:
-.Lpoly1305_emit:
- ldp x4,x5,[x0] // load hash base 2^64
- ldp x6,x7,[x0,#16] // [along with is_base2_26]
- ldp x10,x11,[x2] // load nonce
-
-#ifdef __AARCH64EB__
- lsr x12,x4,#32
- mov w13,w4
- lsr x14,x5,#32
- mov w15,w5
- lsr x16,x6,#32
-#else
- mov w12,w4
- lsr x13,x4,#32
- mov w14,w5
- lsr x15,x5,#32
- mov w16,w6
-#endif
-
- add x12,x12,x13,lsl#26 // base 2^26 -> base 2^64
- lsr x13,x14,#12
- adds x12,x12,x14,lsl#52
- add x13,x13,x15,lsl#14
- adc x13,x13,xzr
- lsr x14,x16,#24
- adds x13,x13,x16,lsl#40
- adc x14,x14,xzr
-
- cmp x7,#0 // is_base2_26?
- csel x4,x4,x12,eq // choose between radixes
- csel x5,x5,x13,eq
- csel x6,x6,x14,eq
-
- adds x12,x4,#5 // compare to modulus
- adcs x13,x5,xzr
- adc x14,x6,xzr
-
- tst x14,#-4 // see if it's carried/borrowed
-
- csel x4,x4,x12,eq
- csel x5,x5,x13,eq
-
-#ifdef __AARCH64EB__
- ror x10,x10,#32 // flip nonce words
- ror x11,x11,#32
-#endif
- adds x4,x4,x10 // accumulate nonce
- adc x5,x5,x11
-#ifdef __AARCH64EB__
- rev x4,x4 // flip output bytes
- rev x5,x5
-#endif
- stp x4,x5,[x1] // write result
-
- ret
-.size poly1305_emit,.-poly1305_emit
-.type poly1305_mult,%function
-.align 5
-poly1305_mult:
- mul x12,x4,x7 // h0*r0
- umulh x13,x4,x7
-
- mul x10,x5,x9 // h1*5*r1
- umulh x11,x5,x9
-
- adds x12,x12,x10
- mul x10,x4,x8 // h0*r1
- adc x13,x13,x11
- umulh x14,x4,x8
-
- adds x13,x13,x10
- mul x10,x5,x7 // h1*r0
- adc x14,x14,xzr
- umulh x11,x5,x7
-
- adds x13,x13,x10
- mul x10,x6,x9 // h2*5*r1
- adc x14,x14,x11
- mul x11,x6,x7 // h2*r0
-
- adds x13,x13,x10
- adc x14,x14,x11
-
- and x10,x14,#-4 // final reduction
- and x6,x14,#3
- add x10,x10,x14,lsr#2
- adds x4,x12,x10
- adcs x5,x13,xzr
- adc x6,x6,xzr
-
- ret
-.size poly1305_mult,.-poly1305_mult
-
-.type poly1305_splat,%function
-.align 4
-poly1305_splat:
- and x12,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x13,x4,#26,#26
- extr x14,x5,x4,#52
- and x14,x14,#0x03ffffff
- ubfx x15,x5,#14,#26
- extr x16,x6,x5,#40
-
- str w12,[x0,#16*0] // r0
- add w12,w13,w13,lsl#2 // r1*5
- str w13,[x0,#16*1] // r1
- add w13,w14,w14,lsl#2 // r2*5
- str w12,[x0,#16*2] // s1
- str w14,[x0,#16*3] // r2
- add w14,w15,w15,lsl#2 // r3*5
- str w13,[x0,#16*4] // s2
- str w15,[x0,#16*5] // r3
- add w15,w16,w16,lsl#2 // r4*5
- str w14,[x0,#16*6] // s3
- str w16,[x0,#16*7] // r4
- str w15,[x0,#16*8] // s4
-
- ret
-.size poly1305_splat,.-poly1305_splat
-
-#ifdef __KERNEL__
-.globl poly1305_blocks_neon
-#endif
-.type poly1305_blocks_neon,%function
-.align 5
-poly1305_blocks_neon:
-.Lpoly1305_blocks_neon:
- ldr x17,[x0,#24]
- cmp x2,#128
- b.lo .Lpoly1305_blocks
-
- .inst 0xd503233f // paciasp
- stp x29,x30,[sp,#-80]!
- add x29,sp,#0
-
- stp d8,d9,[sp,#16] // meet ABI requirements
- stp d10,d11,[sp,#32]
- stp d12,d13,[sp,#48]
- stp d14,d15,[sp,#64]
-
- cbz x17,.Lbase2_64_neon
-
- ldp w10,w11,[x0] // load hash value base 2^26
- ldp w12,w13,[x0,#8]
- ldr w14,[x0,#16]
-
- tst x2,#31
- b.eq .Leven_neon
-
- ldp x7,x8,[x0,#32] // load key value
-
- add x4,x10,x11,lsl#26 // base 2^26 -> base 2^64
- lsr x5,x12,#12
- adds x4,x4,x12,lsl#52
- add x5,x5,x13,lsl#14
- adc x5,x5,xzr
- lsr x6,x14,#24
- adds x5,x5,x14,lsl#40
- adc x14,x6,xzr // can be partially reduced...
-
- ldp x12,x13,[x1],#16 // load input
- sub x2,x2,#16
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
-
-#ifdef __AARCH64EB__
- rev x12,x12
- rev x13,x13
-#endif
- adds x4,x4,x12 // accumulate input
- adcs x5,x5,x13
- adc x6,x6,x3
-
- bl poly1305_mult
-
- and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x11,x4,#26,#26
- extr x12,x5,x4,#52
- and x12,x12,#0x03ffffff
- ubfx x13,x5,#14,#26
- extr x14,x6,x5,#40
-
- b .Leven_neon
-
-.align 4
-.Lbase2_64_neon:
- ldp x7,x8,[x0,#32] // load key value
-
- ldp x4,x5,[x0] // load hash value base 2^64
- ldr x6,[x0,#16]
-
- tst x2,#31
- b.eq .Linit_neon
-
- ldp x12,x13,[x1],#16 // load input
- sub x2,x2,#16
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
-#ifdef __AARCH64EB__
- rev x12,x12
- rev x13,x13
-#endif
- adds x4,x4,x12 // accumulate input
- adcs x5,x5,x13
- adc x6,x6,x3
-
- bl poly1305_mult
-
-.Linit_neon:
- ldr w17,[x0,#48] // first table element
- and x10,x4,#0x03ffffff // base 2^64 -> base 2^26
- ubfx x11,x4,#26,#26
- extr x12,x5,x4,#52
- and x12,x12,#0x03ffffff
- ubfx x13,x5,#14,#26
- extr x14,x6,x5,#40
-
- cmp w17,#-1 // is value impossible?
- b.ne .Leven_neon
-
- fmov d24,x10
- fmov d25,x11
- fmov d26,x12
- fmov d27,x13
- fmov d28,x14
-
- ////////////////////////////////// initialize r^n table
- mov x4,x7 // r^1
- add x9,x8,x8,lsr#2 // s1 = r1 + (r1 >> 2)
- mov x5,x8
- mov x6,xzr
- add x0,x0,#48+12
- bl poly1305_splat
-
- bl poly1305_mult // r^2
- sub x0,x0,#4
- bl poly1305_splat
-
- bl poly1305_mult // r^3
- sub x0,x0,#4
- bl poly1305_splat
-
- bl poly1305_mult // r^4
- sub x0,x0,#4
- bl poly1305_splat
- sub x0,x0,#48 // restore original x0
- b .Ldo_neon
-
-.align 4
-.Leven_neon:
- fmov d24,x10
- fmov d25,x11
- fmov d26,x12
- fmov d27,x13
- fmov d28,x14
-
-.Ldo_neon:
- ldp x8,x12,[x1,#32] // inp[2:3]
- subs x2,x2,#64
- ldp x9,x13,[x1,#48]
- add x16,x1,#96
- adr x17,.Lzeros
-
- lsl x3,x3,#24
- add x15,x0,#48
-
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- and x5,x9,#0x03ffffff
- ubfx x6,x8,#26,#26
- ubfx x7,x9,#26,#26
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- extr x8,x12,x8,#52
- extr x9,x13,x9,#52
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- fmov d14,x4
- and x8,x8,#0x03ffffff
- and x9,x9,#0x03ffffff
- ubfx x10,x12,#14,#26
- ubfx x11,x13,#14,#26
- add x12,x3,x12,lsr#40
- add x13,x3,x13,lsr#40
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- fmov d15,x6
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- fmov d16,x8
- fmov d17,x10
- fmov d18,x12
-
- ldp x8,x12,[x1],#16 // inp[0:1]
- ldp x9,x13,[x1],#48
-
- ld1 {v0.4s,v1.4s,v2.4s,v3.4s},[x15],#64
- ld1 {v4.4s,v5.4s,v6.4s,v7.4s},[x15],#64
- ld1 {v8.4s},[x15]
-
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- and x5,x9,#0x03ffffff
- ubfx x6,x8,#26,#26
- ubfx x7,x9,#26,#26
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- extr x8,x12,x8,#52
- extr x9,x13,x9,#52
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- fmov d9,x4
- and x8,x8,#0x03ffffff
- and x9,x9,#0x03ffffff
- ubfx x10,x12,#14,#26
- ubfx x11,x13,#14,#26
- add x12,x3,x12,lsr#40
- add x13,x3,x13,lsr#40
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- fmov d10,x6
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- movi v31.2d,#-1
- fmov d11,x8
- fmov d12,x10
- fmov d13,x12
- ushr v31.2d,v31.2d,#38
-
- b.ls .Lskip_loop
-
-.align 4
-.Loop_neon:
- ////////////////////////////////////////////////////////////////
- // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2
- // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^3+inp[7]*r
- // ___________________/
- // ((inp[0]*r^4+inp[2]*r^2+inp[4])*r^4+inp[6]*r^2+inp[8])*r^2
- // ((inp[1]*r^4+inp[3]*r^2+inp[5])*r^4+inp[7]*r^2+inp[9])*r
- // ___________________/ ____________________/
- //
- // Note that we start with inp[2:3]*r^2. This is because it
- // doesn't depend on reduction in previous iteration.
- ////////////////////////////////////////////////////////////////
- // d4 = h0*r4 + h1*r3 + h2*r2 + h3*r1 + h4*r0
- // d3 = h0*r3 + h1*r2 + h2*r1 + h3*r0 + h4*5*r4
- // d2 = h0*r2 + h1*r1 + h2*r0 + h3*5*r4 + h4*5*r3
- // d1 = h0*r1 + h1*r0 + h2*5*r4 + h3*5*r3 + h4*5*r2
- // d0 = h0*r0 + h1*5*r4 + h2*5*r3 + h3*5*r2 + h4*5*r1
-
- subs x2,x2,#64
- umull v23.2d,v14.2s,v7.s[2]
- csel x16,x17,x16,lo
- umull v22.2d,v14.2s,v5.s[2]
- umull v21.2d,v14.2s,v3.s[2]
- ldp x8,x12,[x16],#16 // inp[2:3] (or zero)
- umull v20.2d,v14.2s,v1.s[2]
- ldp x9,x13,[x16],#48
- umull v19.2d,v14.2s,v0.s[2]
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
-
- umlal v23.2d,v15.2s,v5.s[2]
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- umlal v22.2d,v15.2s,v3.s[2]
- and x5,x9,#0x03ffffff
- umlal v21.2d,v15.2s,v1.s[2]
- ubfx x6,x8,#26,#26
- umlal v20.2d,v15.2s,v0.s[2]
- ubfx x7,x9,#26,#26
- umlal v19.2d,v15.2s,v8.s[2]
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
-
- umlal v23.2d,v16.2s,v3.s[2]
- extr x8,x12,x8,#52
- umlal v22.2d,v16.2s,v1.s[2]
- extr x9,x13,x9,#52
- umlal v21.2d,v16.2s,v0.s[2]
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- umlal v20.2d,v16.2s,v8.s[2]
- fmov d14,x4
- umlal v19.2d,v16.2s,v6.s[2]
- and x8,x8,#0x03ffffff
-
- umlal v23.2d,v17.2s,v1.s[2]
- and x9,x9,#0x03ffffff
- umlal v22.2d,v17.2s,v0.s[2]
- ubfx x10,x12,#14,#26
- umlal v21.2d,v17.2s,v8.s[2]
- ubfx x11,x13,#14,#26
- umlal v20.2d,v17.2s,v6.s[2]
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- umlal v19.2d,v17.2s,v4.s[2]
- fmov d15,x6
-
- add v11.2s,v11.2s,v26.2s
- add x12,x3,x12,lsr#40
- umlal v23.2d,v18.2s,v0.s[2]
- add x13,x3,x13,lsr#40
- umlal v22.2d,v18.2s,v8.s[2]
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- umlal v21.2d,v18.2s,v6.s[2]
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- umlal v20.2d,v18.2s,v4.s[2]
- fmov d16,x8
- umlal v19.2d,v18.2s,v2.s[2]
- fmov d17,x10
-
- ////////////////////////////////////////////////////////////////
- // (hash+inp[0:1])*r^4 and accumulate
-
- add v9.2s,v9.2s,v24.2s
- fmov d18,x12
- umlal v22.2d,v11.2s,v1.s[0]
- ldp x8,x12,[x1],#16 // inp[0:1]
- umlal v19.2d,v11.2s,v6.s[0]
- ldp x9,x13,[x1],#48
- umlal v23.2d,v11.2s,v3.s[0]
- umlal v20.2d,v11.2s,v8.s[0]
- umlal v21.2d,v11.2s,v0.s[0]
-#ifdef __AARCH64EB__
- rev x8,x8
- rev x12,x12
- rev x9,x9
- rev x13,x13
-#endif
-
- add v10.2s,v10.2s,v25.2s
- umlal v22.2d,v9.2s,v5.s[0]
- umlal v23.2d,v9.2s,v7.s[0]
- and x4,x8,#0x03ffffff // base 2^64 -> base 2^26
- umlal v21.2d,v9.2s,v3.s[0]
- and x5,x9,#0x03ffffff
- umlal v19.2d,v9.2s,v0.s[0]
- ubfx x6,x8,#26,#26
- umlal v20.2d,v9.2s,v1.s[0]
- ubfx x7,x9,#26,#26
-
- add v12.2s,v12.2s,v27.2s
- add x4,x4,x5,lsl#32 // bfi x4,x5,#32,#32
- umlal v22.2d,v10.2s,v3.s[0]
- extr x8,x12,x8,#52
- umlal v23.2d,v10.2s,v5.s[0]
- extr x9,x13,x9,#52
- umlal v19.2d,v10.2s,v8.s[0]
- add x6,x6,x7,lsl#32 // bfi x6,x7,#32,#32
- umlal v21.2d,v10.2s,v1.s[0]
- fmov d9,x4
- umlal v20.2d,v10.2s,v0.s[0]
- and x8,x8,#0x03ffffff
-
- add v13.2s,v13.2s,v28.2s
- and x9,x9,#0x03ffffff
- umlal v22.2d,v12.2s,v0.s[0]
- ubfx x10,x12,#14,#26
- umlal v19.2d,v12.2s,v4.s[0]
- ubfx x11,x13,#14,#26
- umlal v23.2d,v12.2s,v1.s[0]
- add x8,x8,x9,lsl#32 // bfi x8,x9,#32,#32
- umlal v20.2d,v12.2s,v6.s[0]
- fmov d10,x6
- umlal v21.2d,v12.2s,v8.s[0]
- add x12,x3,x12,lsr#40
-
- umlal v22.2d,v13.2s,v8.s[0]
- add x13,x3,x13,lsr#40
- umlal v19.2d,v13.2s,v2.s[0]
- add x10,x10,x11,lsl#32 // bfi x10,x11,#32,#32
- umlal v23.2d,v13.2s,v0.s[0]
- add x12,x12,x13,lsl#32 // bfi x12,x13,#32,#32
- umlal v20.2d,v13.2s,v4.s[0]
- fmov d11,x8
- umlal v21.2d,v13.2s,v6.s[0]
- fmov d12,x10
- fmov d13,x12
-
- /////////////////////////////////////////////////////////////////
- // lazy reduction as discussed in "NEON crypto" by D.J. Bernstein
- // and P. Schwabe
- //
- // [see discussion in poly1305-armv4 module]
-
- ushr v29.2d,v22.2d,#26
- xtn v27.2s,v22.2d
- ushr v30.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
- add v23.2d,v23.2d,v29.2d // h3 -> h4
- bic v27.2s,#0xfc,lsl#24 // &=0x03ffffff
- add v20.2d,v20.2d,v30.2d // h0 -> h1
-
- ushr v29.2d,v23.2d,#26
- xtn v28.2s,v23.2d
- ushr v30.2d,v20.2d,#26
- xtn v25.2s,v20.2d
- bic v28.2s,#0xfc,lsl#24
- add v21.2d,v21.2d,v30.2d // h1 -> h2
-
- add v19.2d,v19.2d,v29.2d
- shl v29.2d,v29.2d,#2
- shrn v30.2s,v21.2d,#26
- xtn v26.2s,v21.2d
- add v19.2d,v19.2d,v29.2d // h4 -> h0
- bic v25.2s,#0xfc,lsl#24
- add v27.2s,v27.2s,v30.2s // h2 -> h3
- bic v26.2s,#0xfc,lsl#24
-
- shrn v29.2s,v19.2d,#26
- xtn v24.2s,v19.2d
- ushr v30.2s,v27.2s,#26
- bic v27.2s,#0xfc,lsl#24
- bic v24.2s,#0xfc,lsl#24
- add v25.2s,v25.2s,v29.2s // h0 -> h1
- add v28.2s,v28.2s,v30.2s // h3 -> h4
-
- b.hi .Loop_neon
-
-.Lskip_loop:
- dup v16.2d,v16.d[0]
- add v11.2s,v11.2s,v26.2s
-
- ////////////////////////////////////////////////////////////////
- // multiply (inp[0:1]+hash) or inp[2:3] by r^2:r^1
-
- adds x2,x2,#32
- b.ne .Long_tail
-
- dup v16.2d,v11.d[0]
- add v14.2s,v9.2s,v24.2s
- add v17.2s,v12.2s,v27.2s
- add v15.2s,v10.2s,v25.2s
- add v18.2s,v13.2s,v28.2s
-
-.Long_tail:
- dup v14.2d,v14.d[0]
- umull2 v19.2d,v16.4s,v6.4s
- umull2 v22.2d,v16.4s,v1.4s
- umull2 v23.2d,v16.4s,v3.4s
- umull2 v21.2d,v16.4s,v0.4s
- umull2 v20.2d,v16.4s,v8.4s
-
- dup v15.2d,v15.d[0]
- umlal2 v19.2d,v14.4s,v0.4s
- umlal2 v21.2d,v14.4s,v3.4s
- umlal2 v22.2d,v14.4s,v5.4s
- umlal2 v23.2d,v14.4s,v7.4s
- umlal2 v20.2d,v14.4s,v1.4s
-
- dup v17.2d,v17.d[0]
- umlal2 v19.2d,v15.4s,v8.4s
- umlal2 v22.2d,v15.4s,v3.4s
- umlal2 v21.2d,v15.4s,v1.4s
- umlal2 v23.2d,v15.4s,v5.4s
- umlal2 v20.2d,v15.4s,v0.4s
-
- dup v18.2d,v18.d[0]
- umlal2 v22.2d,v17.4s,v0.4s
- umlal2 v23.2d,v17.4s,v1.4s
- umlal2 v19.2d,v17.4s,v4.4s
- umlal2 v20.2d,v17.4s,v6.4s
- umlal2 v21.2d,v17.4s,v8.4s
-
- umlal2 v22.2d,v18.4s,v8.4s
- umlal2 v19.2d,v18.4s,v2.4s
- umlal2 v23.2d,v18.4s,v0.4s
- umlal2 v20.2d,v18.4s,v4.4s
- umlal2 v21.2d,v18.4s,v6.4s
-
- b.eq .Lshort_tail
-
- ////////////////////////////////////////////////////////////////
- // (hash+inp[0:1])*r^4:r^3 and accumulate
-
- add v9.2s,v9.2s,v24.2s
- umlal v22.2d,v11.2s,v1.2s
- umlal v19.2d,v11.2s,v6.2s
- umlal v23.2d,v11.2s,v3.2s
- umlal v20.2d,v11.2s,v8.2s
- umlal v21.2d,v11.2s,v0.2s
-
- add v10.2s,v10.2s,v25.2s
- umlal v22.2d,v9.2s,v5.2s
- umlal v19.2d,v9.2s,v0.2s
- umlal v23.2d,v9.2s,v7.2s
- umlal v20.2d,v9.2s,v1.2s
- umlal v21.2d,v9.2s,v3.2s
-
- add v12.2s,v12.2s,v27.2s
- umlal v22.2d,v10.2s,v3.2s
- umlal v19.2d,v10.2s,v8.2s
- umlal v23.2d,v10.2s,v5.2s
- umlal v20.2d,v10.2s,v0.2s
- umlal v21.2d,v10.2s,v1.2s
-
- add v13.2s,v13.2s,v28.2s
- umlal v22.2d,v12.2s,v0.2s
- umlal v19.2d,v12.2s,v4.2s
- umlal v23.2d,v12.2s,v1.2s
- umlal v20.2d,v12.2s,v6.2s
- umlal v21.2d,v12.2s,v8.2s
-
- umlal v22.2d,v13.2s,v8.2s
- umlal v19.2d,v13.2s,v2.2s
- umlal v23.2d,v13.2s,v0.2s
- umlal v20.2d,v13.2s,v4.2s
- umlal v21.2d,v13.2s,v6.2s
-
-.Lshort_tail:
- ////////////////////////////////////////////////////////////////
- // horizontal add
-
- addp v22.2d,v22.2d,v22.2d
- ldp d8,d9,[sp,#16] // meet ABI requirements
- addp v19.2d,v19.2d,v19.2d
- ldp d10,d11,[sp,#32]
- addp v23.2d,v23.2d,v23.2d
- ldp d12,d13,[sp,#48]
- addp v20.2d,v20.2d,v20.2d
- ldp d14,d15,[sp,#64]
- addp v21.2d,v21.2d,v21.2d
- ldr x30,[sp,#8]
-
- ////////////////////////////////////////////////////////////////
- // lazy reduction, but without narrowing
-
- ushr v29.2d,v22.2d,#26
- and v22.16b,v22.16b,v31.16b
- ushr v30.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
-
- add v23.2d,v23.2d,v29.2d // h3 -> h4
- add v20.2d,v20.2d,v30.2d // h0 -> h1
-
- ushr v29.2d,v23.2d,#26
- and v23.16b,v23.16b,v31.16b
- ushr v30.2d,v20.2d,#26
- and v20.16b,v20.16b,v31.16b
- add v21.2d,v21.2d,v30.2d // h1 -> h2
-
- add v19.2d,v19.2d,v29.2d
- shl v29.2d,v29.2d,#2
- ushr v30.2d,v21.2d,#26
- and v21.16b,v21.16b,v31.16b
- add v19.2d,v19.2d,v29.2d // h4 -> h0
- add v22.2d,v22.2d,v30.2d // h2 -> h3
-
- ushr v29.2d,v19.2d,#26
- and v19.16b,v19.16b,v31.16b
- ushr v30.2d,v22.2d,#26
- and v22.16b,v22.16b,v31.16b
- add v20.2d,v20.2d,v29.2d // h0 -> h1
- add v23.2d,v23.2d,v30.2d // h3 -> h4
-
- ////////////////////////////////////////////////////////////////
- // write the result, can be partially reduced
-
- st4 {v19.s,v20.s,v21.s,v22.s}[0],[x0],#16
- mov x4,#1
- st1 {v23.s}[0],[x0]
- str x4,[x0,#8] // set is_base2_26
-
- ldr x29,[sp],#80
- .inst 0xd50323bf // autiasp
- ret
-.size poly1305_blocks_neon,.-poly1305_blocks_neon
-
-.align 5
-.Lzeros:
-.long 0,0,0,0,0,0,0,0
-.asciz "Poly1305 for ARMv8, CRYPTOGAMS by @dot-asm"
-.align 2
-#if !defined(__KERNEL__) && !defined(_WIN64)
-.comm OPENSSL_armcap_P,4,4
-.hidden OPENSSL_armcap_P
-#endif
diff --git a/arch/arm64/crypto/sha256-core.S_shipped b/arch/arm64/crypto/sha256-core.S_shipped
deleted file mode 100644
index 7c7ce2e3bad6..000000000000
--- a/arch/arm64/crypto/sha256-core.S_shipped
+++ /dev/null
@@ -1,2069 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License"). You may not use
-// this file except in compliance with the License. You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-// SHA256-hw SHA256(*) SHA512
-// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
-// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
-// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
-// Denver 2.01 10.5 (+26%) 6.70 (+8%)
-// X-Gene 20.0 (+100%) 12.8 (+300%(***))
-// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
-//
-// (*) Software SHA256 results are of lesser relevance, presented
-// mostly for informational purposes.
-// (**) The result is a trade-off: it's possible to improve it by
-// 10% (or by 1 cycle per round), but at the cost of 20% loss
-// on Cortex-A53 (or by 4 cycles per round).
-// (***) Super-impressive coefficients over gcc-generated code are
-// indication of some compiler "pathology", most notably code
-// generated with -mgeneral-regs-only is significanty faster
-// and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern OPENSSL_armcap_P
-.globl sha256_block_data_order
-.type sha256_block_data_order,%function
-.align 6
-sha256_block_data_order:
-#ifndef __KERNEL__
-# ifdef __ILP32__
- ldrsw x16,.LOPENSSL_armcap_P
-# else
- ldr x16,.LOPENSSL_armcap_P
-# endif
- adr x17,.LOPENSSL_armcap_P
- add x16,x16,x17
- ldr w16,[x16]
- tst w16,#ARMV8_SHA256
- b.ne .Lv8_entry
- tst w16,#ARMV7_NEON
- b.ne .Lneon_entry
-#endif
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*4
-
- ldp w20,w21,[x0] // load context
- ldp w22,w23,[x0,#2*4]
- ldp w24,w25,[x0,#4*4]
- add x2,x1,x2,lsl#6 // end of input
- ldp w26,w27,[x0,#6*4]
- adr x30,.LK256
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp w3,w4,[x1],#2*4
- ldr w19,[x30],#4 // *K++
- eor w28,w21,w22 // magic seed
- str x1,[x29,#112]
-#ifndef __AARCH64EB__
- rev w3,w3 // 0
-#endif
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w6,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w3 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w4,w4 // 1
-#endif
- ldp w5,w6,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w7,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w4 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w5,w5 // 2
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w8,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w5 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w6,w6 // 3
-#endif
- ldp w7,w8,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w9,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w6 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w7,w7 // 4
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w10,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w7 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w10,ror#11 // Sigma1(e)
- ror w10,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w10,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w8,w8 // 5
-#endif
- ldp w9,w10,[x1],#2*4
- add w23,w23,w17 // h+=Sigma0(a)
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w11,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w8 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w11,ror#11 // Sigma1(e)
- ror w11,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w11,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w9,w9 // 6
-#endif
- add w22,w22,w17 // h+=Sigma0(a)
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w12,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w9 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w12,ror#11 // Sigma1(e)
- ror w12,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w12,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w10,w10 // 7
-#endif
- ldp w11,w12,[x1],#2*4
- add w21,w21,w17 // h+=Sigma0(a)
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- eor w13,w25,w25,ror#14
- and w17,w26,w25
- bic w28,w27,w25
- add w20,w20,w10 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w13,ror#11 // Sigma1(e)
- ror w13,w21,#2
- add w20,w20,w17 // h+=Ch(e,f,g)
- eor w17,w21,w21,ror#9
- add w20,w20,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w24,w24,w20 // d+=h
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w13,w17,ror#13 // Sigma0(a)
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w20,w20,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w11,w11 // 8
-#endif
- add w20,w20,w17 // h+=Sigma0(a)
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- eor w14,w24,w24,ror#14
- and w17,w25,w24
- bic w19,w26,w24
- add w27,w27,w11 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w14,ror#11 // Sigma1(e)
- ror w14,w20,#2
- add w27,w27,w17 // h+=Ch(e,f,g)
- eor w17,w20,w20,ror#9
- add w27,w27,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w23,w23,w27 // d+=h
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w14,w17,ror#13 // Sigma0(a)
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w27,w27,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w12,w12 // 9
-#endif
- ldp w13,w14,[x1],#2*4
- add w27,w27,w17 // h+=Sigma0(a)
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- eor w15,w23,w23,ror#14
- and w17,w24,w23
- bic w28,w25,w23
- add w26,w26,w12 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w15,ror#11 // Sigma1(e)
- ror w15,w27,#2
- add w26,w26,w17 // h+=Ch(e,f,g)
- eor w17,w27,w27,ror#9
- add w26,w26,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w22,w22,w26 // d+=h
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w15,w17,ror#13 // Sigma0(a)
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w26,w26,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w13,w13 // 10
-#endif
- add w26,w26,w17 // h+=Sigma0(a)
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- eor w0,w22,w22,ror#14
- and w17,w23,w22
- bic w19,w24,w22
- add w25,w25,w13 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w0,ror#11 // Sigma1(e)
- ror w0,w26,#2
- add w25,w25,w17 // h+=Ch(e,f,g)
- eor w17,w26,w26,ror#9
- add w25,w25,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w21,w21,w25 // d+=h
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w0,w17,ror#13 // Sigma0(a)
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w25,w25,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w14,w14 // 11
-#endif
- ldp w15,w0,[x1],#2*4
- add w25,w25,w17 // h+=Sigma0(a)
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- eor w6,w21,w21,ror#14
- and w17,w22,w21
- bic w28,w23,w21
- add w24,w24,w14 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w6,ror#11 // Sigma1(e)
- ror w6,w25,#2
- add w24,w24,w17 // h+=Ch(e,f,g)
- eor w17,w25,w25,ror#9
- add w24,w24,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w20,w20,w24 // d+=h
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w17,ror#13 // Sigma0(a)
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w24,w24,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w15,w15 // 12
-#endif
- add w24,w24,w17 // h+=Sigma0(a)
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- eor w7,w20,w20,ror#14
- and w17,w21,w20
- bic w19,w22,w20
- add w23,w23,w15 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w7,ror#11 // Sigma1(e)
- ror w7,w24,#2
- add w23,w23,w17 // h+=Ch(e,f,g)
- eor w17,w24,w24,ror#9
- add w23,w23,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w27,w27,w23 // d+=h
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w17,ror#13 // Sigma0(a)
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w23,w23,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w0,w0 // 13
-#endif
- ldp w1,w2,[x1]
- add w23,w23,w17 // h+=Sigma0(a)
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- eor w8,w27,w27,ror#14
- and w17,w20,w27
- bic w28,w21,w27
- add w22,w22,w0 // h+=X[i]
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w8,ror#11 // Sigma1(e)
- ror w8,w23,#2
- add w22,w22,w17 // h+=Ch(e,f,g)
- eor w17,w23,w23,ror#9
- add w22,w22,w16 // h+=Sigma1(e)
- and w19,w19,w28 // (b^c)&=(a^b)
- add w26,w26,w22 // d+=h
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w17,ror#13 // Sigma0(a)
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- //add w22,w22,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w1,w1 // 14
-#endif
- ldr w6,[sp,#12]
- add w22,w22,w17 // h+=Sigma0(a)
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- eor w9,w26,w26,ror#14
- and w17,w27,w26
- bic w19,w20,w26
- add w21,w21,w1 // h+=X[i]
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w9,ror#11 // Sigma1(e)
- ror w9,w22,#2
- add w21,w21,w17 // h+=Ch(e,f,g)
- eor w17,w22,w22,ror#9
- add w21,w21,w16 // h+=Sigma1(e)
- and w28,w28,w19 // (b^c)&=(a^b)
- add w25,w25,w21 // d+=h
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w17,ror#13 // Sigma0(a)
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- //add w21,w21,w17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev w2,w2 // 15
-#endif
- ldr w7,[sp,#0]
- add w21,w21,w17 // h+=Sigma0(a)
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
-.Loop_16_xx:
- ldr w8,[sp,#4]
- str w11,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w10,w5,#7
- and w17,w25,w24
- ror w9,w2,#17
- bic w19,w26,w24
- ror w11,w20,#2
- add w27,w27,w3 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w10,w10,w5,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w11,w11,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w9,w9,w2,ror#19
- eor w10,w10,w5,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w11,w20,ror#22 // Sigma0(a)
- eor w9,w9,w2,lsr#10 // sigma1(X[i+14])
- add w4,w4,w13
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w4,w4,w10
- add w27,w27,w17 // h+=Sigma0(a)
- add w4,w4,w9
- ldr w9,[sp,#8]
- str w12,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w11,w6,#7
- and w17,w24,w23
- ror w10,w3,#17
- bic w28,w25,w23
- ror w12,w27,#2
- add w26,w26,w4 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w11,w11,w6,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w12,w12,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w10,w10,w3,ror#19
- eor w11,w11,w6,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w12,w27,ror#22 // Sigma0(a)
- eor w10,w10,w3,lsr#10 // sigma1(X[i+14])
- add w5,w5,w14
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w5,w5,w11
- add w26,w26,w17 // h+=Sigma0(a)
- add w5,w5,w10
- ldr w10,[sp,#12]
- str w13,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w12,w7,#7
- and w17,w23,w22
- ror w11,w4,#17
- bic w19,w24,w22
- ror w13,w26,#2
- add w25,w25,w5 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w12,w12,w7,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w13,w13,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w11,w11,w4,ror#19
- eor w12,w12,w7,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w13,w26,ror#22 // Sigma0(a)
- eor w11,w11,w4,lsr#10 // sigma1(X[i+14])
- add w6,w6,w15
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w6,w6,w12
- add w25,w25,w17 // h+=Sigma0(a)
- add w6,w6,w11
- ldr w11,[sp,#0]
- str w14,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w13,w8,#7
- and w17,w22,w21
- ror w12,w5,#17
- bic w28,w23,w21
- ror w14,w25,#2
- add w24,w24,w6 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w13,w13,w8,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w14,w14,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w12,w12,w5,ror#19
- eor w13,w13,w8,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w14,w25,ror#22 // Sigma0(a)
- eor w12,w12,w5,lsr#10 // sigma1(X[i+14])
- add w7,w7,w0
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w7,w7,w13
- add w24,w24,w17 // h+=Sigma0(a)
- add w7,w7,w12
- ldr w12,[sp,#4]
- str w15,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w14,w9,#7
- and w17,w21,w20
- ror w13,w6,#17
- bic w19,w22,w20
- ror w15,w24,#2
- add w23,w23,w7 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w14,w14,w9,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w15,w15,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w13,w13,w6,ror#19
- eor w14,w14,w9,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w15,w24,ror#22 // Sigma0(a)
- eor w13,w13,w6,lsr#10 // sigma1(X[i+14])
- add w8,w8,w1
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w8,w8,w14
- add w23,w23,w17 // h+=Sigma0(a)
- add w8,w8,w13
- ldr w13,[sp,#8]
- str w0,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w15,w10,#7
- and w17,w20,w27
- ror w14,w7,#17
- bic w28,w21,w27
- ror w0,w23,#2
- add w22,w22,w8 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w15,w15,w10,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w0,w0,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w14,w14,w7,ror#19
- eor w15,w15,w10,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w0,w23,ror#22 // Sigma0(a)
- eor w14,w14,w7,lsr#10 // sigma1(X[i+14])
- add w9,w9,w2
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w9,w9,w15
- add w22,w22,w17 // h+=Sigma0(a)
- add w9,w9,w14
- ldr w14,[sp,#12]
- str w1,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w0,w11,#7
- and w17,w27,w26
- ror w15,w8,#17
- bic w19,w20,w26
- ror w1,w22,#2
- add w21,w21,w9 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w0,w0,w11,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w1,w1,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w15,w15,w8,ror#19
- eor w0,w0,w11,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w1,w22,ror#22 // Sigma0(a)
- eor w15,w15,w8,lsr#10 // sigma1(X[i+14])
- add w10,w10,w3
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w10,w10,w0
- add w21,w21,w17 // h+=Sigma0(a)
- add w10,w10,w15
- ldr w15,[sp,#0]
- str w2,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w1,w12,#7
- and w17,w26,w25
- ror w0,w9,#17
- bic w28,w27,w25
- ror w2,w21,#2
- add w20,w20,w10 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w1,w1,w12,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w2,w2,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w0,w0,w9,ror#19
- eor w1,w1,w12,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w2,w21,ror#22 // Sigma0(a)
- eor w0,w0,w9,lsr#10 // sigma1(X[i+14])
- add w11,w11,w4
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w11,w11,w1
- add w20,w20,w17 // h+=Sigma0(a)
- add w11,w11,w0
- ldr w0,[sp,#4]
- str w3,[sp,#0]
- ror w16,w24,#6
- add w27,w27,w19 // h+=K[i]
- ror w2,w13,#7
- and w17,w25,w24
- ror w1,w10,#17
- bic w19,w26,w24
- ror w3,w20,#2
- add w27,w27,w11 // h+=X[i]
- eor w16,w16,w24,ror#11
- eor w2,w2,w13,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w20,w21 // a^b, b^c in next round
- eor w16,w16,w24,ror#25 // Sigma1(e)
- eor w3,w3,w20,ror#13
- add w27,w27,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w1,w1,w10,ror#19
- eor w2,w2,w13,lsr#3 // sigma0(X[i+1])
- add w27,w27,w16 // h+=Sigma1(e)
- eor w28,w28,w21 // Maj(a,b,c)
- eor w17,w3,w20,ror#22 // Sigma0(a)
- eor w1,w1,w10,lsr#10 // sigma1(X[i+14])
- add w12,w12,w5
- add w23,w23,w27 // d+=h
- add w27,w27,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w12,w12,w2
- add w27,w27,w17 // h+=Sigma0(a)
- add w12,w12,w1
- ldr w1,[sp,#8]
- str w4,[sp,#4]
- ror w16,w23,#6
- add w26,w26,w28 // h+=K[i]
- ror w3,w14,#7
- and w17,w24,w23
- ror w2,w11,#17
- bic w28,w25,w23
- ror w4,w27,#2
- add w26,w26,w12 // h+=X[i]
- eor w16,w16,w23,ror#11
- eor w3,w3,w14,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w27,w20 // a^b, b^c in next round
- eor w16,w16,w23,ror#25 // Sigma1(e)
- eor w4,w4,w27,ror#13
- add w26,w26,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w2,w2,w11,ror#19
- eor w3,w3,w14,lsr#3 // sigma0(X[i+1])
- add w26,w26,w16 // h+=Sigma1(e)
- eor w19,w19,w20 // Maj(a,b,c)
- eor w17,w4,w27,ror#22 // Sigma0(a)
- eor w2,w2,w11,lsr#10 // sigma1(X[i+14])
- add w13,w13,w6
- add w22,w22,w26 // d+=h
- add w26,w26,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w13,w13,w3
- add w26,w26,w17 // h+=Sigma0(a)
- add w13,w13,w2
- ldr w2,[sp,#12]
- str w5,[sp,#8]
- ror w16,w22,#6
- add w25,w25,w19 // h+=K[i]
- ror w4,w15,#7
- and w17,w23,w22
- ror w3,w12,#17
- bic w19,w24,w22
- ror w5,w26,#2
- add w25,w25,w13 // h+=X[i]
- eor w16,w16,w22,ror#11
- eor w4,w4,w15,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w26,w27 // a^b, b^c in next round
- eor w16,w16,w22,ror#25 // Sigma1(e)
- eor w5,w5,w26,ror#13
- add w25,w25,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w3,w3,w12,ror#19
- eor w4,w4,w15,lsr#3 // sigma0(X[i+1])
- add w25,w25,w16 // h+=Sigma1(e)
- eor w28,w28,w27 // Maj(a,b,c)
- eor w17,w5,w26,ror#22 // Sigma0(a)
- eor w3,w3,w12,lsr#10 // sigma1(X[i+14])
- add w14,w14,w7
- add w21,w21,w25 // d+=h
- add w25,w25,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w14,w14,w4
- add w25,w25,w17 // h+=Sigma0(a)
- add w14,w14,w3
- ldr w3,[sp,#0]
- str w6,[sp,#12]
- ror w16,w21,#6
- add w24,w24,w28 // h+=K[i]
- ror w5,w0,#7
- and w17,w22,w21
- ror w4,w13,#17
- bic w28,w23,w21
- ror w6,w25,#2
- add w24,w24,w14 // h+=X[i]
- eor w16,w16,w21,ror#11
- eor w5,w5,w0,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w25,w26 // a^b, b^c in next round
- eor w16,w16,w21,ror#25 // Sigma1(e)
- eor w6,w6,w25,ror#13
- add w24,w24,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w4,w4,w13,ror#19
- eor w5,w5,w0,lsr#3 // sigma0(X[i+1])
- add w24,w24,w16 // h+=Sigma1(e)
- eor w19,w19,w26 // Maj(a,b,c)
- eor w17,w6,w25,ror#22 // Sigma0(a)
- eor w4,w4,w13,lsr#10 // sigma1(X[i+14])
- add w15,w15,w8
- add w20,w20,w24 // d+=h
- add w24,w24,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w15,w15,w5
- add w24,w24,w17 // h+=Sigma0(a)
- add w15,w15,w4
- ldr w4,[sp,#4]
- str w7,[sp,#0]
- ror w16,w20,#6
- add w23,w23,w19 // h+=K[i]
- ror w6,w1,#7
- and w17,w21,w20
- ror w5,w14,#17
- bic w19,w22,w20
- ror w7,w24,#2
- add w23,w23,w15 // h+=X[i]
- eor w16,w16,w20,ror#11
- eor w6,w6,w1,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w24,w25 // a^b, b^c in next round
- eor w16,w16,w20,ror#25 // Sigma1(e)
- eor w7,w7,w24,ror#13
- add w23,w23,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w5,w5,w14,ror#19
- eor w6,w6,w1,lsr#3 // sigma0(X[i+1])
- add w23,w23,w16 // h+=Sigma1(e)
- eor w28,w28,w25 // Maj(a,b,c)
- eor w17,w7,w24,ror#22 // Sigma0(a)
- eor w5,w5,w14,lsr#10 // sigma1(X[i+14])
- add w0,w0,w9
- add w27,w27,w23 // d+=h
- add w23,w23,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w0,w0,w6
- add w23,w23,w17 // h+=Sigma0(a)
- add w0,w0,w5
- ldr w5,[sp,#8]
- str w8,[sp,#4]
- ror w16,w27,#6
- add w22,w22,w28 // h+=K[i]
- ror w7,w2,#7
- and w17,w20,w27
- ror w6,w15,#17
- bic w28,w21,w27
- ror w8,w23,#2
- add w22,w22,w0 // h+=X[i]
- eor w16,w16,w27,ror#11
- eor w7,w7,w2,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w23,w24 // a^b, b^c in next round
- eor w16,w16,w27,ror#25 // Sigma1(e)
- eor w8,w8,w23,ror#13
- add w22,w22,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w6,w6,w15,ror#19
- eor w7,w7,w2,lsr#3 // sigma0(X[i+1])
- add w22,w22,w16 // h+=Sigma1(e)
- eor w19,w19,w24 // Maj(a,b,c)
- eor w17,w8,w23,ror#22 // Sigma0(a)
- eor w6,w6,w15,lsr#10 // sigma1(X[i+14])
- add w1,w1,w10
- add w26,w26,w22 // d+=h
- add w22,w22,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w1,w1,w7
- add w22,w22,w17 // h+=Sigma0(a)
- add w1,w1,w6
- ldr w6,[sp,#12]
- str w9,[sp,#8]
- ror w16,w26,#6
- add w21,w21,w19 // h+=K[i]
- ror w8,w3,#7
- and w17,w27,w26
- ror w7,w0,#17
- bic w19,w20,w26
- ror w9,w22,#2
- add w21,w21,w1 // h+=X[i]
- eor w16,w16,w26,ror#11
- eor w8,w8,w3,ror#18
- orr w17,w17,w19 // Ch(e,f,g)
- eor w19,w22,w23 // a^b, b^c in next round
- eor w16,w16,w26,ror#25 // Sigma1(e)
- eor w9,w9,w22,ror#13
- add w21,w21,w17 // h+=Ch(e,f,g)
- and w28,w28,w19 // (b^c)&=(a^b)
- eor w7,w7,w0,ror#19
- eor w8,w8,w3,lsr#3 // sigma0(X[i+1])
- add w21,w21,w16 // h+=Sigma1(e)
- eor w28,w28,w23 // Maj(a,b,c)
- eor w17,w9,w22,ror#22 // Sigma0(a)
- eor w7,w7,w0,lsr#10 // sigma1(X[i+14])
- add w2,w2,w11
- add w25,w25,w21 // d+=h
- add w21,w21,w28 // h+=Maj(a,b,c)
- ldr w28,[x30],#4 // *K++, w19 in next round
- add w2,w2,w8
- add w21,w21,w17 // h+=Sigma0(a)
- add w2,w2,w7
- ldr w7,[sp,#0]
- str w10,[sp,#12]
- ror w16,w25,#6
- add w20,w20,w28 // h+=K[i]
- ror w9,w4,#7
- and w17,w26,w25
- ror w8,w1,#17
- bic w28,w27,w25
- ror w10,w21,#2
- add w20,w20,w2 // h+=X[i]
- eor w16,w16,w25,ror#11
- eor w9,w9,w4,ror#18
- orr w17,w17,w28 // Ch(e,f,g)
- eor w28,w21,w22 // a^b, b^c in next round
- eor w16,w16,w25,ror#25 // Sigma1(e)
- eor w10,w10,w21,ror#13
- add w20,w20,w17 // h+=Ch(e,f,g)
- and w19,w19,w28 // (b^c)&=(a^b)
- eor w8,w8,w1,ror#19
- eor w9,w9,w4,lsr#3 // sigma0(X[i+1])
- add w20,w20,w16 // h+=Sigma1(e)
- eor w19,w19,w22 // Maj(a,b,c)
- eor w17,w10,w21,ror#22 // Sigma0(a)
- eor w8,w8,w1,lsr#10 // sigma1(X[i+14])
- add w3,w3,w12
- add w24,w24,w20 // d+=h
- add w20,w20,w19 // h+=Maj(a,b,c)
- ldr w19,[x30],#4 // *K++, w28 in next round
- add w3,w3,w9
- add w20,w20,w17 // h+=Sigma0(a)
- add w3,w3,w8
- cbnz w19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#260 // rewind
-
- ldp w3,w4,[x0]
- ldp w5,w6,[x0,#2*4]
- add x1,x1,#14*4 // advance input pointer
- ldp w7,w8,[x0,#4*4]
- add w20,w20,w3
- ldp w9,w10,[x0,#6*4]
- add w21,w21,w4
- add w22,w22,w5
- add w23,w23,w6
- stp w20,w21,[x0]
- add w24,w24,w7
- add w25,w25,w8
- stp w22,w23,[x0,#2*4]
- add w26,w26,w9
- add w27,w27,w10
- cmp x1,x2
- stp w24,w25,[x0,#4*4]
- stp w26,w27,[x0,#6*4]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*4
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha256_block_data_order,.-sha256_block_data_order
-
-.align 6
-.type .LK256,%object
-.LK256:
- .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
- .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
- .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
- .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
- .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
- .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
- .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
- .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
- .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
- .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
- .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
- .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070
- .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
- .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
- .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
- .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
- .long 0 //terminator
-.size .LK256,.-.LK256
-#ifndef __KERNEL__
-.align 3
-.LOPENSSL_armcap_P:
-# ifdef __ILP32__
- .long OPENSSL_armcap_P-.
-# else
- .quad OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz "SHA256 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#ifndef __KERNEL__
-.type sha256_block_armv8,%function
-.align 6
-sha256_block_armv8:
-.Lv8_entry:
- stp x29,x30,[sp,#-16]!
- add x29,sp,#0
-
- ld1 {v0.4s,v1.4s},[x0]
- adr x3,.LK256
-
-.Loop_hw:
- ld1 {v4.16b-v7.16b},[x1],#64
- sub x2,x2,#1
- ld1 {v16.4s},[x3],#16
- rev32 v4.16b,v4.16b
- rev32 v5.16b,v5.16b
- rev32 v6.16b,v6.16b
- rev32 v7.16b,v7.16b
- orr v18.16b,v0.16b,v0.16b // offload
- orr v19.16b,v1.16b,v1.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v6.4s
- .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
- .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v7.4s
- .inst 0x5e282887 //sha256su0 v7.16b,v4.16b
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
- .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b
- ld1 {v17.4s},[x3],#16
- add v16.4s,v16.4s,v4.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- ld1 {v16.4s},[x3],#16
- add v17.4s,v17.4s,v5.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- ld1 {v17.4s},[x3]
- add v16.4s,v16.4s,v6.4s
- sub x3,x3,#64*4-16 // rewind
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s
- .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s
-
- add v17.4s,v17.4s,v7.4s
- orr v2.16b,v0.16b,v0.16b
- .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s
- .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s
-
- add v0.4s,v0.4s,v18.4s
- add v1.4s,v1.4s,v19.4s
-
- cbnz x2,.Loop_hw
-
- st1 {v0.4s,v1.4s},[x0]
-
- ldr x29,[sp],#16
- ret
-.size sha256_block_armv8,.-sha256_block_armv8
-#endif
-#ifdef __KERNEL__
-.globl sha256_block_neon
-#endif
-.type sha256_block_neon,%function
-.align 4
-sha256_block_neon:
-.Lneon_entry:
- stp x29, x30, [sp, #-16]!
- mov x29, sp
- sub sp,sp,#16*4
-
- adr x16,.LK256
- add x2,x1,x2,lsl#6 // len to point at the end of inp
-
- ld1 {v0.16b},[x1], #16
- ld1 {v1.16b},[x1], #16
- ld1 {v2.16b},[x1], #16
- ld1 {v3.16b},[x1], #16
- ld1 {v4.4s},[x16], #16
- ld1 {v5.4s},[x16], #16
- ld1 {v6.4s},[x16], #16
- ld1 {v7.4s},[x16], #16
- rev32 v0.16b,v0.16b // yes, even on
- rev32 v1.16b,v1.16b // big-endian
- rev32 v2.16b,v2.16b
- rev32 v3.16b,v3.16b
- mov x17,sp
- add v4.4s,v4.4s,v0.4s
- add v5.4s,v5.4s,v1.4s
- add v6.4s,v6.4s,v2.4s
- st1 {v4.4s-v5.4s},[x17], #32
- add v7.4s,v7.4s,v3.4s
- st1 {v6.4s-v7.4s},[x17]
- sub x17,x17,#32
-
- ldp w3,w4,[x0]
- ldp w5,w6,[x0,#8]
- ldp w7,w8,[x0,#16]
- ldp w9,w10,[x0,#24]
- ldr w12,[sp,#0]
- mov w13,wzr
- eor w14,w4,w5
- mov w15,wzr
- b .L_00_48
-
-.align 4
-.L_00_48:
- ext v4.16b,v0.16b,v1.16b,#4
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- bic w15,w9,w7
- ext v7.16b,v2.16b,v3.16b,#4
- eor w11,w7,w7,ror#5
- add w3,w3,w13
- mov d19,v3.d[1]
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w3,w3,ror#11
- ushr v5.4s,v4.4s,#3
- add w10,w10,w12
- add v0.4s,v0.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- ushr v7.4s,v4.4s,#18
- add w10,w10,w11
- ldr w12,[sp,#4]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w6,w6,w10
- sli v7.4s,v4.4s,#14
- eor w14,w14,w4
- ushr v16.4s,v19.4s,#17
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- eor v5.16b,v5.16b,v7.16b
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- sli v16.4s,v19.4s,#15
- add w10,w10,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- ushr v7.4s,v19.4s,#19
- add w9,w9,w12
- ror w11,w11,#6
- add v0.4s,v0.4s,v5.4s
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- sli v7.4s,v19.4s,#13
- add w9,w9,w11
- ldr w12,[sp,#8]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- eor v17.16b,v17.16b,v7.16b
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- add v0.4s,v0.4s,v17.4s
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- ushr v18.4s,v0.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v0.4s,#10
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- sli v18.4s,v0.4s,#15
- add w8,w8,w12
- ushr v17.4s,v0.4s,#19
- ror w11,w11,#6
- eor w13,w9,w10
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- sli v17.4s,v0.4s,#13
- ldr w12,[sp,#12]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w4,w4,w8
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w10
- eor v17.16b,v17.16b,v17.16b
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- mov v17.d[1],v19.d[0]
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- add v0.4s,v0.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add v4.4s,v4.4s,v0.4s
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#16]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- ext v4.16b,v1.16b,v2.16b,#4
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- bic w15,w5,w3
- ext v7.16b,v3.16b,v0.16b,#4
- eor w11,w3,w3,ror#5
- add w7,w7,w13
- mov d19,v0.d[1]
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w7,w7,ror#11
- ushr v5.4s,v4.4s,#3
- add w6,w6,w12
- add v1.4s,v1.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- ushr v7.4s,v4.4s,#18
- add w6,w6,w11
- ldr w12,[sp,#20]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w10,w10,w6
- sli v7.4s,v4.4s,#14
- eor w14,w14,w8
- ushr v16.4s,v19.4s,#17
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- eor v5.16b,v5.16b,v7.16b
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- sli v16.4s,v19.4s,#15
- add w6,w6,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- ushr v7.4s,v19.4s,#19
- add w5,w5,w12
- ror w11,w11,#6
- add v1.4s,v1.4s,v5.4s
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- sli v7.4s,v19.4s,#13
- add w5,w5,w11
- ldr w12,[sp,#24]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- eor v17.16b,v17.16b,v7.16b
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- add v1.4s,v1.4s,v17.4s
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- ushr v18.4s,v1.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v1.4s,#10
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- sli v18.4s,v1.4s,#15
- add w4,w4,w12
- ushr v17.4s,v1.4s,#19
- ror w11,w11,#6
- eor w13,w5,w6
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- sli v17.4s,v1.4s,#13
- ldr w12,[sp,#28]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w8,w8,w4
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w6
- eor v17.16b,v17.16b,v17.16b
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- mov v17.d[1],v19.d[0]
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- add v1.4s,v1.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add v4.4s,v4.4s,v1.4s
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[sp,#32]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- ext v4.16b,v2.16b,v3.16b,#4
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- bic w15,w9,w7
- ext v7.16b,v0.16b,v1.16b,#4
- eor w11,w7,w7,ror#5
- add w3,w3,w13
- mov d19,v1.d[1]
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w3,w3,ror#11
- ushr v5.4s,v4.4s,#3
- add w10,w10,w12
- add v2.4s,v2.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- ushr v7.4s,v4.4s,#18
- add w10,w10,w11
- ldr w12,[sp,#36]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w6,w6,w10
- sli v7.4s,v4.4s,#14
- eor w14,w14,w4
- ushr v16.4s,v19.4s,#17
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- eor v5.16b,v5.16b,v7.16b
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- sli v16.4s,v19.4s,#15
- add w10,w10,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- ushr v7.4s,v19.4s,#19
- add w9,w9,w12
- ror w11,w11,#6
- add v2.4s,v2.4s,v5.4s
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- sli v7.4s,v19.4s,#13
- add w9,w9,w11
- ldr w12,[sp,#40]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- eor v17.16b,v17.16b,v7.16b
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- add v2.4s,v2.4s,v17.4s
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- ushr v18.4s,v2.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v2.4s,#10
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- sli v18.4s,v2.4s,#15
- add w8,w8,w12
- ushr v17.4s,v2.4s,#19
- ror w11,w11,#6
- eor w13,w9,w10
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- sli v17.4s,v2.4s,#13
- ldr w12,[sp,#44]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w4,w4,w8
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w10
- eor v17.16b,v17.16b,v17.16b
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- mov v17.d[1],v19.d[0]
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- add v2.4s,v2.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add v4.4s,v4.4s,v2.4s
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#48]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- ext v4.16b,v3.16b,v0.16b,#4
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- bic w15,w5,w3
- ext v7.16b,v1.16b,v2.16b,#4
- eor w11,w3,w3,ror#5
- add w7,w7,w13
- mov d19,v2.d[1]
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- ushr v6.4s,v4.4s,#7
- eor w15,w7,w7,ror#11
- ushr v5.4s,v4.4s,#3
- add w6,w6,w12
- add v3.4s,v3.4s,v7.4s
- ror w11,w11,#6
- sli v6.4s,v4.4s,#25
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- ushr v7.4s,v4.4s,#18
- add w6,w6,w11
- ldr w12,[sp,#52]
- and w14,w14,w13
- eor v5.16b,v5.16b,v6.16b
- ror w15,w15,#2
- add w10,w10,w6
- sli v7.4s,v4.4s,#14
- eor w14,w14,w8
- ushr v16.4s,v19.4s,#17
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- eor v5.16b,v5.16b,v7.16b
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- sli v16.4s,v19.4s,#15
- add w6,w6,w14
- orr w12,w12,w15
- ushr v17.4s,v19.4s,#10
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- ushr v7.4s,v19.4s,#19
- add w5,w5,w12
- ror w11,w11,#6
- add v3.4s,v3.4s,v5.4s
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- sli v7.4s,v19.4s,#13
- add w5,w5,w11
- ldr w12,[sp,#56]
- and w13,w13,w14
- eor v17.16b,v17.16b,v16.16b
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- eor v17.16b,v17.16b,v7.16b
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- add v3.4s,v3.4s,v17.4s
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- ushr v18.4s,v3.4s,#17
- orr w12,w12,w15
- ushr v19.4s,v3.4s,#10
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- sli v18.4s,v3.4s,#15
- add w4,w4,w12
- ushr v17.4s,v3.4s,#19
- ror w11,w11,#6
- eor w13,w5,w6
- eor v19.16b,v19.16b,v18.16b
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- sli v17.4s,v3.4s,#13
- ldr w12,[sp,#60]
- and w14,w14,w13
- ror w15,w15,#2
- ld1 {v4.4s},[x16], #16
- add w8,w8,w4
- eor v19.16b,v19.16b,v17.16b
- eor w14,w14,w6
- eor v17.16b,v17.16b,v17.16b
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- mov v17.d[1],v19.d[0]
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- add v3.4s,v3.4s,v17.4s
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add v4.4s,v4.4s,v3.4s
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[x16]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- cmp w12,#0 // check for K256 terminator
- ldr w12,[sp,#0]
- sub x17,x17,#64
- bne .L_00_48
-
- sub x16,x16,#256 // rewind x16
- cmp x1,x2
- mov x17, #64
- csel x17, x17, xzr, eq
- sub x1,x1,x17 // avoid SEGV
- mov x17,sp
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- ld1 {v0.16b},[x1],#16
- bic w15,w9,w7
- eor w11,w7,w7,ror#5
- ld1 {v4.4s},[x16],#16
- add w3,w3,w13
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- eor w15,w3,w3,ror#11
- rev32 v0.16b,v0.16b
- add w10,w10,w12
- ror w11,w11,#6
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- add v4.4s,v4.4s,v0.4s
- add w10,w10,w11
- ldr w12,[sp,#4]
- and w14,w14,w13
- ror w15,w15,#2
- add w6,w6,w10
- eor w14,w14,w4
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- add w10,w10,w14
- orr w12,w12,w15
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- add w9,w9,w12
- ror w11,w11,#6
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- add w9,w9,w11
- ldr w12,[sp,#8]
- and w13,w13,w14
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- orr w12,w12,w15
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- add w8,w8,w12
- ror w11,w11,#6
- eor w13,w9,w10
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- ldr w12,[sp,#12]
- and w14,w14,w13
- ror w15,w15,#2
- add w4,w4,w8
- eor w14,w14,w10
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#16]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- ld1 {v1.16b},[x1],#16
- bic w15,w5,w3
- eor w11,w3,w3,ror#5
- ld1 {v4.4s},[x16],#16
- add w7,w7,w13
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- eor w15,w7,w7,ror#11
- rev32 v1.16b,v1.16b
- add w6,w6,w12
- ror w11,w11,#6
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- add v4.4s,v4.4s,v1.4s
- add w6,w6,w11
- ldr w12,[sp,#20]
- and w14,w14,w13
- ror w15,w15,#2
- add w10,w10,w6
- eor w14,w14,w8
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- add w6,w6,w14
- orr w12,w12,w15
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- add w5,w5,w12
- ror w11,w11,#6
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- add w5,w5,w11
- ldr w12,[sp,#24]
- and w13,w13,w14
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- orr w12,w12,w15
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- add w4,w4,w12
- ror w11,w11,#6
- eor w13,w5,w6
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- ldr w12,[sp,#28]
- and w14,w14,w13
- ror w15,w15,#2
- add w8,w8,w4
- eor w14,w14,w6
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- ldr w12,[sp,#32]
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- add w10,w10,w12
- add w3,w3,w15
- and w12,w8,w7
- ld1 {v2.16b},[x1],#16
- bic w15,w9,w7
- eor w11,w7,w7,ror#5
- ld1 {v4.4s},[x16],#16
- add w3,w3,w13
- orr w12,w12,w15
- eor w11,w11,w7,ror#19
- eor w15,w3,w3,ror#11
- rev32 v2.16b,v2.16b
- add w10,w10,w12
- ror w11,w11,#6
- eor w13,w3,w4
- eor w15,w15,w3,ror#20
- add v4.4s,v4.4s,v2.4s
- add w10,w10,w11
- ldr w12,[sp,#36]
- and w14,w14,w13
- ror w15,w15,#2
- add w6,w6,w10
- eor w14,w14,w4
- add w9,w9,w12
- add w10,w10,w15
- and w12,w7,w6
- bic w15,w8,w6
- eor w11,w6,w6,ror#5
- add w10,w10,w14
- orr w12,w12,w15
- eor w11,w11,w6,ror#19
- eor w15,w10,w10,ror#11
- add w9,w9,w12
- ror w11,w11,#6
- eor w14,w10,w3
- eor w15,w15,w10,ror#20
- add w9,w9,w11
- ldr w12,[sp,#40]
- and w13,w13,w14
- ror w15,w15,#2
- add w5,w5,w9
- eor w13,w13,w3
- add w8,w8,w12
- add w9,w9,w15
- and w12,w6,w5
- bic w15,w7,w5
- eor w11,w5,w5,ror#5
- add w9,w9,w13
- orr w12,w12,w15
- eor w11,w11,w5,ror#19
- eor w15,w9,w9,ror#11
- add w8,w8,w12
- ror w11,w11,#6
- eor w13,w9,w10
- eor w15,w15,w9,ror#20
- add w8,w8,w11
- ldr w12,[sp,#44]
- and w14,w14,w13
- ror w15,w15,#2
- add w4,w4,w8
- eor w14,w14,w10
- add w7,w7,w12
- add w8,w8,w15
- and w12,w5,w4
- bic w15,w6,w4
- eor w11,w4,w4,ror#5
- add w8,w8,w14
- orr w12,w12,w15
- eor w11,w11,w4,ror#19
- eor w15,w8,w8,ror#11
- add w7,w7,w12
- ror w11,w11,#6
- eor w14,w8,w9
- eor w15,w15,w8,ror#20
- add w7,w7,w11
- ldr w12,[sp,#48]
- and w13,w13,w14
- ror w15,w15,#2
- add w3,w3,w7
- eor w13,w13,w9
- st1 {v4.4s},[x17], #16
- add w6,w6,w12
- add w7,w7,w15
- and w12,w4,w3
- ld1 {v3.16b},[x1],#16
- bic w15,w5,w3
- eor w11,w3,w3,ror#5
- ld1 {v4.4s},[x16],#16
- add w7,w7,w13
- orr w12,w12,w15
- eor w11,w11,w3,ror#19
- eor w15,w7,w7,ror#11
- rev32 v3.16b,v3.16b
- add w6,w6,w12
- ror w11,w11,#6
- eor w13,w7,w8
- eor w15,w15,w7,ror#20
- add v4.4s,v4.4s,v3.4s
- add w6,w6,w11
- ldr w12,[sp,#52]
- and w14,w14,w13
- ror w15,w15,#2
- add w10,w10,w6
- eor w14,w14,w8
- add w5,w5,w12
- add w6,w6,w15
- and w12,w3,w10
- bic w15,w4,w10
- eor w11,w10,w10,ror#5
- add w6,w6,w14
- orr w12,w12,w15
- eor w11,w11,w10,ror#19
- eor w15,w6,w6,ror#11
- add w5,w5,w12
- ror w11,w11,#6
- eor w14,w6,w7
- eor w15,w15,w6,ror#20
- add w5,w5,w11
- ldr w12,[sp,#56]
- and w13,w13,w14
- ror w15,w15,#2
- add w9,w9,w5
- eor w13,w13,w7
- add w4,w4,w12
- add w5,w5,w15
- and w12,w10,w9
- bic w15,w3,w9
- eor w11,w9,w9,ror#5
- add w5,w5,w13
- orr w12,w12,w15
- eor w11,w11,w9,ror#19
- eor w15,w5,w5,ror#11
- add w4,w4,w12
- ror w11,w11,#6
- eor w13,w5,w6
- eor w15,w15,w5,ror#20
- add w4,w4,w11
- ldr w12,[sp,#60]
- and w14,w14,w13
- ror w15,w15,#2
- add w8,w8,w4
- eor w14,w14,w6
- add w3,w3,w12
- add w4,w4,w15
- and w12,w9,w8
- bic w15,w10,w8
- eor w11,w8,w8,ror#5
- add w4,w4,w14
- orr w12,w12,w15
- eor w11,w11,w8,ror#19
- eor w15,w4,w4,ror#11
- add w3,w3,w12
- ror w11,w11,#6
- eor w14,w4,w5
- eor w15,w15,w4,ror#20
- add w3,w3,w11
- and w13,w13,w14
- ror w15,w15,#2
- add w7,w7,w3
- eor w13,w13,w5
- st1 {v4.4s},[x17], #16
- add w3,w3,w15 // h+=Sigma0(a) from the past
- ldp w11,w12,[x0,#0]
- add w3,w3,w13 // h+=Maj(a,b,c) from the past
- ldp w13,w14,[x0,#8]
- add w3,w3,w11 // accumulate
- add w4,w4,w12
- ldp w11,w12,[x0,#16]
- add w5,w5,w13
- add w6,w6,w14
- ldp w13,w14,[x0,#24]
- add w7,w7,w11
- add w8,w8,w12
- ldr w12,[sp,#0]
- stp w3,w4,[x0,#0]
- add w9,w9,w13
- mov w13,wzr
- stp w5,w6,[x0,#8]
- add w10,w10,w14
- stp w7,w8,[x0,#16]
- eor w14,w4,w5
- stp w9,w10,[x0,#24]
- mov w15,wzr
- mov x17,sp
- b.ne .L_00_48
-
- ldr x29,[x29]
- add sp,sp,#16*4+16
- ret
-.size sha256_block_neon,.-sha256_block_neon
-#ifndef __KERNEL__
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/arm64/crypto/sha512-core.S_shipped b/arch/arm64/crypto/sha512-core.S_shipped
deleted file mode 100644
index e063a6106720..000000000000
--- a/arch/arm64/crypto/sha512-core.S_shipped
+++ /dev/null
@@ -1,1093 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-// This code is taken from the OpenSSL project but the author (Andy Polyakov)
-// has relicensed it under the GPLv2. Therefore this program is free software;
-// you can redistribute it and/or modify it under the terms of the GNU General
-// Public License version 2 as published by the Free Software Foundation.
-//
-// The original headers, including the original license headers, are
-// included below for completeness.
-
-// Copyright 2014-2016 The OpenSSL Project Authors. All Rights Reserved.
-//
-// Licensed under the OpenSSL license (the "License"). You may not use
-// this file except in compliance with the License. You can obtain a copy
-// in the file LICENSE in the source distribution or at
-// https://www.openssl.org/source/license.html
-
-// ====================================================================
-// Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
-// project. The module is, however, dual licensed under OpenSSL and
-// CRYPTOGAMS licenses depending on where you obtain it. For further
-// details see http://www.openssl.org/~appro/cryptogams/.
-// ====================================================================
-//
-// SHA256/512 for ARMv8.
-//
-// Performance in cycles per processed byte and improvement coefficient
-// over code generated with "default" compiler:
-//
-// SHA256-hw SHA256(*) SHA512
-// Apple A7 1.97 10.5 (+33%) 6.73 (-1%(**))
-// Cortex-A53 2.38 15.5 (+115%) 10.0 (+150%(***))
-// Cortex-A57 2.31 11.6 (+86%) 7.51 (+260%(***))
-// Denver 2.01 10.5 (+26%) 6.70 (+8%)
-// X-Gene 20.0 (+100%) 12.8 (+300%(***))
-// Mongoose 2.36 13.0 (+50%) 8.36 (+33%)
-//
-// (*) Software SHA256 results are of lesser relevance, presented
-// mostly for informational purposes.
-// (**) The result is a trade-off: it's possible to improve it by
-// 10% (or by 1 cycle per round), but at the cost of 20% loss
-// on Cortex-A53 (or by 4 cycles per round).
-// (***) Super-impressive coefficients over gcc-generated code are
-// indication of some compiler "pathology", most notably code
-// generated with -mgeneral-regs-only is significanty faster
-// and the gap is only 40-90%.
-//
-// October 2016.
-//
-// Originally it was reckoned that it makes no sense to implement NEON
-// version of SHA256 for 64-bit processors. This is because performance
-// improvement on most wide-spread Cortex-A5x processors was observed
-// to be marginal, same on Cortex-A53 and ~10% on A57. But then it was
-// observed that 32-bit NEON SHA256 performs significantly better than
-// 64-bit scalar version on *some* of the more recent processors. As
-// result 64-bit NEON version of SHA256 was added to provide best
-// all-round performance. For example it executes ~30% faster on X-Gene
-// and Mongoose. [For reference, NEON version of SHA512 is bound to
-// deliver much less improvement, likely *negative* on Cortex-A5x.
-// Which is why NEON support is limited to SHA256.]
-
-#ifndef __KERNEL__
-# include "arm_arch.h"
-#endif
-
-.text
-
-.extern OPENSSL_armcap_P
-.globl sha512_block_data_order
-.type sha512_block_data_order,%function
-.align 6
-sha512_block_data_order:
- stp x29,x30,[sp,#-128]!
- add x29,sp,#0
-
- stp x19,x20,[sp,#16]
- stp x21,x22,[sp,#32]
- stp x23,x24,[sp,#48]
- stp x25,x26,[sp,#64]
- stp x27,x28,[sp,#80]
- sub sp,sp,#4*8
-
- ldp x20,x21,[x0] // load context
- ldp x22,x23,[x0,#2*8]
- ldp x24,x25,[x0,#4*8]
- add x2,x1,x2,lsl#7 // end of input
- ldp x26,x27,[x0,#6*8]
- adr x30,.LK512
- stp x0,x2,[x29,#96]
-
-.Loop:
- ldp x3,x4,[x1],#2*8
- ldr x19,[x30],#8 // *K++
- eor x28,x21,x22 // magic seed
- str x1,[x29,#112]
-#ifndef __AARCH64EB__
- rev x3,x3 // 0
-#endif
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x6,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x3 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x4,x4 // 1
-#endif
- ldp x5,x6,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x7,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x4 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x5,x5 // 2
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x8,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x5 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x6,x6 // 3
-#endif
- ldp x7,x8,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x9,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x6 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x7,x7 // 4
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x10,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x7 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x10,ror#18 // Sigma1(e)
- ror x10,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x10,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x8,x8 // 5
-#endif
- ldp x9,x10,[x1],#2*8
- add x23,x23,x17 // h+=Sigma0(a)
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x11,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x8 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x11,ror#18 // Sigma1(e)
- ror x11,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x11,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x9,x9 // 6
-#endif
- add x22,x22,x17 // h+=Sigma0(a)
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x12,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x9 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x12,ror#18 // Sigma1(e)
- ror x12,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x12,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x10,x10 // 7
-#endif
- ldp x11,x12,[x1],#2*8
- add x21,x21,x17 // h+=Sigma0(a)
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- eor x13,x25,x25,ror#23
- and x17,x26,x25
- bic x28,x27,x25
- add x20,x20,x10 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x13,ror#18 // Sigma1(e)
- ror x13,x21,#28
- add x20,x20,x17 // h+=Ch(e,f,g)
- eor x17,x21,x21,ror#5
- add x20,x20,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x24,x24,x20 // d+=h
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x13,x17,ror#34 // Sigma0(a)
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x20,x20,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x11,x11 // 8
-#endif
- add x20,x20,x17 // h+=Sigma0(a)
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- eor x14,x24,x24,ror#23
- and x17,x25,x24
- bic x19,x26,x24
- add x27,x27,x11 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x14,ror#18 // Sigma1(e)
- ror x14,x20,#28
- add x27,x27,x17 // h+=Ch(e,f,g)
- eor x17,x20,x20,ror#5
- add x27,x27,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x23,x23,x27 // d+=h
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x14,x17,ror#34 // Sigma0(a)
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x27,x27,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x12,x12 // 9
-#endif
- ldp x13,x14,[x1],#2*8
- add x27,x27,x17 // h+=Sigma0(a)
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- eor x15,x23,x23,ror#23
- and x17,x24,x23
- bic x28,x25,x23
- add x26,x26,x12 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x15,ror#18 // Sigma1(e)
- ror x15,x27,#28
- add x26,x26,x17 // h+=Ch(e,f,g)
- eor x17,x27,x27,ror#5
- add x26,x26,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x22,x22,x26 // d+=h
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x15,x17,ror#34 // Sigma0(a)
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x26,x26,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x13,x13 // 10
-#endif
- add x26,x26,x17 // h+=Sigma0(a)
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- eor x0,x22,x22,ror#23
- and x17,x23,x22
- bic x19,x24,x22
- add x25,x25,x13 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x0,ror#18 // Sigma1(e)
- ror x0,x26,#28
- add x25,x25,x17 // h+=Ch(e,f,g)
- eor x17,x26,x26,ror#5
- add x25,x25,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x21,x21,x25 // d+=h
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x0,x17,ror#34 // Sigma0(a)
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x25,x25,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x14,x14 // 11
-#endif
- ldp x15,x0,[x1],#2*8
- add x25,x25,x17 // h+=Sigma0(a)
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- eor x6,x21,x21,ror#23
- and x17,x22,x21
- bic x28,x23,x21
- add x24,x24,x14 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x6,ror#18 // Sigma1(e)
- ror x6,x25,#28
- add x24,x24,x17 // h+=Ch(e,f,g)
- eor x17,x25,x25,ror#5
- add x24,x24,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x20,x20,x24 // d+=h
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x17,ror#34 // Sigma0(a)
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x24,x24,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x15,x15 // 12
-#endif
- add x24,x24,x17 // h+=Sigma0(a)
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- eor x7,x20,x20,ror#23
- and x17,x21,x20
- bic x19,x22,x20
- add x23,x23,x15 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x7,ror#18 // Sigma1(e)
- ror x7,x24,#28
- add x23,x23,x17 // h+=Ch(e,f,g)
- eor x17,x24,x24,ror#5
- add x23,x23,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x27,x27,x23 // d+=h
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x17,ror#34 // Sigma0(a)
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x23,x23,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x0,x0 // 13
-#endif
- ldp x1,x2,[x1]
- add x23,x23,x17 // h+=Sigma0(a)
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- eor x8,x27,x27,ror#23
- and x17,x20,x27
- bic x28,x21,x27
- add x22,x22,x0 // h+=X[i]
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x8,ror#18 // Sigma1(e)
- ror x8,x23,#28
- add x22,x22,x17 // h+=Ch(e,f,g)
- eor x17,x23,x23,ror#5
- add x22,x22,x16 // h+=Sigma1(e)
- and x19,x19,x28 // (b^c)&=(a^b)
- add x26,x26,x22 // d+=h
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x17,ror#34 // Sigma0(a)
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- //add x22,x22,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x1,x1 // 14
-#endif
- ldr x6,[sp,#24]
- add x22,x22,x17 // h+=Sigma0(a)
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- eor x9,x26,x26,ror#23
- and x17,x27,x26
- bic x19,x20,x26
- add x21,x21,x1 // h+=X[i]
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x9,ror#18 // Sigma1(e)
- ror x9,x22,#28
- add x21,x21,x17 // h+=Ch(e,f,g)
- eor x17,x22,x22,ror#5
- add x21,x21,x16 // h+=Sigma1(e)
- and x28,x28,x19 // (b^c)&=(a^b)
- add x25,x25,x21 // d+=h
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x17,ror#34 // Sigma0(a)
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- //add x21,x21,x17 // h+=Sigma0(a)
-#ifndef __AARCH64EB__
- rev x2,x2 // 15
-#endif
- ldr x7,[sp,#0]
- add x21,x21,x17 // h+=Sigma0(a)
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
-.Loop_16_xx:
- ldr x8,[sp,#8]
- str x11,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x10,x5,#1
- and x17,x25,x24
- ror x9,x2,#19
- bic x19,x26,x24
- ror x11,x20,#28
- add x27,x27,x3 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x10,x10,x5,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x11,x11,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x9,x9,x2,ror#61
- eor x10,x10,x5,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x11,x20,ror#39 // Sigma0(a)
- eor x9,x9,x2,lsr#6 // sigma1(X[i+14])
- add x4,x4,x13
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x4,x4,x10
- add x27,x27,x17 // h+=Sigma0(a)
- add x4,x4,x9
- ldr x9,[sp,#16]
- str x12,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x11,x6,#1
- and x17,x24,x23
- ror x10,x3,#19
- bic x28,x25,x23
- ror x12,x27,#28
- add x26,x26,x4 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x11,x11,x6,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x12,x12,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x10,x10,x3,ror#61
- eor x11,x11,x6,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x12,x27,ror#39 // Sigma0(a)
- eor x10,x10,x3,lsr#6 // sigma1(X[i+14])
- add x5,x5,x14
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x5,x5,x11
- add x26,x26,x17 // h+=Sigma0(a)
- add x5,x5,x10
- ldr x10,[sp,#24]
- str x13,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x12,x7,#1
- and x17,x23,x22
- ror x11,x4,#19
- bic x19,x24,x22
- ror x13,x26,#28
- add x25,x25,x5 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x12,x12,x7,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x13,x13,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x11,x11,x4,ror#61
- eor x12,x12,x7,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x13,x26,ror#39 // Sigma0(a)
- eor x11,x11,x4,lsr#6 // sigma1(X[i+14])
- add x6,x6,x15
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x6,x6,x12
- add x25,x25,x17 // h+=Sigma0(a)
- add x6,x6,x11
- ldr x11,[sp,#0]
- str x14,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x13,x8,#1
- and x17,x22,x21
- ror x12,x5,#19
- bic x28,x23,x21
- ror x14,x25,#28
- add x24,x24,x6 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x13,x13,x8,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x14,x14,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x12,x12,x5,ror#61
- eor x13,x13,x8,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x14,x25,ror#39 // Sigma0(a)
- eor x12,x12,x5,lsr#6 // sigma1(X[i+14])
- add x7,x7,x0
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x7,x7,x13
- add x24,x24,x17 // h+=Sigma0(a)
- add x7,x7,x12
- ldr x12,[sp,#8]
- str x15,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x14,x9,#1
- and x17,x21,x20
- ror x13,x6,#19
- bic x19,x22,x20
- ror x15,x24,#28
- add x23,x23,x7 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x14,x14,x9,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x15,x15,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x13,x13,x6,ror#61
- eor x14,x14,x9,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x15,x24,ror#39 // Sigma0(a)
- eor x13,x13,x6,lsr#6 // sigma1(X[i+14])
- add x8,x8,x1
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x8,x8,x14
- add x23,x23,x17 // h+=Sigma0(a)
- add x8,x8,x13
- ldr x13,[sp,#16]
- str x0,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x15,x10,#1
- and x17,x20,x27
- ror x14,x7,#19
- bic x28,x21,x27
- ror x0,x23,#28
- add x22,x22,x8 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x15,x15,x10,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x0,x0,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x14,x14,x7,ror#61
- eor x15,x15,x10,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x0,x23,ror#39 // Sigma0(a)
- eor x14,x14,x7,lsr#6 // sigma1(X[i+14])
- add x9,x9,x2
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x9,x9,x15
- add x22,x22,x17 // h+=Sigma0(a)
- add x9,x9,x14
- ldr x14,[sp,#24]
- str x1,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x0,x11,#1
- and x17,x27,x26
- ror x15,x8,#19
- bic x19,x20,x26
- ror x1,x22,#28
- add x21,x21,x9 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x0,x0,x11,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x1,x1,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x15,x15,x8,ror#61
- eor x0,x0,x11,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x1,x22,ror#39 // Sigma0(a)
- eor x15,x15,x8,lsr#6 // sigma1(X[i+14])
- add x10,x10,x3
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x10,x10,x0
- add x21,x21,x17 // h+=Sigma0(a)
- add x10,x10,x15
- ldr x15,[sp,#0]
- str x2,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x1,x12,#1
- and x17,x26,x25
- ror x0,x9,#19
- bic x28,x27,x25
- ror x2,x21,#28
- add x20,x20,x10 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x1,x1,x12,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x2,x2,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x0,x0,x9,ror#61
- eor x1,x1,x12,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x2,x21,ror#39 // Sigma0(a)
- eor x0,x0,x9,lsr#6 // sigma1(X[i+14])
- add x11,x11,x4
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x11,x11,x1
- add x20,x20,x17 // h+=Sigma0(a)
- add x11,x11,x0
- ldr x0,[sp,#8]
- str x3,[sp,#0]
- ror x16,x24,#14
- add x27,x27,x19 // h+=K[i]
- ror x2,x13,#1
- and x17,x25,x24
- ror x1,x10,#19
- bic x19,x26,x24
- ror x3,x20,#28
- add x27,x27,x11 // h+=X[i]
- eor x16,x16,x24,ror#18
- eor x2,x2,x13,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x20,x21 // a^b, b^c in next round
- eor x16,x16,x24,ror#41 // Sigma1(e)
- eor x3,x3,x20,ror#34
- add x27,x27,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x1,x1,x10,ror#61
- eor x2,x2,x13,lsr#7 // sigma0(X[i+1])
- add x27,x27,x16 // h+=Sigma1(e)
- eor x28,x28,x21 // Maj(a,b,c)
- eor x17,x3,x20,ror#39 // Sigma0(a)
- eor x1,x1,x10,lsr#6 // sigma1(X[i+14])
- add x12,x12,x5
- add x23,x23,x27 // d+=h
- add x27,x27,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x12,x12,x2
- add x27,x27,x17 // h+=Sigma0(a)
- add x12,x12,x1
- ldr x1,[sp,#16]
- str x4,[sp,#8]
- ror x16,x23,#14
- add x26,x26,x28 // h+=K[i]
- ror x3,x14,#1
- and x17,x24,x23
- ror x2,x11,#19
- bic x28,x25,x23
- ror x4,x27,#28
- add x26,x26,x12 // h+=X[i]
- eor x16,x16,x23,ror#18
- eor x3,x3,x14,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x27,x20 // a^b, b^c in next round
- eor x16,x16,x23,ror#41 // Sigma1(e)
- eor x4,x4,x27,ror#34
- add x26,x26,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x2,x2,x11,ror#61
- eor x3,x3,x14,lsr#7 // sigma0(X[i+1])
- add x26,x26,x16 // h+=Sigma1(e)
- eor x19,x19,x20 // Maj(a,b,c)
- eor x17,x4,x27,ror#39 // Sigma0(a)
- eor x2,x2,x11,lsr#6 // sigma1(X[i+14])
- add x13,x13,x6
- add x22,x22,x26 // d+=h
- add x26,x26,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x13,x13,x3
- add x26,x26,x17 // h+=Sigma0(a)
- add x13,x13,x2
- ldr x2,[sp,#24]
- str x5,[sp,#16]
- ror x16,x22,#14
- add x25,x25,x19 // h+=K[i]
- ror x4,x15,#1
- and x17,x23,x22
- ror x3,x12,#19
- bic x19,x24,x22
- ror x5,x26,#28
- add x25,x25,x13 // h+=X[i]
- eor x16,x16,x22,ror#18
- eor x4,x4,x15,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x26,x27 // a^b, b^c in next round
- eor x16,x16,x22,ror#41 // Sigma1(e)
- eor x5,x5,x26,ror#34
- add x25,x25,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x3,x3,x12,ror#61
- eor x4,x4,x15,lsr#7 // sigma0(X[i+1])
- add x25,x25,x16 // h+=Sigma1(e)
- eor x28,x28,x27 // Maj(a,b,c)
- eor x17,x5,x26,ror#39 // Sigma0(a)
- eor x3,x3,x12,lsr#6 // sigma1(X[i+14])
- add x14,x14,x7
- add x21,x21,x25 // d+=h
- add x25,x25,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x14,x14,x4
- add x25,x25,x17 // h+=Sigma0(a)
- add x14,x14,x3
- ldr x3,[sp,#0]
- str x6,[sp,#24]
- ror x16,x21,#14
- add x24,x24,x28 // h+=K[i]
- ror x5,x0,#1
- and x17,x22,x21
- ror x4,x13,#19
- bic x28,x23,x21
- ror x6,x25,#28
- add x24,x24,x14 // h+=X[i]
- eor x16,x16,x21,ror#18
- eor x5,x5,x0,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x25,x26 // a^b, b^c in next round
- eor x16,x16,x21,ror#41 // Sigma1(e)
- eor x6,x6,x25,ror#34
- add x24,x24,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x4,x4,x13,ror#61
- eor x5,x5,x0,lsr#7 // sigma0(X[i+1])
- add x24,x24,x16 // h+=Sigma1(e)
- eor x19,x19,x26 // Maj(a,b,c)
- eor x17,x6,x25,ror#39 // Sigma0(a)
- eor x4,x4,x13,lsr#6 // sigma1(X[i+14])
- add x15,x15,x8
- add x20,x20,x24 // d+=h
- add x24,x24,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x15,x15,x5
- add x24,x24,x17 // h+=Sigma0(a)
- add x15,x15,x4
- ldr x4,[sp,#8]
- str x7,[sp,#0]
- ror x16,x20,#14
- add x23,x23,x19 // h+=K[i]
- ror x6,x1,#1
- and x17,x21,x20
- ror x5,x14,#19
- bic x19,x22,x20
- ror x7,x24,#28
- add x23,x23,x15 // h+=X[i]
- eor x16,x16,x20,ror#18
- eor x6,x6,x1,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x24,x25 // a^b, b^c in next round
- eor x16,x16,x20,ror#41 // Sigma1(e)
- eor x7,x7,x24,ror#34
- add x23,x23,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x5,x5,x14,ror#61
- eor x6,x6,x1,lsr#7 // sigma0(X[i+1])
- add x23,x23,x16 // h+=Sigma1(e)
- eor x28,x28,x25 // Maj(a,b,c)
- eor x17,x7,x24,ror#39 // Sigma0(a)
- eor x5,x5,x14,lsr#6 // sigma1(X[i+14])
- add x0,x0,x9
- add x27,x27,x23 // d+=h
- add x23,x23,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x0,x0,x6
- add x23,x23,x17 // h+=Sigma0(a)
- add x0,x0,x5
- ldr x5,[sp,#16]
- str x8,[sp,#8]
- ror x16,x27,#14
- add x22,x22,x28 // h+=K[i]
- ror x7,x2,#1
- and x17,x20,x27
- ror x6,x15,#19
- bic x28,x21,x27
- ror x8,x23,#28
- add x22,x22,x0 // h+=X[i]
- eor x16,x16,x27,ror#18
- eor x7,x7,x2,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x23,x24 // a^b, b^c in next round
- eor x16,x16,x27,ror#41 // Sigma1(e)
- eor x8,x8,x23,ror#34
- add x22,x22,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x6,x6,x15,ror#61
- eor x7,x7,x2,lsr#7 // sigma0(X[i+1])
- add x22,x22,x16 // h+=Sigma1(e)
- eor x19,x19,x24 // Maj(a,b,c)
- eor x17,x8,x23,ror#39 // Sigma0(a)
- eor x6,x6,x15,lsr#6 // sigma1(X[i+14])
- add x1,x1,x10
- add x26,x26,x22 // d+=h
- add x22,x22,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x1,x1,x7
- add x22,x22,x17 // h+=Sigma0(a)
- add x1,x1,x6
- ldr x6,[sp,#24]
- str x9,[sp,#16]
- ror x16,x26,#14
- add x21,x21,x19 // h+=K[i]
- ror x8,x3,#1
- and x17,x27,x26
- ror x7,x0,#19
- bic x19,x20,x26
- ror x9,x22,#28
- add x21,x21,x1 // h+=X[i]
- eor x16,x16,x26,ror#18
- eor x8,x8,x3,ror#8
- orr x17,x17,x19 // Ch(e,f,g)
- eor x19,x22,x23 // a^b, b^c in next round
- eor x16,x16,x26,ror#41 // Sigma1(e)
- eor x9,x9,x22,ror#34
- add x21,x21,x17 // h+=Ch(e,f,g)
- and x28,x28,x19 // (b^c)&=(a^b)
- eor x7,x7,x0,ror#61
- eor x8,x8,x3,lsr#7 // sigma0(X[i+1])
- add x21,x21,x16 // h+=Sigma1(e)
- eor x28,x28,x23 // Maj(a,b,c)
- eor x17,x9,x22,ror#39 // Sigma0(a)
- eor x7,x7,x0,lsr#6 // sigma1(X[i+14])
- add x2,x2,x11
- add x25,x25,x21 // d+=h
- add x21,x21,x28 // h+=Maj(a,b,c)
- ldr x28,[x30],#8 // *K++, x19 in next round
- add x2,x2,x8
- add x21,x21,x17 // h+=Sigma0(a)
- add x2,x2,x7
- ldr x7,[sp,#0]
- str x10,[sp,#24]
- ror x16,x25,#14
- add x20,x20,x28 // h+=K[i]
- ror x9,x4,#1
- and x17,x26,x25
- ror x8,x1,#19
- bic x28,x27,x25
- ror x10,x21,#28
- add x20,x20,x2 // h+=X[i]
- eor x16,x16,x25,ror#18
- eor x9,x9,x4,ror#8
- orr x17,x17,x28 // Ch(e,f,g)
- eor x28,x21,x22 // a^b, b^c in next round
- eor x16,x16,x25,ror#41 // Sigma1(e)
- eor x10,x10,x21,ror#34
- add x20,x20,x17 // h+=Ch(e,f,g)
- and x19,x19,x28 // (b^c)&=(a^b)
- eor x8,x8,x1,ror#61
- eor x9,x9,x4,lsr#7 // sigma0(X[i+1])
- add x20,x20,x16 // h+=Sigma1(e)
- eor x19,x19,x22 // Maj(a,b,c)
- eor x17,x10,x21,ror#39 // Sigma0(a)
- eor x8,x8,x1,lsr#6 // sigma1(X[i+14])
- add x3,x3,x12
- add x24,x24,x20 // d+=h
- add x20,x20,x19 // h+=Maj(a,b,c)
- ldr x19,[x30],#8 // *K++, x28 in next round
- add x3,x3,x9
- add x20,x20,x17 // h+=Sigma0(a)
- add x3,x3,x8
- cbnz x19,.Loop_16_xx
-
- ldp x0,x2,[x29,#96]
- ldr x1,[x29,#112]
- sub x30,x30,#648 // rewind
-
- ldp x3,x4,[x0]
- ldp x5,x6,[x0,#2*8]
- add x1,x1,#14*8 // advance input pointer
- ldp x7,x8,[x0,#4*8]
- add x20,x20,x3
- ldp x9,x10,[x0,#6*8]
- add x21,x21,x4
- add x22,x22,x5
- add x23,x23,x6
- stp x20,x21,[x0]
- add x24,x24,x7
- add x25,x25,x8
- stp x22,x23,[x0,#2*8]
- add x26,x26,x9
- add x27,x27,x10
- cmp x1,x2
- stp x24,x25,[x0,#4*8]
- stp x26,x27,[x0,#6*8]
- b.ne .Loop
-
- ldp x19,x20,[x29,#16]
- add sp,sp,#4*8
- ldp x21,x22,[x29,#32]
- ldp x23,x24,[x29,#48]
- ldp x25,x26,[x29,#64]
- ldp x27,x28,[x29,#80]
- ldp x29,x30,[sp],#128
- ret
-.size sha512_block_data_order,.-sha512_block_data_order
-
-.align 6
-.type .LK512,%object
-.LK512:
- .quad 0x428a2f98d728ae22,0x7137449123ef65cd
- .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
- .quad 0x3956c25bf348b538,0x59f111f1b605d019
- .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118
- .quad 0xd807aa98a3030242,0x12835b0145706fbe
- .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
- .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1
- .quad 0x9bdc06a725c71235,0xc19bf174cf692694
- .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3
- .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
- .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483
- .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5
- .quad 0x983e5152ee66dfab,0xa831c66d2db43210
- .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4
- .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725
- .quad 0x06ca6351e003826f,0x142929670a0e6e70
- .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926
- .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df
- .quad 0x650a73548baf63de,0x766a0abb3c77b2a8
- .quad 0x81c2c92e47edaee6,0x92722c851482353b
- .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001
- .quad 0xc24b8b70d0f89791,0xc76c51a30654be30
- .quad 0xd192e819d6ef5218,0xd69906245565a910
- .quad 0xf40e35855771202a,0x106aa07032bbd1b8
- .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53
- .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
- .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
- .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
- .quad 0x748f82ee5defb2fc,0x78a5636f43172f60
- .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec
- .quad 0x90befffa23631e28,0xa4506cebde82bde9
- .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b
- .quad 0xca273eceea26619c,0xd186b8c721c0c207
- .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
- .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6
- .quad 0x113f9804bef90dae,0x1b710b35131c471b
- .quad 0x28db77f523047d84,0x32caab7b40c72493
- .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
- .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a
- .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817
- .quad 0 // terminator
-.size .LK512,.-.LK512
-#ifndef __KERNEL__
-.align 3
-.LOPENSSL_armcap_P:
-# ifdef __ILP32__
- .long OPENSSL_armcap_P-.
-# else
- .quad OPENSSL_armcap_P-.
-# endif
-#endif
-.asciz "SHA512 block transform for ARMv8, CRYPTOGAMS by <appro@openssl.org>"
-.align 2
-#ifndef __KERNEL__
-.comm OPENSSL_armcap_P,4,4
-#endif
diff --git a/arch/x86/crypto/curve25519-x86_64.c b/arch/x86/crypto/curve25519-x86_64.c
index 6706b6cb1d0f..38caf61cd5b7 100644
--- a/arch/x86/crypto/curve25519-x86_64.c
+++ b/arch/x86/crypto/curve25519-x86_64.c
@@ -1500,7 +1500,7 @@ static int __init curve25519_mod_init(void)
static void __exit curve25519_mod_exit(void)
{
if (IS_REACHABLE(CONFIG_CRYPTO_KPP) &&
- (boot_cpu_has(X86_FEATURE_BMI2) || boot_cpu_has(X86_FEATURE_ADX)))
+ static_branch_likely(&curve25519_use_bmi2_adx))
crypto_unregister_kpp(&curve25519_alg);
}
diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 18cc82dc4a42..8bd288d2b089 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -411,7 +411,7 @@ int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
if (n < 0)
return n;
- npages = (off + n + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ npages = DIV_ROUND_UP(off + n, PAGE_SIZE);
if (WARN_ON(npages == 0))
return -EINVAL;
/* Add one extra for linking */
diff --git a/crypto/algapi.c b/crypto/algapi.c
index fdabf2675b63..43f999dba4dc 100644
--- a/crypto/algapi.c
+++ b/crypto/algapi.c
@@ -868,24 +868,6 @@ const char *crypto_attr_alg_name(struct rtattr *rta)
}
EXPORT_SYMBOL_GPL(crypto_attr_alg_name);
-int crypto_attr_u32(struct rtattr *rta, u32 *num)
-{
- struct crypto_attr_u32 *nu32;
-
- if (!rta)
- return -ENOENT;
- if (RTA_PAYLOAD(rta) < sizeof(*nu32))
- return -EINVAL;
- if (rta->rta_type != CRYPTOA_U32)
- return -EINVAL;
-
- nu32 = RTA_DATA(rta);
- *num = nu32->num;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(crypto_attr_u32);
-
int crypto_inst_setname(struct crypto_instance *inst, const char *name,
struct crypto_alg *alg)
{
diff --git a/crypto/algboss.c b/crypto/algboss.c
index 5ebccbd6b74e..1814d2c5188a 100644
--- a/crypto/algboss.c
+++ b/crypto/algboss.c
@@ -28,16 +28,9 @@ struct cryptomgr_param {
struct crypto_attr_type data;
} type;
- union {
+ struct {
struct rtattr attr;
- struct {
- struct rtattr attr;
- struct crypto_attr_alg data;
- } alg;
- struct {
- struct rtattr attr;
- struct crypto_attr_u32 data;
- } nu32;
+ struct crypto_attr_alg data;
} attrs[CRYPTO_MAX_ATTRS];
char template[CRYPTO_MAX_ALG_NAME];
@@ -104,12 +97,10 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
i = 0;
for (;;) {
- int notnum = 0;
-
name = ++p;
for (; isalnum(*p) || *p == '-' || *p == '_'; p++)
- notnum |= !isdigit(*p);
+ ;
if (*p == '(') {
int recursion = 0;
@@ -123,7 +114,6 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
break;
}
- notnum = 1;
p++;
}
@@ -131,18 +121,9 @@ static int cryptomgr_schedule_probe(struct crypto_larval *larval)
if (!len)
goto err_free_param;
- if (notnum) {
- param->attrs[i].alg.attr.rta_len =
- sizeof(param->attrs[i].alg);
- param->attrs[i].alg.attr.rta_type = CRYPTOA_ALG;
- memcpy(param->attrs[i].alg.data.name, name, len);
- } else {
- param->attrs[i].nu32.attr.rta_len =
- sizeof(param->attrs[i].nu32);
- param->attrs[i].nu32.attr.rta_type = CRYPTOA_U32;
- param->attrs[i].nu32.data.num =
- simple_strtol(name, NULL, 0);
- }
+ param->attrs[i].attr.rta_len = sizeof(param->attrs[i]);
+ param->attrs[i].attr.rta_type = CRYPTOA_ALG;
+ memcpy(param->attrs[i].data.name, name, len);
param->tb[i + 1] = &param->attrs[i].attr;
i++;
diff --git a/crypto/drbg.c b/crypto/drbg.c
index 1b4587e0ddad..ea85d4a0fe9e 100644
--- a/crypto/drbg.c
+++ b/crypto/drbg.c
@@ -178,16 +178,16 @@ static const struct drbg_core drbg_cores[] = {
.backend_cra_name = "hmac(sha384)",
}, {
.flags = DRBG_HMAC | DRBG_STRENGTH256,
- .statelen = 64, /* block length of cipher */
- .blocklen_bytes = 64,
- .cra_name = "hmac_sha512",
- .backend_cra_name = "hmac(sha512)",
- }, {
- .flags = DRBG_HMAC | DRBG_STRENGTH256,
.statelen = 32, /* block length of cipher */
.blocklen_bytes = 32,
.cra_name = "hmac_sha256",
.backend_cra_name = "hmac(sha256)",
+ }, {
+ .flags = DRBG_HMAC | DRBG_STRENGTH256,
+ .statelen = 64, /* block length of cipher */
+ .blocklen_bytes = 64,
+ .cra_name = "hmac_sha512",
+ .backend_cra_name = "hmac(sha512)",
},
#endif /* CONFIG_CRYPTO_DRBG_HMAC */
};
diff --git a/crypto/ecdh.c b/crypto/ecdh.c
index 04a427b8c956..c6f61c2211dc 100644
--- a/crypto/ecdh.c
+++ b/crypto/ecdh.c
@@ -141,7 +141,7 @@ static struct kpp_alg ecdh_nist_p192 = {
.init = ecdh_nist_p192_init_tfm,
.base = {
.cra_name = "ecdh-nist-p192",
- .cra_driver_name = "ecdh-generic",
+ .cra_driver_name = "ecdh-nist-p192-generic",
.cra_priority = 100,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct ecdh_ctx),
@@ -166,7 +166,32 @@ static struct kpp_alg ecdh_nist_p256 = {
.init = ecdh_nist_p256_init_tfm,
.base = {
.cra_name = "ecdh-nist-p256",
- .cra_driver_name = "ecdh-generic",
+ .cra_driver_name = "ecdh-nist-p256-generic",
+ .cra_priority = 100,
+ .cra_module = THIS_MODULE,
+ .cra_ctxsize = sizeof(struct ecdh_ctx),
+ },
+};
+
+static int ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
+{
+ struct ecdh_ctx *ctx = ecdh_get_ctx(tfm);
+
+ ctx->curve_id = ECC_CURVE_NIST_P384;
+ ctx->ndigits = ECC_CURVE_NIST_P384_DIGITS;
+
+ return 0;
+}
+
+static struct kpp_alg ecdh_nist_p384 = {
+ .set_secret = ecdh_set_secret,
+ .generate_public_key = ecdh_compute_value,
+ .compute_shared_secret = ecdh_compute_value,
+ .max_size = ecdh_max_size,
+ .init = ecdh_nist_p384_init_tfm,
+ .base = {
+ .cra_name = "ecdh-nist-p384",
+ .cra_driver_name = "ecdh-nist-p384-generic",
.cra_priority = 100,
.cra_module = THIS_MODULE,
.cra_ctxsize = sizeof(struct ecdh_ctx),
@@ -179,10 +204,27 @@ static int ecdh_init(void)
{
int ret;
+ /* NIST p192 will fail to register in FIPS mode */
ret = crypto_register_kpp(&ecdh_nist_p192);
ecdh_nist_p192_registered = ret == 0;
- return crypto_register_kpp(&ecdh_nist_p256);
+ ret = crypto_register_kpp(&ecdh_nist_p256);
+ if (ret)
+ goto nist_p256_error;
+
+ ret = crypto_register_kpp(&ecdh_nist_p384);
+ if (ret)
+ goto nist_p384_error;
+
+ return 0;
+
+nist_p384_error:
+ crypto_unregister_kpp(&ecdh_nist_p256);
+
+nist_p256_error:
+ if (ecdh_nist_p192_registered)
+ crypto_unregister_kpp(&ecdh_nist_p192);
+ return ret;
}
static void ecdh_exit(void)
@@ -190,6 +232,7 @@ static void ecdh_exit(void)
if (ecdh_nist_p192_registered)
crypto_unregister_kpp(&ecdh_nist_p192);
crypto_unregister_kpp(&ecdh_nist_p256);
+ crypto_unregister_kpp(&ecdh_nist_p384);
}
subsys_initcall(ecdh_init);
diff --git a/crypto/internal.h b/crypto/internal.h
index 976ec9dfc76d..f00869af689f 100644
--- a/crypto/internal.h
+++ b/crypto/internal.h
@@ -29,6 +29,18 @@ struct crypto_larval {
u32 mask;
};
+enum {
+ CRYPTOA_UNSPEC,
+ CRYPTOA_ALG,
+ CRYPTOA_TYPE,
+ __CRYPTOA_MAX,
+};
+
+#define CRYPTOA_MAX (__CRYPTOA_MAX - 1)
+
+/* Maximum number of (rtattr) parameters for each template. */
+#define CRYPTO_MAX_ATTRS 32
+
extern struct list_head crypto_alg_list;
extern struct rw_semaphore crypto_alg_sem;
extern struct blocking_notifier_head crypto_chain;
diff --git a/crypto/khazad.c b/crypto/khazad.c
index 14ca7f1631c7..f19339954c89 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -819,7 +819,7 @@ static void khazad_crypt(const u64 roundKey[KHAZAD_ROUNDS + 1],
T6[(int)(state >> 8) & 0xff] ^
T7[(int)(state ) & 0xff] ^
roundKey[r];
- }
+ }
state = (T0[(int)(state >> 56) ] & 0xff00000000000000ULL) ^
(T1[(int)(state >> 48) & 0xff] & 0x00ff000000000000ULL) ^
diff --git a/crypto/shash.c b/crypto/shash.c
index 2e3433ad9762..0a0a50cb694f 100644
--- a/crypto/shash.c
+++ b/crypto/shash.c
@@ -20,12 +20,24 @@
static const struct crypto_type crypto_shash_type;
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen)
+static int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
+ unsigned int keylen)
{
return -ENOSYS;
}
-EXPORT_SYMBOL_GPL(shash_no_setkey);
+
+/*
+ * Check whether an shash algorithm has a setkey function.
+ *
+ * For CFI compatibility, this must not be an inline function. This is because
+ * when CFI is enabled, modules won't get the same address for shash_no_setkey
+ * (if it were exported, which inlining would require) as the core kernel will.
+ */
+bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
+{
+ return alg->setkey != shash_no_setkey;
+}
+EXPORT_SYMBOL_GPL(crypto_shash_alg_has_setkey);
static int shash_setkey_unaligned(struct crypto_shash *tfm, const u8 *key,
unsigned int keylen)
diff --git a/crypto/sm2.c b/crypto/sm2.c
index b21addc3ac06..db8a4a265669 100644
--- a/crypto/sm2.c
+++ b/crypto/sm2.c
@@ -79,10 +79,17 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec)
goto free;
rc = -ENOMEM;
+
+ ec->Q = mpi_point_new(0);
+ if (!ec->Q)
+ goto free;
+
/* mpi_ec_setup_elliptic_curve */
ec->G = mpi_point_new(0);
- if (!ec->G)
+ if (!ec->G) {
+ mpi_point_release(ec->Q);
goto free;
+ }
mpi_set(ec->G->x, x);
mpi_set(ec->G->y, y);
@@ -91,6 +98,7 @@ static int sm2_ec_ctx_init(struct mpi_ec_ctx *ec)
rc = -EINVAL;
ec->n = mpi_scanval(ecp->n);
if (!ec->n) {
+ mpi_point_release(ec->Q);
mpi_point_release(ec->G);
goto free;
}
@@ -386,27 +394,15 @@ static int sm2_set_pub_key(struct crypto_akcipher *tfm,
MPI a;
int rc;
- ec->Q = mpi_point_new(0);
- if (!ec->Q)
- return -ENOMEM;
-
/* include the uncompressed flag '0x04' */
- rc = -ENOMEM;
a = mpi_read_raw_data(key, keylen);
if (!a)
- goto error;
+ return -ENOMEM;
mpi_normalize(a);
rc = sm2_ecc_os2ec(ec->Q, a);
mpi_free(a);
- if (rc)
- goto error;
-
- return 0;
-error:
- mpi_point_release(ec->Q);
- ec->Q = NULL;
return rc;
}
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index 6b7c158dc508..f8d06da78e4f 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1847,10 +1847,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
ret += tcrypt_test("cts(cbc(aes))");
break;
+ case 39:
+ ret += tcrypt_test("xxhash64");
+ break;
+
case 40:
ret += tcrypt_test("rmd160");
break;
+ case 41:
+ ret += tcrypt_test("blake2s-256");
+ break;
+
+ case 42:
+ ret += tcrypt_test("blake2b-512");
+ break;
+
case 43:
ret += tcrypt_test("ecb(seed)");
break;
@@ -2356,10 +2368,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_hash_speed("sha224", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
+ case 314:
+ test_hash_speed("xxhash64", sec, generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ fallthrough;
case 315:
test_hash_speed("rmd160", sec, generic_hash_speed_template);
if (mode > 300 && mode < 400) break;
fallthrough;
+ case 316:
+ test_hash_speed("blake2s-256", sec, generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ fallthrough;
+ case 317:
+ test_hash_speed("blake2b-512", sec, generic_hash_speed_template);
+ if (mode > 300 && mode < 400) break;
+ fallthrough;
case 318:
klen = 16;
test_hash_speed("ghash", sec, generic_hash_speed_template);
@@ -2456,10 +2480,22 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
test_ahash_speed("sha224", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
+ case 414:
+ test_ahash_speed("xxhash64", sec, generic_hash_speed_template);
+ if (mode > 400 && mode < 500) break;
+ fallthrough;
case 415:
test_ahash_speed("rmd160", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
fallthrough;
+ case 416:
+ test_ahash_speed("blake2s-256", sec, generic_hash_speed_template);
+ if (mode > 400 && mode < 500) break;
+ fallthrough;
+ case 417:
+ test_ahash_speed("blake2b-512", sec, generic_hash_speed_template);
+ if (mode > 400 && mode < 500) break;
+ fallthrough;
case 418:
test_ahash_speed("sha3-224", sec, generic_hash_speed_template);
if (mode > 400 && mode < 500) break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 10c5b3b01ec4..1f7f63e836ae 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4899,15 +4899,12 @@ static const struct alg_test_desc alg_test_descs[] = {
}
}, {
#endif
-#ifndef CONFIG_CRYPTO_FIPS
.alg = "ecdh-nist-p192",
.test = alg_test_kpp,
- .fips_allowed = 1,
.suite = {
.kpp = __VECS(ecdh_p192_tv_template)
}
}, {
-#endif
.alg = "ecdh-nist-p256",
.test = alg_test_kpp,
.fips_allowed = 1,
@@ -4915,6 +4912,13 @@ static const struct alg_test_desc alg_test_descs[] = {
.kpp = __VECS(ecdh_p256_tv_template)
}
}, {
+ .alg = "ecdh-nist-p384",
+ .test = alg_test_kpp,
+ .fips_allowed = 1,
+ .suite = {
+ .kpp = __VECS(ecdh_p384_tv_template)
+ }
+ }, {
.alg = "ecdsa-nist-p192",
.test = alg_test_akcipher,
.suite = {
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index 34e4a3db3991..96eb7ce9f81b 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -2685,7 +2685,6 @@ static const struct kpp_testvec curve25519_tv_template[] = {
}
};
-#ifndef CONFIG_CRYPTO_FIPS
static const struct kpp_testvec ecdh_p192_tv_template[] = {
{
.secret =
@@ -2719,13 +2718,12 @@ static const struct kpp_testvec ecdh_p192_tv_template[] = {
"\xf4\x57\xcc\x4f\x1f\x4e\x31\xcc"
"\xe3\x40\x60\xc8\x06\x93\xc6\x2e"
"\x99\x80\x81\x28\xaf\xc5\x51\x74",
- .secret_size = 32,
+ .secret_size = 30,
.b_public_size = 48,
.expected_a_public_size = 48,
.expected_ss_size = 24
}
};
-#endif
static const struct kpp_testvec ecdh_p256_tv_template[] = {
{
@@ -2766,7 +2764,7 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
"\x9f\x4a\x38\xcc\xc0\x2c\x49\x2f"
"\xb1\x32\xbb\xaf\x22\x61\xda\xcb"
"\x6f\xdb\xa9\xaa\xfc\x77\x81\xf3",
- .secret_size = 40,
+ .secret_size = 38,
.b_public_size = 64,
.expected_a_public_size = 64,
.expected_ss_size = 32
@@ -2804,8 +2802,8 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
"\x37\x08\xcc\x40\x5e\x7a\xfd\x6a"
"\x6a\x02\x6e\x41\x87\x68\x38\x77"
"\xfa\xa9\x44\x43\x2d\xef\x09\xdf",
- .secret_size = 8,
- .b_secret_size = 40,
+ .secret_size = 6,
+ .b_secret_size = 38,
.b_public_size = 64,
.expected_a_public_size = 64,
.expected_ss_size = 32,
@@ -2814,6 +2812,67 @@ static const struct kpp_testvec ecdh_p256_tv_template[] = {
};
/*
+ * NIST P384 test vectors from RFC5903
+ */
+static const struct kpp_testvec ecdh_p384_tv_template[] = {
+ {
+ .secret =
+#ifdef __LITTLE_ENDIAN
+ "\x02\x00" /* type */
+ "\x36\x00" /* len */
+ "\x30\x00" /* key_size */
+#else
+ "\x00\x02" /* type */
+ "\x00\x36" /* len */
+ "\x00\x30" /* key_size */
+#endif
+ "\x09\x9F\x3C\x70\x34\xD4\xA2\xC6"
+ "\x99\x88\x4D\x73\xA3\x75\xA6\x7F"
+ "\x76\x24\xEF\x7C\x6B\x3C\x0F\x16"
+ "\x06\x47\xB6\x74\x14\xDC\xE6\x55"
+ "\xE3\x5B\x53\x80\x41\xE6\x49\xEE"
+ "\x3F\xAE\xF8\x96\x78\x3A\xB1\x94",
+ .b_public =
+ "\xE5\x58\xDB\xEF\x53\xEE\xCD\xE3"
+ "\xD3\xFC\xCF\xC1\xAE\xA0\x8A\x89"
+ "\xA9\x87\x47\x5D\x12\xFD\x95\x0D"
+ "\x83\xCF\xA4\x17\x32\xBC\x50\x9D"
+ "\x0D\x1A\xC4\x3A\x03\x36\xDE\xF9"
+ "\x6F\xDA\x41\xD0\x77\x4A\x35\x71"
+ "\xDC\xFB\xEC\x7A\xAC\xF3\x19\x64"
+ "\x72\x16\x9E\x83\x84\x30\x36\x7F"
+ "\x66\xEE\xBE\x3C\x6E\x70\xC4\x16"
+ "\xDD\x5F\x0C\x68\x75\x9D\xD1\xFF"
+ "\xF8\x3F\xA4\x01\x42\x20\x9D\xFF"
+ "\x5E\xAA\xD9\x6D\xB9\xE6\x38\x6C",
+ .expected_a_public =
+ "\x66\x78\x42\xD7\xD1\x80\xAC\x2C"
+ "\xDE\x6F\x74\xF3\x75\x51\xF5\x57"
+ "\x55\xC7\x64\x5C\x20\xEF\x73\xE3"
+ "\x16\x34\xFE\x72\xB4\xC5\x5E\xE6"
+ "\xDE\x3A\xC8\x08\xAC\xB4\xBD\xB4"
+ "\xC8\x87\x32\xAE\xE9\x5F\x41\xAA"
+ "\x94\x82\xED\x1F\xC0\xEE\xB9\xCA"
+ "\xFC\x49\x84\x62\x5C\xCF\xC2\x3F"
+ "\x65\x03\x21\x49\xE0\xE1\x44\xAD"
+ "\xA0\x24\x18\x15\x35\xA0\xF3\x8E"
+ "\xEB\x9F\xCF\xF3\xC2\xC9\x47\xDA"
+ "\xE6\x9B\x4C\x63\x45\x73\xA8\x1C",
+ .expected_ss =
+ "\x11\x18\x73\x31\xC2\x79\x96\x2D"
+ "\x93\xD6\x04\x24\x3F\xD5\x92\xCB"
+ "\x9D\x0A\x92\x6F\x42\x2E\x47\x18"
+ "\x75\x21\x28\x7E\x71\x56\xC5\xC4"
+ "\xD6\x03\x13\x55\x69\xB9\xE9\xD0"
+ "\x9C\xF5\xD4\xA2\x70\xF5\x97\x46",
+ .secret_size = 54,
+ .b_public_size = 96,
+ .expected_a_public_size = 96,
+ .expected_ss_size = 48
+ }
+};
+
+/*
* MD4 test vectors from RFC1320
*/
static const struct hash_testvec md4_tv_template[] = {
diff --git a/crypto/wp512.c b/crypto/wp512.c
index feadc13ccae0..bf79fbb2340f 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1066,33 +1066,31 @@ static int wp512_final(struct shash_desc *desc, u8 *out)
{
struct wp512_ctx *wctx = shash_desc_ctx(desc);
int i;
- u8 *buffer = wctx->buffer;
- u8 *bitLength = wctx->bitLength;
- int bufferBits = wctx->bufferBits;
- int bufferPos = wctx->bufferPos;
+ u8 *buffer = wctx->buffer;
+ u8 *bitLength = wctx->bitLength;
+ int bufferBits = wctx->bufferBits;
+ int bufferPos = wctx->bufferPos;
__be64 *digest = (__be64 *)out;
- buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
- bufferPos++;
- if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
- if (bufferPos < WP512_BLOCK_SIZE) {
- memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
- }
- wp512_process_buffer(wctx);
- bufferPos = 0;
- }
- if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
- memset(&buffer[bufferPos], 0,
+ buffer[bufferPos] |= 0x80U >> (bufferBits & 7);
+ bufferPos++;
+ if (bufferPos > WP512_BLOCK_SIZE - WP512_LENGTHBYTES) {
+ if (bufferPos < WP512_BLOCK_SIZE)
+ memset(&buffer[bufferPos], 0, WP512_BLOCK_SIZE - bufferPos);
+ wp512_process_buffer(wctx);
+ bufferPos = 0;
+ }
+ if (bufferPos < WP512_BLOCK_SIZE - WP512_LENGTHBYTES)
+ memset(&buffer[bufferPos], 0,
(WP512_BLOCK_SIZE - WP512_LENGTHBYTES) - bufferPos);
- }
- bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
- memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
+ bufferPos = WP512_BLOCK_SIZE - WP512_LENGTHBYTES;
+ memcpy(&buffer[WP512_BLOCK_SIZE - WP512_LENGTHBYTES],
bitLength, WP512_LENGTHBYTES);
- wp512_process_buffer(wctx);
+ wp512_process_buffer(wctx);
for (i = 0; i < WP512_DIGEST_SIZE/8; i++)
digest[i] = cpu_to_be64(wctx->hash[i]);
- wctx->bufferBits = bufferBits;
- wctx->bufferPos = bufferPos;
+ wctx->bufferBits = bufferBits;
+ wctx->bufferPos = bufferPos;
return 0;
}
diff --git a/drivers/char/hw_random/Kconfig b/drivers/char/hw_random/Kconfig
index 1fe006f3f12f..c11f12d4ab53 100644
--- a/drivers/char/hw_random/Kconfig
+++ b/drivers/char/hw_random/Kconfig
@@ -165,17 +165,17 @@ config HW_RANDOM_IXP4XX
config HW_RANDOM_OMAP
tristate "OMAP Random Number Generator support"
- depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU
+ depends on ARCH_OMAP16XX || ARCH_OMAP2PLUS || ARCH_MVEBU || ARCH_K3
default HW_RANDOM
help
- This driver provides kernel-side support for the Random Number
+ This driver provides kernel-side support for the Random Number
Generator hardware found on OMAP16xx, OMAP2/3/4/5, AM33xx/AM43xx
multimedia processors, and Marvell Armada 7k/8k SoCs.
To compile this driver as a module, choose M here: the
module will be called omap-rng.
- If unsure, say Y.
+ If unsure, say Y.
config HW_RANDOM_OMAP3_ROM
tristate "OMAP3 ROM Random Number Generator support"
@@ -485,13 +485,13 @@ config HW_RANDOM_NPCM
depends on ARCH_NPCM || COMPILE_TEST
default HW_RANDOM
help
- This driver provides support for the Random Number
+ This driver provides support for the Random Number
Generator hardware available in Nuvoton NPCM SoCs.
To compile this driver as a module, choose M here: the
module will be called npcm-rng.
- If unsure, say Y.
+ If unsure, say Y.
config HW_RANDOM_KEYSTONE
depends on ARCH_KEYSTONE || COMPILE_TEST
diff --git a/drivers/char/hw_random/amd-rng.c b/drivers/char/hw_random/amd-rng.c
index 9959c762da2f..d8d4ef5214a1 100644
--- a/drivers/char/hw_random/amd-rng.c
+++ b/drivers/char/hw_random/amd-rng.c
@@ -126,7 +126,7 @@ static struct hwrng amd_rng = {
static int __init mod_init(void)
{
- int err = -ENODEV;
+ int err;
struct pci_dev *pdev = NULL;
const struct pci_device_id *ent;
u32 pmbase;
diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c
index adb3c2bd7783..a3db27916256 100644
--- a/drivers/char/hw_random/core.c
+++ b/drivers/char/hw_random/core.c
@@ -319,11 +319,11 @@ static int enable_best_rng(void)
return ret;
}
-static ssize_t hwrng_attr_current_store(struct device *dev,
- struct device_attribute *attr,
- const char *buf, size_t len)
+static ssize_t rng_current_store(struct device *dev,
+ struct device_attribute *attr,
+ const char *buf, size_t len)
{
- int err = -ENODEV;
+ int err;
struct hwrng *rng, *old_rng, *new_rng;
err = mutex_lock_interruptible(&rng_mutex);
@@ -354,9 +354,9 @@ static ssize_t hwrng_attr_current_store(struct device *dev,
return err ? : len;
}
-static ssize_t hwrng_attr_current_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t rng_current_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
ssize_t ret;
struct hwrng *rng;
@@ -371,9 +371,9 @@ static ssize_t hwrng_attr_current_show(struct device *dev,
return ret;
}
-static ssize_t hwrng_attr_available_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t rng_available_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
int err;
struct hwrng *rng;
@@ -392,22 +392,16 @@ static ssize_t hwrng_attr_available_show(struct device *dev,
return strlen(buf);
}
-static ssize_t hwrng_attr_selected_show(struct device *dev,
- struct device_attribute *attr,
- char *buf)
+static ssize_t rng_selected_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
{
return sysfs_emit(buf, "%d\n", cur_rng_set_by_user);
}
-static DEVICE_ATTR(rng_current, S_IRUGO | S_IWUSR,
- hwrng_attr_current_show,
- hwrng_attr_current_store);
-static DEVICE_ATTR(rng_available, S_IRUGO,
- hwrng_attr_available_show,
- NULL);
-static DEVICE_ATTR(rng_selected, S_IRUGO,
- hwrng_attr_selected_show,
- NULL);
+static DEVICE_ATTR_RW(rng_current);
+static DEVICE_ATTR_RO(rng_available);
+static DEVICE_ATTR_RO(rng_selected);
static struct attribute *rng_dev_attrs[] = {
&dev_attr_rng_current.attr,
diff --git a/drivers/char/hw_random/exynos-trng.c b/drivers/char/hw_random/exynos-trng.c
index 8e1fe3f8dd2d..9cc3d542dd0f 100644
--- a/drivers/char/hw_random/exynos-trng.c
+++ b/drivers/char/hw_random/exynos-trng.c
@@ -132,7 +132,7 @@ static int exynos_trng_probe(struct platform_device *pdev)
return PTR_ERR(trng->mem);
pm_runtime_enable(&pdev->dev);
- ret = pm_runtime_get_sync(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0) {
dev_err(&pdev->dev, "Could not get runtime PM.\n");
goto err_pm_get;
@@ -165,7 +165,7 @@ err_register:
clk_disable_unprepare(trng->clk);
err_clock:
- pm_runtime_put_sync(&pdev->dev);
+ pm_runtime_put_noidle(&pdev->dev);
err_pm_get:
pm_runtime_disable(&pdev->dev);
@@ -196,10 +196,9 @@ static int __maybe_unused exynos_trng_resume(struct device *dev)
{
int ret;
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
dev_err(dev, "Could not get runtime PM.\n");
- pm_runtime_put_noidle(dev);
return ret;
}
diff --git a/drivers/char/hw_random/ks-sa-rng.c b/drivers/char/hw_random/ks-sa-rng.c
index 8f1d47ff9799..2f2f21f1b659 100644
--- a/drivers/char/hw_random/ks-sa-rng.c
+++ b/drivers/char/hw_random/ks-sa-rng.c
@@ -241,10 +241,9 @@ static int ks_sa_rng_probe(struct platform_device *pdev)
}
pm_runtime_enable(dev);
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
dev_err(dev, "Failed to enable SA power-domain\n");
- pm_runtime_put_noidle(dev);
pm_runtime_disable(dev);
return ret;
}
diff --git a/drivers/char/hw_random/omap-rng.c b/drivers/char/hw_random/omap-rng.c
index cede9f159102..00ff96703dd2 100644
--- a/drivers/char/hw_random/omap-rng.c
+++ b/drivers/char/hw_random/omap-rng.c
@@ -454,10 +454,9 @@ static int omap_rng_probe(struct platform_device *pdev)
}
pm_runtime_enable(&pdev->dev);
- ret = pm_runtime_get_sync(&pdev->dev);
+ ret = pm_runtime_resume_and_get(&pdev->dev);
if (ret < 0) {
dev_err(&pdev->dev, "Failed to runtime_get device: %d\n", ret);
- pm_runtime_put_noidle(&pdev->dev);
goto err_ioremap;
}
@@ -543,10 +542,9 @@ static int __maybe_unused omap_rng_resume(struct device *dev)
struct omap_rng_dev *priv = dev_get_drvdata(dev);
int ret;
- ret = pm_runtime_get_sync(dev);
+ ret = pm_runtime_resume_and_get(dev);
if (ret < 0) {
dev_err(dev, "Failed to runtime_get device: %d\n", ret);
- pm_runtime_put_noidle(dev);
return ret;
}
diff --git a/drivers/crypto/Kconfig b/drivers/crypto/Kconfig
index 9a4c275a1335..ebcec460c045 100644
--- a/drivers/crypto/Kconfig
+++ b/drivers/crypto/Kconfig
@@ -266,6 +266,27 @@ config CRYPTO_DEV_NIAGARA2
Group, which can perform encryption, decryption, hashing,
checksumming, and raw copies.
+config CRYPTO_DEV_SL3516
+ tristate "Stormlink SL3516 crypto offloader"
+ depends on HAS_IOMEM
+ select CRYPTO_SKCIPHER
+ select CRYPTO_ENGINE
+ select CRYPTO_ECB
+ select CRYPTO_AES
+ select HW_RANDOM
+ depends on PM
+ help
+ This option allows you to have support for SL3516 crypto offloader.
+
+config CRYPTO_DEV_SL3516_DEBUG
+ bool "Enable SL3516 stats"
+ depends on CRYPTO_DEV_SL3516
+ depends on DEBUG_FS
+ help
+ Say y to enable SL3516 debug stats.
+ This will create /sys/kernel/debug/sl3516/stats for displaying
+ the number of requests per algorithm and other internal stats.
+
config CRYPTO_DEV_HIFN_795X
tristate "Driver HIFN 795x crypto accelerator chips"
select CRYPTO_LIB_DES
@@ -325,6 +346,11 @@ config CRYPTO_DEV_TALITOS2
config CRYPTO_DEV_IXP4XX
tristate "Driver for IXP4xx crypto hardware acceleration"
depends on ARCH_IXP4XX && IXP4XX_QMGR && IXP4XX_NPE
+ select CRYPTO_AES
+ select CRYPTO_DES
+ select CRYPTO_ECB
+ select CRYPTO_CBC
+ select CRYPTO_CTR
select CRYPTO_LIB_DES
select CRYPTO_AEAD
select CRYPTO_AUTHENC
@@ -627,6 +653,12 @@ config CRYPTO_DEV_QCE_SHA
select CRYPTO_SHA1
select CRYPTO_SHA256
+config CRYPTO_DEV_QCE_AEAD
+ bool
+ depends on CRYPTO_DEV_QCE
+ select CRYPTO_AUTHENC
+ select CRYPTO_LIB_DES
+
choice
prompt "Algorithms enabled for QCE acceleration"
default CRYPTO_DEV_QCE_ENABLE_ALL
@@ -647,6 +679,7 @@ choice
bool "All supported algorithms"
select CRYPTO_DEV_QCE_SKCIPHER
select CRYPTO_DEV_QCE_SHA
+ select CRYPTO_DEV_QCE_AEAD
help
Enable all supported algorithms:
- AES (CBC, CTR, ECB, XTS)
@@ -672,6 +705,14 @@ choice
- SHA1, HMAC-SHA1
- SHA256, HMAC-SHA256
+ config CRYPTO_DEV_QCE_ENABLE_AEAD
+ bool "AEAD algorithms only"
+ select CRYPTO_DEV_QCE_AEAD
+ help
+ Enable AEAD algorithms only:
+ - authenc()
+ - ccm(aes)
+ - rfc4309(ccm(aes))
endchoice
config CRYPTO_DEV_QCE_SW_MAX_LEN
diff --git a/drivers/crypto/Makefile b/drivers/crypto/Makefile
index fa22cb19e242..1fe5120eb966 100644
--- a/drivers/crypto/Makefile
+++ b/drivers/crypto/Makefile
@@ -38,6 +38,7 @@ obj-$(CONFIG_CRYPTO_DEV_ROCKCHIP) += rockchip/
obj-$(CONFIG_CRYPTO_DEV_S5P) += s5p-sss.o
obj-$(CONFIG_CRYPTO_DEV_SA2UL) += sa2ul.o
obj-$(CONFIG_CRYPTO_DEV_SAHARA) += sahara.o
+obj-$(CONFIG_CRYPTO_DEV_SL3516) += gemini/
obj-$(CONFIG_ARCH_STM32) += stm32/
obj-$(CONFIG_CRYPTO_DEV_TALITOS) += talitos.o
obj-$(CONFIG_CRYPTO_DEV_UX500) += ux500/
diff --git a/drivers/crypto/cavium/cpt/cptpf_main.c b/drivers/crypto/cavium/cpt/cptpf_main.c
index 06ee42e8a245..8c32d0eb8fcf 100644
--- a/drivers/crypto/cavium/cpt/cptpf_main.c
+++ b/drivers/crypto/cavium/cpt/cptpf_main.c
@@ -401,7 +401,7 @@ static void cpt_disable_all_cores(struct cpt_device *cpt)
cpt_write_csr64(cpt->reg_base, CPTX_PF_EXE_CTL(0), 0);
}
-/**
+/*
* Ensure all cores are disengaged from all groups by
* calling cpt_disable_all_cores() before calling this
* function.
diff --git a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
index 4fe7898c8561..153004bdfb5c 100644
--- a/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
+++ b/drivers/crypto/cavium/cpt/cptvf_reqmanager.c
@@ -9,8 +9,8 @@
/**
* get_free_pending_entry - get free entry from pending queue
- * @param pqinfo: pending_qinfo structure
- * @param qno: queue number
+ * @q: pending queue
+ * @qlen: queue length
*/
static struct pending_entry *get_free_pending_entry(struct pending_queue *q,
int qlen)
@@ -244,11 +244,7 @@ static int send_cpt_command(struct cpt_vf *cptvf, union cpt_inst_s *cmd,
memcpy(ent, (void *)cmd, qinfo->cmd_size);
if (++queue->idx >= queue->qhead->size / 64) {
- struct hlist_node *node;
-
- hlist_for_each(node, &queue->chead) {
- chunk = hlist_entry(node, struct command_chunk,
- nextchunk);
+ hlist_for_each_entry(chunk, &queue->chead, nextchunk) {
if (chunk == queue->qhead) {
continue;
} else {
diff --git a/drivers/crypto/cavium/nitrox/nitrox_isr.c b/drivers/crypto/cavium/nitrox/nitrox_isr.c
index c288c4b51783..f19e520da6d0 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_isr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_isr.c
@@ -307,6 +307,10 @@ int nitrox_register_interrupts(struct nitrox_device *ndev)
* Entry 192: NPS_CORE_INT_ACTIVE
*/
nr_vecs = pci_msix_vec_count(pdev);
+ if (nr_vecs < 0) {
+ dev_err(DEV(ndev), "Error in getting vec count %d\n", nr_vecs);
+ return nr_vecs;
+ }
/* Enable MSI-X */
ret = pci_alloc_irq_vectors(pdev, nr_vecs, nr_vecs, PCI_IRQ_MSIX);
diff --git a/drivers/crypto/cavium/nitrox/nitrox_main.c b/drivers/crypto/cavium/nitrox/nitrox_main.c
index d385daf2c71c..96bc7b5c6532 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_main.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_main.c
@@ -35,7 +35,7 @@ static LIST_HEAD(ndevlist);
static DEFINE_MUTEX(devlist_lock);
static unsigned int num_devices;
-/**
+/*
* nitrox_pci_tbl - PCI Device ID Table
*/
static const struct pci_device_id nitrox_pci_tbl[] = {
@@ -65,7 +65,7 @@ struct ucode {
u64 code[];
};
-/**
+/*
* write_to_ucd_unit - Write Firmware to NITROX UCD unit
*/
static void write_to_ucd_unit(struct nitrox_device *ndev, u32 ucode_size,
@@ -424,8 +424,7 @@ static int nitrox_probe(struct pci_dev *pdev,
err = nitrox_device_flr(pdev);
if (err) {
dev_err(&pdev->dev, "FLR failed\n");
- pci_disable_device(pdev);
- return err;
+ goto flr_fail;
}
if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
@@ -434,16 +433,13 @@ static int nitrox_probe(struct pci_dev *pdev,
err = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
if (err) {
dev_err(&pdev->dev, "DMA configuration failed\n");
- pci_disable_device(pdev);
- return err;
+ goto flr_fail;
}
}
err = pci_request_mem_regions(pdev, nitrox_driver_name);
- if (err) {
- pci_disable_device(pdev);
- return err;
- }
+ if (err)
+ goto flr_fail;
pci_set_master(pdev);
ndev = kzalloc(sizeof(*ndev), GFP_KERNEL);
@@ -479,7 +475,7 @@ static int nitrox_probe(struct pci_dev *pdev,
err = nitrox_pf_sw_init(ndev);
if (err)
- goto ioremap_err;
+ goto pf_sw_fail;
err = nitrox_pf_hw_init(ndev);
if (err)
@@ -509,12 +505,15 @@ crypto_fail:
smp_mb__after_atomic();
pf_hw_fail:
nitrox_pf_sw_cleanup(ndev);
+pf_sw_fail:
+ iounmap(ndev->bar_addr);
ioremap_err:
nitrox_remove_from_devlist(ndev);
kfree(ndev);
pci_set_drvdata(pdev, NULL);
ndev_fail:
pci_release_mem_regions(pdev);
+flr_fail:
pci_disable_device(pdev);
return err;
}
diff --git a/drivers/crypto/cavium/nitrox/nitrox_mbx.c b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
index c1af9d4fca6e..2e9c0d214363 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_mbx.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_mbx.c
@@ -8,7 +8,7 @@
#define RING_TO_VFNO(_x, _y) ((_x) / (_y))
-/**
+/*
* mbx_msg_type - Mailbox message types
*/
enum mbx_msg_type {
@@ -18,7 +18,7 @@ enum mbx_msg_type {
MBX_MSG_TYPE_NACK,
};
-/**
+/*
* mbx_msg_opcode - Mailbox message opcodes
*/
enum mbx_msg_opcode {
diff --git a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
index df95ba26b414..55c18da4a500 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_reqmgr.c
@@ -19,7 +19,7 @@
#define REQ_BACKLOG 2
#define REQ_POSTED 3
-/**
+/*
* Response codes from SE microcode
* 0x00 - Success
* Completion with no error
@@ -159,7 +159,7 @@ static int dma_map_inbufs(struct nitrox_softreq *sr,
struct se_crypto_request *req)
{
struct device *dev = DEV(sr->ndev);
- struct scatterlist *sg = req->src;
+ struct scatterlist *sg;
int i, nents, ret = 0;
nents = dma_map_sg(dev, req->src, sg_nents(req->src),
@@ -279,6 +279,7 @@ static inline bool cmdq_full(struct nitrox_cmdq *cmdq, int qlen)
/**
* post_se_instr - Post SE instruction to Packet Input ring
* @sr: Request structure
+ * @cmdq: Command queue structure
*
* Returns 0 if successful or a negative error code,
* if no space in ring.
@@ -369,9 +370,11 @@ static int nitrox_enqueue_request(struct nitrox_softreq *sr)
}
/**
- * nitrox_se_request - Send request to SE core
+ * nitrox_process_se_request - Send request to SE core
* @ndev: NITROX device
* @req: Crypto request
+ * @callback: Completion callback
+ * @cb_arg: Completion callback arguments
*
* Returns 0 on success, or a negative error code.
*/
@@ -526,9 +529,8 @@ static bool sr_completed(struct nitrox_softreq *sr)
}
/**
- * process_request_list - process completed requests
- * @ndev: N5 device
- * @qno: queue to operate
+ * process_response_list - process completed requests
+ * @cmdq: Command queue structure
*
* Returns the number of responses processed.
*/
@@ -578,7 +580,7 @@ static void process_response_list(struct nitrox_cmdq *cmdq)
}
}
-/**
+/*
* pkt_slc_resp_tasklet - post processing of SE responses
*/
void pkt_slc_resp_tasklet(unsigned long data)
diff --git a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
index a553ac65f324..248b4fff1c72 100644
--- a/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
+++ b/drivers/crypto/cavium/nitrox/nitrox_skcipher.c
@@ -20,7 +20,7 @@ struct nitrox_cipher {
enum flexi_cipher value;
};
-/**
+/*
* supported cipher list
*/
static const struct nitrox_cipher flexi_cipher_table[] = {
diff --git a/drivers/crypto/ccp/ccp-dev.c b/drivers/crypto/ccp/ccp-dev.c
index 6777582aa1ce..9ce4b68e9c48 100644
--- a/drivers/crypto/ccp/ccp-dev.c
+++ b/drivers/crypto/ccp/ccp-dev.c
@@ -470,7 +470,7 @@ int ccp_cmd_queue_thread(void *data)
/**
* ccp_alloc_struct - allocate and initialize the ccp_device struct
*
- * @dev: device struct of the CCP
+ * @sp: sp_device struct of the CCP
*/
struct ccp_device *ccp_alloc_struct(struct sp_device *sp)
{
diff --git a/drivers/crypto/ccp/ccp-dmaengine.c b/drivers/crypto/ccp/ccp-dmaengine.c
index 0770a83bf1a5..d718db224be4 100644
--- a/drivers/crypto/ccp/ccp-dmaengine.c
+++ b/drivers/crypto/ccp/ccp-dmaengine.c
@@ -307,8 +307,7 @@ static dma_cookie_t ccp_tx_submit(struct dma_async_tx_descriptor *tx_desc)
spin_lock_irqsave(&chan->lock, flags);
cookie = dma_cookie_assign(tx_desc);
- list_del(&desc->entry);
- list_add_tail(&desc->entry, &chan->pending);
+ list_move_tail(&desc->entry, &chan->pending);
spin_unlock_irqrestore(&chan->lock, flags);
diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c
index 3506b2050fb8..91808402e0bf 100644
--- a/drivers/crypto/ccp/sev-dev.c
+++ b/drivers/crypto/ccp/sev-dev.c
@@ -43,6 +43,10 @@ static int psp_probe_timeout = 5;
module_param(psp_probe_timeout, int, 0644);
MODULE_PARM_DESC(psp_probe_timeout, " default timeout value, in seconds, during PSP device probe");
+MODULE_FIRMWARE("amd/amd_sev_fam17h_model0xh.sbin"); /* 1st gen EPYC */
+MODULE_FIRMWARE("amd/amd_sev_fam17h_model3xh.sbin"); /* 2nd gen EPYC */
+MODULE_FIRMWARE("amd/amd_sev_fam19h_model0xh.sbin"); /* 3rd gen EPYC */
+
static bool psp_dead;
static int psp_timeout;
diff --git a/drivers/crypto/ccp/sp-pci.c b/drivers/crypto/ccp/sp-pci.c
index f468594ef8af..6fb6ba35f89d 100644
--- a/drivers/crypto/ccp/sp-pci.c
+++ b/drivers/crypto/ccp/sp-pci.c
@@ -222,7 +222,7 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
if (ret) {
dev_err(dev, "dma_set_mask_and_coherent failed (%d)\n",
ret);
- goto e_err;
+ goto free_irqs;
}
}
@@ -230,10 +230,12 @@ static int sp_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
ret = sp_init(sp);
if (ret)
- goto e_err;
+ goto free_irqs;
return 0;
+free_irqs:
+ sp_free_irqs(sp);
e_err:
dev_notice(dev, "initialization failed\n");
return ret;
diff --git a/drivers/crypto/gemini/Makefile b/drivers/crypto/gemini/Makefile
new file mode 100644
index 000000000000..c73c8b69260d
--- /dev/null
+++ b/drivers/crypto/gemini/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_CRYPTO_DEV_SL3516) += sl3516-ce.o
+sl3516-ce-y += sl3516-ce-core.o sl3516-ce-cipher.o sl3516-ce-rng.o
diff --git a/drivers/crypto/gemini/sl3516-ce-cipher.c b/drivers/crypto/gemini/sl3516-ce-cipher.c
new file mode 100644
index 000000000000..b41c2f5fc495
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-cipher.c
@@ -0,0 +1,387 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-cipher.c - hardware cryptographic offloader for Stormlink SL3516 SoC
+ *
+ * Copyright (C) 2021 Corentin LABBE <clabbe@baylibre.com>
+ *
+ * This file adds support for AES cipher with 128,192,256 bits keysize in
+ * ECB mode.
+ */
+
+#include <linux/crypto.h>
+#include <linux/dma-mapping.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/pm_runtime.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/internal/skcipher.h>
+#include "sl3516-ce.h"
+
+/* sl3516_ce_need_fallback - check if a request can be handled by the CE */
+static bool sl3516_ce_need_fallback(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_dev *ce = op->ce;
+ struct scatterlist *in_sg = areq->src;
+ struct scatterlist *out_sg = areq->dst;
+ struct scatterlist *sg;
+
+ if (areq->cryptlen == 0 || areq->cryptlen % 16) {
+ ce->fallback_mod16++;
+ return true;
+ }
+
+ /*
+ * check if we have enough descriptors for TX
+ * Note: TX need one control desc for each SG
+ */
+ if (sg_nents(areq->src) > MAXDESC / 2) {
+ ce->fallback_sg_count_tx++;
+ return true;
+ }
+ /* check if we have enough descriptors for RX */
+ if (sg_nents(areq->dst) > MAXDESC) {
+ ce->fallback_sg_count_rx++;
+ return true;
+ }
+
+ sg = areq->src;
+ while (sg) {
+ if ((sg->length % 16) != 0) {
+ ce->fallback_mod16++;
+ return true;
+ }
+ if ((sg_dma_len(sg) % 16) != 0) {
+ ce->fallback_mod16++;
+ return true;
+ }
+ if (!IS_ALIGNED(sg->offset, 16)) {
+ ce->fallback_align16++;
+ return true;
+ }
+ sg = sg_next(sg);
+ }
+ sg = areq->dst;
+ while (sg) {
+ if ((sg->length % 16) != 0) {
+ ce->fallback_mod16++;
+ return true;
+ }
+ if ((sg_dma_len(sg) % 16) != 0) {
+ ce->fallback_mod16++;
+ return true;
+ }
+ if (!IS_ALIGNED(sg->offset, 16)) {
+ ce->fallback_align16++;
+ return true;
+ }
+ sg = sg_next(sg);
+ }
+
+ /* need same numbers of SG (with same length) for source and destination */
+ in_sg = areq->src;
+ out_sg = areq->dst;
+ while (in_sg && out_sg) {
+ if (in_sg->length != out_sg->length) {
+ ce->fallback_not_same_len++;
+ return true;
+ }
+ in_sg = sg_next(in_sg);
+ out_sg = sg_next(out_sg);
+ }
+ if (in_sg || out_sg)
+ return true;
+
+ return false;
+}
+
+static int sl3516_ce_cipher_fallback(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct sl3516_ce_alg_template *algt;
+ int err;
+
+ algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+ algt->stat_fb++;
+
+ skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+ skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+ areq->base.complete, areq->base.data);
+ skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ if (rctx->op_dir == CE_DECRYPTION)
+ err = crypto_skcipher_decrypt(&rctx->fallback_req);
+ else
+ err = crypto_skcipher_encrypt(&rctx->fallback_req);
+ return err;
+}
+
+static int sl3516_ce_cipher(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_dev *ce = op->ce;
+ struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct skcipher_alg *alg = crypto_skcipher_alg(tfm);
+ struct sl3516_ce_alg_template *algt;
+ struct scatterlist *sg;
+ unsigned int todo, len;
+ struct pkt_control_ecb *ecb;
+ int nr_sgs = 0;
+ int nr_sgd = 0;
+ int err = 0;
+ int i;
+
+ algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+
+ dev_dbg(ce->dev, "%s %s %u %x IV(%p %u) key=%u\n", __func__,
+ crypto_tfm_alg_name(areq->base.tfm),
+ areq->cryptlen,
+ rctx->op_dir, areq->iv, crypto_skcipher_ivsize(tfm),
+ op->keylen);
+
+ algt->stat_req++;
+
+ if (areq->src == areq->dst) {
+ nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+ DMA_BIDIRECTIONAL);
+ if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) {
+ dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+ err = -EINVAL;
+ goto theend;
+ }
+ nr_sgd = nr_sgs;
+ } else {
+ nr_sgs = dma_map_sg(ce->dev, areq->src, sg_nents(areq->src),
+ DMA_TO_DEVICE);
+ if (nr_sgs <= 0 || nr_sgs > MAXDESC / 2) {
+ dev_err(ce->dev, "Invalid sg number %d\n", nr_sgs);
+ err = -EINVAL;
+ goto theend;
+ }
+ nr_sgd = dma_map_sg(ce->dev, areq->dst, sg_nents(areq->dst),
+ DMA_FROM_DEVICE);
+ if (nr_sgd <= 0 || nr_sgd > MAXDESC) {
+ dev_err(ce->dev, "Invalid sg number %d\n", nr_sgd);
+ err = -EINVAL;
+ goto theend_sgs;
+ }
+ }
+
+ len = areq->cryptlen;
+ i = 0;
+ sg = areq->src;
+ while (i < nr_sgs && sg && len) {
+ if (sg_dma_len(sg) == 0)
+ goto sgs_next;
+ rctx->t_src[i].addr = sg_dma_address(sg);
+ todo = min(len, sg_dma_len(sg));
+ rctx->t_src[i].len = todo;
+ dev_dbg(ce->dev, "%s total=%u SGS(%d %u off=%d) todo=%u\n", __func__,
+ areq->cryptlen, i, rctx->t_src[i].len, sg->offset, todo);
+ len -= todo;
+ i++;
+sgs_next:
+ sg = sg_next(sg);
+ }
+ if (len > 0) {
+ dev_err(ce->dev, "remaining len %d/%u nr_sgs=%d\n", len, areq->cryptlen, nr_sgs);
+ err = -EINVAL;
+ goto theend_sgs;
+ }
+
+ len = areq->cryptlen;
+ i = 0;
+ sg = areq->dst;
+ while (i < nr_sgd && sg && len) {
+ if (sg_dma_len(sg) == 0)
+ goto sgd_next;
+ rctx->t_dst[i].addr = sg_dma_address(sg);
+ todo = min(len, sg_dma_len(sg));
+ rctx->t_dst[i].len = todo;
+ dev_dbg(ce->dev, "%s total=%u SGD(%d %u off=%d) todo=%u\n", __func__,
+ areq->cryptlen, i, rctx->t_dst[i].len, sg->offset, todo);
+ len -= todo;
+ i++;
+
+sgd_next:
+ sg = sg_next(sg);
+ }
+ if (len > 0) {
+ dev_err(ce->dev, "remaining len %d\n", len);
+ err = -EINVAL;
+ goto theend_sgs;
+ }
+
+ switch (algt->mode) {
+ case ECB_AES:
+ rctx->pctrllen = sizeof(struct pkt_control_ecb);
+ ecb = (struct pkt_control_ecb *)ce->pctrl;
+
+ rctx->tqflag = TQ0_TYPE_CTRL;
+ rctx->tqflag |= TQ1_CIPHER;
+ ecb->control.op_mode = rctx->op_dir;
+ ecb->control.cipher_algorithm = ECB_AES;
+ ecb->cipher.header_len = 0;
+ ecb->cipher.algorithm_len = areq->cryptlen;
+ cpu_to_be32_array((__be32 *)ecb->key, (u32 *)op->key, op->keylen / 4);
+ rctx->h = &ecb->cipher;
+
+ rctx->tqflag |= TQ4_KEY0;
+ rctx->tqflag |= TQ5_KEY4;
+ rctx->tqflag |= TQ6_KEY6;
+ ecb->control.aesnk = op->keylen / 4;
+ break;
+ }
+
+ rctx->nr_sgs = nr_sgs;
+ rctx->nr_sgd = nr_sgd;
+ err = sl3516_ce_run_task(ce, rctx, crypto_tfm_alg_name(areq->base.tfm));
+
+theend_sgs:
+ if (areq->src == areq->dst) {
+ dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
+ DMA_BIDIRECTIONAL);
+ } else {
+ dma_unmap_sg(ce->dev, areq->src, sg_nents(areq->src),
+ DMA_TO_DEVICE);
+ dma_unmap_sg(ce->dev, areq->dst, sg_nents(areq->dst),
+ DMA_FROM_DEVICE);
+ }
+
+theend:
+
+ return err;
+}
+
+static int sl3516_ce_handle_cipher_request(struct crypto_engine *engine, void *areq)
+{
+ int err;
+ struct skcipher_request *breq = container_of(areq, struct skcipher_request, base);
+
+ err = sl3516_ce_cipher(breq);
+ crypto_finalize_skcipher_request(engine, breq, err);
+
+ return 0;
+}
+
+int sl3516_ce_skdecrypt(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct crypto_engine *engine;
+
+ memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx));
+ rctx->op_dir = CE_DECRYPTION;
+
+ if (sl3516_ce_need_fallback(areq))
+ return sl3516_ce_cipher_fallback(areq);
+
+ engine = op->ce->engine;
+
+ return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sl3516_ce_skencrypt(struct skcipher_request *areq)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_cipher_req_ctx *rctx = skcipher_request_ctx(areq);
+ struct crypto_engine *engine;
+
+ memset(rctx, 0, sizeof(struct sl3516_ce_cipher_req_ctx));
+ rctx->op_dir = CE_ENCRYPTION;
+
+ if (sl3516_ce_need_fallback(areq))
+ return sl3516_ce_cipher_fallback(areq);
+
+ engine = op->ce->engine;
+
+ return crypto_transfer_skcipher_request_to_engine(engine, areq);
+}
+
+int sl3516_ce_cipher_init(struct crypto_tfm *tfm)
+{
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+ struct sl3516_ce_alg_template *algt;
+ const char *name = crypto_tfm_alg_name(tfm);
+ struct crypto_skcipher *sktfm = __crypto_skcipher_cast(tfm);
+ struct skcipher_alg *alg = crypto_skcipher_alg(sktfm);
+ int err;
+
+ memset(op, 0, sizeof(struct sl3516_ce_cipher_tfm_ctx));
+
+ algt = container_of(alg, struct sl3516_ce_alg_template, alg.skcipher);
+ op->ce = algt->ce;
+
+ op->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(op->fallback_tfm)) {
+ dev_err(op->ce->dev, "ERROR: Cannot allocate fallback for %s %ld\n",
+ name, PTR_ERR(op->fallback_tfm));
+ return PTR_ERR(op->fallback_tfm);
+ }
+
+ sktfm->reqsize = sizeof(struct sl3516_ce_cipher_req_ctx) +
+ crypto_skcipher_reqsize(op->fallback_tfm);
+
+ dev_info(op->ce->dev, "Fallback for %s is %s\n",
+ crypto_tfm_alg_driver_name(&sktfm->base),
+ crypto_tfm_alg_driver_name(crypto_skcipher_tfm(op->fallback_tfm)));
+
+ op->enginectx.op.do_one_request = sl3516_ce_handle_cipher_request;
+ op->enginectx.op.prepare_request = NULL;
+ op->enginectx.op.unprepare_request = NULL;
+
+ err = pm_runtime_get_sync(op->ce->dev);
+ if (err < 0)
+ goto error_pm;
+
+ return 0;
+error_pm:
+ pm_runtime_put_noidle(op->ce->dev);
+ crypto_free_skcipher(op->fallback_tfm);
+ return err;
+}
+
+void sl3516_ce_cipher_exit(struct crypto_tfm *tfm)
+{
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_tfm_ctx(tfm);
+
+ kfree_sensitive(op->key);
+ crypto_free_skcipher(op->fallback_tfm);
+ pm_runtime_put_sync_suspend(op->ce->dev);
+}
+
+int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct sl3516_ce_cipher_tfm_ctx *op = crypto_skcipher_ctx(tfm);
+ struct sl3516_ce_dev *ce = op->ce;
+
+ switch (keylen) {
+ case 128 / 8:
+ break;
+ case 192 / 8:
+ break;
+ case 256 / 8:
+ break;
+ default:
+ dev_dbg(ce->dev, "ERROR: Invalid keylen %u\n", keylen);
+ return -EINVAL;
+ }
+ kfree_sensitive(op->key);
+ op->keylen = keylen;
+ op->key = kmemdup(key, keylen, GFP_KERNEL | GFP_DMA);
+ if (!op->key)
+ return -ENOMEM;
+
+ crypto_skcipher_clear_flags(op->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(op->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+ return crypto_skcipher_setkey(op->fallback_tfm, key, keylen);
+}
diff --git a/drivers/crypto/gemini/sl3516-ce-core.c b/drivers/crypto/gemini/sl3516-ce-core.c
new file mode 100644
index 000000000000..da6cd529a6c0
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-core.c
@@ -0,0 +1,535 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-core.c - hardware cryptographic offloader for Stormlink SL3516 SoC
+ *
+ * Copyright (C) 2021 Corentin Labbe <clabbe@baylibre.com>
+ *
+ * Core file which registers crypto algorithms supported by the CryptoEngine
+ */
+#include <linux/clk.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/dev_printk.h>
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/irq.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/reset.h>
+#include <crypto/internal/rng.h>
+#include <crypto/internal/skcipher.h>
+
+#include "sl3516-ce.h"
+
+static int sl3516_ce_desc_init(struct sl3516_ce_dev *ce)
+{
+ const size_t sz = sizeof(struct descriptor) * MAXDESC;
+ int i;
+
+ ce->tx = dma_alloc_coherent(ce->dev, sz, &ce->dtx, GFP_KERNEL);
+ if (!ce->tx)
+ return -ENOMEM;
+ ce->rx = dma_alloc_coherent(ce->dev, sz, &ce->drx, GFP_KERNEL);
+ if (!ce->rx)
+ goto err_rx;
+
+ for (i = 0; i < MAXDESC; i++) {
+ ce->tx[i].frame_ctrl.bits.own = CE_CPU;
+ ce->tx[i].next_desc.next_descriptor = ce->dtx + (i + 1) * sizeof(struct descriptor);
+ }
+ ce->tx[MAXDESC - 1].next_desc.next_descriptor = ce->dtx;
+
+ for (i = 0; i < MAXDESC; i++) {
+ ce->rx[i].frame_ctrl.bits.own = CE_CPU;
+ ce->rx[i].next_desc.next_descriptor = ce->drx + (i + 1) * sizeof(struct descriptor);
+ }
+ ce->rx[MAXDESC - 1].next_desc.next_descriptor = ce->drx;
+
+ ce->pctrl = dma_alloc_coherent(ce->dev, sizeof(struct pkt_control_ecb),
+ &ce->dctrl, GFP_KERNEL);
+ if (!ce->pctrl)
+ goto err_pctrl;
+
+ return 0;
+err_pctrl:
+ dma_free_coherent(ce->dev, sz, ce->rx, ce->drx);
+err_rx:
+ dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx);
+ return -ENOMEM;
+}
+
+static void sl3516_ce_free_descs(struct sl3516_ce_dev *ce)
+{
+ const size_t sz = sizeof(struct descriptor) * MAXDESC;
+
+ dma_free_coherent(ce->dev, sz, ce->tx, ce->dtx);
+ dma_free_coherent(ce->dev, sz, ce->rx, ce->drx);
+ dma_free_coherent(ce->dev, sizeof(struct pkt_control_ecb), ce->pctrl,
+ ce->dctrl);
+}
+
+static void start_dma_tx(struct sl3516_ce_dev *ce)
+{
+ u32 v;
+
+ v = TXDMA_CTRL_START | TXDMA_CTRL_CHAIN_MODE | TXDMA_CTRL_CONTINUE | \
+ TXDMA_CTRL_INT_FAIL | TXDMA_CTRL_INT_PERR | TXDMA_CTRL_BURST_UNK;
+
+ writel(v, ce->base + IPSEC_TXDMA_CTRL);
+}
+
+static void start_dma_rx(struct sl3516_ce_dev *ce)
+{
+ u32 v;
+
+ v = RXDMA_CTRL_START | RXDMA_CTRL_CHAIN_MODE | RXDMA_CTRL_CONTINUE | \
+ RXDMA_CTRL_BURST_UNK | RXDMA_CTRL_INT_FINISH | \
+ RXDMA_CTRL_INT_FAIL | RXDMA_CTRL_INT_PERR | \
+ RXDMA_CTRL_INT_EOD | RXDMA_CTRL_INT_EOF;
+
+ writel(v, ce->base + IPSEC_RXDMA_CTRL);
+}
+
+static struct descriptor *get_desc_tx(struct sl3516_ce_dev *ce)
+{
+ struct descriptor *dd;
+
+ dd = &ce->tx[ce->ctx];
+ ce->ctx++;
+ if (ce->ctx >= MAXDESC)
+ ce->ctx = 0;
+ return dd;
+}
+
+static struct descriptor *get_desc_rx(struct sl3516_ce_dev *ce)
+{
+ struct descriptor *rdd;
+
+ rdd = &ce->rx[ce->crx];
+ ce->crx++;
+ if (ce->crx >= MAXDESC)
+ ce->crx = 0;
+ return rdd;
+}
+
+int sl3516_ce_run_task(struct sl3516_ce_dev *ce, struct sl3516_ce_cipher_req_ctx *rctx,
+ const char *name)
+{
+ struct descriptor *dd, *rdd = NULL;
+ u32 v;
+ int i, err = 0;
+
+ ce->stat_req++;
+
+ reinit_completion(&ce->complete);
+ ce->status = 0;
+
+ for (i = 0; i < rctx->nr_sgd; i++) {
+ dev_dbg(ce->dev, "%s handle DST SG %d/%d len=%d\n", __func__,
+ i, rctx->nr_sgd, rctx->t_dst[i].len);
+ rdd = get_desc_rx(ce);
+ rdd->buf_adr = rctx->t_dst[i].addr;
+ rdd->frame_ctrl.bits.buffer_size = rctx->t_dst[i].len;
+ rdd->frame_ctrl.bits.own = CE_DMA;
+ }
+ rdd->next_desc.bits.eofie = 1;
+
+ for (i = 0; i < rctx->nr_sgs; i++) {
+ dev_dbg(ce->dev, "%s handle SRC SG %d/%d len=%d\n", __func__,
+ i, rctx->nr_sgs, rctx->t_src[i].len);
+ rctx->h->algorithm_len = rctx->t_src[i].len;
+
+ dd = get_desc_tx(ce);
+ dd->frame_ctrl.raw = 0;
+ dd->flag_status.raw = 0;
+ dd->frame_ctrl.bits.buffer_size = rctx->pctrllen;
+ dd->buf_adr = ce->dctrl;
+ dd->flag_status.tx_flag.tqflag = rctx->tqflag;
+ dd->next_desc.bits.eofie = 0;
+ dd->next_desc.bits.dec = 0;
+ dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST;
+ dd->frame_ctrl.bits.own = CE_DMA;
+
+ dd = get_desc_tx(ce);
+ dd->frame_ctrl.raw = 0;
+ dd->flag_status.raw = 0;
+ dd->frame_ctrl.bits.buffer_size = rctx->t_src[i].len;
+ dd->buf_adr = rctx->t_src[i].addr;
+ dd->flag_status.tx_flag.tqflag = 0;
+ dd->next_desc.bits.eofie = 0;
+ dd->next_desc.bits.dec = 0;
+ dd->next_desc.bits.sof_eof = DESC_FIRST | DESC_LAST;
+ dd->frame_ctrl.bits.own = CE_DMA;
+ start_dma_tx(ce);
+ start_dma_rx(ce);
+ }
+ wait_for_completion_interruptible_timeout(&ce->complete,
+ msecs_to_jiffies(5000));
+ if (ce->status == 0) {
+ dev_err(ce->dev, "DMA timeout for %s\n", name);
+ err = -EFAULT;
+ }
+ v = readl(ce->base + IPSEC_STATUS_REG);
+ if (v & 0xFFF) {
+ dev_err(ce->dev, "IPSEC_STATUS_REG %x\n", v);
+ err = -EFAULT;
+ }
+
+ return err;
+}
+
+static irqreturn_t ce_irq_handler(int irq, void *data)
+{
+ struct sl3516_ce_dev *ce = (struct sl3516_ce_dev *)data;
+ u32 v;
+
+ ce->stat_irq++;
+
+ v = readl(ce->base + IPSEC_DMA_STATUS);
+ writel(v, ce->base + IPSEC_DMA_STATUS);
+
+ if (v & DMA_STATUS_TS_DERR)
+ dev_err(ce->dev, "AHB bus Error While Tx !!!\n");
+ if (v & DMA_STATUS_TS_PERR)
+ dev_err(ce->dev, "Tx Descriptor Protocol Error !!!\n");
+ if (v & DMA_STATUS_RS_DERR)
+ dev_err(ce->dev, "AHB bus Error While Rx !!!\n");
+ if (v & DMA_STATUS_RS_PERR)
+ dev_err(ce->dev, "Rx Descriptor Protocol Error !!!\n");
+
+ if (v & DMA_STATUS_TS_EOFI)
+ ce->stat_irq_tx++;
+ if (v & DMA_STATUS_RS_EOFI) {
+ ce->status = 1;
+ complete(&ce->complete);
+ ce->stat_irq_rx++;
+ return IRQ_HANDLED;
+ }
+
+ return IRQ_HANDLED;
+}
+
+static struct sl3516_ce_alg_template ce_algs[] = {
+{
+ .type = CRYPTO_ALG_TYPE_SKCIPHER,
+ .mode = ECB_AES,
+ .alg.skcipher = {
+ .base = {
+ .cra_name = "ecb(aes)",
+ .cra_driver_name = "ecb-aes-sl3516",
+ .cra_priority = 400,
+ .cra_blocksize = AES_BLOCK_SIZE,
+ .cra_flags = CRYPTO_ALG_TYPE_SKCIPHER |
+ CRYPTO_ALG_ASYNC | CRYPTO_ALG_NEED_FALLBACK,
+ .cra_ctxsize = sizeof(struct sl3516_ce_cipher_tfm_ctx),
+ .cra_module = THIS_MODULE,
+ .cra_alignmask = 0xf,
+ .cra_init = sl3516_ce_cipher_init,
+ .cra_exit = sl3516_ce_cipher_exit,
+ },
+ .min_keysize = AES_MIN_KEY_SIZE,
+ .max_keysize = AES_MAX_KEY_SIZE,
+ .setkey = sl3516_ce_aes_setkey,
+ .encrypt = sl3516_ce_skencrypt,
+ .decrypt = sl3516_ce_skdecrypt,
+ }
+},
+};
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+static int sl3516_ce_debugfs_show(struct seq_file *seq, void *v)
+{
+ struct sl3516_ce_dev *ce = seq->private;
+ unsigned int i;
+
+ seq_printf(seq, "HWRNG %lu %lu\n",
+ ce->hwrng_stat_req, ce->hwrng_stat_bytes);
+ seq_printf(seq, "IRQ %lu\n", ce->stat_irq);
+ seq_printf(seq, "IRQ TX %lu\n", ce->stat_irq_tx);
+ seq_printf(seq, "IRQ RX %lu\n", ce->stat_irq_rx);
+ seq_printf(seq, "nreq %lu\n", ce->stat_req);
+ seq_printf(seq, "fallback SG count TX %lu\n", ce->fallback_sg_count_tx);
+ seq_printf(seq, "fallback SG count RX %lu\n", ce->fallback_sg_count_rx);
+ seq_printf(seq, "fallback modulo16 %lu\n", ce->fallback_mod16);
+ seq_printf(seq, "fallback align16 %lu\n", ce->fallback_align16);
+ seq_printf(seq, "fallback not same len %lu\n", ce->fallback_not_same_len);
+
+ for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+ if (!ce_algs[i].ce)
+ continue;
+ switch (ce_algs[i].type) {
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ seq_printf(seq, "%s %s reqs=%lu fallback=%lu\n",
+ ce_algs[i].alg.skcipher.base.cra_driver_name,
+ ce_algs[i].alg.skcipher.base.cra_name,
+ ce_algs[i].stat_req, ce_algs[i].stat_fb);
+ break;
+ }
+ }
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(sl3516_ce_debugfs);
+#endif
+
+static int sl3516_ce_register_algs(struct sl3516_ce_dev *ce)
+{
+ int err;
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+ ce_algs[i].ce = ce;
+ switch (ce_algs[i].type) {
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ dev_info(ce->dev, "DEBUG: Register %s\n",
+ ce_algs[i].alg.skcipher.base.cra_name);
+ err = crypto_register_skcipher(&ce_algs[i].alg.skcipher);
+ if (err) {
+ dev_err(ce->dev, "Fail to register %s\n",
+ ce_algs[i].alg.skcipher.base.cra_name);
+ ce_algs[i].ce = NULL;
+ return err;
+ }
+ break;
+ default:
+ ce_algs[i].ce = NULL;
+ dev_err(ce->dev, "ERROR: tried to register an unknown algo\n");
+ }
+ }
+ return 0;
+}
+
+static void sl3516_ce_unregister_algs(struct sl3516_ce_dev *ce)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(ce_algs); i++) {
+ if (!ce_algs[i].ce)
+ continue;
+ switch (ce_algs[i].type) {
+ case CRYPTO_ALG_TYPE_SKCIPHER:
+ dev_info(ce->dev, "Unregister %d %s\n", i,
+ ce_algs[i].alg.skcipher.base.cra_name);
+ crypto_unregister_skcipher(&ce_algs[i].alg.skcipher);
+ break;
+ }
+ }
+}
+
+static void sl3516_ce_start(struct sl3516_ce_dev *ce)
+{
+ ce->ctx = 0;
+ ce->crx = 0;
+ writel(ce->dtx, ce->base + IPSEC_TXDMA_CURR_DESC);
+ writel(ce->drx, ce->base + IPSEC_RXDMA_CURR_DESC);
+ writel(0, ce->base + IPSEC_DMA_STATUS);
+}
+
+/*
+ * Power management strategy: The device is suspended unless a TFM exists for
+ * one of the algorithms proposed by this driver.
+ */
+static int sl3516_ce_pm_suspend(struct device *dev)
+{
+ struct sl3516_ce_dev *ce = dev_get_drvdata(dev);
+
+ reset_control_assert(ce->reset);
+ clk_disable_unprepare(ce->clks);
+ return 0;
+}
+
+static int sl3516_ce_pm_resume(struct device *dev)
+{
+ struct sl3516_ce_dev *ce = dev_get_drvdata(dev);
+ int err;
+
+ err = clk_prepare_enable(ce->clks);
+ if (err) {
+ dev_err(ce->dev, "Cannot prepare_enable\n");
+ goto error;
+ }
+ err = reset_control_deassert(ce->reset);
+ if (err) {
+ dev_err(ce->dev, "Cannot deassert reset control\n");
+ goto error;
+ }
+
+ sl3516_ce_start(ce);
+
+ return 0;
+error:
+ sl3516_ce_pm_suspend(dev);
+ return err;
+}
+
+static const struct dev_pm_ops sl3516_ce_pm_ops = {
+ SET_RUNTIME_PM_OPS(sl3516_ce_pm_suspend, sl3516_ce_pm_resume, NULL)
+};
+
+static int sl3516_ce_pm_init(struct sl3516_ce_dev *ce)
+{
+ int err;
+
+ pm_runtime_use_autosuspend(ce->dev);
+ pm_runtime_set_autosuspend_delay(ce->dev, 2000);
+
+ err = pm_runtime_set_suspended(ce->dev);
+ if (err)
+ return err;
+ pm_runtime_enable(ce->dev);
+ return err;
+}
+
+static void sl3516_ce_pm_exit(struct sl3516_ce_dev *ce)
+{
+ pm_runtime_disable(ce->dev);
+}
+
+static int sl3516_ce_probe(struct platform_device *pdev)
+{
+ struct sl3516_ce_dev *ce;
+ int err, irq;
+ u32 v;
+
+ ce = devm_kzalloc(&pdev->dev, sizeof(*ce), GFP_KERNEL);
+ if (!ce)
+ return -ENOMEM;
+
+ ce->dev = &pdev->dev;
+ platform_set_drvdata(pdev, ce);
+
+ ce->base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(ce->base))
+ return PTR_ERR(ce->base);
+
+ irq = platform_get_irq(pdev, 0);
+ if (irq < 0)
+ return irq;
+
+ err = devm_request_irq(&pdev->dev, irq, ce_irq_handler, 0, "crypto", ce);
+ if (err) {
+ dev_err(ce->dev, "Cannot request Crypto Engine IRQ (err=%d)\n", err);
+ return err;
+ }
+
+ ce->reset = devm_reset_control_get(&pdev->dev, NULL);
+ if (IS_ERR(ce->reset))
+ return dev_err_probe(&pdev->dev, PTR_ERR(ce->reset),
+ "No reset control found\n");
+ ce->clks = devm_clk_get(ce->dev, NULL);
+ if (IS_ERR(ce->clks)) {
+ err = PTR_ERR(ce->clks);
+ dev_err(ce->dev, "Cannot get clock err=%d\n", err);
+ return err;
+ }
+
+ err = sl3516_ce_desc_init(ce);
+ if (err)
+ return err;
+
+ err = sl3516_ce_pm_init(ce);
+ if (err)
+ goto error_pm;
+
+ init_completion(&ce->complete);
+
+ ce->engine = crypto_engine_alloc_init(ce->dev, true);
+ if (!ce->engine) {
+ dev_err(ce->dev, "Cannot allocate engine\n");
+ err = -ENOMEM;
+ goto error_engine;
+ }
+
+ err = crypto_engine_start(ce->engine);
+ if (err) {
+ dev_err(ce->dev, "Cannot start engine\n");
+ goto error_engine;
+ }
+
+ err = sl3516_ce_register_algs(ce);
+ if (err)
+ goto error_alg;
+
+ err = sl3516_ce_rng_register(ce);
+ if (err)
+ goto error_rng;
+
+ err = pm_runtime_resume_and_get(ce->dev);
+ if (err < 0)
+ goto error_pmuse;
+
+ v = readl(ce->base + IPSEC_ID);
+ dev_info(ce->dev, "SL3516 dev %lx rev %lx\n",
+ v & GENMASK(31, 4),
+ v & GENMASK(3, 0));
+ v = readl(ce->base + IPSEC_DMA_DEVICE_ID);
+ dev_info(ce->dev, "SL3516 DMA dev %lx rev %lx\n",
+ v & GENMASK(15, 4),
+ v & GENMASK(3, 0));
+
+ pm_runtime_put_sync(ce->dev);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+ /* Ignore error of debugfs */
+ ce->dbgfs_dir = debugfs_create_dir("sl3516", NULL);
+ ce->dbgfs_stats = debugfs_create_file("stats", 0444,
+ ce->dbgfs_dir, ce,
+ &sl3516_ce_debugfs_fops);
+#endif
+
+ return 0;
+error_pmuse:
+ sl3516_ce_rng_unregister(ce);
+error_rng:
+ sl3516_ce_unregister_algs(ce);
+error_alg:
+ crypto_engine_exit(ce->engine);
+error_engine:
+ sl3516_ce_pm_exit(ce);
+error_pm:
+ sl3516_ce_free_descs(ce);
+ return err;
+}
+
+static int sl3516_ce_remove(struct platform_device *pdev)
+{
+ struct sl3516_ce_dev *ce = platform_get_drvdata(pdev);
+
+ sl3516_ce_rng_unregister(ce);
+ sl3516_ce_unregister_algs(ce);
+ crypto_engine_exit(ce->engine);
+ sl3516_ce_pm_exit(ce);
+ sl3516_ce_free_descs(ce);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+ debugfs_remove_recursive(ce->dbgfs_dir);
+#endif
+
+ return 0;
+}
+
+static const struct of_device_id sl3516_ce_crypto_of_match_table[] = {
+ { .compatible = "cortina,sl3516-crypto"},
+ {}
+};
+MODULE_DEVICE_TABLE(of, sl3516_ce_crypto_of_match_table);
+
+static struct platform_driver sl3516_ce_driver = {
+ .probe = sl3516_ce_probe,
+ .remove = sl3516_ce_remove,
+ .driver = {
+ .name = "sl3516-crypto",
+ .pm = &sl3516_ce_pm_ops,
+ .of_match_table = sl3516_ce_crypto_of_match_table,
+ },
+};
+
+module_platform_driver(sl3516_ce_driver);
+
+MODULE_DESCRIPTION("SL3516 cryptographic offloader");
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Corentin Labbe <clabbe@baylibre.com>");
diff --git a/drivers/crypto/gemini/sl3516-ce-rng.c b/drivers/crypto/gemini/sl3516-ce-rng.c
new file mode 100644
index 000000000000..76931ec1cec5
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce-rng.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * sl3516-ce-rng.c - hardware cryptographic offloader for SL3516 SoC.
+ *
+ * Copyright (C) 2021 Corentin Labbe <clabbe@baylibre.com>
+ *
+ * This file handle the RNG found in the SL3516 crypto engine
+ */
+#include "sl3516-ce.h"
+#include <linux/pm_runtime.h>
+#include <linux/hw_random.h>
+
+static int sl3516_ce_rng_read(struct hwrng *rng, void *buf, size_t max, bool wait)
+{
+ struct sl3516_ce_dev *ce;
+ u32 *data = buf;
+ size_t read = 0;
+ int err;
+
+ ce = container_of(rng, struct sl3516_ce_dev, trng);
+
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+ ce->hwrng_stat_req++;
+ ce->hwrng_stat_bytes += max;
+#endif
+
+ err = pm_runtime_get_sync(ce->dev);
+ if (err < 0) {
+ pm_runtime_put_noidle(ce->dev);
+ return err;
+ }
+
+ while (read < max) {
+ *data = readl(ce->base + IPSEC_RAND_NUM_REG);
+ data++;
+ read += 4;
+ }
+
+ pm_runtime_put(ce->dev);
+
+ return read;
+}
+
+int sl3516_ce_rng_register(struct sl3516_ce_dev *ce)
+{
+ int ret;
+
+ ce->trng.name = "SL3516 Crypto Engine RNG";
+ ce->trng.read = sl3516_ce_rng_read;
+ ce->trng.quality = 700;
+
+ ret = hwrng_register(&ce->trng);
+ if (ret)
+ dev_err(ce->dev, "Fail to register the RNG\n");
+ return ret;
+}
+
+void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce)
+{
+ hwrng_unregister(&ce->trng);
+}
diff --git a/drivers/crypto/gemini/sl3516-ce.h b/drivers/crypto/gemini/sl3516-ce.h
new file mode 100644
index 000000000000..4c0ec6c920d1
--- /dev/null
+++ b/drivers/crypto/gemini/sl3516-ce.h
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * sl3516-ce.h - hardware cryptographic offloader for cortina/gemini SoC
+ *
+ * Copyright (C) 2021 Corentin LABBE <clabbe@baylibre.com>
+ *
+ * General notes on this driver:
+ * Called either Crypto Acceleration Engine Module, Security Acceleration Engine
+ * or IPSEC module in the datasheet, it will be called Crypto Engine for short
+ * in this driver.
+ * The CE was designed to handle IPSEC and wifi(TKIP WEP) protocol.
+ * It can handle AES, DES, 3DES, MD5, WEP, TKIP, SHA1, HMAC(MD5), HMAC(SHA1),
+ * Michael cipher/digest suites.
+ * It acts the same as a network hw, with both RX and TX chained descriptors.
+ */
+#include <crypto/aes.h>
+#include <crypto/engine.h>
+#include <crypto/scatterwalk.h>
+#include <crypto/skcipher.h>
+#include <linux/crypto.h>
+#include <linux/debugfs.h>
+#include <linux/hw_random.h>
+
+#define TQ0_TYPE_DATA 0
+#define TQ0_TYPE_CTRL BIT(0)
+#define TQ1_CIPHER BIT(1)
+#define TQ2_AUTH BIT(2)
+#define TQ3_IV BIT(3)
+#define TQ4_KEY0 BIT(4)
+#define TQ5_KEY4 BIT(5)
+#define TQ6_KEY6 BIT(6)
+#define TQ7_AKEY0 BIT(7)
+#define TQ8_AKEY2 BIT(8)
+#define TQ9_AKEY2 BIT(9)
+
+#define ECB_AES 0x2
+
+#define DESC_LAST 0x01
+#define DESC_FIRST 0x02
+
+#define IPSEC_ID 0x0000
+#define IPSEC_STATUS_REG 0x00a8
+#define IPSEC_RAND_NUM_REG 0x00ac
+#define IPSEC_DMA_DEVICE_ID 0xff00
+#define IPSEC_DMA_STATUS 0xff04
+#define IPSEC_TXDMA_CTRL 0xff08
+#define IPSEC_TXDMA_FIRST_DESC 0xff0c
+#define IPSEC_TXDMA_CURR_DESC 0xff10
+#define IPSEC_RXDMA_CTRL 0xff14
+#define IPSEC_RXDMA_FIRST_DESC 0xff18
+#define IPSEC_RXDMA_CURR_DESC 0xff1c
+#define IPSEC_TXDMA_BUF_ADDR 0xff28
+#define IPSEC_RXDMA_BUF_ADDR 0xff38
+#define IPSEC_RXDMA_BUF_SIZE 0xff30
+
+#define CE_ENCRYPTION 0x01
+#define CE_DECRYPTION 0x03
+
+#define MAXDESC 6
+
+#define DMA_STATUS_RS_EOFI BIT(22)
+#define DMA_STATUS_RS_PERR BIT(24)
+#define DMA_STATUS_RS_DERR BIT(25)
+#define DMA_STATUS_TS_EOFI BIT(27)
+#define DMA_STATUS_TS_PERR BIT(29)
+#define DMA_STATUS_TS_DERR BIT(30)
+
+#define TXDMA_CTRL_START BIT(31)
+#define TXDMA_CTRL_CONTINUE BIT(30)
+#define TXDMA_CTRL_CHAIN_MODE BIT(29)
+/* the burst value is not documented in the datasheet */
+#define TXDMA_CTRL_BURST_UNK BIT(22)
+#define TXDMA_CTRL_INT_FAIL BIT(17)
+#define TXDMA_CTRL_INT_PERR BIT(16)
+
+#define RXDMA_CTRL_START BIT(31)
+#define RXDMA_CTRL_CONTINUE BIT(30)
+#define RXDMA_CTRL_CHAIN_MODE BIT(29)
+/* the burst value is not documented in the datasheet */
+#define RXDMA_CTRL_BURST_UNK BIT(22)
+#define RXDMA_CTRL_INT_FINISH BIT(18)
+#define RXDMA_CTRL_INT_FAIL BIT(17)
+#define RXDMA_CTRL_INT_PERR BIT(16)
+#define RXDMA_CTRL_INT_EOD BIT(15)
+#define RXDMA_CTRL_INT_EOF BIT(14)
+
+#define CE_CPU 0
+#define CE_DMA 1
+
+/*
+ * struct sl3516_ce_descriptor - descriptor for CE operations
+ * @frame_ctrl: Information for the current descriptor
+ * @flag_status: For send packet, describe flag of operations.
+ * @buf_adr: pointer to a send/recv buffer for data packet
+ * @next_desc: control linking to other descriptors
+ */
+struct descriptor {
+ union {
+ u32 raw;
+ /*
+ * struct desc_frame_ctrl - Information for the current descriptor
+ * @buffer_size: the size of buffer at buf_adr
+ * @desc_count: Upon completion of a DMA operation, DMA
+ * write the number of descriptors used
+ * for the current frame
+ * @checksum: unknown
+ * @authcomp: unknown
+ * @perr: Protocol error during processing this descriptor
+ * @derr: Data error during processing this descriptor
+ * @own: 0 if owned by CPU, 1 for DMA
+ */
+ struct desc_frame_ctrl {
+ u32 buffer_size :16;
+ u32 desc_count :6;
+ u32 checksum :6;
+ u32 authcomp :1;
+ u32 perr :1;
+ u32 derr :1;
+ u32 own :1;
+ } bits;
+ } frame_ctrl;
+
+ union {
+ u32 raw;
+ /*
+ * struct desc_flag_status - flag for this descriptor
+ * @tqflag: list of flag describing the type of operation
+ * to be performed.
+ */
+ struct desc_tx_flag_status {
+ u32 tqflag :10;
+ u32 unused :22;
+ } tx_flag;
+ } flag_status;
+
+ u32 buf_adr;
+
+ union {
+ u32 next_descriptor;
+ /*
+ * struct desc_next - describe chaining of descriptors
+ * @sof_eof: does the descriptor is first (0x11),
+ * the last (0x01), middle of a chan (0x00)
+ * or the only one (0x11)
+ * @dec: AHB bus address increase (0), decrease (1)
+ * @eofie: End of frame interrupt enable
+ * @ndar: Next descriptor address
+ */
+ struct desc_next {
+ u32 sof_eof :2;
+ u32 dec :1;
+ u32 eofie :1;
+ u32 ndar :28;
+ } bits;
+ } next_desc;
+};
+
+/*
+ * struct control - The value of this register is used to set the
+ * operation mode of the IPSec Module.
+ * @process_id: Used to identify the process. The number will be copied
+ * to the descriptor status of the received packet.
+ * @auth_check_len: Number of 32-bit words to be checked or appended by the
+ * authentication module
+ * @auth_algorithm:
+ * @auth_mode: 0:append 1:Check Authentication Result
+ * @fcs_stream_copy: 0:enable 1:disable authentication stream copy
+ * @mix_key_sel: 0:use rCipherKey0-3 1:use Key Mixer
+ * @aesnk: AES Key Size
+ * @cipher_algorithm: choice of CBC/ECE and AES/DES/3DES
+ * @op_mode: Operation Mode for the IPSec Module
+ */
+struct pkt_control_header {
+ u32 process_id :8;
+ u32 auth_check_len :3;
+ u32 un1 :1;
+ u32 auth_algorithm :3;
+ u32 auth_mode :1;
+ u32 fcs_stream_copy :1;
+ u32 un2 :2;
+ u32 mix_key_sel :1;
+ u32 aesnk :4;
+ u32 cipher_algorithm :3;
+ u32 un3 :1;
+ u32 op_mode :4;
+};
+
+struct pkt_control_cipher {
+ u32 algorithm_len :16;
+ u32 header_len :16;
+};
+
+/*
+ * struct pkt_control_ecb - control packet for ECB
+ */
+struct pkt_control_ecb {
+ struct pkt_control_header control;
+ struct pkt_control_cipher cipher;
+ unsigned char key[AES_MAX_KEY_SIZE];
+};
+
+/*
+ * struct sl3516_ce_dev - main container for all this driver information
+ * @base: base address
+ * @clks: clocks used
+ * @reset: pointer to reset controller
+ * @dev: the platform device
+ * @engine: ptr to the crypto/crypto_engine
+ * @complete: completion for the current task on this flow
+ * @status: set to 1 by interrupt if task is done
+ * @dtx: base DMA address for TX descriptors
+ * @tx base address of TX descriptors
+ * @drx: base DMA address for RX descriptors
+ * @rx base address of RX descriptors
+ * @ctx current used TX descriptor
+ * @crx current used RX descriptor
+ * @trng hw_random structure for RNG
+ * @hwrng_stat_req number of HWRNG requests
+ * @hwrng_stat_bytes total number of bytes generated by RNG
+ * @stat_irq number of IRQ handled by CE
+ * @stat_irq_tx number of TX IRQ handled by CE
+ * @stat_irq_rx number of RX IRQ handled by CE
+ * @stat_req number of requests handled by CE
+ * @fallbak_sg_count_tx number of fallback due to destination SG count
+ * @fallbak_sg_count_rx number of fallback due to source SG count
+ * @fallbak_not_same_len number of fallback due to difference in SG length
+ * @dbgfs_dir: Debugfs dentry for statistic directory
+ * @dbgfs_stats: Debugfs dentry for statistic counters
+ */
+struct sl3516_ce_dev {
+ void __iomem *base;
+ struct clk *clks;
+ struct reset_control *reset;
+ struct device *dev;
+ struct crypto_engine *engine;
+ struct completion complete;
+ int status;
+ dma_addr_t dtx;
+ struct descriptor *tx;
+ dma_addr_t drx;
+ struct descriptor *rx;
+ int ctx;
+ int crx;
+ struct hwrng trng;
+ unsigned long hwrng_stat_req;
+ unsigned long hwrng_stat_bytes;
+ unsigned long stat_irq;
+ unsigned long stat_irq_tx;
+ unsigned long stat_irq_rx;
+ unsigned long stat_req;
+ unsigned long fallback_sg_count_tx;
+ unsigned long fallback_sg_count_rx;
+ unsigned long fallback_not_same_len;
+ unsigned long fallback_mod16;
+ unsigned long fallback_align16;
+#ifdef CONFIG_CRYPTO_DEV_SL3516_DEBUG
+ struct dentry *dbgfs_dir;
+ struct dentry *dbgfs_stats;
+#endif
+ void *pctrl;
+ dma_addr_t dctrl;
+};
+
+struct sginfo {
+ u32 addr;
+ u32 len;
+};
+
+/*
+ * struct sl3516_ce_cipher_req_ctx - context for a skcipher request
+ * @t_src: list of mapped SGs with their size
+ * @t_dst: list of mapped SGs with their size
+ * @op_dir: direction (encrypt vs decrypt) for this request
+ * @pctrllen: the length of the ctrl packet
+ * @tqflag: the TQflag to set in data packet
+ * @h pointer to the pkt_control_cipher header
+ * @nr_sgs: number of source SG
+ * @nr_sgd: number of destination SG
+ * @fallback_req: request struct for invoking the fallback skcipher TFM
+ */
+struct sl3516_ce_cipher_req_ctx {
+ struct sginfo t_src[MAXDESC];
+ struct sginfo t_dst[MAXDESC];
+ u32 op_dir;
+ unsigned int pctrllen;
+ u32 tqflag;
+ struct pkt_control_cipher *h;
+ int nr_sgs;
+ int nr_sgd;
+ struct skcipher_request fallback_req; // keep at the end
+};
+
+/*
+ * struct sl3516_ce_cipher_tfm_ctx - context for a skcipher TFM
+ * @enginectx: crypto_engine used by this TFM
+ * @key: pointer to key data
+ * @keylen: len of the key
+ * @ce: pointer to the private data of driver handling this TFM
+ * @fallback_tfm: pointer to the fallback TFM
+ *
+ * enginectx must be the first element
+ */
+struct sl3516_ce_cipher_tfm_ctx {
+ struct crypto_engine_ctx enginectx;
+ u32 *key;
+ u32 keylen;
+ struct sl3516_ce_dev *ce;
+ struct crypto_skcipher *fallback_tfm;
+};
+
+/*
+ * struct sl3516_ce_alg_template - crypto_alg template
+ * @type: the CRYPTO_ALG_TYPE for this template
+ * @mode: value to be used in control packet for this algorithm
+ * @ce: pointer to the sl3516_ce_dev structure associated with
+ * this template
+ * @alg: one of sub struct must be used
+ * @stat_req: number of request done on this template
+ * @stat_fb: number of request which has fallbacked
+ * @stat_bytes: total data size done by this template
+ */
+struct sl3516_ce_alg_template {
+ u32 type;
+ u32 mode;
+ struct sl3516_ce_dev *ce;
+ union {
+ struct skcipher_alg skcipher;
+ } alg;
+ unsigned long stat_req;
+ unsigned long stat_fb;
+ unsigned long stat_bytes;
+};
+
+int sl3516_ce_enqueue(struct crypto_async_request *areq, u32 type);
+
+int sl3516_ce_aes_setkey(struct crypto_skcipher *tfm, const u8 *key,
+ unsigned int keylen);
+int sl3516_ce_cipher_init(struct crypto_tfm *tfm);
+void sl3516_ce_cipher_exit(struct crypto_tfm *tfm);
+int sl3516_ce_skdecrypt(struct skcipher_request *areq);
+int sl3516_ce_skencrypt(struct skcipher_request *areq);
+
+int sl3516_ce_run_task(struct sl3516_ce_dev *ce,
+ struct sl3516_ce_cipher_req_ctx *rctx, const char *name);
+
+int sl3516_ce_rng_register(struct sl3516_ce_dev *ce);
+void sl3516_ce_rng_unregister(struct sl3516_ce_dev *ce);
diff --git a/drivers/crypto/hisilicon/hpre/hpre_crypto.c b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
index a380087c83f7..a032c192ef1d 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_crypto.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_crypto.c
@@ -5,6 +5,7 @@
#include <crypto/dh.h>
#include <crypto/ecc_curve.h>
#include <crypto/ecdh.h>
+#include <crypto/rng.h>
#include <crypto/internal/akcipher.h>
#include <crypto/internal/kpp.h>
#include <crypto/internal/rsa.h>
@@ -30,7 +31,6 @@ struct hpre_ctx;
#define HPRE_DH_G_FLAG 0x02
#define HPRE_TRY_SEND_TIMES 100
#define HPRE_INVLD_REQ_ID (-1)
-#define HPRE_DEV(ctx) (&((ctx)->qp->qm->pdev->dev))
#define HPRE_SQE_ALG_BITS 5
#define HPRE_SQE_DONE_SHIFT 30
@@ -39,12 +39,17 @@ struct hpre_ctx;
#define HPRE_DFX_SEC_TO_US 1000000
#define HPRE_DFX_US_TO_NS 1000
+/* due to nist p521 */
+#define HPRE_ECC_MAX_KSZ 66
+
/* size in bytes of the n prime */
#define HPRE_ECC_NIST_P192_N_SIZE 24
#define HPRE_ECC_NIST_P256_N_SIZE 32
+#define HPRE_ECC_NIST_P384_N_SIZE 48
/* size in bytes */
#define HPRE_ECC_HW256_KSZ_B 32
+#define HPRE_ECC_HW384_KSZ_B 48
typedef void (*hpre_cb)(struct hpre_ctx *ctx, void *sqe);
@@ -102,6 +107,7 @@ struct hpre_curve25519_ctx {
struct hpre_ctx {
struct hisi_qp *qp;
+ struct device *dev;
struct hpre_asym_request **req_list;
struct hpre *hpre;
spinlock_t req_lock;
@@ -214,8 +220,7 @@ static int hpre_get_data_dma_addr(struct hpre_asym_request *hpre_req,
struct scatterlist *data, unsigned int len,
int is_src, dma_addr_t *tmp)
{
- struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = hpre_req->ctx->dev;
enum dma_data_direction dma_dir;
if (is_src) {
@@ -239,7 +244,7 @@ static int hpre_prepare_dma_buf(struct hpre_asym_request *hpre_req,
int is_src, dma_addr_t *tmp)
{
struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
void *ptr;
int shift;
@@ -293,11 +298,13 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
struct scatterlist *dst,
struct scatterlist *src)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
struct hpre_sqe *sqe = &req->req;
dma_addr_t tmp;
tmp = le64_to_cpu(sqe->in);
+ if (unlikely(dma_mapping_error(dev, tmp)))
+ return;
if (src) {
if (req->src)
@@ -307,6 +314,8 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
}
tmp = le64_to_cpu(sqe->out);
+ if (unlikely(dma_mapping_error(dev, tmp)))
+ return;
if (req->dst) {
if (dst)
@@ -321,16 +330,15 @@ static void hpre_hw_data_clr_all(struct hpre_ctx *ctx,
static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
void **kreq)
{
- struct device *dev = HPRE_DEV(ctx);
struct hpre_asym_request *req;
unsigned int err, done, alg;
int id;
#define HPRE_NO_HW_ERR 0
#define HPRE_HW_TASK_DONE 3
-#define HREE_HW_ERR_MASK 0x7ff
-#define HREE_SQE_DONE_MASK 0x3
-#define HREE_ALG_TYPE_MASK 0x1f
+#define HREE_HW_ERR_MASK GENMASK(10, 0)
+#define HREE_SQE_DONE_MASK GENMASK(1, 0)
+#define HREE_ALG_TYPE_MASK GENMASK(4, 0)
id = (int)le16_to_cpu(sqe->tag);
req = ctx->req_list[id];
hpre_rm_req_from_ctx(req);
@@ -346,7 +354,7 @@ static int hpre_alg_res_post_hf(struct hpre_ctx *ctx, struct hpre_sqe *sqe,
return 0;
alg = le32_to_cpu(sqe->dw0) & HREE_ALG_TYPE_MASK;
- dev_err_ratelimited(dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n",
+ dev_err_ratelimited(ctx->dev, "alg[0x%x] error: done[0x%x], etype[0x%x]\n",
alg, done, err);
return -EINVAL;
@@ -361,6 +369,7 @@ static int hpre_ctx_set(struct hpre_ctx *ctx, struct hisi_qp *qp, int qlen)
spin_lock_init(&ctx->req_lock);
ctx->qp = qp;
+ ctx->dev = &qp->qm->pdev->dev;
hpre = container_of(ctx->qp->qm, struct hpre, qm);
ctx->hpre = hpre;
@@ -524,6 +533,8 @@ static int hpre_msg_request_set(struct hpre_ctx *ctx, void *req, bool is_rsa)
msg->key = cpu_to_le64(ctx->dh.dma_xa_p);
}
+ msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+ msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
msg->dw0 |= cpu_to_le32(0x1 << HPRE_SQE_DONE_SHIFT);
msg->task_len1 = (ctx->key_sz >> HPRE_BITS_2_BYTES_SHIFT) - 1;
h_req->ctx = ctx;
@@ -618,14 +629,14 @@ static int hpre_is_dh_params_length_valid(unsigned int key_sz)
case _HPRE_DH_GRP15:
case _HPRE_DH_GRP16:
return 0;
+ default:
+ return -EINVAL;
}
-
- return -EINVAL;
}
static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int sz;
if (params->p_size > HPRE_DH_MAX_P_SZ)
@@ -664,7 +675,7 @@ static int hpre_dh_set_params(struct hpre_ctx *ctx, struct dh *params)
static void hpre_dh_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int sz = ctx->key_sz;
if (is_clear_all)
@@ -877,18 +888,18 @@ static int hpre_rsa_set_n(struct hpre_ctx *ctx, const char *value,
if (!hpre_rsa_key_size_is_support(ctx->key_sz))
return 0;
- ctx->rsa.pubkey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+ ctx->rsa.pubkey = dma_alloc_coherent(ctx->dev, vlen << 1,
&ctx->rsa.dma_pubkey,
GFP_KERNEL);
if (!ctx->rsa.pubkey)
return -ENOMEM;
if (private) {
- ctx->rsa.prikey = dma_alloc_coherent(HPRE_DEV(ctx), vlen << 1,
+ ctx->rsa.prikey = dma_alloc_coherent(ctx->dev, vlen << 1,
&ctx->rsa.dma_prikey,
GFP_KERNEL);
if (!ctx->rsa.prikey) {
- dma_free_coherent(HPRE_DEV(ctx), vlen << 1,
+ dma_free_coherent(ctx->dev, vlen << 1,
ctx->rsa.pubkey,
ctx->rsa.dma_pubkey);
ctx->rsa.pubkey = NULL;
@@ -950,7 +961,7 @@ static int hpre_crt_para_get(char *para, size_t para_sz,
static int hpre_rsa_setkey_crt(struct hpre_ctx *ctx, struct rsa_key *rsa_key)
{
unsigned int hlf_ksz = ctx->key_sz >> 1;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
u64 offset;
int ret;
@@ -1008,7 +1019,7 @@ free_key:
static void hpre_rsa_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all)
{
unsigned int half_key_sz = ctx->key_sz >> 1;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
if (is_clear_all)
hisi_qm_stop_qp(ctx->qp);
@@ -1179,7 +1190,7 @@ static void hpre_key_to_big_end(u8 *data, int len)
static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all,
bool is_ecdh)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int sz = ctx->key_sz;
unsigned int shift = sz << 1;
@@ -1202,12 +1213,21 @@ static void hpre_ecc_clear_ctx(struct hpre_ctx *ctx, bool is_clear_all,
hpre_ctx_clear(ctx, is_clear_all);
}
+/*
+ * The bits of 192/224/256/384/521 are supported by HPRE,
+ * and convert the bits like:
+ * bits<=256, bits=256; 256<bits<=384, bits=384; 384<bits<=576, bits=576;
+ * If the parameter bit width is insufficient, then we fill in the
+ * high-order zeros by soft, so TASK_LENGTH1 is 0x3/0x5/0x8;
+ */
static unsigned int hpre_ecdh_supported_curve(unsigned short id)
{
switch (id) {
case ECC_CURVE_NIST_P192:
case ECC_CURVE_NIST_P256:
return HPRE_ECC_HW256_KSZ_B;
+ case ECC_CURVE_NIST_P384:
+ return HPRE_ECC_HW384_KSZ_B;
default:
break;
}
@@ -1272,6 +1292,8 @@ static unsigned int hpre_ecdh_get_curvesz(unsigned short id)
return HPRE_ECC_NIST_P192_N_SIZE;
case ECC_CURVE_NIST_P256:
return HPRE_ECC_NIST_P256_N_SIZE;
+ case ECC_CURVE_NIST_P384:
+ return HPRE_ECC_NIST_P384_N_SIZE;
default:
break;
}
@@ -1281,7 +1303,7 @@ static unsigned int hpre_ecdh_get_curvesz(unsigned short id)
static int hpre_ecdh_set_param(struct hpre_ctx *ctx, struct ecdh *params)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int sz, shift, curve_sz;
int ret;
@@ -1328,11 +1350,32 @@ static bool hpre_key_is_zero(char *key, unsigned short key_sz)
return true;
}
+static int ecdh_gen_privkey(struct hpre_ctx *ctx, struct ecdh *params)
+{
+ struct device *dev = ctx->dev;
+ int ret;
+
+ ret = crypto_get_default_rng();
+ if (ret) {
+ dev_err(dev, "failed to get default rng, ret = %d!\n", ret);
+ return ret;
+ }
+
+ ret = crypto_rng_get_bytes(crypto_default_rng, (u8 *)params->key,
+ params->key_size);
+ crypto_put_default_rng();
+ if (ret)
+ dev_err(dev, "failed to get rng, ret = %d!\n", ret);
+
+ return ret;
+}
+
static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
unsigned int len)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
+ char key[HPRE_ECC_MAX_KSZ];
unsigned int sz, sz_shift;
struct ecdh params;
int ret;
@@ -1342,6 +1385,15 @@ static int hpre_ecdh_set_secret(struct crypto_kpp *tfm, const void *buf,
return -EINVAL;
}
+ /* Use stdrng to generate private key */
+ if (!params.key || !params.key_size) {
+ params.key = key;
+ params.key_size = hpre_ecdh_get_curvesz(ctx->curve_id);
+ ret = ecdh_gen_privkey(ctx, &params);
+ if (ret)
+ return ret;
+ }
+
if (hpre_key_is_zero(params.key, params.key_size)) {
dev_err(dev, "Invalid hpre key!\n");
return -EINVAL;
@@ -1367,16 +1419,20 @@ static void hpre_ecdh_hw_data_clr_all(struct hpre_ctx *ctx,
struct scatterlist *dst,
struct scatterlist *src)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
struct hpre_sqe *sqe = &req->req;
dma_addr_t dma;
dma = le64_to_cpu(sqe->in);
+ if (unlikely(dma_mapping_error(dev, dma)))
+ return;
if (src && req->src)
dma_free_coherent(dev, ctx->key_sz << 2, req->src, dma);
dma = le64_to_cpu(sqe->out);
+ if (unlikely(dma_mapping_error(dev, dma)))
+ return;
if (req->dst)
dma_free_coherent(dev, ctx->key_sz << 1, req->dst, dma);
@@ -1431,6 +1487,8 @@ static int hpre_ecdh_msg_request_set(struct hpre_ctx *ctx,
h_req->areq.ecdh = req;
msg = &h_req->req;
memset(msg, 0, sizeof(*msg));
+ msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+ msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
msg->key = cpu_to_le64(ctx->ecdh.dma_p);
msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT);
@@ -1450,7 +1508,7 @@ static int hpre_ecdh_src_data_init(struct hpre_asym_request *hpre_req,
{
struct hpre_sqe *msg = &hpre_req->req;
struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int tmpshift;
dma_addr_t dma = 0;
void *ptr;
@@ -1480,8 +1538,8 @@ static int hpre_ecdh_dst_data_init(struct hpre_asym_request *hpre_req,
{
struct hpre_sqe *msg = &hpre_req->req;
struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
- dma_addr_t dma = 0;
+ struct device *dev = ctx->dev;
+ dma_addr_t dma;
if (unlikely(!data || !sg_is_last(data) || len != ctx->key_sz << 1)) {
dev_err(dev, "data or data length is illegal!\n");
@@ -1503,7 +1561,7 @@ static int hpre_ecdh_compute_value(struct kpp_request *req)
{
struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
void *tmp = kpp_request_ctx(req);
struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
struct hpre_sqe *msg = &hpre_req->req;
@@ -1568,6 +1626,15 @@ static int hpre_ecdh_nist_p256_init_tfm(struct crypto_kpp *tfm)
return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
}
+static int hpre_ecdh_nist_p384_init_tfm(struct crypto_kpp *tfm)
+{
+ struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
+
+ ctx->curve_id = ECC_CURVE_NIST_P384;
+
+ return hpre_ctx_init(ctx, HPRE_V3_ECC_ALG_TYPE);
+}
+
static void hpre_ecdh_exit_tfm(struct crypto_kpp *tfm)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
@@ -1609,7 +1676,7 @@ static void hpre_curve25519_fill_curve(struct hpre_ctx *ctx, const void *buf,
static int hpre_curve25519_set_param(struct hpre_ctx *ctx, const void *buf,
unsigned int len)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
unsigned int sz = ctx->key_sz;
unsigned int shift = sz << 1;
@@ -1634,7 +1701,7 @@ static int hpre_curve25519_set_secret(struct crypto_kpp *tfm, const void *buf,
unsigned int len)
{
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
int ret = -EINVAL;
if (len != CURVE25519_KEY_SIZE ||
@@ -1662,16 +1729,20 @@ static void hpre_curve25519_hw_data_clr_all(struct hpre_ctx *ctx,
struct scatterlist *dst,
struct scatterlist *src)
{
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
struct hpre_sqe *sqe = &req->req;
dma_addr_t dma;
dma = le64_to_cpu(sqe->in);
+ if (unlikely(dma_mapping_error(dev, dma)))
+ return;
if (src && req->src)
dma_free_coherent(dev, ctx->key_sz, req->src, dma);
dma = le64_to_cpu(sqe->out);
+ if (unlikely(dma_mapping_error(dev, dma)))
+ return;
if (req->dst)
dma_free_coherent(dev, ctx->key_sz, req->dst, dma);
@@ -1722,6 +1793,8 @@ static int hpre_curve25519_msg_request_set(struct hpre_ctx *ctx,
h_req->areq.curve25519 = req;
msg = &h_req->req;
memset(msg, 0, sizeof(*msg));
+ msg->in = cpu_to_le64(DMA_MAPPING_ERROR);
+ msg->out = cpu_to_le64(DMA_MAPPING_ERROR);
msg->key = cpu_to_le64(ctx->curve25519.dma_p);
msg->dw0 |= cpu_to_le32(0x1U << HPRE_SQE_DONE_SHIFT);
@@ -1752,7 +1825,7 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
{
struct hpre_sqe *msg = &hpre_req->req;
struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
u8 p[CURVE25519_KEY_SIZE] = { 0 };
const struct ecc_curve *curve;
dma_addr_t dma = 0;
@@ -1790,8 +1863,12 @@ static int hpre_curve25519_src_init(struct hpre_asym_request *hpre_req,
* When src_data equals (2^255 - 19) ~ (2^255 - 1), it is out of p,
* we get its modulus to p, and then use it.
*/
- if (memcmp(ptr, p, ctx->key_sz) >= 0)
+ if (memcmp(ptr, p, ctx->key_sz) == 0) {
+ dev_err(dev, "gx is p!\n");
+ return -EINVAL;
+ } else if (memcmp(ptr, p, ctx->key_sz) > 0) {
hpre_curve25519_src_modulo_p(ptr);
+ }
hpre_req->src = ptr;
msg->in = cpu_to_le64(dma);
@@ -1807,8 +1884,8 @@ static int hpre_curve25519_dst_init(struct hpre_asym_request *hpre_req,
{
struct hpre_sqe *msg = &hpre_req->req;
struct hpre_ctx *ctx = hpre_req->ctx;
- struct device *dev = HPRE_DEV(ctx);
- dma_addr_t dma = 0;
+ struct device *dev = ctx->dev;
+ dma_addr_t dma;
if (!data || !sg_is_last(data) || len != ctx->key_sz) {
dev_err(dev, "data or data length is illegal!\n");
@@ -1830,7 +1907,7 @@ static int hpre_curve25519_compute_value(struct kpp_request *req)
{
struct crypto_kpp *tfm = crypto_kpp_reqtfm(req);
struct hpre_ctx *ctx = kpp_tfm_ctx(tfm);
- struct device *dev = HPRE_DEV(ctx);
+ struct device *dev = ctx->dev;
void *tmp = kpp_request_ctx(req);
struct hpre_asym_request *hpre_req = PTR_ALIGN(tmp, HPRE_ALIGN_SZ);
struct hpre_sqe *msg = &hpre_req->req;
@@ -1940,7 +2017,7 @@ static struct kpp_alg ecdh_nist_p192 = {
.cra_ctxsize = sizeof(struct hpre_ctx),
.cra_priority = HPRE_CRYPTO_ALG_PRI,
.cra_name = "ecdh-nist-p192",
- .cra_driver_name = "hpre-ecdh",
+ .cra_driver_name = "hpre-ecdh-nist-p192",
.cra_module = THIS_MODULE,
},
};
@@ -1957,7 +2034,24 @@ static struct kpp_alg ecdh_nist_p256 = {
.cra_ctxsize = sizeof(struct hpre_ctx),
.cra_priority = HPRE_CRYPTO_ALG_PRI,
.cra_name = "ecdh-nist-p256",
- .cra_driver_name = "hpre-ecdh",
+ .cra_driver_name = "hpre-ecdh-nist-p256",
+ .cra_module = THIS_MODULE,
+ },
+};
+
+static struct kpp_alg ecdh_nist_p384 = {
+ .set_secret = hpre_ecdh_set_secret,
+ .generate_public_key = hpre_ecdh_compute_value,
+ .compute_shared_secret = hpre_ecdh_compute_value,
+ .max_size = hpre_ecdh_max_size,
+ .init = hpre_ecdh_nist_p384_init_tfm,
+ .exit = hpre_ecdh_exit_tfm,
+ .reqsize = sizeof(struct hpre_asym_request) + HPRE_ALIGN_SZ,
+ .base = {
+ .cra_ctxsize = sizeof(struct hpre_ctx),
+ .cra_priority = HPRE_CRYPTO_ALG_PRI,
+ .cra_name = "ecdh-nist-p384",
+ .cra_driver_name = "hpre-ecdh-nist-p384",
.cra_module = THIS_MODULE,
},
};
@@ -1989,16 +2083,25 @@ static int hpre_register_ecdh(void)
return ret;
ret = crypto_register_kpp(&ecdh_nist_p256);
- if (ret) {
- crypto_unregister_kpp(&ecdh_nist_p192);
- return ret;
- }
+ if (ret)
+ goto unregister_ecdh_p192;
+
+ ret = crypto_register_kpp(&ecdh_nist_p384);
+ if (ret)
+ goto unregister_ecdh_p256;
return 0;
+
+unregister_ecdh_p256:
+ crypto_unregister_kpp(&ecdh_nist_p256);
+unregister_ecdh_p192:
+ crypto_unregister_kpp(&ecdh_nist_p192);
+ return ret;
}
static void hpre_unregister_ecdh(void)
{
+ crypto_unregister_kpp(&ecdh_nist_p384);
crypto_unregister_kpp(&ecdh_nist_p256);
crypto_unregister_kpp(&ecdh_nist_p192);
}
diff --git a/drivers/crypto/hisilicon/hpre/hpre_main.c b/drivers/crypto/hisilicon/hpre/hpre_main.c
index 046bc962c8b2..8b0640fb04be 100644
--- a/drivers/crypto/hisilicon/hpre/hpre_main.c
+++ b/drivers/crypto/hisilicon/hpre/hpre_main.c
@@ -36,7 +36,7 @@
#define HPRE_INT_MASK 0x301400
#define HPRE_INT_STATUS 0x301800
#define HPRE_CORE_INT_ENABLE 0
-#define HPRE_CORE_INT_DISABLE 0x003fffff
+#define HPRE_CORE_INT_DISABLE GENMASK(21, 0)
#define HPRE_RDCHN_INI_ST 0x301a00
#define HPRE_CLSTR_BASE 0x302000
#define HPRE_CORE_EN_OFFSET 0x04
@@ -50,6 +50,7 @@
#define HPRE_RAS_NFE_ENB 0x301414
#define HPRE_HAC_RAS_NFE_ENABLE 0x3ffffe
#define HPRE_RAS_FE_ENB 0x301418
+#define HPRE_OOO_SHUTDOWN_SEL 0x301a3c
#define HPRE_HAC_RAS_FE_ENABLE 0
#define HPRE_CORE_ENB (HPRE_CLSTR_BASE + HPRE_CORE_EN_OFFSET)
@@ -57,7 +58,6 @@
#define HPRE_CORE_INI_STATUS (HPRE_CLSTR_BASE + HPRE_CORE_INI_STATUS_OFFSET)
#define HPRE_HAC_ECC1_CNT 0x301a04
#define HPRE_HAC_ECC2_CNT 0x301a08
-#define HPRE_HAC_INT_STATUS 0x301800
#define HPRE_HAC_SOURCE_INT 0x301600
#define HPRE_CLSTR_ADDR_INTRVL 0x1000
#define HPRE_CLUSTER_INQURY 0x100
@@ -69,13 +69,17 @@
#define HPRE_DBGFS_VAL_MAX_LEN 20
#define HPRE_PCI_DEVICE_ID 0xa258
#define HPRE_PCI_VF_DEVICE_ID 0xa259
-#define HPRE_ADDR(qm, offset) ((qm)->io_base + (offset))
-#define HPRE_QM_USR_CFG_MASK 0xfffffffe
-#define HPRE_QM_AXI_CFG_MASK 0xffff
-#define HPRE_QM_VFG_AX_MASK 0xff
-#define HPRE_BD_USR_MASK 0x3
-#define HPRE_CLUSTER_CORE_MASK_V2 0xf
-#define HPRE_CLUSTER_CORE_MASK_V3 0xff
+#define HPRE_QM_USR_CFG_MASK GENMASK(31, 1)
+#define HPRE_QM_AXI_CFG_MASK GENMASK(15, 0)
+#define HPRE_QM_VFG_AX_MASK GENMASK(7, 0)
+#define HPRE_BD_USR_MASK GENMASK(1, 0)
+#define HPRE_CLUSTER_CORE_MASK_V2 GENMASK(3, 0)
+#define HPRE_CLUSTER_CORE_MASK_V3 GENMASK(7, 0)
+#define HPRE_PREFETCH_CFG 0x301130
+#define HPRE_SVA_PREFTCH_DFX 0x30115C
+#define HPRE_PREFETCH_ENABLE (~(BIT(0) | BIT(30)))
+#define HPRE_PREFETCH_DISABLE BIT(30)
+#define HPRE_SVA_DISABLE_READY (BIT(4) | BIT(8))
#define HPRE_AM_OOO_SHUTDOWN_ENB 0x301044
#define HPRE_AM_OOO_SHUTDOWN_ENABLE BIT(0)
@@ -88,11 +92,7 @@
#define HPRE_QM_PM_FLR BIT(11)
#define HPRE_QM_SRIOV_FLR BIT(12)
-#define HPRE_CLUSTERS_NUM(qm) \
- (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 : HPRE_CLUSTERS_NUM_V2)
-#define HPRE_CLUSTER_CORE_MASK(qm) \
- (((qm)->ver >= QM_HW_V3) ? HPRE_CLUSTER_CORE_MASK_V3 :\
- HPRE_CLUSTER_CORE_MASK_V2)
+#define HPRE_SHAPER_TYPE_RATE 128
#define HPRE_VIA_MSI_DSM 1
#define HPRE_SQE_MASK_OFFSET 8
#define HPRE_SQE_MASK_LEN 24
@@ -123,21 +123,49 @@ static const char * const hpre_debug_file_name[] = {
};
static const struct hpre_hw_error hpre_hw_errors[] = {
- { .int_msk = BIT(0), .msg = "core_ecc_1bit_err_int_set" },
- { .int_msk = BIT(1), .msg = "core_ecc_2bit_err_int_set" },
- { .int_msk = BIT(2), .msg = "dat_wb_poison_int_set" },
- { .int_msk = BIT(3), .msg = "dat_rd_poison_int_set" },
- { .int_msk = BIT(4), .msg = "bd_rd_poison_int_set" },
- { .int_msk = BIT(5), .msg = "ooo_ecc_2bit_err_int_set" },
- { .int_msk = BIT(6), .msg = "cluster1_shb_timeout_int_set" },
- { .int_msk = BIT(7), .msg = "cluster2_shb_timeout_int_set" },
- { .int_msk = BIT(8), .msg = "cluster3_shb_timeout_int_set" },
- { .int_msk = BIT(9), .msg = "cluster4_shb_timeout_int_set" },
- { .int_msk = GENMASK(15, 10), .msg = "ooo_rdrsp_err_int_set" },
- { .int_msk = GENMASK(21, 16), .msg = "ooo_wrrsp_err_int_set" },
- { .int_msk = BIT(22), .msg = "pt_rng_timeout_int_set"},
- { .int_msk = BIT(23), .msg = "sva_fsm_timeout_int_set"},
{
+ .int_msk = BIT(0),
+ .msg = "core_ecc_1bit_err_int_set"
+ }, {
+ .int_msk = BIT(1),
+ .msg = "core_ecc_2bit_err_int_set"
+ }, {
+ .int_msk = BIT(2),
+ .msg = "dat_wb_poison_int_set"
+ }, {
+ .int_msk = BIT(3),
+ .msg = "dat_rd_poison_int_set"
+ }, {
+ .int_msk = BIT(4),
+ .msg = "bd_rd_poison_int_set"
+ }, {
+ .int_msk = BIT(5),
+ .msg = "ooo_ecc_2bit_err_int_set"
+ }, {
+ .int_msk = BIT(6),
+ .msg = "cluster1_shb_timeout_int_set"
+ }, {
+ .int_msk = BIT(7),
+ .msg = "cluster2_shb_timeout_int_set"
+ }, {
+ .int_msk = BIT(8),
+ .msg = "cluster3_shb_timeout_int_set"
+ }, {
+ .int_msk = BIT(9),
+ .msg = "cluster4_shb_timeout_int_set"
+ }, {
+ .int_msk = GENMASK(15, 10),
+ .msg = "ooo_rdrsp_err_int_set"
+ }, {
+ .int_msk = GENMASK(21, 16),
+ .msg = "ooo_wrrsp_err_int_set"
+ }, {
+ .int_msk = BIT(22),
+ .msg = "pt_rng_timeout_int_set"
+ }, {
+ .int_msk = BIT(23),
+ .msg = "sva_fsm_timeout_int_set"
+ }, {
/* sentinel */
}
};
@@ -224,6 +252,18 @@ static u32 vfs_num;
module_param_cb(vfs_num, &vfs_num_ops, &vfs_num, 0444);
MODULE_PARM_DESC(vfs_num, "Number of VFs to enable(1-63), 0(default)");
+static inline int hpre_cluster_num(struct hisi_qm *qm)
+{
+ return (qm->ver >= QM_HW_V3) ? HPRE_CLUSTERS_NUM_V3 :
+ HPRE_CLUSTERS_NUM_V2;
+}
+
+static inline int hpre_cluster_core_mask(struct hisi_qm *qm)
+{
+ return (qm->ver >= QM_HW_V3) ?
+ HPRE_CLUSTER_CORE_MASK_V3 : HPRE_CLUSTER_CORE_MASK_V2;
+}
+
struct hisi_qp *hpre_create_qp(u8 type)
{
int node = cpu_to_node(smp_processor_id());
@@ -290,8 +330,8 @@ static int hpre_cfg_by_dsm(struct hisi_qm *qm)
static int hpre_set_cluster(struct hisi_qm *qm)
{
- u32 cluster_core_mask = HPRE_CLUSTER_CORE_MASK(qm);
- u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+ u32 cluster_core_mask = hpre_cluster_core_mask(qm);
+ u8 clusters_num = hpre_cluster_num(qm);
struct device *dev = &qm->pdev->dev;
unsigned long offset;
u32 val = 0;
@@ -302,10 +342,10 @@ static int hpre_set_cluster(struct hisi_qm *qm)
/* clusters initiating */
writel(cluster_core_mask,
- HPRE_ADDR(qm, offset + HPRE_CORE_ENB));
- writel(0x1, HPRE_ADDR(qm, offset + HPRE_CORE_INI_CFG));
- ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, offset +
- HPRE_CORE_INI_STATUS), val,
+ qm->io_base + offset + HPRE_CORE_ENB);
+ writel(0x1, qm->io_base + offset + HPRE_CORE_INI_CFG);
+ ret = readl_relaxed_poll_timeout(qm->io_base + offset +
+ HPRE_CORE_INI_STATUS, val,
((val & cluster_core_mask) ==
cluster_core_mask),
HPRE_REG_RD_INTVRL_US,
@@ -329,11 +369,52 @@ static void disable_flr_of_bme(struct hisi_qm *qm)
{
u32 val;
- val = readl(HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
+ val = readl(qm->io_base + QM_PEH_AXUSER_CFG);
val &= ~(HPRE_QM_BME_FLR | HPRE_QM_SRIOV_FLR);
val |= HPRE_QM_PM_FLR;
- writel(val, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG));
- writel(PEH_AXUSER_CFG_ENABLE, HPRE_ADDR(qm, QM_PEH_AXUSER_CFG_ENABLE));
+ writel(val, qm->io_base + QM_PEH_AXUSER_CFG);
+ writel(PEH_AXUSER_CFG_ENABLE, qm->io_base + QM_PEH_AXUSER_CFG_ENABLE);
+}
+
+static void hpre_open_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ /* Enable prefetch */
+ val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG);
+ val &= HPRE_PREFETCH_ENABLE;
+ writel(val, qm->io_base + HPRE_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_PREFETCH_CFG,
+ val, !(val & HPRE_PREFETCH_DISABLE),
+ HPRE_REG_RD_INTVRL_US,
+ HPRE_REG_RD_TMOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void hpre_close_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl_relaxed(qm->io_base + HPRE_PREFETCH_CFG);
+ val |= HPRE_PREFETCH_DISABLE;
+ writel(val, qm->io_base + HPRE_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_SVA_PREFTCH_DFX,
+ val, !(val & HPRE_SVA_DISABLE_READY),
+ HPRE_REG_RD_INTVRL_US,
+ HPRE_REG_RD_TMOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to close sva prefetch\n");
}
static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
@@ -342,33 +423,33 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
u32 val;
int ret;
- writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_ARUSER_M_CFG_ENABLE));
- writel(HPRE_QM_USR_CFG_MASK, HPRE_ADDR(qm, QM_AWUSER_M_CFG_ENABLE));
- writel_relaxed(HPRE_QM_AXI_CFG_MASK, HPRE_ADDR(qm, QM_AXI_M_CFG));
+ writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_ARUSER_M_CFG_ENABLE);
+ writel(HPRE_QM_USR_CFG_MASK, qm->io_base + QM_AWUSER_M_CFG_ENABLE);
+ writel_relaxed(HPRE_QM_AXI_CFG_MASK, qm->io_base + QM_AXI_M_CFG);
/* HPRE need more time, we close this interrupt */
- val = readl_relaxed(HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+ val = readl_relaxed(qm->io_base + HPRE_QM_ABNML_INT_MASK);
val |= BIT(HPRE_TIMEOUT_ABNML_BIT);
- writel_relaxed(val, HPRE_ADDR(qm, HPRE_QM_ABNML_INT_MASK));
+ writel_relaxed(val, qm->io_base + HPRE_QM_ABNML_INT_MASK);
if (qm->ver >= QM_HW_V3)
writel(HPRE_RSA_ENB | HPRE_ECC_ENB,
- HPRE_ADDR(qm, HPRE_TYPES_ENB));
+ qm->io_base + HPRE_TYPES_ENB);
else
- writel(HPRE_RSA_ENB, HPRE_ADDR(qm, HPRE_TYPES_ENB));
-
- writel(HPRE_QM_VFG_AX_MASK, HPRE_ADDR(qm, HPRE_VFG_AXCACHE));
- writel(0x0, HPRE_ADDR(qm, HPRE_BD_ENDIAN));
- writel(0x0, HPRE_ADDR(qm, HPRE_INT_MASK));
- writel(0x0, HPRE_ADDR(qm, HPRE_POISON_BYPASS));
- writel(0x0, HPRE_ADDR(qm, HPRE_COMM_CNT_CLR_CE));
- writel(0x0, HPRE_ADDR(qm, HPRE_ECC_BYPASS));
-
- writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_ARUSR_CFG));
- writel(HPRE_BD_USR_MASK, HPRE_ADDR(qm, HPRE_BD_AWUSR_CFG));
- writel(0x1, HPRE_ADDR(qm, HPRE_RDCHN_INI_CFG));
- ret = readl_relaxed_poll_timeout(HPRE_ADDR(qm, HPRE_RDCHN_INI_ST), val,
- val & BIT(0),
+ writel(HPRE_RSA_ENB, qm->io_base + HPRE_TYPES_ENB);
+
+ writel(HPRE_QM_VFG_AX_MASK, qm->io_base + HPRE_VFG_AXCACHE);
+ writel(0x0, qm->io_base + HPRE_BD_ENDIAN);
+ writel(0x0, qm->io_base + HPRE_INT_MASK);
+ writel(0x0, qm->io_base + HPRE_POISON_BYPASS);
+ writel(0x0, qm->io_base + HPRE_COMM_CNT_CLR_CE);
+ writel(0x0, qm->io_base + HPRE_ECC_BYPASS);
+
+ writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_ARUSR_CFG);
+ writel(HPRE_BD_USR_MASK, qm->io_base + HPRE_BD_AWUSR_CFG);
+ writel(0x1, qm->io_base + HPRE_RDCHN_INI_CFG);
+ ret = readl_relaxed_poll_timeout(qm->io_base + HPRE_RDCHN_INI_ST, val,
+ val & BIT(0),
HPRE_REG_RD_INTVRL_US,
HPRE_REG_RD_TMOUT_US);
if (ret) {
@@ -397,7 +478,7 @@ static int hpre_set_user_domain_and_cache(struct hisi_qm *qm)
static void hpre_cnt_regs_clear(struct hisi_qm *qm)
{
- u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+ u8 clusters_num = hpre_cluster_num(qm);
unsigned long offset;
int i;
@@ -413,36 +494,49 @@ static void hpre_cnt_regs_clear(struct hisi_qm *qm)
hisi_qm_debug_regs_clear(qm);
}
-static void hpre_hw_error_disable(struct hisi_qm *qm)
+static void hpre_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
{
- u32 val;
+ u32 val1, val2;
+
+ val1 = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+ if (enable) {
+ val1 |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
+ val2 = HPRE_HAC_RAS_NFE_ENABLE;
+ } else {
+ val1 &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE;
+ val2 = 0x0;
+ }
+
+ if (qm->ver > QM_HW_V2)
+ writel(val2, qm->io_base + HPRE_OOO_SHUTDOWN_SEL);
+
+ writel(val1, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+}
+static void hpre_hw_error_disable(struct hisi_qm *qm)
+{
/* disable hpre hw error interrupts */
writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_INT_MASK);
- /* disable HPRE block master OOO when m-bit error occur */
- val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
- val &= ~HPRE_AM_OOO_SHUTDOWN_ENABLE;
- writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+ /* disable HPRE block master OOO when nfe occurs on Kunpeng930 */
+ hpre_master_ooo_ctrl(qm, false);
}
static void hpre_hw_error_enable(struct hisi_qm *qm)
{
- u32 val;
-
/* clear HPRE hw error source if having */
writel(HPRE_CORE_INT_DISABLE, qm->io_base + HPRE_HAC_SOURCE_INT);
- /* enable hpre hw error interrupts */
- writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
+ /* configure error type */
writel(HPRE_HAC_RAS_CE_ENABLE, qm->io_base + HPRE_RAS_CE_ENB);
writel(HPRE_HAC_RAS_NFE_ENABLE, qm->io_base + HPRE_RAS_NFE_ENB);
writel(HPRE_HAC_RAS_FE_ENABLE, qm->io_base + HPRE_RAS_FE_ENB);
- /* enable HPRE block master OOO when m-bit error occur */
- val = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
- val |= HPRE_AM_OOO_SHUTDOWN_ENABLE;
- writel(val, qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
+ /* enable HPRE block master OOO when nfe occurs on Kunpeng930 */
+ hpre_master_ooo_ctrl(qm, true);
+
+ /* enable hpre hw error interrupts */
+ writel(HPRE_CORE_INT_ENABLE, qm->io_base + HPRE_INT_MASK);
}
static inline struct hisi_qm *hpre_file_to_qm(struct hpre_debugfs_file *file)
@@ -650,7 +744,7 @@ static int hpre_pf_comm_regs_debugfs_init(struct hisi_qm *qm)
static int hpre_cluster_debugfs_init(struct hisi_qm *qm)
{
- u8 clusters_num = HPRE_CLUSTERS_NUM(qm);
+ u8 clusters_num = hpre_cluster_num(qm);
struct device *dev = &qm->pdev->dev;
char buf[HPRE_DBGFS_VAL_MAX_LEN];
struct debugfs_regset32 *regset;
@@ -788,7 +882,7 @@ static void hpre_log_hw_error(struct hisi_qm *qm, u32 err_sts)
static u32 hpre_get_hw_err_status(struct hisi_qm *qm)
{
- return readl(qm->io_base + HPRE_HAC_INT_STATUS);
+ return readl(qm->io_base + HPRE_INT_STATUS);
}
static void hpre_clear_hw_err_status(struct hisi_qm *qm, u32 err_sts)
@@ -802,9 +896,9 @@ static void hpre_open_axi_master_ooo(struct hisi_qm *qm)
value = readl(qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
writel(value & ~HPRE_AM_OOO_SHUTDOWN_ENABLE,
- HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+ qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
writel(value | HPRE_AM_OOO_SHUTDOWN_ENABLE,
- HPRE_ADDR(qm, HPRE_AM_OOO_SHUTDOWN_ENB));
+ qm->io_base + HPRE_AM_OOO_SHUTDOWN_ENB);
}
static void hpre_err_info_init(struct hisi_qm *qm)
@@ -829,6 +923,8 @@ static const struct hisi_qm_err_ini hpre_err_ini = {
.clear_dev_hw_err_status = hpre_clear_hw_err_status,
.log_dev_hw_err = hpre_log_hw_error,
.open_axi_master_ooo = hpre_open_axi_master_ooo,
+ .open_sva_prefetch = hpre_open_sva_prefetch,
+ .close_sva_prefetch = hpre_close_sva_prefetch,
.err_info_init = hpre_err_info_init,
};
@@ -841,6 +937,8 @@ static int hpre_pf_probe_init(struct hpre *hpre)
if (ret)
return ret;
+ hpre_open_sva_prefetch(qm);
+
qm->err_ini = &hpre_err_ini;
qm->err_ini->err_info_init(qm);
hisi_qm_dev_err_init(qm);
@@ -850,6 +948,7 @@ static int hpre_pf_probe_init(struct hpre *hpre)
static int hpre_probe_init(struct hpre *hpre)
{
+ u32 type_rate = HPRE_SHAPER_TYPE_RATE;
struct hisi_qm *qm = &hpre->qm;
int ret;
@@ -857,6 +956,11 @@ static int hpre_probe_init(struct hpre *hpre)
ret = hpre_pf_probe_init(hpre);
if (ret)
return ret;
+ /* Enable shaper type 0 */
+ if (qm->ver >= QM_HW_V3) {
+ type_rate |= QM_SHAPER_ENABLE;
+ qm->type_rate = type_rate;
+ }
}
return 0;
diff --git a/drivers/crypto/hisilicon/qm.c b/drivers/crypto/hisilicon/qm.c
index ce439a0c66c9..1d67f94a1d56 100644
--- a/drivers/crypto/hisilicon/qm.c
+++ b/drivers/crypto/hisilicon/qm.c
@@ -25,9 +25,11 @@
#define QM_IRQ_NUM_V1 1
#define QM_IRQ_NUM_PF_V2 4
#define QM_IRQ_NUM_VF_V2 2
+#define QM_IRQ_NUM_VF_V3 3
#define QM_EQ_EVENT_IRQ_VECTOR 0
#define QM_AEQ_EVENT_IRQ_VECTOR 1
+#define QM_CMD_EVENT_IRQ_VECTOR 2
#define QM_ABNORMAL_EVENT_IRQ_VECTOR 3
/* mailbox */
@@ -39,6 +41,8 @@
#define QM_MB_CMD_CQC_BT 0x5
#define QM_MB_CMD_SQC_VFT_V2 0x6
#define QM_MB_CMD_STOP_QP 0x8
+#define QM_MB_CMD_SRC 0xc
+#define QM_MB_CMD_DST 0xd
#define QM_MB_CMD_SEND_BASE 0x300
#define QM_MB_EVENT_SHIFT 8
@@ -46,6 +50,9 @@
#define QM_MB_OP_SHIFT 14
#define QM_MB_CMD_DATA_ADDR_L 0x304
#define QM_MB_CMD_DATA_ADDR_H 0x308
+#define QM_MB_PING_ALL_VFS 0xffff
+#define QM_MB_CMD_DATA_SHIFT 32
+#define QM_MB_CMD_DATA_MASK GENMASK(31, 0)
/* sqc shift */
#define QM_SQ_HOP_NUM_SHIFT 0
@@ -95,6 +102,7 @@
#define QM_DOORBELL_SQ_CQ_BASE_V2 0x1000
#define QM_DOORBELL_EQ_AEQ_BASE_V2 0x2000
#define QM_QUE_ISO_CFG_V 0x0030
+#define QM_PAGE_SIZE 0x0034
#define QM_QUE_ISO_EN 0x100154
#define QM_CAPBILITY 0x100158
#define QM_QP_NUN_MASK GENMASK(10, 0)
@@ -155,11 +163,15 @@
#define QM_RAS_CE_THRESHOLD 0x1000f8
#define QM_RAS_CE_TIMES_PER_IRQ 1
#define QM_RAS_MSI_INT_SEL 0x1040f4
+#define QM_OOO_SHUTDOWN_SEL 0x1040f8
#define QM_RESET_WAIT_TIMEOUT 400
#define QM_PEH_VENDOR_ID 0x1000d8
#define ACC_VENDOR_ID_VALUE 0x5a5a
#define QM_PEH_DFX_INFO0 0x1000fc
+#define QM_PEH_DFX_INFO1 0x100100
+#define QM_PEH_DFX_MASK (BIT(0) | BIT(2))
+#define QM_PEH_MSI_FINISH_MASK GENMASK(19, 16)
#define ACC_PEH_SRIOV_CTRL_VF_MSE_SHIFT 3
#define ACC_PEH_MSI_DISABLE GENMASK(31, 0)
#define ACC_MASTER_GLOBAL_CTRL_SHUTDOWN 0x1
@@ -170,6 +182,31 @@
#define QM_RAS_NFE_MBIT_DISABLE ~QM_ECC_MBIT
#define ACC_AM_ROB_ECC_INT_STS 0x300104
#define ACC_ROB_ECC_ERR_MULTPL BIT(1)
+#define QM_MSI_CAP_ENABLE BIT(16)
+
+/* interfunction communication */
+#define QM_IFC_READY_STATUS 0x100128
+#define QM_IFC_C_STS_M 0x10012C
+#define QM_IFC_INT_SET_P 0x100130
+#define QM_IFC_INT_CFG 0x100134
+#define QM_IFC_INT_SOURCE_P 0x100138
+#define QM_IFC_INT_SOURCE_V 0x0020
+#define QM_IFC_INT_MASK 0x0024
+#define QM_IFC_INT_STATUS 0x0028
+#define QM_IFC_INT_SET_V 0x002C
+#define QM_IFC_SEND_ALL_VFS GENMASK(6, 0)
+#define QM_IFC_INT_SOURCE_CLR GENMASK(63, 0)
+#define QM_IFC_INT_SOURCE_MASK BIT(0)
+#define QM_IFC_INT_DISABLE BIT(0)
+#define QM_IFC_INT_STATUS_MASK BIT(0)
+#define QM_IFC_INT_SET_MASK BIT(0)
+#define QM_WAIT_DST_ACK 10
+#define QM_MAX_PF_WAIT_COUNT 10
+#define QM_MAX_VF_WAIT_COUNT 40
+#define QM_VF_RESET_WAIT_US 20000
+#define QM_VF_RESET_WAIT_CNT 3000
+#define QM_VF_RESET_WAIT_TIMEOUT_US \
+ (QM_VF_RESET_WAIT_US * QM_VF_RESET_WAIT_CNT)
#define QM_DFX_MB_CNT_VF 0x104010
#define QM_DFX_DB_CNT_VF 0x104020
@@ -205,6 +242,33 @@
#define QM_DRIVER_REMOVING 0
#define QM_RST_SCHED 1
#define QM_RESETTING 2
+#define QM_QOS_PARAM_NUM 2
+#define QM_QOS_VAL_NUM 1
+#define QM_QOS_BDF_PARAM_NUM 4
+#define QM_QOS_MAX_VAL 1000
+#define QM_QOS_RATE 100
+#define QM_QOS_EXPAND_RATE 1000
+#define QM_SHAPER_CIR_B_MASK GENMASK(7, 0)
+#define QM_SHAPER_CIR_U_MASK GENMASK(10, 8)
+#define QM_SHAPER_CIR_S_MASK GENMASK(14, 11)
+#define QM_SHAPER_FACTOR_CIR_U_SHIFT 8
+#define QM_SHAPER_FACTOR_CIR_S_SHIFT 11
+#define QM_SHAPER_FACTOR_CBS_B_SHIFT 15
+#define QM_SHAPER_FACTOR_CBS_S_SHIFT 19
+#define QM_SHAPER_CBS_B 1
+#define QM_SHAPER_CBS_S 16
+#define QM_SHAPER_VFT_OFFSET 6
+#define WAIT_FOR_QOS_VF 100
+#define QM_QOS_MIN_ERROR_RATE 5
+#define QM_QOS_TYPICAL_NUM 8
+#define QM_SHAPER_MIN_CBS_S 8
+#define QM_QOS_TICK 0x300U
+#define QM_QOS_DIVISOR_CLK 0x1f40U
+#define QM_QOS_MAX_CIR_B 200
+#define QM_QOS_MIN_CIR_B 100
+#define QM_QOS_MAX_CIR_U 6
+#define QM_QOS_MAX_CIR_S 11
+#define QM_QOS_VAL_MAX_LEN 32
#define QM_MK_CQC_DW3_V1(hop_num, pg_sz, buf_sz, cqe_sz) \
(((hop_num) << QM_CQ_HOP_NUM_SHIFT) | \
@@ -245,6 +309,7 @@
enum vft_type {
SQC_VFT = 0,
CQC_VFT,
+ SHAPER_VFT,
};
enum acc_err_result {
@@ -253,6 +318,23 @@ enum acc_err_result {
ACC_ERR_RECOVERED,
};
+enum qm_alg_type {
+ ALG_TYPE_0,
+ ALG_TYPE_1,
+};
+
+enum qm_mb_cmd {
+ QM_PF_FLR_PREPARE = 0x01,
+ QM_PF_SRST_PREPARE,
+ QM_PF_RESET_DONE,
+ QM_VF_PREPARE_DONE,
+ QM_VF_PREPARE_FAIL,
+ QM_VF_START_DONE,
+ QM_VF_START_FAIL,
+ QM_PF_SET_QOS,
+ QM_VF_GET_QOS,
+};
+
struct qm_cqe {
__le32 rsvd0;
__le16 cmd_id;
@@ -351,6 +433,9 @@ struct hisi_qm_hw_ops {
void (*hw_error_uninit)(struct hisi_qm *qm);
enum acc_err_result (*hw_error_handle)(struct hisi_qm *qm);
int (*stop_qp)(struct hisi_qp *qp);
+ int (*set_msi)(struct hisi_qm *qm, bool set);
+ int (*ping_all_vfs)(struct hisi_qm *qm, u64 cmd);
+ int (*ping_pf)(struct hisi_qm *qm, u64 cmd);
};
struct qm_dfx_item {
@@ -412,6 +497,11 @@ static const char * const qp_s[] = {
"none", "init", "start", "stop", "close",
};
+static const u32 typical_qos_val[QM_QOS_TYPICAL_NUM] = {100, 250, 500, 1000,
+ 10000, 25000, 50000, 100000};
+static const u32 typical_qos_cbs_s[QM_QOS_TYPICAL_NUM] = {9, 10, 11, 12, 16,
+ 17, 18, 19};
+
static bool qm_avail_state(struct hisi_qm *qm, enum qm_state new)
{
enum qm_state curr = atomic_read(&qm->status.flags);
@@ -491,6 +581,18 @@ static bool qm_qp_avail_state(struct hisi_qm *qm, struct hisi_qp *qp,
return avail;
}
+static void qm_mb_pre_init(struct qm_mailbox *mailbox, u8 cmd,
+ u64 base, u16 queue, bool op)
+{
+ mailbox->w0 = cpu_to_le16((cmd) |
+ ((op) ? 0x1 << QM_MB_OP_SHIFT : 0) |
+ (0x1 << QM_MB_BUSY_SHIFT));
+ mailbox->queue_num = cpu_to_le16(queue);
+ mailbox->base_l = cpu_to_le32(lower_32_bits(base));
+ mailbox->base_h = cpu_to_le32(upper_32_bits(base));
+ mailbox->rsvd = 0;
+}
+
/* return 0 mailbox ready, -ETIMEDOUT hardware timeout */
static int qm_wait_mb_ready(struct hisi_qm *qm)
{
@@ -523,44 +625,42 @@ static void qm_mb_write(struct hisi_qm *qm, const void *src)
: "memory");
}
-static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
- bool op)
+static int qm_mb_nolock(struct hisi_qm *qm, struct qm_mailbox *mailbox)
{
- struct qm_mailbox mailbox;
- int ret = 0;
-
- dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
- queue, cmd, (unsigned long long)dma_addr);
-
- mailbox.w0 = cpu_to_le16(cmd |
- (op ? 0x1 << QM_MB_OP_SHIFT : 0) |
- (0x1 << QM_MB_BUSY_SHIFT));
- mailbox.queue_num = cpu_to_le16(queue);
- mailbox.base_l = cpu_to_le32(lower_32_bits(dma_addr));
- mailbox.base_h = cpu_to_le32(upper_32_bits(dma_addr));
- mailbox.rsvd = 0;
-
- mutex_lock(&qm->mailbox_lock);
-
if (unlikely(qm_wait_mb_ready(qm))) {
- ret = -EBUSY;
dev_err(&qm->pdev->dev, "QM mailbox is busy to start!\n");
- goto busy_unlock;
+ goto mb_busy;
}
- qm_mb_write(qm, &mailbox);
+ qm_mb_write(qm, mailbox);
if (unlikely(qm_wait_mb_ready(qm))) {
- ret = -EBUSY;
dev_err(&qm->pdev->dev, "QM mailbox operation timeout!\n");
- goto busy_unlock;
+ goto mb_busy;
}
-busy_unlock:
+ return 0;
+
+mb_busy:
+ atomic64_inc(&qm->debug.dfx.mb_err_cnt);
+ return -EBUSY;
+}
+
+static int qm_mb(struct hisi_qm *qm, u8 cmd, dma_addr_t dma_addr, u16 queue,
+ bool op)
+{
+ struct qm_mailbox mailbox;
+ int ret;
+
+ dev_dbg(&qm->pdev->dev, "QM mailbox request to q%u: %u-%llx\n",
+ queue, cmd, (unsigned long long)dma_addr);
+
+ qm_mb_pre_init(&mailbox, cmd, dma_addr, queue, op);
+
+ mutex_lock(&qm->mailbox_lock);
+ ret = qm_mb_nolock(qm, &mailbox);
mutex_unlock(&qm->mailbox_lock);
- if (ret)
- atomic64_inc(&qm->debug.dfx.mb_err_cnt);
return ret;
}
@@ -626,6 +726,14 @@ static u32 qm_get_irq_num_v2(struct hisi_qm *qm)
return QM_IRQ_NUM_VF_V2;
}
+static u32 qm_get_irq_num_v3(struct hisi_qm *qm)
+{
+ if (qm->fun_type == QM_HW_PF)
+ return QM_IRQ_NUM_PF_V2;
+
+ return QM_IRQ_NUM_VF_V3;
+}
+
static struct hisi_qp *qm_to_hisi_qp(struct hisi_qm *qm, struct qm_eqe *eqe)
{
u16 cqn = le32_to_cpu(eqe->dw0) & QM_EQE_CQN_MASK;
@@ -730,6 +838,21 @@ static irqreturn_t qm_irq(int irq, void *data)
return IRQ_NONE;
}
+static irqreturn_t qm_mb_cmd_irq(int irq, void *data)
+{
+ struct hisi_qm *qm = data;
+ u32 val;
+
+ val = readl(qm->io_base + QM_IFC_INT_STATUS);
+ val &= QM_IFC_INT_STATUS_MASK;
+ if (!val)
+ return IRQ_NONE;
+
+ schedule_work(&qm->cmd_process);
+
+ return IRQ_HANDLED;
+}
+
static irqreturn_t qm_aeq_irq(int irq, void *data)
{
struct hisi_qm *qm = data;
@@ -770,14 +893,16 @@ static void qm_irq_unregister(struct hisi_qm *qm)
free_irq(pci_irq_vector(pdev, QM_EQ_EVENT_IRQ_VECTOR), qm);
- if (qm->ver == QM_HW_V1)
- return;
+ if (qm->ver > QM_HW_V1) {
+ free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
- free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
+ if (qm->fun_type == QM_HW_PF)
+ free_irq(pci_irq_vector(pdev,
+ QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
+ }
- if (qm->fun_type == QM_HW_PF)
- free_irq(pci_irq_vector(pdev,
- QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
+ if (qm->ver > QM_HW_V2)
+ free_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR), qm);
}
static void qm_init_qp_status(struct hisi_qp *qp)
@@ -790,8 +915,95 @@ static void qm_init_qp_status(struct hisi_qp *qp)
atomic_set(&qp_status->used, 0);
}
+static void qm_init_prefetch(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 page_type = 0x0;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ switch (PAGE_SIZE) {
+ case SZ_4K:
+ page_type = 0x0;
+ break;
+ case SZ_16K:
+ page_type = 0x1;
+ break;
+ case SZ_64K:
+ page_type = 0x2;
+ break;
+ default:
+ dev_err(dev, "system page size is not support: %lu, default set to 4KB",
+ PAGE_SIZE);
+ }
+
+ writel(page_type, qm->io_base + QM_PAGE_SIZE);
+}
+
+/*
+ * the formula:
+ * IR = X Mbps if ir = 1 means IR = 100 Mbps, if ir = 10000 means = 10Gbps
+ *
+ * IR_b * (2 ^ IR_u) * 8
+ * IR(Mbps) * 10 ^ -3 = -------------------------
+ * Tick * (2 ^ IR_s)
+ */
+static u32 acc_shaper_para_calc(u64 cir_b, u64 cir_u, u64 cir_s)
+{
+ return ((cir_b * QM_QOS_DIVISOR_CLK) * (1 << cir_u)) /
+ (QM_QOS_TICK * (1 << cir_s));
+}
+
+static u32 acc_shaper_calc_cbs_s(u32 ir)
+{
+ int i;
+
+ if (ir < typical_qos_val[0])
+ return QM_SHAPER_MIN_CBS_S;
+
+ for (i = 1; i < QM_QOS_TYPICAL_NUM; i++) {
+ if (ir >= typical_qos_val[i - 1] && ir < typical_qos_val[i])
+ return typical_qos_cbs_s[i - 1];
+ }
+
+ return typical_qos_cbs_s[QM_QOS_TYPICAL_NUM - 1];
+}
+
+static int qm_get_shaper_para(u32 ir, struct qm_shaper_factor *factor)
+{
+ u32 cir_b, cir_u, cir_s, ir_calc;
+ u32 error_rate;
+
+ factor->cbs_s = acc_shaper_calc_cbs_s(ir);
+
+ for (cir_b = QM_QOS_MIN_CIR_B; cir_b <= QM_QOS_MAX_CIR_B; cir_b++) {
+ for (cir_u = 0; cir_u <= QM_QOS_MAX_CIR_U; cir_u++) {
+ for (cir_s = 0; cir_s <= QM_QOS_MAX_CIR_S; cir_s++) {
+ /** the formula is changed to:
+ * IR_b * (2 ^ IR_u) * DIVISOR_CLK
+ * IR(Mbps) = -------------------------
+ * 768 * (2 ^ IR_s)
+ */
+ ir_calc = acc_shaper_para_calc(cir_b, cir_u,
+ cir_s);
+ error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
+ if (error_rate <= QM_QOS_MIN_ERROR_RATE) {
+ factor->cir_b = cir_b;
+ factor->cir_u = cir_u;
+ factor->cir_s = cir_s;
+
+ return 0;
+ }
+ }
+ }
+ }
+
+ return -EINVAL;
+}
+
static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
- u32 number)
+ u32 number, struct qm_shaper_factor *factor)
{
u64 tmp = 0;
@@ -820,6 +1032,15 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
tmp = QM_CQC_VFT_VALID;
}
break;
+ case SHAPER_VFT:
+ if (qm->ver >= QM_HW_V3) {
+ tmp = factor->cir_b |
+ (factor->cir_u << QM_SHAPER_FACTOR_CIR_U_SHIFT) |
+ (factor->cir_s << QM_SHAPER_FACTOR_CIR_S_SHIFT) |
+ (QM_SHAPER_CBS_B << QM_SHAPER_FACTOR_CBS_B_SHIFT) |
+ (factor->cbs_s << QM_SHAPER_FACTOR_CBS_S_SHIFT);
+ }
+ break;
}
}
@@ -830,6 +1051,7 @@ static void qm_vft_data_cfg(struct hisi_qm *qm, enum vft_type type, u32 base,
static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
u32 fun_num, u32 base, u32 number)
{
+ struct qm_shaper_factor *factor = &qm->factor[fun_num];
unsigned int val;
int ret;
@@ -841,9 +1063,12 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
writel(0x0, qm->io_base + QM_VFT_CFG_OP_WR);
writel(type, qm->io_base + QM_VFT_CFG_TYPE);
+ if (type == SHAPER_VFT)
+ fun_num |= base << QM_SHAPER_VFT_OFFSET;
+
writel(fun_num, qm->io_base + QM_VFT_CFG);
- qm_vft_data_cfg(qm, type, base, number);
+ qm_vft_data_cfg(qm, type, base, number, factor);
writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
@@ -853,6 +1078,27 @@ static int qm_set_vft_common(struct hisi_qm *qm, enum vft_type type,
POLL_TIMEOUT);
}
+static int qm_shaper_init_vft(struct hisi_qm *qm, u32 fun_num)
+{
+ int ret, i;
+
+ qm->factor[fun_num].func_qos = QM_QOS_MAX_VAL;
+ ret = qm_get_shaper_para(QM_QOS_MAX_VAL * QM_QOS_RATE, &qm->factor[fun_num]);
+ if (ret) {
+ dev_err(&qm->pdev->dev, "failed to calculate shaper parameter!\n");
+ return ret;
+ }
+ writel(qm->type_rate, qm->io_base + QM_SHAPER_CFG);
+ for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
+ /* The base number of queue reuse for different alg type */
+ ret = qm_set_vft_common(qm, SHAPER_VFT, fun_num, i, 1);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
/* The config should be conducted after qm_dev_mem_reset() */
static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
u32 number)
@@ -865,7 +1111,21 @@ static int qm_set_sqc_cqc_vft(struct hisi_qm *qm, u32 fun_num, u32 base,
return ret;
}
+ /* init default shaper qos val */
+ if (qm->ver >= QM_HW_V3) {
+ ret = qm_shaper_init_vft(qm, fun_num);
+ if (ret)
+ goto back_sqc_cqc;
+ }
+
return 0;
+back_sqc_cqc:
+ for (i = SQC_VFT; i <= CQC_VFT; i++) {
+ ret = qm_set_vft_common(qm, i, fun_num, 0, 0);
+ if (ret)
+ return ret;
+ }
+ return ret;
}
static int qm_get_vft_v2(struct hisi_qm *qm, u32 *base, u32 *number)
@@ -1570,16 +1830,9 @@ static ssize_t qm_cmd_write(struct file *filp, const char __user *buffer,
if (count > QM_DBG_WRITE_LEN)
return -ENOSPC;
- cmd_buf = kzalloc(count + 1, GFP_KERNEL);
- if (!cmd_buf)
- return -ENOMEM;
-
- if (copy_from_user(cmd_buf, buffer, count)) {
- kfree(cmd_buf);
- return -EFAULT;
- }
-
- cmd_buf[count] = '\0';
+ cmd_buf = memdup_user_nul(buffer, count);
+ if (IS_ERR(cmd_buf))
+ return PTR_ERR(cmd_buf);
cmd_buf_tmp = strchr(cmd_buf, '\n');
if (cmd_buf_tmp) {
@@ -1623,13 +1876,9 @@ static void qm_hw_error_init_v1(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
}
-static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+static void qm_hw_error_cfg(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
{
- u32 irq_enable = ce | nfe | fe;
- u32 irq_unmask = ~irq_enable;
-
qm->error_mask = ce | nfe | fe;
-
/* clear QM hw residual error source */
writel(QM_ABNORMAL_INT_SOURCE_CLR,
qm->io_base + QM_ABNORMAL_INT_SOURCE);
@@ -1639,6 +1888,14 @@ static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
writel(QM_RAS_CE_TIMES_PER_IRQ, qm->io_base + QM_RAS_CE_THRESHOLD);
writel(nfe, qm->io_base + QM_RAS_NFE_ENABLE);
writel(fe, qm->io_base + QM_RAS_FE_ENABLE);
+}
+
+static void qm_hw_error_init_v2(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+{
+ u32 irq_enable = ce | nfe | fe;
+ u32 irq_unmask = ~irq_enable;
+
+ qm_hw_error_cfg(qm, ce, nfe, fe);
irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
@@ -1649,6 +1906,28 @@ static void qm_hw_error_uninit_v2(struct hisi_qm *qm)
writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
}
+static void qm_hw_error_init_v3(struct hisi_qm *qm, u32 ce, u32 nfe, u32 fe)
+{
+ u32 irq_enable = ce | nfe | fe;
+ u32 irq_unmask = ~irq_enable;
+
+ qm_hw_error_cfg(qm, ce, nfe, fe);
+
+ /* enable close master ooo when hardware error happened */
+ writel(nfe & (~QM_DB_RANDOM_INVALID), qm->io_base + QM_OOO_SHUTDOWN_SEL);
+
+ irq_unmask &= readl(qm->io_base + QM_ABNORMAL_INT_MASK);
+ writel(irq_unmask, qm->io_base + QM_ABNORMAL_INT_MASK);
+}
+
+static void qm_hw_error_uninit_v3(struct hisi_qm *qm)
+{
+ writel(QM_ABNORMAL_INT_MASK_VALUE, qm->io_base + QM_ABNORMAL_INT_MASK);
+
+ /* disable close master ooo when hardware error happened */
+ writel(0x0, qm->io_base + QM_OOO_SHUTDOWN_SEL);
+}
+
static void qm_log_hw_error(struct hisi_qm *qm, u32 error_status)
{
const struct hisi_qm_hw_error *err;
@@ -1715,15 +1994,371 @@ static enum acc_err_result qm_hw_error_handle_v2(struct hisi_qm *qm)
return ACC_ERR_RECOVERED;
}
+static u32 qm_get_hw_error_status(struct hisi_qm *qm)
+{
+ return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
+}
+
+static u32 qm_get_dev_err_status(struct hisi_qm *qm)
+{
+ return qm->err_ini->get_dev_hw_err_status(qm);
+}
+
+/* Check if the error causes the master ooo block */
+static int qm_check_dev_error(struct hisi_qm *qm)
+{
+ u32 val, dev_val;
+
+ if (qm->fun_type == QM_HW_VF)
+ return 0;
+
+ val = qm_get_hw_error_status(qm);
+ dev_val = qm_get_dev_err_status(qm);
+
+ if (qm->ver < QM_HW_V3)
+ return (val & QM_ECC_MBIT) ||
+ (dev_val & qm->err_info.ecc_2bits_mask);
+
+ return (val & readl(qm->io_base + QM_OOO_SHUTDOWN_SEL)) ||
+ (dev_val & (~qm->err_info.dev_ce_mask));
+}
+
+static int qm_get_mb_cmd(struct hisi_qm *qm, u64 *msg, u16 fun_num)
+{
+ struct qm_mailbox mailbox;
+ int ret;
+
+ qm_mb_pre_init(&mailbox, QM_MB_CMD_DST, 0, fun_num, 0);
+ mutex_lock(&qm->mailbox_lock);
+ ret = qm_mb_nolock(qm, &mailbox);
+ if (ret)
+ goto err_unlock;
+
+ *msg = readl(qm->io_base + QM_MB_CMD_DATA_ADDR_L) |
+ ((u64)readl(qm->io_base + QM_MB_CMD_DATA_ADDR_H) << 32);
+
+err_unlock:
+ mutex_unlock(&qm->mailbox_lock);
+ return ret;
+}
+
+static void qm_clear_cmd_interrupt(struct hisi_qm *qm, u64 vf_mask)
+{
+ u32 val;
+
+ if (qm->fun_type == QM_HW_PF)
+ writeq(vf_mask, qm->io_base + QM_IFC_INT_SOURCE_P);
+
+ val = readl(qm->io_base + QM_IFC_INT_SOURCE_V);
+ val |= QM_IFC_INT_SOURCE_MASK;
+ writel(val, qm->io_base + QM_IFC_INT_SOURCE_V);
+}
+
+static void qm_handle_vf_msg(struct hisi_qm *qm, u32 vf_id)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 cmd;
+ u64 msg;
+ int ret;
+
+ ret = qm_get_mb_cmd(qm, &msg, vf_id);
+ if (ret) {
+ dev_err(dev, "failed to get msg from VF(%u)!\n", vf_id);
+ return;
+ }
+
+ cmd = msg & QM_MB_CMD_DATA_MASK;
+ switch (cmd) {
+ case QM_VF_PREPARE_FAIL:
+ dev_err(dev, "failed to stop VF(%u)!\n", vf_id);
+ break;
+ case QM_VF_START_FAIL:
+ dev_err(dev, "failed to start VF(%u)!\n", vf_id);
+ break;
+ case QM_VF_PREPARE_DONE:
+ case QM_VF_START_DONE:
+ break;
+ default:
+ dev_err(dev, "unsupported cmd %u sent by VF(%u)!\n", cmd, vf_id);
+ break;
+ }
+}
+
+static int qm_wait_vf_prepare_finish(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 vfs_num = qm->vfs_num;
+ int cnt = 0;
+ int ret = 0;
+ u64 val;
+ u32 i;
+
+ if (!qm->vfs_num || qm->ver < QM_HW_V3)
+ return 0;
+
+ while (true) {
+ val = readq(qm->io_base + QM_IFC_INT_SOURCE_P);
+ /* All VFs send command to PF, break */
+ if ((val & GENMASK(vfs_num, 1)) == GENMASK(vfs_num, 1))
+ break;
+
+ if (++cnt > QM_MAX_PF_WAIT_COUNT) {
+ ret = -EBUSY;
+ break;
+ }
+
+ msleep(QM_WAIT_DST_ACK);
+ }
+
+ /* PF check VFs msg */
+ for (i = 1; i <= vfs_num; i++) {
+ if (val & BIT(i))
+ qm_handle_vf_msg(qm, i);
+ else
+ dev_err(dev, "VF(%u) not ping PF!\n", i);
+ }
+
+ /* PF clear interrupt to ack VFs */
+ qm_clear_cmd_interrupt(qm, val);
+
+ return ret;
+}
+
+static void qm_trigger_vf_interrupt(struct hisi_qm *qm, u32 fun_num)
+{
+ u32 val;
+
+ val = readl(qm->io_base + QM_IFC_INT_CFG);
+ val &= ~QM_IFC_SEND_ALL_VFS;
+ val |= fun_num;
+ writel(val, qm->io_base + QM_IFC_INT_CFG);
+
+ val = readl(qm->io_base + QM_IFC_INT_SET_P);
+ val |= QM_IFC_INT_SET_MASK;
+ writel(val, qm->io_base + QM_IFC_INT_SET_P);
+}
+
+static void qm_trigger_pf_interrupt(struct hisi_qm *qm)
+{
+ u32 val;
+
+ val = readl(qm->io_base + QM_IFC_INT_SET_V);
+ val |= QM_IFC_INT_SET_MASK;
+ writel(val, qm->io_base + QM_IFC_INT_SET_V);
+}
+
+static int qm_ping_single_vf(struct hisi_qm *qm, u64 cmd, u32 fun_num)
+{
+ struct device *dev = &qm->pdev->dev;
+ struct qm_mailbox mailbox;
+ int cnt = 0;
+ u64 val;
+ int ret;
+
+ qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, fun_num, 0);
+ mutex_lock(&qm->mailbox_lock);
+ ret = qm_mb_nolock(qm, &mailbox);
+ if (ret) {
+ dev_err(dev, "failed to send command to vf(%u)!\n", fun_num);
+ goto err_unlock;
+ }
+
+ qm_trigger_vf_interrupt(qm, fun_num);
+ while (true) {
+ msleep(QM_WAIT_DST_ACK);
+ val = readq(qm->io_base + QM_IFC_READY_STATUS);
+ /* if VF respond, PF notifies VF successfully. */
+ if (!(val & BIT(fun_num)))
+ goto err_unlock;
+
+ if (++cnt > QM_MAX_PF_WAIT_COUNT) {
+ dev_err(dev, "failed to get response from VF(%u)!\n", fun_num);
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+
+err_unlock:
+ mutex_unlock(&qm->mailbox_lock);
+ return ret;
+}
+
+static int qm_ping_all_vfs(struct hisi_qm *qm, u64 cmd)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 vfs_num = qm->vfs_num;
+ struct qm_mailbox mailbox;
+ u64 val = 0;
+ int cnt = 0;
+ int ret;
+ u32 i;
+
+ qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, QM_MB_PING_ALL_VFS, 0);
+ mutex_lock(&qm->mailbox_lock);
+ /* PF sends command to all VFs by mailbox */
+ ret = qm_mb_nolock(qm, &mailbox);
+ if (ret) {
+ dev_err(dev, "failed to send command to VFs!\n");
+ mutex_unlock(&qm->mailbox_lock);
+ return ret;
+ }
+
+ qm_trigger_vf_interrupt(qm, QM_IFC_SEND_ALL_VFS);
+ while (true) {
+ msleep(QM_WAIT_DST_ACK);
+ val = readq(qm->io_base + QM_IFC_READY_STATUS);
+ /* If all VFs acked, PF notifies VFs successfully. */
+ if (!(val & GENMASK(vfs_num, 1))) {
+ mutex_unlock(&qm->mailbox_lock);
+ return 0;
+ }
+
+ if (++cnt > QM_MAX_PF_WAIT_COUNT)
+ break;
+ }
+
+ mutex_unlock(&qm->mailbox_lock);
+
+ /* Check which vf respond timeout. */
+ for (i = 1; i <= vfs_num; i++) {
+ if (val & BIT(i))
+ dev_err(dev, "failed to get response from VF(%u)!\n", i);
+ }
+
+ return -ETIMEDOUT;
+}
+
+static int qm_ping_pf(struct hisi_qm *qm, u64 cmd)
+{
+ struct qm_mailbox mailbox;
+ int cnt = 0;
+ u32 val;
+ int ret;
+
+ qm_mb_pre_init(&mailbox, QM_MB_CMD_SRC, cmd, 0, 0);
+ mutex_lock(&qm->mailbox_lock);
+ ret = qm_mb_nolock(qm, &mailbox);
+ if (ret) {
+ dev_err(&qm->pdev->dev, "failed to send command to PF!\n");
+ goto unlock;
+ }
+
+ qm_trigger_pf_interrupt(qm);
+ /* Waiting for PF response */
+ while (true) {
+ msleep(QM_WAIT_DST_ACK);
+ val = readl(qm->io_base + QM_IFC_INT_SET_V);
+ if (!(val & QM_IFC_INT_STATUS_MASK))
+ break;
+
+ if (++cnt > QM_MAX_VF_WAIT_COUNT) {
+ ret = -ETIMEDOUT;
+ break;
+ }
+ }
+
+unlock:
+ mutex_unlock(&qm->mailbox_lock);
+ return ret;
+}
+
static int qm_stop_qp(struct hisi_qp *qp)
{
return qm_mb(qp->qm, QM_MB_CMD_STOP_QP, 0, qp->qp_id, 0);
}
+static int qm_set_msi(struct hisi_qm *qm, bool set)
+{
+ struct pci_dev *pdev = qm->pdev;
+
+ if (set) {
+ pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+ 0);
+ } else {
+ pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
+ ACC_PEH_MSI_DISABLE);
+ if (qm->err_status.is_qm_ecc_mbit ||
+ qm->err_status.is_dev_ecc_mbit)
+ return 0;
+
+ mdelay(1);
+ if (readl(qm->io_base + QM_PEH_DFX_INFO0))
+ return -EFAULT;
+ }
+
+ return 0;
+}
+
+static void qm_wait_msi_finish(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ u32 cmd = ~0;
+ int cnt = 0;
+ u32 val;
+ int ret;
+
+ while (true) {
+ pci_read_config_dword(pdev, pdev->msi_cap +
+ PCI_MSI_PENDING_64, &cmd);
+ if (!cmd)
+ break;
+
+ if (++cnt > MAX_WAIT_COUNTS) {
+ pci_warn(pdev, "failed to empty MSI PENDING!\n");
+ break;
+ }
+
+ udelay(1);
+ }
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO0,
+ val, !(val & QM_PEH_DFX_MASK),
+ POLL_PERIOD, POLL_TIMEOUT);
+ if (ret)
+ pci_warn(pdev, "failed to empty PEH MSI!\n");
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_DFX_INFO1,
+ val, !(val & QM_PEH_MSI_FINISH_MASK),
+ POLL_PERIOD, POLL_TIMEOUT);
+ if (ret)
+ pci_warn(pdev, "failed to finish MSI operation!\n");
+}
+
+static int qm_set_msi_v3(struct hisi_qm *qm, bool set)
+{
+ struct pci_dev *pdev = qm->pdev;
+ int ret = -ETIMEDOUT;
+ u32 cmd, i;
+
+ pci_read_config_dword(pdev, pdev->msi_cap, &cmd);
+ if (set)
+ cmd |= QM_MSI_CAP_ENABLE;
+ else
+ cmd &= ~QM_MSI_CAP_ENABLE;
+
+ pci_write_config_dword(pdev, pdev->msi_cap, cmd);
+ if (set) {
+ for (i = 0; i < MAX_WAIT_COUNTS; i++) {
+ pci_read_config_dword(pdev, pdev->msi_cap, &cmd);
+ if (cmd & QM_MSI_CAP_ENABLE)
+ return 0;
+
+ udelay(1);
+ }
+ } else {
+ udelay(WAIT_PERIOD_US_MIN);
+ qm_wait_msi_finish(qm);
+ ret = 0;
+ }
+
+ return ret;
+}
+
static const struct hisi_qm_hw_ops qm_hw_ops_v1 = {
.qm_db = qm_db_v1,
.get_irq_num = qm_get_irq_num_v1,
.hw_error_init = qm_hw_error_init_v1,
+ .set_msi = qm_set_msi,
};
static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
@@ -1733,16 +2368,20 @@ static const struct hisi_qm_hw_ops qm_hw_ops_v2 = {
.hw_error_init = qm_hw_error_init_v2,
.hw_error_uninit = qm_hw_error_uninit_v2,
.hw_error_handle = qm_hw_error_handle_v2,
+ .set_msi = qm_set_msi,
};
static const struct hisi_qm_hw_ops qm_hw_ops_v3 = {
.get_vft = qm_get_vft_v2,
.qm_db = qm_db_v2,
- .get_irq_num = qm_get_irq_num_v2,
- .hw_error_init = qm_hw_error_init_v2,
- .hw_error_uninit = qm_hw_error_uninit_v2,
+ .get_irq_num = qm_get_irq_num_v3,
+ .hw_error_init = qm_hw_error_init_v3,
+ .hw_error_uninit = qm_hw_error_uninit_v3,
.hw_error_handle = qm_hw_error_handle_v2,
.stop_qp = qm_stop_qp,
+ .set_msi = qm_set_msi_v3,
+ .ping_all_vfs = qm_ping_all_vfs,
+ .ping_pf = qm_ping_pf,
};
static void *qm_get_avail_sqe(struct hisi_qp *qp)
@@ -2017,11 +2656,8 @@ static int qm_drain_qp(struct hisi_qp *qp)
int ret = 0, i = 0;
void *addr;
- /*
- * No need to judge if ECC multi-bit error occurs because the
- * master OOO will be blocked.
- */
- if (qm->err_status.is_qm_ecc_mbit || qm->err_status.is_dev_ecc_mbit)
+ /* No need to judge if master OOO is blocked. */
+ if (qm_check_dev_error(qm))
return 0;
/* Kunpeng930 supports drain qp by device */
@@ -2290,6 +2926,23 @@ static void hisi_qm_uacce_stop_queue(struct uacce_queue *q)
hisi_qm_stop_qp(q->priv);
}
+static int hisi_qm_is_q_updated(struct uacce_queue *q)
+{
+ struct hisi_qp *qp = q->priv;
+ struct qm_cqe *cqe = qp->cqe + qp->qp_status.cq_head;
+ int updated = 0;
+
+ while (QM_CQE_PHASE(cqe) == qp->qp_status.cqc_phase) {
+ /* make sure to read data from memory */
+ dma_rmb();
+ qm_cq_head_update(qp);
+ cqe = qp->cqe + qp->qp_status.cq_head;
+ updated = 1;
+ }
+
+ return updated;
+}
+
static void qm_set_sqctype(struct uacce_queue *q, u16 type)
{
struct hisi_qm *qm = q->uacce->priv;
@@ -2335,6 +2988,7 @@ static const struct uacce_ops uacce_qm_ops = {
.stop_queue = hisi_qm_uacce_stop_queue,
.mmap = hisi_qm_uacce_mmap,
.ioctl = hisi_qm_uacce_ioctl,
+ .is_q_updated = hisi_qm_is_q_updated,
};
static int qm_alloc_uacce(struct hisi_qm *qm)
@@ -2530,62 +3184,6 @@ static int hisi_qp_memory_init(struct hisi_qm *qm, size_t dma_size, int id)
return 0;
}
-static int hisi_qm_memory_init(struct hisi_qm *qm)
-{
- struct device *dev = &qm->pdev->dev;
- size_t qp_dma_size, off = 0;
- int i, ret = 0;
-
-#define QM_INIT_BUF(qm, type, num) do { \
- (qm)->type = ((qm)->qdma.va + (off)); \
- (qm)->type##_dma = (qm)->qdma.dma + (off); \
- off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
-} while (0)
-
- idr_init(&qm->qp_idr);
- qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) +
- QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
- QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
- QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
- qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
- GFP_ATOMIC);
- dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
- if (!qm->qdma.va)
- return -ENOMEM;
-
- QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH);
- QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
- QM_INIT_BUF(qm, sqc, qm->qp_num);
- QM_INIT_BUF(qm, cqc, qm->qp_num);
-
- qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
- if (!qm->qp_array) {
- ret = -ENOMEM;
- goto err_alloc_qp_array;
- }
-
- /* one more page for device or qp statuses */
- qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
- sizeof(struct qm_cqe) * QM_Q_DEPTH;
- qp_dma_size = PAGE_ALIGN(qp_dma_size);
- for (i = 0; i < qm->qp_num; i++) {
- ret = hisi_qp_memory_init(qm, qp_dma_size, i);
- if (ret)
- goto err_init_qp_mem;
-
- dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
- }
-
- return ret;
-
-err_init_qp_mem:
- hisi_qp_memory_uninit(qm, i);
-err_alloc_qp_array:
- dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
-
- return ret;
-}
-
static void hisi_qm_pre_init(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -2604,6 +3202,34 @@ static void hisi_qm_pre_init(struct hisi_qm *qm)
qm->misc_ctl = false;
}
+static void qm_cmd_uninit(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl(qm->io_base + QM_IFC_INT_MASK);
+ val |= QM_IFC_INT_DISABLE;
+ writel(val, qm->io_base + QM_IFC_INT_MASK);
+}
+
+static void qm_cmd_init(struct hisi_qm *qm)
+{
+ u32 val;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ /* Clear communication interrupt source */
+ qm_clear_cmd_interrupt(qm, QM_IFC_INT_SOURCE_CLR);
+
+ /* Enable pf to vf communication reg. */
+ val = readl(qm->io_base + QM_IFC_INT_MASK);
+ val &= ~QM_IFC_INT_DISABLE;
+ writel(val, qm->io_base + QM_IFC_INT_MASK);
+}
+
static void qm_put_pci_res(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -2635,6 +3261,8 @@ void hisi_qm_uninit(struct hisi_qm *qm)
struct pci_dev *pdev = qm->pdev;
struct device *dev = &pdev->dev;
+ qm_cmd_uninit(qm);
+ kfree(qm->factor);
down_write(&qm->qps_lock);
if (!qm_avail_state(qm, QM_CLOSE)) {
@@ -2826,6 +3454,8 @@ static int __hisi_qm_start(struct hisi_qm *qm)
if (ret)
return ret;
+ qm_init_prefetch(qm);
+
writel(0x0, qm->io_base + QM_VF_EQ_INT_MASK);
writel(0x0, qm->io_base + QM_VF_AEQ_INT_MASK);
@@ -3034,79 +3664,6 @@ static int qm_debugfs_atomic64_get(void *data, u64 *val)
DEFINE_DEBUGFS_ATTRIBUTE(qm_atomic64_ops, qm_debugfs_atomic64_get,
qm_debugfs_atomic64_set, "%llu\n");
-/**
- * hisi_qm_debug_init() - Initialize qm related debugfs files.
- * @qm: The qm for which we want to add debugfs files.
- *
- * Create qm related debugfs files.
- */
-void hisi_qm_debug_init(struct hisi_qm *qm)
-{
- struct qm_dfx *dfx = &qm->debug.dfx;
- struct dentry *qm_d;
- void *data;
- int i;
-
- qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
- qm->debug.qm_d = qm_d;
-
- /* only show this in PF */
- if (qm->fun_type == QM_HW_PF) {
- qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
- for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
- qm_create_debugfs_file(qm, qm_d, i);
- }
-
- debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
-
- debugfs_create_file("cmd", 0444, qm->debug.qm_d, qm, &qm_cmd_fops);
-
- debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
- &qm_status_fops);
- for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
- data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
- debugfs_create_file(qm_dfx_files[i].name,
- 0644,
- qm_d,
- data,
- &qm_atomic64_ops);
- }
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
-
-/**
- * hisi_qm_debug_regs_clear() - clear qm debug related registers.
- * @qm: The qm for which we want to clear its debug registers.
- */
-void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
-{
- struct qm_dfx_registers *regs;
- int i;
-
- /* clear current_qm */
- writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
- writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
-
- /* clear current_q */
- writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
- writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
-
- /*
- * these registers are reading and clearing, so clear them after
- * reading them.
- */
- writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
-
- regs = qm_dfx_regs;
- for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
- readl(qm->io_base + regs->reg_offset);
- regs++;
- }
-
- writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
-}
-EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
-
static void qm_hw_error_init(struct hisi_qm *qm)
{
struct hisi_qm_err_info *err_info = &qm->err_info;
@@ -3362,6 +3919,360 @@ static int qm_clear_vft_config(struct hisi_qm *qm)
return 0;
}
+static int qm_func_shaper_enable(struct hisi_qm *qm, u32 fun_index, u32 qos)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 ir = qos * QM_QOS_RATE;
+ int ret, total_vfs, i;
+
+ total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+ if (fun_index > total_vfs)
+ return -EINVAL;
+
+ qm->factor[fun_index].func_qos = qos;
+
+ ret = qm_get_shaper_para(ir, &qm->factor[fun_index]);
+ if (ret) {
+ dev_err(dev, "failed to calculate shaper parameter!\n");
+ return -EINVAL;
+ }
+
+ for (i = ALG_TYPE_0; i <= ALG_TYPE_1; i++) {
+ /* The base number of queue reuse for different alg type */
+ ret = qm_set_vft_common(qm, SHAPER_VFT, fun_index, i, 1);
+ if (ret) {
+ dev_err(dev, "type: %d, failed to set shaper vft!\n", i);
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static u32 qm_get_shaper_vft_qos(struct hisi_qm *qm, u32 fun_index)
+{
+ u64 cir_u = 0, cir_b = 0, cir_s = 0;
+ u64 shaper_vft, ir_calc, ir;
+ unsigned int val;
+ u32 error_rate;
+ int ret;
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), POLL_PERIOD,
+ POLL_TIMEOUT);
+ if (ret)
+ return 0;
+
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_WR);
+ writel(SHAPER_VFT, qm->io_base + QM_VFT_CFG_TYPE);
+ writel(fun_index, qm->io_base + QM_VFT_CFG);
+
+ writel(0x0, qm->io_base + QM_VFT_CFG_RDY);
+ writel(0x1, qm->io_base + QM_VFT_CFG_OP_ENABLE);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_VFT_CFG_RDY, val,
+ val & BIT(0), POLL_PERIOD,
+ POLL_TIMEOUT);
+ if (ret)
+ return 0;
+
+ shaper_vft = readl(qm->io_base + QM_VFT_CFG_DATA_L) |
+ ((u64)readl(qm->io_base + QM_VFT_CFG_DATA_H) << 32);
+
+ cir_b = shaper_vft & QM_SHAPER_CIR_B_MASK;
+ cir_u = shaper_vft & QM_SHAPER_CIR_U_MASK;
+ cir_u = cir_u >> QM_SHAPER_FACTOR_CIR_U_SHIFT;
+
+ cir_s = shaper_vft & QM_SHAPER_CIR_S_MASK;
+ cir_s = cir_s >> QM_SHAPER_FACTOR_CIR_S_SHIFT;
+
+ ir_calc = acc_shaper_para_calc(cir_b, cir_u, cir_s);
+
+ ir = qm->factor[fun_index].func_qos * QM_QOS_RATE;
+
+ error_rate = QM_QOS_EXPAND_RATE * (u32)abs(ir_calc - ir) / ir;
+ if (error_rate > QM_QOS_MIN_ERROR_RATE) {
+ pci_err(qm->pdev, "error_rate: %u, get function qos is error!\n", error_rate);
+ return 0;
+ }
+
+ return ir;
+}
+
+static void qm_vf_get_qos(struct hisi_qm *qm, u32 fun_num)
+{
+ struct device *dev = &qm->pdev->dev;
+ u64 mb_cmd;
+ u32 qos;
+ int ret;
+
+ qos = qm_get_shaper_vft_qos(qm, fun_num);
+ if (!qos) {
+ dev_err(dev, "function(%u) failed to get qos by PF!\n", fun_num);
+ return;
+ }
+
+ mb_cmd = QM_PF_SET_QOS | (u64)qos << QM_MB_CMD_DATA_SHIFT;
+ ret = qm_ping_single_vf(qm, mb_cmd, fun_num);
+ if (ret)
+ dev_err(dev, "failed to send cmd to VF(%u)!\n", fun_num);
+}
+
+static int qm_vf_read_qos(struct hisi_qm *qm)
+{
+ int cnt = 0;
+ int ret;
+
+ /* reset mailbox qos val */
+ qm->mb_qos = 0;
+
+ /* vf ping pf to get function qos */
+ if (qm->ops->ping_pf) {
+ ret = qm->ops->ping_pf(qm, QM_VF_GET_QOS);
+ if (ret) {
+ pci_err(qm->pdev, "failed to send cmd to PF to get qos!\n");
+ return ret;
+ }
+ }
+
+ while (true) {
+ msleep(QM_WAIT_DST_ACK);
+ if (qm->mb_qos)
+ break;
+
+ if (++cnt > QM_MAX_VF_WAIT_COUNT) {
+ pci_err(qm->pdev, "PF ping VF timeout!\n");
+ return -ETIMEDOUT;
+ }
+ }
+
+ return ret;
+}
+
+static ssize_t qm_algqos_read(struct file *filp, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct hisi_qm *qm = filp->private_data;
+ char tbuf[QM_DBG_READ_LEN];
+ u32 qos_val, ir;
+ int ret;
+
+ /* Mailbox and reset cannot be operated at the same time */
+ if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+ pci_err(qm->pdev, "dev resetting, read alg qos failed!\n");
+ return -EAGAIN;
+ }
+
+ if (qm->fun_type == QM_HW_PF) {
+ ir = qm_get_shaper_vft_qos(qm, 0);
+ } else {
+ ret = qm_vf_read_qos(qm);
+ if (ret)
+ goto err_get_status;
+ ir = qm->mb_qos;
+ }
+
+ qos_val = ir / QM_QOS_RATE;
+ ret = scnprintf(tbuf, QM_DBG_READ_LEN, "%u\n", qos_val);
+
+ ret = simple_read_from_buffer(buf, count, pos, tbuf, ret);
+
+err_get_status:
+ clear_bit(QM_RESETTING, &qm->misc_ctl);
+ return ret;
+}
+
+static ssize_t qm_qos_value_init(const char *buf, unsigned long *val)
+{
+ int buflen = strlen(buf);
+ int ret, i;
+
+ for (i = 0; i < buflen; i++) {
+ if (!isdigit(buf[i]))
+ return -EINVAL;
+ }
+
+ ret = sscanf(buf, "%ld", val);
+ if (ret != QM_QOS_VAL_NUM)
+ return -EINVAL;
+
+ return 0;
+}
+
+static ssize_t qm_algqos_write(struct file *filp, const char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct hisi_qm *qm = filp->private_data;
+ char tbuf[QM_DBG_READ_LEN];
+ int tmp1, bus, device, function;
+ char tbuf_bdf[QM_DBG_READ_LEN] = {0};
+ char val_buf[QM_QOS_VAL_MAX_LEN] = {0};
+ unsigned int fun_index;
+ unsigned long val = 0;
+ int len, ret;
+
+ if (qm->fun_type == QM_HW_VF)
+ return -EINVAL;
+
+ /* Mailbox and reset cannot be operated at the same time */
+ if (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
+ pci_err(qm->pdev, "dev resetting, write alg qos failed!\n");
+ return -EAGAIN;
+ }
+
+ if (*pos != 0) {
+ ret = 0;
+ goto err_get_status;
+ }
+
+ if (count >= QM_DBG_READ_LEN) {
+ ret = -ENOSPC;
+ goto err_get_status;
+ }
+
+ len = simple_write_to_buffer(tbuf, QM_DBG_READ_LEN - 1, pos, buf, count);
+ if (len < 0) {
+ ret = len;
+ goto err_get_status;
+ }
+
+ tbuf[len] = '\0';
+ ret = sscanf(tbuf, "%s %s", tbuf_bdf, val_buf);
+ if (ret != QM_QOS_PARAM_NUM) {
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
+ ret = qm_qos_value_init(val_buf, &val);
+ if (val == 0 || val > QM_QOS_MAX_VAL || ret) {
+ pci_err(qm->pdev, "input qos value is error, please set 1~1000!\n");
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
+ ret = sscanf(tbuf_bdf, "%d:%x:%d.%d", &tmp1, &bus, &device, &function);
+ if (ret != QM_QOS_BDF_PARAM_NUM) {
+ pci_err(qm->pdev, "input pci bdf value is error!\n");
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
+ fun_index = device * 8 + function;
+
+ ret = qm_func_shaper_enable(qm, fun_index, val);
+ if (ret) {
+ pci_err(qm->pdev, "failed to enable function shaper!\n");
+ ret = -EINVAL;
+ goto err_get_status;
+ }
+
+ ret = count;
+
+err_get_status:
+ clear_bit(QM_RESETTING, &qm->misc_ctl);
+ return ret;
+}
+
+static const struct file_operations qm_algqos_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = qm_algqos_read,
+ .write = qm_algqos_write,
+};
+
+/**
+ * hisi_qm_set_algqos_init() - Initialize function qos debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create function qos debugfs files.
+ */
+static void hisi_qm_set_algqos_init(struct hisi_qm *qm)
+{
+ if (qm->fun_type == QM_HW_PF)
+ debugfs_create_file("alg_qos", 0644, qm->debug.debug_root,
+ qm, &qm_algqos_fops);
+ else
+ debugfs_create_file("alg_qos", 0444, qm->debug.debug_root,
+ qm, &qm_algqos_fops);
+}
+
+/**
+ * hisi_qm_debug_init() - Initialize qm related debugfs files.
+ * @qm: The qm for which we want to add debugfs files.
+ *
+ * Create qm related debugfs files.
+ */
+void hisi_qm_debug_init(struct hisi_qm *qm)
+{
+ struct qm_dfx *dfx = &qm->debug.dfx;
+ struct dentry *qm_d;
+ void *data;
+ int i;
+
+ qm_d = debugfs_create_dir("qm", qm->debug.debug_root);
+ qm->debug.qm_d = qm_d;
+
+ /* only show this in PF */
+ if (qm->fun_type == QM_HW_PF) {
+ qm_create_debugfs_file(qm, qm->debug.debug_root, CURRENT_QM);
+ for (i = CURRENT_Q; i < DEBUG_FILE_NUM; i++)
+ qm_create_debugfs_file(qm, qm->debug.qm_d, i);
+ }
+
+ debugfs_create_file("regs", 0444, qm->debug.qm_d, qm, &qm_regs_fops);
+
+ debugfs_create_file("cmd", 0600, qm->debug.qm_d, qm, &qm_cmd_fops);
+
+ debugfs_create_file("status", 0444, qm->debug.qm_d, qm,
+ &qm_status_fops);
+ for (i = 0; i < ARRAY_SIZE(qm_dfx_files); i++) {
+ data = (atomic64_t *)((uintptr_t)dfx + qm_dfx_files[i].offset);
+ debugfs_create_file(qm_dfx_files[i].name,
+ 0644,
+ qm_d,
+ data,
+ &qm_atomic64_ops);
+ }
+
+ if (qm->ver >= QM_HW_V3)
+ hisi_qm_set_algqos_init(qm);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_init);
+
+/**
+ * hisi_qm_debug_regs_clear() - clear qm debug related registers.
+ * @qm: The qm for which we want to clear its debug registers.
+ */
+void hisi_qm_debug_regs_clear(struct hisi_qm *qm)
+{
+ struct qm_dfx_registers *regs;
+ int i;
+
+ /* clear current_qm */
+ writel(0x0, qm->io_base + QM_DFX_MB_CNT_VF);
+ writel(0x0, qm->io_base + QM_DFX_DB_CNT_VF);
+
+ /* clear current_q */
+ writel(0x0, qm->io_base + QM_DFX_SQE_CNT_VF_SQN);
+ writel(0x0, qm->io_base + QM_DFX_CQE_CNT_VF_CQN);
+
+ /*
+ * these registers are reading and clearing, so clear them after
+ * reading them.
+ */
+ writel(0x1, qm->io_base + QM_DFX_CNT_CLR_CE);
+
+ regs = qm_dfx_regs;
+ for (i = 0; i < CNT_CYC_REGS_NUM; i++) {
+ readl(qm->io_base + regs->reg_offset);
+ regs++;
+ }
+
+ /* clear clear_enable */
+ writel(0x0, qm->io_base + QM_DFX_CNT_CLR_CE);
+}
+EXPORT_SYMBOL_GPL(hisi_qm_debug_regs_clear);
+
/**
* hisi_qm_sriov_enable() - enable virtual functions
* @pdev: the PCIe device
@@ -3416,6 +4327,7 @@ EXPORT_SYMBOL_GPL(hisi_qm_sriov_enable);
int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
{
struct hisi_qm *qm = pci_get_drvdata(pdev);
+ int total_vfs = pci_sriov_get_totalvfs(qm->pdev);
if (pci_vfs_assigned(pdev)) {
pci_err(pdev, "Failed to disable VFs as VFs are assigned!\n");
@@ -3429,6 +4341,9 @@ int hisi_qm_sriov_disable(struct pci_dev *pdev, bool is_frozen)
}
pci_disable_sriov(pdev);
+ /* clear vf function shaper configure array */
+ memset(qm->factor + 1, 0, sizeof(struct qm_shaper_factor) * total_vfs);
+
return qm_clear_vft_config(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_sriov_disable);
@@ -3527,17 +4442,15 @@ pci_ers_result_t hisi_qm_dev_err_detected(struct pci_dev *pdev,
}
EXPORT_SYMBOL_GPL(hisi_qm_dev_err_detected);
-static u32 qm_get_hw_error_status(struct hisi_qm *qm)
-{
- return readl(qm->io_base + QM_ABNORMAL_INT_STATUS);
-}
-
static int qm_check_req_recv(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
int ret;
u32 val;
+ if (qm->ver >= QM_HW_V3)
+ return 0;
+
writel(ACC_VENDOR_ID_VALUE, qm->io_base + QM_PEH_VENDOR_ID);
ret = readl_relaxed_poll_timeout(qm->io_base + QM_PEH_VENDOR_ID, val,
(val == ACC_VENDOR_ID_VALUE),
@@ -3608,28 +4521,6 @@ static int qm_set_vf_mse(struct hisi_qm *qm, bool set)
return -ETIMEDOUT;
}
-static int qm_set_msi(struct hisi_qm *qm, bool set)
-{
- struct pci_dev *pdev = qm->pdev;
-
- if (set) {
- pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
- 0);
- } else {
- pci_write_config_dword(pdev, pdev->msi_cap + PCI_MSI_MASK_64,
- ACC_PEH_MSI_DISABLE);
- if (qm->err_status.is_qm_ecc_mbit ||
- qm->err_status.is_dev_ecc_mbit)
- return 0;
-
- mdelay(1);
- if (readl(qm->io_base + QM_PEH_DFX_INFO0))
- return -EFAULT;
- }
-
- return 0;
-}
-
static int qm_vf_reset_prepare(struct hisi_qm *qm,
enum qm_stop_reason stop_reason)
{
@@ -3660,14 +4551,35 @@ stop_fail:
return ret;
}
-static int qm_reset_prepare_ready(struct hisi_qm *qm)
+static int qm_try_stop_vfs(struct hisi_qm *qm, u64 cmd,
+ enum qm_stop_reason stop_reason)
{
struct pci_dev *pdev = qm->pdev;
- struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+ int ret;
+
+ if (!qm->vfs_num)
+ return 0;
+
+ /* Kunpeng930 supports to notify VFs to stop before PF reset */
+ if (qm->ops->ping_all_vfs) {
+ ret = qm->ops->ping_all_vfs(qm, cmd);
+ if (ret)
+ pci_err(pdev, "failed to send cmd to all VFs before PF reset!\n");
+ } else {
+ ret = qm_vf_reset_prepare(qm, stop_reason);
+ if (ret)
+ pci_err(pdev, "failed to prepare reset, ret = %d.\n", ret);
+ }
+
+ return ret;
+}
+
+static int qm_wait_reset_finish(struct hisi_qm *qm)
+{
int delay = 0;
/* All reset requests need to be queued for processing */
- while (test_and_set_bit(QM_RESETTING, &pf_qm->misc_ctl)) {
+ while (test_and_set_bit(QM_RESETTING, &qm->misc_ctl)) {
msleep(++delay);
if (delay > QM_RESET_WAIT_TIMEOUT)
return -EBUSY;
@@ -3676,6 +4588,32 @@ static int qm_reset_prepare_ready(struct hisi_qm *qm)
return 0;
}
+static int qm_reset_prepare_ready(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+ /*
+ * PF and VF on host doesnot support resetting at the
+ * same time on Kunpeng920.
+ */
+ if (qm->ver < QM_HW_V3)
+ return qm_wait_reset_finish(pf_qm);
+
+ return qm_wait_reset_finish(qm);
+}
+
+static void qm_reset_bit_clear(struct hisi_qm *qm)
+{
+ struct pci_dev *pdev = qm->pdev;
+ struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
+
+ if (qm->ver < QM_HW_V3)
+ clear_bit(QM_RESETTING, &pf_qm->misc_ctl);
+
+ clear_bit(QM_RESETTING, &qm->misc_ctl);
+}
+
static int qm_controller_reset_prepare(struct hisi_qm *qm)
{
struct pci_dev *pdev = qm->pdev;
@@ -3687,22 +4625,25 @@ static int qm_controller_reset_prepare(struct hisi_qm *qm)
return ret;
}
- if (qm->vfs_num) {
- ret = qm_vf_reset_prepare(qm, QM_SOFT_RESET);
- if (ret) {
- pci_err(pdev, "Fails to stop VFs!\n");
- clear_bit(QM_RESETTING, &qm->misc_ctl);
- return ret;
- }
- }
+ /* PF obtains the information of VF by querying the register. */
+ qm_cmd_uninit(qm);
+
+ /* Whether VFs stop successfully, soft reset will continue. */
+ ret = qm_try_stop_vfs(qm, QM_PF_SRST_PREPARE, QM_SOFT_RESET);
+ if (ret)
+ pci_err(pdev, "failed to stop vfs by pf in soft reset.\n");
ret = hisi_qm_stop(qm, QM_SOFT_RESET);
if (ret) {
pci_err(pdev, "Fails to stop QM!\n");
- clear_bit(QM_RESETTING, &qm->misc_ctl);
+ qm_reset_bit_clear(qm);
return ret;
}
+ ret = qm_wait_vf_prepare_finish(qm);
+ if (ret)
+ pci_err(pdev, "failed to stop by vfs in soft reset!\n");
+
clear_bit(QM_RST_SCHED, &qm->misc_ctl);
return 0;
@@ -3712,6 +4653,10 @@ static void qm_dev_ecc_mbit_handle(struct hisi_qm *qm)
{
u32 nfe_enb = 0;
+ /* Kunpeng930 hardware automatically close master ooo when NFE occurs */
+ if (qm->ver >= QM_HW_V3)
+ return;
+
if (!qm->err_status.is_dev_ecc_mbit &&
qm->err_status.is_qm_ecc_mbit &&
qm->err_ini->close_axi_master_ooo) {
@@ -3748,7 +4693,7 @@ static int qm_soft_reset(struct hisi_qm *qm)
}
}
- ret = qm_set_msi(qm, false);
+ ret = qm->ops->set_msi(qm, false);
if (ret) {
pci_err(pdev, "Fails to disable PEH MSI bit.\n");
return ret;
@@ -3770,6 +4715,9 @@ static int qm_soft_reset(struct hisi_qm *qm)
return ret;
}
+ if (qm->err_ini->close_sva_prefetch)
+ qm->err_ini->close_sva_prefetch(qm);
+
ret = qm_set_pf_mse(qm, false);
if (ret) {
pci_err(pdev, "Fails to disable pf MSE bit.\n");
@@ -3830,9 +4778,32 @@ restart_fail:
return ret;
}
-static u32 qm_get_dev_err_status(struct hisi_qm *qm)
+static int qm_try_start_vfs(struct hisi_qm *qm, enum qm_mb_cmd cmd)
{
- return qm->err_ini->get_dev_hw_err_status(qm);
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+
+ if (!qm->vfs_num)
+ return 0;
+
+ ret = qm_vf_q_assign(qm, qm->vfs_num);
+ if (ret) {
+ pci_err(pdev, "failed to assign VFs, ret = %d.\n", ret);
+ return ret;
+ }
+
+ /* Kunpeng930 supports to notify VFs to start after PF reset. */
+ if (qm->ops->ping_all_vfs) {
+ ret = qm->ops->ping_all_vfs(qm, cmd);
+ if (ret)
+ pci_warn(pdev, "failed to send cmd to all VFs after PF reset!\n");
+ } else {
+ ret = qm_vf_reset_done(qm);
+ if (ret)
+ pci_warn(pdev, "failed to start vfs, ret = %d.\n", ret);
+ }
+
+ return ret;
}
static int qm_dev_hw_init(struct hisi_qm *qm)
@@ -3844,6 +4815,12 @@ static void qm_restart_prepare(struct hisi_qm *qm)
{
u32 value;
+ if (qm->err_ini->open_sva_prefetch)
+ qm->err_ini->open_sva_prefetch(qm);
+
+ if (qm->ver >= QM_HW_V3)
+ return;
+
if (!qm->err_status.is_qm_ecc_mbit &&
!qm->err_status.is_dev_ecc_mbit)
return;
@@ -3863,15 +4840,15 @@ static void qm_restart_prepare(struct hisi_qm *qm)
/* clear AM Reorder Buffer ecc mbit source */
writel(ACC_ROB_ECC_ERR_MULTPL, qm->io_base + ACC_AM_ROB_ECC_INT_STS);
-
- if (qm->err_ini->open_axi_master_ooo)
- qm->err_ini->open_axi_master_ooo(qm);
}
static void qm_restart_done(struct hisi_qm *qm)
{
u32 value;
+ if (qm->ver >= QM_HW_V3)
+ goto clear_flags;
+
if (!qm->err_status.is_qm_ecc_mbit &&
!qm->err_status.is_dev_ecc_mbit)
return;
@@ -3881,6 +4858,7 @@ static void qm_restart_done(struct hisi_qm *qm)
value |= qm->err_info.msi_wr_port;
writel(value, qm->io_base + ACC_AM_CFG_PORT_WR_EN);
+clear_flags:
qm->err_status.is_qm_ecc_mbit = false;
qm->err_status.is_dev_ecc_mbit = false;
}
@@ -3890,7 +4868,7 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
struct pci_dev *pdev = qm->pdev;
int ret;
- ret = qm_set_msi(qm, true);
+ ret = qm->ops->set_msi(qm, true);
if (ret) {
pci_err(pdev, "Fails to enable PEH MSI bit!\n");
return ret;
@@ -3917,6 +4895,9 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
}
qm_restart_prepare(qm);
+ hisi_qm_dev_err_init(qm);
+ if (qm->err_ini->open_axi_master_ooo)
+ qm->err_ini->open_axi_master_ooo(qm);
ret = qm_restart(qm);
if (ret) {
@@ -3924,24 +4905,18 @@ static int qm_controller_reset_done(struct hisi_qm *qm)
return ret;
}
- if (qm->vfs_num) {
- ret = qm_vf_q_assign(qm, qm->vfs_num);
- if (ret) {
- pci_err(pdev, "Failed to assign queue!\n");
- return ret;
- }
- }
+ ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE);
+ if (ret)
+ pci_err(pdev, "failed to start vfs by pf in soft reset.\n");
- ret = qm_vf_reset_done(qm);
- if (ret) {
- pci_err(pdev, "Failed to start VFs!\n");
- return -EPERM;
- }
+ ret = qm_wait_vf_prepare_finish(qm);
+ if (ret)
+ pci_err(pdev, "failed to start by vfs in soft reset!\n");
- hisi_qm_dev_err_init(qm);
+ qm_cmd_init(qm);
qm_restart_done(qm);
- clear_bit(QM_RESETTING, &qm->misc_ctl);
+ qm_reset_bit_clear(qm);
return 0;
}
@@ -3962,13 +4937,13 @@ static int qm_controller_reset(struct hisi_qm *qm)
ret = qm_soft_reset(qm);
if (ret) {
pci_err(pdev, "Controller reset failed (%d)\n", ret);
- clear_bit(QM_RESETTING, &qm->misc_ctl);
+ qm_reset_bit_clear(qm);
return ret;
}
ret = qm_controller_reset_done(qm);
if (ret) {
- clear_bit(QM_RESETTING, &qm->misc_ctl);
+ qm_reset_bit_clear(qm);
return ret;
}
@@ -4005,21 +4980,6 @@ pci_ers_result_t hisi_qm_dev_slot_reset(struct pci_dev *pdev)
}
EXPORT_SYMBOL_GPL(hisi_qm_dev_slot_reset);
-/* check the interrupt is ecc-mbit error or not */
-static int qm_check_dev_error(struct hisi_qm *qm)
-{
- int ret;
-
- if (qm->fun_type == QM_HW_VF)
- return 0;
-
- ret = qm_get_hw_error_status(qm) & QM_ECC_MBIT;
- if (ret)
- return ret;
-
- return (qm_get_dev_err_status(qm) & qm->err_info.ecc_2bits_mask);
-}
-
void hisi_qm_reset_prepare(struct pci_dev *pdev)
{
struct hisi_qm *pf_qm = pci_get_drvdata(pci_physfn(pdev));
@@ -4045,14 +5005,13 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
return;
}
- if (qm->vfs_num) {
- ret = qm_vf_reset_prepare(qm, QM_FLR);
- if (ret) {
- pci_err(pdev, "Failed to prepare reset, ret = %d.\n",
- ret);
- return;
- }
- }
+ /* PF obtains the information of VF by querying the register. */
+ if (qm->fun_type == QM_HW_PF)
+ qm_cmd_uninit(qm);
+
+ ret = qm_try_stop_vfs(qm, QM_PF_FLR_PREPARE, QM_FLR);
+ if (ret)
+ pci_err(pdev, "failed to stop vfs by pf in FLR.\n");
ret = hisi_qm_stop(qm, QM_FLR);
if (ret) {
@@ -4060,6 +5019,10 @@ void hisi_qm_reset_prepare(struct pci_dev *pdev)
return;
}
+ ret = qm_wait_vf_prepare_finish(qm);
+ if (ret)
+ pci_err(pdev, "failed to stop by vfs in FLR!\n");
+
pci_info(pdev, "FLR resetting...\n");
}
EXPORT_SYMBOL_GPL(hisi_qm_reset_prepare);
@@ -4085,42 +5048,38 @@ void hisi_qm_reset_done(struct pci_dev *pdev)
struct hisi_qm *qm = pci_get_drvdata(pdev);
int ret;
- hisi_qm_dev_err_init(pf_qm);
-
- ret = qm_restart(qm);
- if (ret) {
- pci_err(pdev, "Failed to start QM, ret = %d.\n", ret);
- goto flr_done;
- }
-
if (qm->fun_type == QM_HW_PF) {
ret = qm_dev_hw_init(qm);
if (ret) {
pci_err(pdev, "Failed to init PF, ret = %d.\n", ret);
goto flr_done;
}
+ }
- if (!qm->vfs_num)
- goto flr_done;
-
- ret = qm_vf_q_assign(qm, qm->vfs_num);
- if (ret) {
- pci_err(pdev, "Failed to assign VFs, ret = %d.\n", ret);
- goto flr_done;
- }
+ hisi_qm_dev_err_init(pf_qm);
- ret = qm_vf_reset_done(qm);
- if (ret) {
- pci_err(pdev, "Failed to start VFs, ret = %d.\n", ret);
- goto flr_done;
- }
+ ret = qm_restart(qm);
+ if (ret) {
+ pci_err(pdev, "Failed to start QM, ret = %d.\n", ret);
+ goto flr_done;
}
+ ret = qm_try_start_vfs(qm, QM_PF_RESET_DONE);
+ if (ret)
+ pci_err(pdev, "failed to start vfs by pf in FLR.\n");
+
+ ret = qm_wait_vf_prepare_finish(qm);
+ if (ret)
+ pci_err(pdev, "failed to start by vfs in FLR!\n");
+
flr_done:
+ if (qm->fun_type == QM_HW_PF)
+ qm_cmd_init(qm);
+
if (qm_flr_reset_complete(pdev))
pci_info(pdev, "FLR reset complete\n");
- clear_bit(QM_RESETTING, &qm->misc_ctl);
+ qm_reset_bit_clear(qm);
}
EXPORT_SYMBOL_GPL(hisi_qm_reset_done);
@@ -4149,7 +5108,7 @@ static int qm_irq_register(struct hisi_qm *qm)
if (ret)
return ret;
- if (qm->ver != QM_HW_V1) {
+ if (qm->ver > QM_HW_V1) {
ret = request_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR),
qm_aeq_irq, 0, qm->dev_name, qm);
if (ret)
@@ -4164,8 +5123,18 @@ static int qm_irq_register(struct hisi_qm *qm)
}
}
+ if (qm->ver > QM_HW_V2) {
+ ret = request_irq(pci_irq_vector(pdev, QM_CMD_EVENT_IRQ_VECTOR),
+ qm_mb_cmd_irq, 0, qm->dev_name, qm);
+ if (ret)
+ goto err_mb_cmd_irq;
+ }
+
return 0;
+err_mb_cmd_irq:
+ if (qm->fun_type == QM_HW_PF)
+ free_irq(pci_irq_vector(pdev, QM_ABNORMAL_EVENT_IRQ_VECTOR), qm);
err_abonormal_irq:
free_irq(pci_irq_vector(pdev, QM_AEQ_EVENT_IRQ_VECTOR), qm);
err_aeq_irq:
@@ -4202,6 +5171,183 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
}
+static void qm_pf_reset_vf_prepare(struct hisi_qm *qm,
+ enum qm_stop_reason stop_reason)
+{
+ enum qm_mb_cmd cmd = QM_VF_PREPARE_DONE;
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+
+ ret = qm_reset_prepare_ready(qm);
+ if (ret) {
+ dev_err(&pdev->dev, "reset prepare not ready!\n");
+ atomic_set(&qm->status.flags, QM_STOP);
+ cmd = QM_VF_PREPARE_FAIL;
+ goto err_prepare;
+ }
+
+ ret = hisi_qm_stop(qm, stop_reason);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to stop QM, ret = %d.\n", ret);
+ atomic_set(&qm->status.flags, QM_STOP);
+ cmd = QM_VF_PREPARE_FAIL;
+ goto err_prepare;
+ }
+
+err_prepare:
+ pci_save_state(pdev);
+ ret = qm->ops->ping_pf(qm, cmd);
+ if (ret)
+ dev_warn(&pdev->dev, "PF responds timeout in reset prepare!\n");
+}
+
+static void qm_pf_reset_vf_done(struct hisi_qm *qm)
+{
+ enum qm_mb_cmd cmd = QM_VF_START_DONE;
+ struct pci_dev *pdev = qm->pdev;
+ int ret;
+
+ pci_restore_state(pdev);
+ ret = hisi_qm_start(qm);
+ if (ret) {
+ dev_err(&pdev->dev, "failed to start QM, ret = %d.\n", ret);
+ cmd = QM_VF_START_FAIL;
+ }
+
+ ret = qm->ops->ping_pf(qm, cmd);
+ if (ret)
+ dev_warn(&pdev->dev, "PF responds timeout in reset done!\n");
+
+ qm_reset_bit_clear(qm);
+}
+
+static int qm_wait_pf_reset_finish(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ u32 val, cmd;
+ u64 msg;
+ int ret;
+
+ /* Wait for reset to finish */
+ ret = readl_relaxed_poll_timeout(qm->io_base + QM_IFC_INT_SOURCE_V, val,
+ val == BIT(0), QM_VF_RESET_WAIT_US,
+ QM_VF_RESET_WAIT_TIMEOUT_US);
+ /* hardware completion status should be available by this time */
+ if (ret) {
+ dev_err(dev, "couldn't get reset done status from PF, timeout!\n");
+ return -ETIMEDOUT;
+ }
+
+ /*
+ * Whether message is got successfully,
+ * VF needs to ack PF by clearing the interrupt.
+ */
+ ret = qm_get_mb_cmd(qm, &msg, 0);
+ qm_clear_cmd_interrupt(qm, 0);
+ if (ret) {
+ dev_err(dev, "failed to get msg from PF in reset done!\n");
+ return ret;
+ }
+
+ cmd = msg & QM_MB_CMD_DATA_MASK;
+ if (cmd != QM_PF_RESET_DONE) {
+ dev_err(dev, "the cmd(%u) is not reset done!\n", cmd);
+ ret = -EINVAL;
+ }
+
+ return ret;
+}
+
+static void qm_pf_reset_vf_process(struct hisi_qm *qm,
+ enum qm_stop_reason stop_reason)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret;
+
+ dev_info(dev, "device reset start...\n");
+
+ /* The message is obtained by querying the register during resetting */
+ qm_cmd_uninit(qm);
+ qm_pf_reset_vf_prepare(qm, stop_reason);
+
+ ret = qm_wait_pf_reset_finish(qm);
+ if (ret)
+ goto err_get_status;
+
+ qm_pf_reset_vf_done(qm);
+ qm_cmd_init(qm);
+
+ dev_info(dev, "device reset done.\n");
+
+ return;
+
+err_get_status:
+ qm_cmd_init(qm);
+ qm_reset_bit_clear(qm);
+}
+
+static void qm_handle_cmd_msg(struct hisi_qm *qm, u32 fun_num)
+{
+ struct device *dev = &qm->pdev->dev;
+ u64 msg;
+ u32 cmd;
+ int ret;
+
+ /*
+ * Get the msg from source by sending mailbox. Whether message is got
+ * successfully, destination needs to ack source by clearing the interrupt.
+ */
+ ret = qm_get_mb_cmd(qm, &msg, fun_num);
+ qm_clear_cmd_interrupt(qm, BIT(fun_num));
+ if (ret) {
+ dev_err(dev, "failed to get msg from source!\n");
+ return;
+ }
+
+ cmd = msg & QM_MB_CMD_DATA_MASK;
+ switch (cmd) {
+ case QM_PF_FLR_PREPARE:
+ qm_pf_reset_vf_process(qm, QM_FLR);
+ break;
+ case QM_PF_SRST_PREPARE:
+ qm_pf_reset_vf_process(qm, QM_SOFT_RESET);
+ break;
+ case QM_VF_GET_QOS:
+ qm_vf_get_qos(qm, fun_num);
+ break;
+ case QM_PF_SET_QOS:
+ qm->mb_qos = msg >> QM_MB_CMD_DATA_SHIFT;
+ break;
+ default:
+ dev_err(dev, "unsupported cmd %u sent by function(%u)!\n", cmd, fun_num);
+ break;
+ }
+}
+
+static void qm_cmd_process(struct work_struct *cmd_process)
+{
+ struct hisi_qm *qm = container_of(cmd_process,
+ struct hisi_qm, cmd_process);
+ u32 vfs_num = qm->vfs_num;
+ u64 val;
+ u32 i;
+
+ if (qm->fun_type == QM_HW_PF) {
+ val = readq(qm->io_base + QM_IFC_INT_SOURCE_P);
+ if (!val)
+ return;
+
+ for (i = 1; i <= vfs_num; i++) {
+ if (val & BIT(i))
+ qm_handle_cmd_msg(qm, i);
+ }
+
+ return;
+ }
+
+ qm_handle_cmd_msg(qm, 0);
+}
+
/**
* hisi_qm_alg_register() - Register alg to crypto and add qm to qm_list.
* @qm: The qm needs add.
@@ -4212,11 +5358,9 @@ static void hisi_qm_controller_reset(struct work_struct *rst_work)
*/
int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
{
+ struct device *dev = &qm->pdev->dev;
int flag = 0;
int ret = 0;
- /* HW V2 not support both use uacce sva mode and hardware crypto algs */
- if (qm->ver <= QM_HW_V2 && qm->use_sva)
- return 0;
mutex_lock(&qm_list->lock);
if (list_empty(&qm_list->list))
@@ -4224,6 +5368,11 @@ int hisi_qm_alg_register(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
list_add_tail(&qm->list, &qm_list->list);
mutex_unlock(&qm_list->lock);
+ if (qm->ver <= QM_HW_V2 && qm->use_sva) {
+ dev_info(dev, "HW V2 not both use uacce sva mode and hardware crypto algs.\n");
+ return 0;
+ }
+
if (flag) {
ret = qm_list->register_to_crypto(qm);
if (ret) {
@@ -4248,13 +5397,13 @@ EXPORT_SYMBOL_GPL(hisi_qm_alg_register);
*/
void hisi_qm_alg_unregister(struct hisi_qm *qm, struct hisi_qm_list *qm_list)
{
- if (qm->ver <= QM_HW_V2 && qm->use_sva)
- return;
-
mutex_lock(&qm_list->lock);
list_del(&qm->list);
mutex_unlock(&qm_list->lock);
+ if (qm->ver <= QM_HW_V2 && qm->use_sva)
+ return;
+
if (list_empty(&qm_list->list))
qm_list->unregister_from_crypto(qm);
}
@@ -4389,6 +5538,94 @@ err_disable_pcidev:
return ret;
}
+static void hisi_qm_init_work(struct hisi_qm *qm)
+{
+ INIT_WORK(&qm->work, qm_work_process);
+ if (qm->fun_type == QM_HW_PF)
+ INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
+
+ if (qm->ver > QM_HW_V2)
+ INIT_WORK(&qm->cmd_process, qm_cmd_process);
+}
+
+static int hisi_qp_alloc_memory(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ size_t qp_dma_size;
+ int i, ret;
+
+ qm->qp_array = kcalloc(qm->qp_num, sizeof(struct hisi_qp), GFP_KERNEL);
+ if (!qm->qp_array)
+ return -ENOMEM;
+
+ /* one more page for device or qp statuses */
+ qp_dma_size = qm->sqe_size * QM_Q_DEPTH +
+ sizeof(struct qm_cqe) * QM_Q_DEPTH;
+ qp_dma_size = PAGE_ALIGN(qp_dma_size) + PAGE_SIZE;
+ for (i = 0; i < qm->qp_num; i++) {
+ ret = hisi_qp_memory_init(qm, qp_dma_size, i);
+ if (ret)
+ goto err_init_qp_mem;
+
+ dev_dbg(dev, "allocate qp dma buf size=%zx)\n", qp_dma_size);
+ }
+
+ return 0;
+err_init_qp_mem:
+ hisi_qp_memory_uninit(qm, i);
+
+ return ret;
+}
+
+static int hisi_qm_memory_init(struct hisi_qm *qm)
+{
+ struct device *dev = &qm->pdev->dev;
+ int ret, total_vfs;
+ size_t off = 0;
+
+ total_vfs = pci_sriov_get_totalvfs(qm->pdev);
+ qm->factor = kcalloc(total_vfs + 1, sizeof(struct qm_shaper_factor), GFP_KERNEL);
+ if (!qm->factor)
+ return -ENOMEM;
+
+#define QM_INIT_BUF(qm, type, num) do { \
+ (qm)->type = ((qm)->qdma.va + (off)); \
+ (qm)->type##_dma = (qm)->qdma.dma + (off); \
+ off += QMC_ALIGN(sizeof(struct qm_##type) * (num)); \
+} while (0)
+
+ idr_init(&qm->qp_idr);
+ qm->qdma.size = QMC_ALIGN(sizeof(struct qm_eqe) * QM_EQ_DEPTH) +
+ QMC_ALIGN(sizeof(struct qm_aeqe) * QM_Q_DEPTH) +
+ QMC_ALIGN(sizeof(struct qm_sqc) * qm->qp_num) +
+ QMC_ALIGN(sizeof(struct qm_cqc) * qm->qp_num);
+ qm->qdma.va = dma_alloc_coherent(dev, qm->qdma.size, &qm->qdma.dma,
+ GFP_ATOMIC);
+ dev_dbg(dev, "allocate qm dma buf size=%zx)\n", qm->qdma.size);
+ if (!qm->qdma.va) {
+ ret = -ENOMEM;
+ goto err_alloc_qdma;
+ }
+
+ QM_INIT_BUF(qm, eqe, QM_EQ_DEPTH);
+ QM_INIT_BUF(qm, aeqe, QM_Q_DEPTH);
+ QM_INIT_BUF(qm, sqc, qm->qp_num);
+ QM_INIT_BUF(qm, cqc, qm->qp_num);
+
+ ret = hisi_qp_alloc_memory(qm);
+ if (ret)
+ goto err_alloc_qp_array;
+
+ return 0;
+
+err_alloc_qp_array:
+ dma_free_coherent(dev, qm->qdma.size, qm->qdma.va, qm->qdma.dma);
+err_alloc_qdma:
+ kfree(qm->factor);
+
+ return ret;
+}
+
/**
* hisi_qm_init() - Initialize configures about qm.
* @qm: The qm needing init.
@@ -4426,10 +5663,8 @@ int hisi_qm_init(struct hisi_qm *qm)
if (ret)
goto err_alloc_uacce;
- INIT_WORK(&qm->work, qm_work_process);
- if (qm->fun_type == QM_HW_PF)
- INIT_WORK(&qm->rst_work, hisi_qm_controller_reset);
-
+ hisi_qm_init_work(qm);
+ qm_cmd_init(qm);
atomic_set(&qm->status.flags, QM_INIT);
return 0;
diff --git a/drivers/crypto/hisilicon/qm.h b/drivers/crypto/hisilicon/qm.h
index acefdf8b3a50..035eaf8c442d 100644
--- a/drivers/crypto/hisilicon/qm.h
+++ b/drivers/crypto/hisilicon/qm.h
@@ -76,6 +76,9 @@
#define QM_Q_DEPTH 1024
#define QM_MIN_QNUM 2
#define HISI_ACC_SGL_SGE_NR_MAX 255
+#define QM_SHAPER_CFG 0x100164
+#define QM_SHAPER_ENABLE BIT(30)
+#define QM_SHAPER_TYPE1_OFFSET 10
/* page number for queue file region */
#define QM_DOORBELL_PAGE_NR 1
@@ -148,6 +151,14 @@ struct qm_debug {
struct debugfs_file files[DEBUG_FILE_NUM];
};
+struct qm_shaper_factor {
+ u32 func_qos;
+ u64 cir_b;
+ u64 cir_u;
+ u64 cir_s;
+ u64 cbs_s;
+};
+
struct qm_dma {
void *va;
dma_addr_t dma;
@@ -188,6 +199,8 @@ struct hisi_qm_err_ini {
void (*clear_dev_hw_err_status)(struct hisi_qm *qm, u32 err_sts);
void (*open_axi_master_ooo)(struct hisi_qm *qm);
void (*close_axi_master_ooo)(struct hisi_qm *qm);
+ void (*open_sva_prefetch)(struct hisi_qm *qm);
+ void (*close_sva_prefetch)(struct hisi_qm *qm);
void (*log_dev_hw_err)(struct hisi_qm *qm, u32 err_sts);
void (*err_info_init)(struct hisi_qm *qm);
};
@@ -248,6 +261,7 @@ struct hisi_qm {
struct workqueue_struct *wq;
struct work_struct work;
struct work_struct rst_work;
+ struct work_struct cmd_process;
const char *algs;
bool use_sva;
@@ -259,6 +273,9 @@ struct hisi_qm {
resource_size_t db_phys_base;
struct uacce_device *uacce;
int mode;
+ struct qm_shaper_factor *factor;
+ u32 mb_qos;
+ u32 type_rate;
};
struct hisi_qp_status {
diff --git a/drivers/crypto/hisilicon/sec2/sec.h b/drivers/crypto/hisilicon/sec2/sec.h
index dfdce2f21e65..018415b9840a 100644
--- a/drivers/crypto/hisilicon/sec2/sec.h
+++ b/drivers/crypto/hisilicon/sec2/sec.h
@@ -13,14 +13,14 @@ struct sec_alg_res {
dma_addr_t pbuf_dma;
u8 *c_ivin;
dma_addr_t c_ivin_dma;
+ u8 *a_ivin;
+ dma_addr_t a_ivin_dma;
u8 *out_mac;
dma_addr_t out_mac_dma;
};
/* Cipher request of SEC private */
struct sec_cipher_req {
- struct hisi_acc_hw_sgl *c_in;
- dma_addr_t c_in_dma;
struct hisi_acc_hw_sgl *c_out;
dma_addr_t c_out_dma;
u8 *c_ivin;
@@ -33,15 +33,25 @@ struct sec_cipher_req {
struct sec_aead_req {
u8 *out_mac;
dma_addr_t out_mac_dma;
+ u8 *a_ivin;
+ dma_addr_t a_ivin_dma;
struct aead_request *aead_req;
};
/* SEC request of Crypto */
struct sec_req {
- struct sec_sqe sec_sqe;
+ union {
+ struct sec_sqe sec_sqe;
+ struct sec_sqe3 sec_sqe3;
+ };
struct sec_ctx *ctx;
struct sec_qp_ctx *qp_ctx;
+ /**
+ * Common parameter of the SEC request.
+ */
+ struct hisi_acc_hw_sgl *in;
+ dma_addr_t in_dma;
struct sec_cipher_req c_req;
struct sec_aead_req aead_req;
struct list_head backlog_head;
@@ -81,7 +91,9 @@ struct sec_auth_ctx {
u8 a_key_len;
u8 mac_len;
u8 a_alg;
+ bool fallback;
struct crypto_shash *hash_tfm;
+ struct crypto_aead *fallback_aead_tfm;
};
/* SEC cipher context which cipher's relatives */
@@ -94,6 +106,10 @@ struct sec_cipher_ctx {
u8 c_mode;
u8 c_alg;
u8 c_key_len;
+
+ /* add software support */
+ bool fallback;
+ struct crypto_sync_skcipher *fbtfm;
};
/* SEC queue context which defines queue's relatives */
@@ -137,6 +153,7 @@ struct sec_ctx {
bool pbuf_supported;
struct sec_cipher_ctx c_ctx;
struct sec_auth_ctx a_ctx;
+ u8 type_supported;
struct device *dev;
};
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.c b/drivers/crypto/hisilicon/sec2/sec_crypto.c
index 133aede8bf07..6a45bd23b363 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.c
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 HiSilicon Limited. */
#include <crypto/aes.h>
+#include <crypto/aead.h>
#include <crypto/algapi.h>
#include <crypto/authenc.h>
#include <crypto/des.h>
@@ -21,6 +22,7 @@
#define SEC_PRIORITY 4001
#define SEC_XTS_MIN_KEY_SIZE (2 * AES_MIN_KEY_SIZE)
+#define SEC_XTS_MID_KEY_SIZE (3 * AES_MIN_KEY_SIZE)
#define SEC_XTS_MAX_KEY_SIZE (2 * AES_MAX_KEY_SIZE)
#define SEC_DES3_2KEY_SIZE (2 * DES_KEY_SIZE)
#define SEC_DES3_3KEY_SIZE (3 * DES_KEY_SIZE)
@@ -37,10 +39,23 @@
#define SEC_AEAD_ALG_OFFSET 11
#define SEC_AUTH_OFFSET 6
+#define SEC_DE_OFFSET_V3 9
+#define SEC_SCENE_OFFSET_V3 5
+#define SEC_CKEY_OFFSET_V3 13
+#define SEC_SRC_SGL_OFFSET_V3 11
+#define SEC_DST_SGL_OFFSET_V3 14
+#define SEC_CALG_OFFSET_V3 4
+#define SEC_AKEY_OFFSET_V3 9
+#define SEC_MAC_OFFSET_V3 4
+#define SEC_AUTH_ALG_OFFSET_V3 15
+#define SEC_CIPHER_AUTH_V3 0xbf
+#define SEC_AUTH_CIPHER_V3 0x40
#define SEC_FLAG_OFFSET 7
#define SEC_FLAG_MASK 0x0780
#define SEC_TYPE_MASK 0x0F
#define SEC_DONE_MASK 0x0001
+#define SEC_ICV_MASK 0x000E
+#define SEC_SQE_LEN_RATE_MASK 0x3
#define SEC_TOTAL_IV_SZ (SEC_IV_SIZE * QM_Q_DEPTH)
#define SEC_SGL_SGE_NR 128
@@ -66,6 +81,25 @@
#define SEC_SQE_CFLAG 2
#define SEC_SQE_AEAD_FLAG 3
#define SEC_SQE_DONE 0x1
+#define SEC_ICV_ERR 0x2
+#define MIN_MAC_LEN 4
+#define MAC_LEN_MASK 0x1U
+#define MAX_INPUT_DATA_LEN 0xFFFE00
+#define BITS_MASK 0xFF
+#define BYTE_BITS 0x8
+#define SEC_XTS_NAME_SZ 0x3
+#define IV_CM_CAL_NUM 2
+#define IV_CL_MASK 0x7
+#define IV_CL_MIN 2
+#define IV_CL_MID 4
+#define IV_CL_MAX 8
+#define IV_FLAGS_OFFSET 0x6
+#define IV_CM_OFFSET 0x3
+#define IV_LAST_BYTE1 1
+#define IV_LAST_BYTE2 2
+#define IV_LAST_BYTE_MASK 0xFF
+#define IV_CTR_INIT 0x1
+#define IV_BYTE_OFFSET 0x8
/* Get an en/de-cipher queue cyclically to balance load over queues of TFM */
static inline int sec_alloc_queue_id(struct sec_ctx *ctx, struct sec_req *req)
@@ -124,22 +158,59 @@ static void sec_free_req_id(struct sec_req *req)
mutex_unlock(&qp_ctx->req_lock);
}
-static int sec_aead_verify(struct sec_req *req)
+static u8 pre_parse_finished_bd(struct bd_status *status, void *resp)
{
- struct aead_request *aead_req = req->aead_req.aead_req;
- struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
- size_t authsize = crypto_aead_authsize(tfm);
- u8 *mac_out = req->aead_req.out_mac;
- u8 *mac = mac_out + SEC_MAX_MAC_LEN;
- struct scatterlist *sgl = aead_req->src;
- size_t sz;
+ struct sec_sqe *bd = resp;
+
+ status->done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
+ status->icv = (le16_to_cpu(bd->type2.done_flag) & SEC_ICV_MASK) >> 1;
+ status->flag = (le16_to_cpu(bd->type2.done_flag) &
+ SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
+ status->tag = le16_to_cpu(bd->type2.tag);
+ status->err_type = bd->type2.error_type;
+
+ return bd->type_cipher_auth & SEC_TYPE_MASK;
+}
+
+static u8 pre_parse_finished_bd3(struct bd_status *status, void *resp)
+{
+ struct sec_sqe3 *bd3 = resp;
+
+ status->done = le16_to_cpu(bd3->done_flag) & SEC_DONE_MASK;
+ status->icv = (le16_to_cpu(bd3->done_flag) & SEC_ICV_MASK) >> 1;
+ status->flag = (le16_to_cpu(bd3->done_flag) &
+ SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
+ status->tag = le64_to_cpu(bd3->tag);
+ status->err_type = bd3->error_type;
+
+ return le32_to_cpu(bd3->bd_param) & SEC_TYPE_MASK;
+}
+
+static int sec_cb_status_check(struct sec_req *req,
+ struct bd_status *status)
+{
+ struct sec_ctx *ctx = req->ctx;
- sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac, authsize,
- aead_req->cryptlen + aead_req->assoclen -
- authsize);
- if (unlikely(sz != authsize || memcmp(mac_out, mac, sz))) {
- dev_err(req->ctx->dev, "aead verify failure!\n");
- return -EBADMSG;
+ if (unlikely(req->err_type || status->done != SEC_SQE_DONE)) {
+ dev_err_ratelimited(ctx->dev, "err_type[%d], done[%u]\n",
+ req->err_type, status->done);
+ return -EIO;
+ }
+
+ if (unlikely(ctx->alg_type == SEC_SKCIPHER)) {
+ if (unlikely(status->flag != SEC_SQE_CFLAG)) {
+ dev_err_ratelimited(ctx->dev, "flag[%u]\n",
+ status->flag);
+ return -EIO;
+ }
+ } else if (unlikely(ctx->alg_type == SEC_AEAD)) {
+ if (unlikely(status->flag != SEC_SQE_AEAD_FLAG ||
+ status->icv == SEC_ICV_ERR)) {
+ dev_err_ratelimited(ctx->dev,
+ "flag[%u], icv[%u]\n",
+ status->flag, status->icv);
+ return -EBADMSG;
+ }
}
return 0;
@@ -149,43 +220,38 @@ static void sec_req_cb(struct hisi_qp *qp, void *resp)
{
struct sec_qp_ctx *qp_ctx = qp->qp_ctx;
struct sec_dfx *dfx = &qp_ctx->ctx->sec->debug.dfx;
- struct sec_sqe *bd = resp;
+ u8 type_supported = qp_ctx->ctx->type_supported;
+ struct bd_status status;
struct sec_ctx *ctx;
struct sec_req *req;
- u16 done, flag;
- int err = 0;
+ int err;
u8 type;
- type = bd->type_cipher_auth & SEC_TYPE_MASK;
- if (unlikely(type != SEC_BD_TYPE2)) {
+ if (type_supported == SEC_BD_TYPE2) {
+ type = pre_parse_finished_bd(&status, resp);
+ req = qp_ctx->req_list[status.tag];
+ } else {
+ type = pre_parse_finished_bd3(&status, resp);
+ req = (void *)(uintptr_t)status.tag;
+ }
+
+ if (unlikely(type != type_supported)) {
atomic64_inc(&dfx->err_bd_cnt);
pr_err("err bd type [%d]\n", type);
return;
}
- req = qp_ctx->req_list[le16_to_cpu(bd->type2.tag)];
if (unlikely(!req)) {
atomic64_inc(&dfx->invalid_req_cnt);
atomic_inc(&qp->qp_status.used);
return;
}
- req->err_type = bd->type2.error_type;
+
+ req->err_type = status.err_type;
ctx = req->ctx;
- done = le16_to_cpu(bd->type2.done_flag) & SEC_DONE_MASK;
- flag = (le16_to_cpu(bd->type2.done_flag) &
- SEC_FLAG_MASK) >> SEC_FLAG_OFFSET;
- if (unlikely(req->err_type || done != SEC_SQE_DONE ||
- (ctx->alg_type == SEC_SKCIPHER && flag != SEC_SQE_CFLAG) ||
- (ctx->alg_type == SEC_AEAD && flag != SEC_SQE_AEAD_FLAG))) {
- dev_err_ratelimited(ctx->dev,
- "err_type[%d],done[%d],flag[%d]\n",
- req->err_type, done, flag);
- err = -EIO;
+ err = sec_cb_status_check(req, &status);
+ if (err)
atomic64_inc(&dfx->done_flag_cnt);
- }
-
- if (ctx->alg_type == SEC_AEAD && !req->c_req.encrypt)
- err = sec_aead_verify(req);
atomic64_inc(&dfx->recv_cnt);
@@ -253,6 +319,30 @@ static void sec_free_civ_resource(struct device *dev, struct sec_alg_res *res)
res->c_ivin, res->c_ivin_dma);
}
+static int sec_alloc_aiv_resource(struct device *dev, struct sec_alg_res *res)
+{
+ int i;
+
+ res->a_ivin = dma_alloc_coherent(dev, SEC_TOTAL_IV_SZ,
+ &res->a_ivin_dma, GFP_KERNEL);
+ if (!res->a_ivin)
+ return -ENOMEM;
+
+ for (i = 1; i < QM_Q_DEPTH; i++) {
+ res[i].a_ivin_dma = res->a_ivin_dma + i * SEC_IV_SIZE;
+ res[i].a_ivin = res->a_ivin + i * SEC_IV_SIZE;
+ }
+
+ return 0;
+}
+
+static void sec_free_aiv_resource(struct device *dev, struct sec_alg_res *res)
+{
+ if (res->a_ivin)
+ dma_free_coherent(dev, SEC_TOTAL_IV_SZ,
+ res->a_ivin, res->a_ivin_dma);
+}
+
static int sec_alloc_mac_resource(struct device *dev, struct sec_alg_res *res)
{
int i;
@@ -335,9 +425,13 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
return ret;
if (ctx->alg_type == SEC_AEAD) {
+ ret = sec_alloc_aiv_resource(dev, res);
+ if (ret)
+ goto alloc_aiv_fail;
+
ret = sec_alloc_mac_resource(dev, res);
if (ret)
- goto alloc_fail;
+ goto alloc_mac_fail;
}
if (ctx->pbuf_supported) {
ret = sec_alloc_pbuf_resource(dev, res);
@@ -352,7 +446,10 @@ static int sec_alg_resource_alloc(struct sec_ctx *ctx,
alloc_pbuf_fail:
if (ctx->alg_type == SEC_AEAD)
sec_free_mac_resource(dev, qp_ctx->res);
-alloc_fail:
+alloc_mac_fail:
+ if (ctx->alg_type == SEC_AEAD)
+ sec_free_aiv_resource(dev, res);
+alloc_aiv_fail:
sec_free_civ_resource(dev, res);
return ret;
}
@@ -382,10 +479,11 @@ static int sec_create_qp_ctx(struct hisi_qm *qm, struct sec_ctx *ctx,
qp = ctx->qps[qp_ctx_id];
qp->req_type = 0;
qp->qp_ctx = qp_ctx;
- qp->req_cb = sec_req_cb;
qp_ctx->qp = qp;
qp_ctx->ctx = ctx;
+ qp->req_cb = sec_req_cb;
+
mutex_init(&qp_ctx->req_lock);
idr_init(&qp_ctx->req_idr);
INIT_LIST_HEAD(&qp_ctx->backlog);
@@ -536,6 +634,26 @@ static void sec_auth_uninit(struct sec_ctx *ctx)
a_ctx->a_key, a_ctx->a_key_dma);
}
+static int sec_skcipher_fbtfm_init(struct crypto_skcipher *tfm)
+{
+ const char *alg = crypto_tfm_alg_name(&tfm->base);
+ struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+ struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+
+ c_ctx->fallback = false;
+ if (likely(strncmp(alg, "xts", SEC_XTS_NAME_SZ)))
+ return 0;
+
+ c_ctx->fbtfm = crypto_alloc_sync_skcipher(alg, 0,
+ CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(c_ctx->fbtfm)) {
+ pr_err("failed to alloc fallback tfm!\n");
+ return PTR_ERR(c_ctx->fbtfm);
+ }
+
+ return 0;
+}
+
static int sec_skcipher_init(struct crypto_skcipher *tfm)
{
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -557,8 +675,14 @@ static int sec_skcipher_init(struct crypto_skcipher *tfm)
if (ret)
goto err_cipher_init;
+ ret = sec_skcipher_fbtfm_init(tfm);
+ if (ret)
+ goto err_fbtfm_init;
+
return 0;
+err_fbtfm_init:
+ sec_cipher_uninit(ctx);
err_cipher_init:
sec_ctx_base_uninit(ctx);
return ret;
@@ -568,6 +692,9 @@ static void sec_skcipher_uninit(struct crypto_skcipher *tfm)
{
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+ if (ctx->c_ctx.fbtfm)
+ crypto_free_sync_skcipher(ctx->c_ctx.fbtfm);
+
sec_cipher_uninit(ctx);
sec_ctx_base_uninit(ctx);
}
@@ -607,6 +734,9 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx,
case SEC_XTS_MIN_KEY_SIZE:
c_ctx->c_key_len = SEC_CKEY_128BIT;
break;
+ case SEC_XTS_MID_KEY_SIZE:
+ c_ctx->fallback = true;
+ break;
case SEC_XTS_MAX_KEY_SIZE:
c_ctx->c_key_len = SEC_CKEY_256BIT;
break;
@@ -615,19 +745,25 @@ static int sec_skcipher_aes_sm4_setkey(struct sec_cipher_ctx *c_ctx,
return -EINVAL;
}
} else {
- switch (keylen) {
- case AES_KEYSIZE_128:
- c_ctx->c_key_len = SEC_CKEY_128BIT;
- break;
- case AES_KEYSIZE_192:
- c_ctx->c_key_len = SEC_CKEY_192BIT;
- break;
- case AES_KEYSIZE_256:
- c_ctx->c_key_len = SEC_CKEY_256BIT;
- break;
- default:
- pr_err("hisi_sec2: aes key error!\n");
+ if (c_ctx->c_alg == SEC_CALG_SM4 &&
+ keylen != AES_KEYSIZE_128) {
+ pr_err("hisi_sec2: sm4 key error!\n");
return -EINVAL;
+ } else {
+ switch (keylen) {
+ case AES_KEYSIZE_128:
+ c_ctx->c_key_len = SEC_CKEY_128BIT;
+ break;
+ case AES_KEYSIZE_192:
+ c_ctx->c_key_len = SEC_CKEY_192BIT;
+ break;
+ case AES_KEYSIZE_256:
+ c_ctx->c_key_len = SEC_CKEY_256BIT;
+ break;
+ default:
+ pr_err("hisi_sec2: aes key error!\n");
+ return -EINVAL;
+ }
}
}
@@ -672,7 +808,13 @@ static int sec_skcipher_setkey(struct crypto_skcipher *tfm, const u8 *key,
}
memcpy(c_ctx->c_key, key, keylen);
-
+ if (c_ctx->fallback) {
+ ret = crypto_sync_skcipher_setkey(c_ctx->fbtfm, key, keylen);
+ if (ret) {
+ dev_err(dev, "failed to set fallback skcipher key!\n");
+ return ret;
+ }
+ }
return 0;
}
@@ -686,22 +828,30 @@ static int sec_setkey_##name(struct crypto_skcipher *tfm, const u8 *key,\
GEN_SEC_SETKEY_FUNC(aes_ecb, SEC_CALG_AES, SEC_CMODE_ECB)
GEN_SEC_SETKEY_FUNC(aes_cbc, SEC_CALG_AES, SEC_CMODE_CBC)
GEN_SEC_SETKEY_FUNC(aes_xts, SEC_CALG_AES, SEC_CMODE_XTS)
-
+GEN_SEC_SETKEY_FUNC(aes_ofb, SEC_CALG_AES, SEC_CMODE_OFB)
+GEN_SEC_SETKEY_FUNC(aes_cfb, SEC_CALG_AES, SEC_CMODE_CFB)
+GEN_SEC_SETKEY_FUNC(aes_ctr, SEC_CALG_AES, SEC_CMODE_CTR)
GEN_SEC_SETKEY_FUNC(3des_ecb, SEC_CALG_3DES, SEC_CMODE_ECB)
GEN_SEC_SETKEY_FUNC(3des_cbc, SEC_CALG_3DES, SEC_CMODE_CBC)
-
GEN_SEC_SETKEY_FUNC(sm4_xts, SEC_CALG_SM4, SEC_CMODE_XTS)
GEN_SEC_SETKEY_FUNC(sm4_cbc, SEC_CALG_SM4, SEC_CMODE_CBC)
+GEN_SEC_SETKEY_FUNC(sm4_ofb, SEC_CALG_SM4, SEC_CMODE_OFB)
+GEN_SEC_SETKEY_FUNC(sm4_cfb, SEC_CALG_SM4, SEC_CMODE_CFB)
+GEN_SEC_SETKEY_FUNC(sm4_ctr, SEC_CALG_SM4, SEC_CMODE_CTR)
static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
struct scatterlist *src)
{
- struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_aead_req *a_req = &req->aead_req;
+ struct aead_request *aead_req = a_req->aead_req;
struct sec_cipher_req *c_req = &req->c_req;
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
struct device *dev = ctx->dev;
int copy_size, pbuf_length;
int req_id = req->req_id;
+ struct crypto_aead *tfm;
+ size_t authsize;
+ u8 *mac_offset;
if (ctx->alg_type == SEC_AEAD)
copy_size = aead_req->cryptlen + aead_req->assoclen;
@@ -709,15 +859,20 @@ static int sec_cipher_pbuf_map(struct sec_ctx *ctx, struct sec_req *req,
copy_size = c_req->c_len;
pbuf_length = sg_copy_to_buffer(src, sg_nents(src),
- qp_ctx->res[req_id].pbuf,
- copy_size);
+ qp_ctx->res[req_id].pbuf, copy_size);
if (unlikely(pbuf_length != copy_size)) {
dev_err(dev, "copy src data to pbuf error!\n");
return -EINVAL;
}
+ if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) {
+ tfm = crypto_aead_reqtfm(aead_req);
+ authsize = crypto_aead_authsize(tfm);
+ mac_offset = qp_ctx->res[req_id].pbuf + copy_size - authsize;
+ memcpy(a_req->out_mac, mac_offset, authsize);
+ }
- c_req->c_in_dma = qp_ctx->res[req_id].pbuf_dma;
- c_req->c_out_dma = c_req->c_in_dma;
+ req->in_dma = qp_ctx->res[req_id].pbuf_dma;
+ c_req->c_out_dma = req->in_dma;
return 0;
}
@@ -728,7 +883,6 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
struct aead_request *aead_req = req->aead_req.aead_req;
struct sec_cipher_req *c_req = &req->c_req;
struct sec_qp_ctx *qp_ctx = req->qp_ctx;
- struct device *dev = ctx->dev;
int copy_size, pbuf_length;
int req_id = req->req_id;
@@ -738,10 +892,29 @@ static void sec_cipher_pbuf_unmap(struct sec_ctx *ctx, struct sec_req *req,
copy_size = c_req->c_len;
pbuf_length = sg_copy_from_buffer(dst, sg_nents(dst),
- qp_ctx->res[req_id].pbuf,
- copy_size);
+ qp_ctx->res[req_id].pbuf, copy_size);
if (unlikely(pbuf_length != copy_size))
- dev_err(dev, "copy pbuf data to dst error!\n");
+ dev_err(ctx->dev, "copy pbuf data to dst error!\n");
+}
+
+static int sec_aead_mac_init(struct sec_aead_req *req)
+{
+ struct aead_request *aead_req = req->aead_req;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
+ size_t authsize = crypto_aead_authsize(tfm);
+ u8 *mac_out = req->out_mac;
+ struct scatterlist *sgl = aead_req->src;
+ size_t copy_size;
+ off_t skip_size;
+
+ /* Copy input mac */
+ skip_size = aead_req->assoclen + aead_req->cryptlen - authsize;
+ copy_size = sg_pcopy_to_buffer(sgl, sg_nents(sgl), mac_out,
+ authsize, skip_size);
+ if (unlikely(copy_size != authsize))
+ return -EINVAL;
+
+ return 0;
}
static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
@@ -755,37 +928,48 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
int ret;
if (req->use_pbuf) {
- ret = sec_cipher_pbuf_map(ctx, req, src);
c_req->c_ivin = res->pbuf + SEC_PBUF_IV_OFFSET;
c_req->c_ivin_dma = res->pbuf_dma + SEC_PBUF_IV_OFFSET;
if (ctx->alg_type == SEC_AEAD) {
+ a_req->a_ivin = res->a_ivin;
+ a_req->a_ivin_dma = res->a_ivin_dma;
a_req->out_mac = res->pbuf + SEC_PBUF_MAC_OFFSET;
a_req->out_mac_dma = res->pbuf_dma +
SEC_PBUF_MAC_OFFSET;
}
+ ret = sec_cipher_pbuf_map(ctx, req, src);
return ret;
}
c_req->c_ivin = res->c_ivin;
c_req->c_ivin_dma = res->c_ivin_dma;
if (ctx->alg_type == SEC_AEAD) {
+ a_req->a_ivin = res->a_ivin;
+ a_req->a_ivin_dma = res->a_ivin_dma;
a_req->out_mac = res->out_mac;
a_req->out_mac_dma = res->out_mac_dma;
}
- c_req->c_in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
- qp_ctx->c_in_pool,
- req->req_id,
- &c_req->c_in_dma);
-
- if (IS_ERR(c_req->c_in)) {
+ req->in = hisi_acc_sg_buf_map_to_hw_sgl(dev, src,
+ qp_ctx->c_in_pool,
+ req->req_id,
+ &req->in_dma);
+ if (IS_ERR(req->in)) {
dev_err(dev, "fail to dma map input sgl buffers!\n");
- return PTR_ERR(c_req->c_in);
+ return PTR_ERR(req->in);
+ }
+
+ if (!c_req->encrypt && ctx->alg_type == SEC_AEAD) {
+ ret = sec_aead_mac_init(a_req);
+ if (unlikely(ret)) {
+ dev_err(dev, "fail to init mac data for ICV!\n");
+ return ret;
+ }
}
if (dst == src) {
- c_req->c_out = c_req->c_in;
- c_req->c_out_dma = c_req->c_in_dma;
+ c_req->c_out = req->in;
+ c_req->c_out_dma = req->in_dma;
} else {
c_req->c_out = hisi_acc_sg_buf_map_to_hw_sgl(dev, dst,
qp_ctx->c_out_pool,
@@ -794,7 +978,7 @@ static int sec_cipher_map(struct sec_ctx *ctx, struct sec_req *req,
if (IS_ERR(c_req->c_out)) {
dev_err(dev, "fail to dma map output sgl buffers!\n");
- hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+ hisi_acc_sg_buf_unmap(dev, src, req->in);
return PTR_ERR(c_req->c_out);
}
}
@@ -812,7 +996,7 @@ static void sec_cipher_unmap(struct sec_ctx *ctx, struct sec_req *req,
sec_cipher_pbuf_unmap(ctx, req, dst);
} else {
if (dst != src)
- hisi_acc_sg_buf_unmap(dev, src, c_req->c_in);
+ hisi_acc_sg_buf_unmap(dev, src, req->in);
hisi_acc_sg_buf_unmap(dev, dst, c_req->c_out);
}
@@ -883,6 +1067,28 @@ static int sec_aead_auth_set_key(struct sec_auth_ctx *ctx,
return 0;
}
+static int sec_aead_setauthsize(struct crypto_aead *aead, unsigned int authsize)
+{
+ struct crypto_tfm *tfm = crypto_aead_tfm(aead);
+ struct sec_ctx *ctx = crypto_tfm_ctx(tfm);
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+
+ if (unlikely(a_ctx->fallback_aead_tfm))
+ return crypto_aead_setauthsize(a_ctx->fallback_aead_tfm, authsize);
+
+ return 0;
+}
+
+static int sec_aead_fallback_setkey(struct sec_auth_ctx *a_ctx,
+ struct crypto_aead *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ crypto_aead_clear_flags(a_ctx->fallback_aead_tfm, CRYPTO_TFM_REQ_MASK);
+ crypto_aead_set_flags(a_ctx->fallback_aead_tfm,
+ crypto_aead_get_flags(tfm) & CRYPTO_TFM_REQ_MASK);
+ return crypto_aead_setkey(a_ctx->fallback_aead_tfm, key, keylen);
+}
+
static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
const u32 keylen, const enum sec_hash_alg a_alg,
const enum sec_calg c_alg,
@@ -891,6 +1097,7 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
{
struct sec_ctx *ctx = crypto_aead_ctx(tfm);
struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
struct device *dev = ctx->dev;
struct crypto_authenc_keys keys;
int ret;
@@ -900,6 +1107,23 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
ctx->a_ctx.mac_len = mac_len;
c_ctx->c_mode = c_mode;
+ if (c_mode == SEC_CMODE_CCM || c_mode == SEC_CMODE_GCM) {
+ ret = sec_skcipher_aes_sm4_setkey(c_ctx, keylen, c_mode);
+ if (ret) {
+ dev_err(dev, "set sec aes ccm cipher key err!\n");
+ return ret;
+ }
+ memcpy(c_ctx->c_key, key, keylen);
+
+ if (unlikely(a_ctx->fallback_aead_tfm)) {
+ ret = sec_aead_fallback_setkey(a_ctx, tfm, key, keylen);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+ }
+
if (crypto_authenc_extractkeys(&keys, key, keylen))
goto bad_key;
@@ -915,6 +1139,12 @@ static int sec_aead_setkey(struct crypto_aead *tfm, const u8 *key,
goto bad_key;
}
+ if ((ctx->a_ctx.mac_len & SEC_SQE_LEN_RATE_MASK) ||
+ (ctx->a_ctx.a_key_len & SEC_SQE_LEN_RATE_MASK)) {
+ dev_err(dev, "MAC or AUTH key length error!\n");
+ goto bad_key;
+ }
+
return 0;
bad_key:
@@ -936,6 +1166,14 @@ GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha256, SEC_A_HMAC_SHA256,
SEC_CALG_AES, SEC_HMAC_SHA256_MAC, SEC_CMODE_CBC)
GEN_SEC_AEAD_SETKEY_FUNC(aes_cbc_sha512, SEC_A_HMAC_SHA512,
SEC_CALG_AES, SEC_HMAC_SHA512_MAC, SEC_CMODE_CBC)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_ccm, 0, SEC_CALG_AES,
+ SEC_HMAC_CCM_MAC, SEC_CMODE_CCM)
+GEN_SEC_AEAD_SETKEY_FUNC(aes_gcm, 0, SEC_CALG_AES,
+ SEC_HMAC_GCM_MAC, SEC_CMODE_GCM)
+GEN_SEC_AEAD_SETKEY_FUNC(sm4_ccm, 0, SEC_CALG_SM4,
+ SEC_HMAC_CCM_MAC, SEC_CMODE_CCM)
+GEN_SEC_AEAD_SETKEY_FUNC(sm4_gcm, 0, SEC_CALG_SM4,
+ SEC_HMAC_GCM_MAC, SEC_CMODE_GCM)
static int sec_aead_sgl_map(struct sec_ctx *ctx, struct sec_req *req)
{
@@ -998,7 +1236,7 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
sec_sqe->type2.c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
sec_sqe->type2.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
- sec_sqe->type2.data_src_addr = cpu_to_le64(c_req->c_in_dma);
+ sec_sqe->type2.data_src_addr = cpu_to_le64(req->in_dma);
sec_sqe->type2.data_dst_addr = cpu_to_le64(c_req->c_out_dma);
sec_sqe->type2.icvw_kmode |= cpu_to_le16(((u16)c_ctx->c_mode) <<
@@ -1014,29 +1252,86 @@ static int sec_skcipher_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
cipher = SEC_CIPHER_DEC << SEC_CIPHER_OFFSET;
sec_sqe->type_cipher_auth = bd_type | cipher;
- if (req->use_pbuf)
+ /* Set destination and source address type */
+ if (req->use_pbuf) {
sa_type = SEC_PBUF << SEC_SRC_SGL_OFFSET;
- else
+ da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
+ } else {
sa_type = SEC_SGL << SEC_SRC_SGL_OFFSET;
+ da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
+ }
+
+ sec_sqe->sdm_addr_type |= da_type;
scene = SEC_COMM_SCENE << SEC_SCENE_OFFSET;
- if (c_req->c_in_dma != c_req->c_out_dma)
+ if (req->in_dma != c_req->c_out_dma)
de = 0x1 << SEC_DE_OFFSET;
sec_sqe->sds_sa_type = (de | scene | sa_type);
- /* Just set DST address type */
- if (req->use_pbuf)
- da_type = SEC_PBUF << SEC_DST_SGL_OFFSET;
- else
- da_type = SEC_SGL << SEC_DST_SGL_OFFSET;
- sec_sqe->sdm_addr_type |= da_type;
-
sec_sqe->type2.clen_ivhlen |= cpu_to_le32(c_req->c_len);
sec_sqe->type2.tag = cpu_to_le16((u16)req->req_id);
return 0;
}
+static int sec_skcipher_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
+{
+ struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3;
+ struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+ struct sec_cipher_req *c_req = &req->c_req;
+ u32 bd_param = 0;
+ u16 cipher;
+
+ memset(sec_sqe3, 0, sizeof(struct sec_sqe3));
+
+ sec_sqe3->c_key_addr = cpu_to_le64(c_ctx->c_key_dma);
+ sec_sqe3->no_scene.c_ivin_addr = cpu_to_le64(c_req->c_ivin_dma);
+ sec_sqe3->data_src_addr = cpu_to_le64(req->in_dma);
+ sec_sqe3->data_dst_addr = cpu_to_le64(c_req->c_out_dma);
+
+ sec_sqe3->c_mode_alg = ((u8)c_ctx->c_alg << SEC_CALG_OFFSET_V3) |
+ c_ctx->c_mode;
+ sec_sqe3->c_icv_key |= cpu_to_le16(((u16)c_ctx->c_key_len) <<
+ SEC_CKEY_OFFSET_V3);
+
+ if (c_req->encrypt)
+ cipher = SEC_CIPHER_ENC;
+ else
+ cipher = SEC_CIPHER_DEC;
+ sec_sqe3->c_icv_key |= cpu_to_le16(cipher);
+
+ if (req->use_pbuf) {
+ bd_param |= SEC_PBUF << SEC_SRC_SGL_OFFSET_V3;
+ bd_param |= SEC_PBUF << SEC_DST_SGL_OFFSET_V3;
+ } else {
+ bd_param |= SEC_SGL << SEC_SRC_SGL_OFFSET_V3;
+ bd_param |= SEC_SGL << SEC_DST_SGL_OFFSET_V3;
+ }
+
+ bd_param |= SEC_COMM_SCENE << SEC_SCENE_OFFSET_V3;
+ if (req->in_dma != c_req->c_out_dma)
+ bd_param |= 0x1 << SEC_DE_OFFSET_V3;
+
+ bd_param |= SEC_BD_TYPE3;
+ sec_sqe3->bd_param = cpu_to_le32(bd_param);
+
+ sec_sqe3->c_len_ivin |= cpu_to_le32(c_req->c_len);
+ sec_sqe3->tag = cpu_to_le64(req);
+
+ return 0;
+}
+
+/* increment counter (128-bit int) */
+static void ctr_iv_inc(__u8 *counter, __u8 bits, __u32 nums)
+{
+ do {
+ --bits;
+ nums += counter[bits];
+ counter[bits] = nums & BITS_MASK;
+ nums >>= BYTE_BITS;
+ } while (bits && nums);
+}
+
static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
{
struct aead_request *aead_req = req->aead_req.aead_req;
@@ -1060,10 +1355,17 @@ static void sec_update_iv(struct sec_req *req, enum sec_alg_type alg_type)
cryptlen = aead_req->cryptlen;
}
- sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size,
- cryptlen - iv_size);
- if (unlikely(sz != iv_size))
- dev_err(req->ctx->dev, "copy output iv error!\n");
+ if (req->ctx->c_ctx.c_mode == SEC_CMODE_CBC) {
+ sz = sg_pcopy_to_buffer(sgl, sg_nents(sgl), iv, iv_size,
+ cryptlen - iv_size);
+ if (unlikely(sz != iv_size))
+ dev_err(req->ctx->dev, "copy output iv error!\n");
+ } else {
+ sz = cryptlen / iv_size;
+ if (cryptlen % iv_size)
+ sz += 1;
+ ctr_iv_inc(iv, iv_size, sz);
+ }
}
static struct sec_req *sec_back_req_clear(struct sec_ctx *ctx,
@@ -1094,8 +1396,9 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
sec_free_req_id(req);
- /* IV output at encrypto of CBC mode */
- if (!err && ctx->c_ctx.c_mode == SEC_CMODE_CBC && req->c_req.encrypt)
+ /* IV output at encrypto of CBC/CTR mode */
+ if (!err && (ctx->c_ctx.c_mode == SEC_CMODE_CBC ||
+ ctx->c_ctx.c_mode == SEC_CMODE_CTR) && req->c_req.encrypt)
sec_update_iv(req, SEC_SKCIPHER);
while (1) {
@@ -1112,12 +1415,125 @@ static void sec_skcipher_callback(struct sec_ctx *ctx, struct sec_req *req,
sk_req->base.complete(&sk_req->base, err);
}
-static void sec_aead_copy_iv(struct sec_ctx *ctx, struct sec_req *req)
+static void set_aead_auth_iv(struct sec_ctx *ctx, struct sec_req *req)
+{
+ struct aead_request *aead_req = req->aead_req.aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_aead_req *a_req = &req->aead_req;
+ size_t authsize = ctx->a_ctx.mac_len;
+ u32 data_size = aead_req->cryptlen;
+ u8 flage = 0;
+ u8 cm, cl;
+
+ /* the specification has been checked in aead_iv_demension_check() */
+ cl = c_req->c_ivin[0] + 1;
+ c_req->c_ivin[ctx->c_ctx.ivsize - cl] = 0x00;
+ memset(&c_req->c_ivin[ctx->c_ctx.ivsize - cl], 0, cl);
+ c_req->c_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] = IV_CTR_INIT;
+
+ /* the last 3bit is L' */
+ flage |= c_req->c_ivin[0] & IV_CL_MASK;
+
+ /* the M' is bit3~bit5, the Flags is bit6 */
+ cm = (authsize - IV_CM_CAL_NUM) / IV_CM_CAL_NUM;
+ flage |= cm << IV_CM_OFFSET;
+ if (aead_req->assoclen)
+ flage |= 0x01 << IV_FLAGS_OFFSET;
+
+ memcpy(a_req->a_ivin, c_req->c_ivin, ctx->c_ctx.ivsize);
+ a_req->a_ivin[0] = flage;
+
+ /*
+ * the last 32bit is counter's initial number,
+ * but the nonce uses the first 16bit
+ * the tail 16bit fill with the cipher length
+ */
+ if (!c_req->encrypt)
+ data_size = aead_req->cryptlen - authsize;
+
+ a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE1] =
+ data_size & IV_LAST_BYTE_MASK;
+ data_size >>= IV_BYTE_OFFSET;
+ a_req->a_ivin[ctx->c_ctx.ivsize - IV_LAST_BYTE2] =
+ data_size & IV_LAST_BYTE_MASK;
+}
+
+static void sec_aead_set_iv(struct sec_ctx *ctx, struct sec_req *req)
{
struct aead_request *aead_req = req->aead_req.aead_req;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(aead_req);
+ size_t authsize = crypto_aead_authsize(tfm);
struct sec_cipher_req *c_req = &req->c_req;
+ struct sec_aead_req *a_req = &req->aead_req;
memcpy(c_req->c_ivin, aead_req->iv, ctx->c_ctx.ivsize);
+
+ if (ctx->c_ctx.c_mode == SEC_CMODE_CCM) {
+ /*
+ * CCM 16Byte Cipher_IV: {1B_Flage,13B_IV,2B_counter},
+ * the counter must set to 0x01
+ */
+ ctx->a_ctx.mac_len = authsize;
+ /* CCM 16Byte Auth_IV: {1B_AFlage,13B_IV,2B_Ptext_length} */
+ set_aead_auth_iv(ctx, req);
+ }
+
+ /* GCM 12Byte Cipher_IV == Auth_IV */
+ if (ctx->c_ctx.c_mode == SEC_CMODE_GCM) {
+ ctx->a_ctx.mac_len = authsize;
+ memcpy(a_req->a_ivin, c_req->c_ivin, SEC_AIV_SIZE);
+ }
+}
+
+static void sec_auth_bd_fill_xcm(struct sec_auth_ctx *ctx, int dir,
+ struct sec_req *req, struct sec_sqe *sec_sqe)
+{
+ struct sec_aead_req *a_req = &req->aead_req;
+ struct aead_request *aq = a_req->aead_req;
+
+ /* C_ICV_Len is MAC size, 0x4 ~ 0x10 */
+ sec_sqe->type2.icvw_kmode |= cpu_to_le16((u16)ctx->mac_len);
+
+ /* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */
+ sec_sqe->type2.a_key_addr = sec_sqe->type2.c_key_addr;
+ sec_sqe->type2.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma);
+ sec_sqe->type_cipher_auth |= SEC_NO_AUTH << SEC_AUTH_OFFSET;
+
+ if (dir)
+ sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH;
+ else
+ sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER;
+
+ sec_sqe->type2.alen_ivllen = cpu_to_le32(aq->assoclen);
+ sec_sqe->type2.auth_src_offset = cpu_to_le16(0x0);
+ sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+
+ sec_sqe->type2.mac_addr = cpu_to_le64(a_req->out_mac_dma);
+}
+
+static void sec_auth_bd_fill_xcm_v3(struct sec_auth_ctx *ctx, int dir,
+ struct sec_req *req, struct sec_sqe3 *sqe3)
+{
+ struct sec_aead_req *a_req = &req->aead_req;
+ struct aead_request *aq = a_req->aead_req;
+
+ /* C_ICV_Len is MAC size, 0x4 ~ 0x10 */
+ sqe3->c_icv_key |= cpu_to_le16((u16)ctx->mac_len << SEC_MAC_OFFSET_V3);
+
+ /* mode set to CCM/GCM, don't set {A_Alg, AKey_Len, MAC_Len} */
+ sqe3->a_key_addr = sqe3->c_key_addr;
+ sqe3->auth_ivin.a_ivin_addr = cpu_to_le64(a_req->a_ivin_dma);
+ sqe3->auth_mac_key |= SEC_NO_AUTH;
+
+ if (dir)
+ sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
+ else
+ sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
+
+ sqe3->a_len_key = cpu_to_le32(aq->assoclen);
+ sqe3->auth_src_offset = cpu_to_le16(0x0);
+ sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+ sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma);
}
static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
@@ -1139,13 +1555,13 @@ static void sec_auth_bd_fill_ex(struct sec_auth_ctx *ctx, int dir,
sec_sqe->type2.mac_key_alg |=
cpu_to_le32((u32)(ctx->a_alg) << SEC_AEAD_ALG_OFFSET);
- sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET;
-
- if (dir)
+ if (dir) {
+ sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE1 << SEC_AUTH_OFFSET;
sec_sqe->sds_sa_type &= SEC_CIPHER_AUTH;
- else
+ } else {
+ sec_sqe->type_cipher_auth |= SEC_AUTH_TYPE2 << SEC_AUTH_OFFSET;
sec_sqe->sds_sa_type |= SEC_AUTH_CIPHER;
-
+ }
sec_sqe->type2.alen_ivllen = cpu_to_le32(c_req->c_len + aq->assoclen);
sec_sqe->type2.cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
@@ -1165,7 +1581,68 @@ static int sec_aead_bd_fill(struct sec_ctx *ctx, struct sec_req *req)
return ret;
}
- sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+ if (ctx->c_ctx.c_mode == SEC_CMODE_CCM ||
+ ctx->c_ctx.c_mode == SEC_CMODE_GCM)
+ sec_auth_bd_fill_xcm(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+ else
+ sec_auth_bd_fill_ex(auth_ctx, req->c_req.encrypt, req, sec_sqe);
+
+ return 0;
+}
+
+static void sec_auth_bd_fill_ex_v3(struct sec_auth_ctx *ctx, int dir,
+ struct sec_req *req, struct sec_sqe3 *sqe3)
+{
+ struct sec_aead_req *a_req = &req->aead_req;
+ struct sec_cipher_req *c_req = &req->c_req;
+ struct aead_request *aq = a_req->aead_req;
+
+ sqe3->a_key_addr = cpu_to_le64(ctx->a_key_dma);
+
+ sqe3->auth_mac_key |=
+ cpu_to_le32((u32)(ctx->mac_len /
+ SEC_SQE_LEN_RATE) << SEC_MAC_OFFSET_V3);
+
+ sqe3->auth_mac_key |=
+ cpu_to_le32((u32)(ctx->a_key_len /
+ SEC_SQE_LEN_RATE) << SEC_AKEY_OFFSET_V3);
+
+ sqe3->auth_mac_key |=
+ cpu_to_le32((u32)(ctx->a_alg) << SEC_AUTH_ALG_OFFSET_V3);
+
+ if (dir) {
+ sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+ sqe3->huk_iv_seq &= SEC_CIPHER_AUTH_V3;
+ } else {
+ sqe3->auth_mac_key |= cpu_to_le32((u32)SEC_AUTH_TYPE1);
+ sqe3->huk_iv_seq |= SEC_AUTH_CIPHER_V3;
+ }
+ sqe3->a_len_key = cpu_to_le32(c_req->c_len + aq->assoclen);
+
+ sqe3->cipher_src_offset = cpu_to_le16((u16)aq->assoclen);
+
+ sqe3->mac_addr = cpu_to_le64(a_req->out_mac_dma);
+}
+
+static int sec_aead_bd_fill_v3(struct sec_ctx *ctx, struct sec_req *req)
+{
+ struct sec_auth_ctx *auth_ctx = &ctx->a_ctx;
+ struct sec_sqe3 *sec_sqe3 = &req->sec_sqe3;
+ int ret;
+
+ ret = sec_skcipher_bd_fill_v3(ctx, req);
+ if (unlikely(ret)) {
+ dev_err(ctx->dev, "skcipher bd3 fill is error!\n");
+ return ret;
+ }
+
+ if (ctx->c_ctx.c_mode == SEC_CMODE_CCM ||
+ ctx->c_ctx.c_mode == SEC_CMODE_GCM)
+ sec_auth_bd_fill_xcm_v3(auth_ctx, req->c_req.encrypt,
+ req, sec_sqe3);
+ else
+ sec_auth_bd_fill_ex_v3(auth_ctx, req->c_req.encrypt,
+ req, sec_sqe3);
return 0;
}
@@ -1254,7 +1731,8 @@ static int sec_process(struct sec_ctx *ctx, struct sec_req *req)
goto err_uninit_req;
/* Output IV as decrypto */
- if (ctx->c_ctx.c_mode == SEC_CMODE_CBC && !req->c_req.encrypt)
+ if (!req->c_req.encrypt && (ctx->c_ctx.c_mode == SEC_CMODE_CBC ||
+ ctx->c_ctx.c_mode == SEC_CMODE_CTR))
sec_update_iv(req, ctx->alg_type);
ret = ctx->req_op->bd_send(ctx, req);
@@ -1296,20 +1774,51 @@ static const struct sec_req_op sec_skcipher_req_ops = {
static const struct sec_req_op sec_aead_req_ops = {
.buf_map = sec_aead_sgl_map,
.buf_unmap = sec_aead_sgl_unmap,
- .do_transfer = sec_aead_copy_iv,
+ .do_transfer = sec_aead_set_iv,
.bd_fill = sec_aead_bd_fill,
.bd_send = sec_bd_send,
.callback = sec_aead_callback,
.process = sec_process,
};
+static const struct sec_req_op sec_skcipher_req_ops_v3 = {
+ .buf_map = sec_skcipher_sgl_map,
+ .buf_unmap = sec_skcipher_sgl_unmap,
+ .do_transfer = sec_skcipher_copy_iv,
+ .bd_fill = sec_skcipher_bd_fill_v3,
+ .bd_send = sec_bd_send,
+ .callback = sec_skcipher_callback,
+ .process = sec_process,
+};
+
+static const struct sec_req_op sec_aead_req_ops_v3 = {
+ .buf_map = sec_aead_sgl_map,
+ .buf_unmap = sec_aead_sgl_unmap,
+ .do_transfer = sec_aead_set_iv,
+ .bd_fill = sec_aead_bd_fill_v3,
+ .bd_send = sec_bd_send,
+ .callback = sec_aead_callback,
+ .process = sec_process,
+};
+
static int sec_skcipher_ctx_init(struct crypto_skcipher *tfm)
{
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
+ int ret;
- ctx->req_op = &sec_skcipher_req_ops;
+ ret = sec_skcipher_init(tfm);
+ if (ret)
+ return ret;
- return sec_skcipher_init(tfm);
+ if (ctx->sec->qm.ver < QM_HW_V3) {
+ ctx->type_supported = SEC_BD_TYPE2;
+ ctx->req_op = &sec_skcipher_req_ops;
+ } else {
+ ctx->type_supported = SEC_BD_TYPE3;
+ ctx->req_op = &sec_skcipher_req_ops_v3;
+ }
+
+ return ret;
}
static void sec_skcipher_ctx_exit(struct crypto_skcipher *tfm)
@@ -1325,15 +1834,22 @@ static int sec_aead_init(struct crypto_aead *tfm)
crypto_aead_set_reqsize(tfm, sizeof(struct sec_req));
ctx->alg_type = SEC_AEAD;
ctx->c_ctx.ivsize = crypto_aead_ivsize(tfm);
- if (ctx->c_ctx.ivsize > SEC_IV_SIZE) {
- dev_err(ctx->dev, "get error aead iv size!\n");
+ if (ctx->c_ctx.ivsize < SEC_AIV_SIZE ||
+ ctx->c_ctx.ivsize > SEC_IV_SIZE) {
+ pr_err("get error aead iv size!\n");
return -EINVAL;
}
- ctx->req_op = &sec_aead_req_ops;
ret = sec_ctx_base_init(ctx);
if (ret)
return ret;
+ if (ctx->sec->qm.ver < QM_HW_V3) {
+ ctx->type_supported = SEC_BD_TYPE2;
+ ctx->req_op = &sec_aead_req_ops;
+ } else {
+ ctx->type_supported = SEC_BD_TYPE3;
+ ctx->req_op = &sec_aead_req_ops_v3;
+ }
ret = sec_auth_init(ctx);
if (ret)
@@ -1391,6 +1907,41 @@ static void sec_aead_ctx_exit(struct crypto_aead *tfm)
sec_aead_exit(tfm);
}
+static int sec_aead_xcm_ctx_init(struct crypto_aead *tfm)
+{
+ struct aead_alg *alg = crypto_aead_alg(tfm);
+ struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+ const char *aead_name = alg->base.cra_name;
+ int ret;
+
+ ret = sec_aead_init(tfm);
+ if (ret) {
+ dev_err(ctx->dev, "hisi_sec2: aead xcm init error!\n");
+ return ret;
+ }
+
+ a_ctx->fallback_aead_tfm = crypto_alloc_aead(aead_name, 0,
+ CRYPTO_ALG_NEED_FALLBACK |
+ CRYPTO_ALG_ASYNC);
+ if (IS_ERR(a_ctx->fallback_aead_tfm)) {
+ dev_err(ctx->dev, "aead driver alloc fallback tfm error!\n");
+ sec_aead_exit(tfm);
+ return PTR_ERR(a_ctx->fallback_aead_tfm);
+ }
+ a_ctx->fallback = false;
+
+ return 0;
+}
+
+static void sec_aead_xcm_ctx_exit(struct crypto_aead *tfm)
+{
+ struct sec_ctx *ctx = crypto_aead_ctx(tfm);
+
+ crypto_free_aead(ctx->a_ctx.fallback_aead_tfm);
+ sec_aead_exit(tfm);
+}
+
static int sec_aead_sha1_ctx_init(struct crypto_aead *tfm)
{
return sec_aead_ctx_init(tfm, "sha1");
@@ -1429,6 +1980,14 @@ static int sec_skcipher_cryptlen_ckeck(struct sec_ctx *ctx,
ret = -EINVAL;
}
break;
+ case SEC_CMODE_CFB:
+ case SEC_CMODE_OFB:
+ case SEC_CMODE_CTR:
+ if (unlikely(ctx->sec->qm.ver < QM_HW_V3)) {
+ dev_err(dev, "skcipher HW version error!\n");
+ ret = -EINVAL;
+ }
+ break;
default:
ret = -EINVAL;
}
@@ -1442,7 +2001,8 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
struct device *dev = ctx->dev;
u8 c_alg = ctx->c_ctx.c_alg;
- if (unlikely(!sk_req->src || !sk_req->dst)) {
+ if (unlikely(!sk_req->src || !sk_req->dst ||
+ sk_req->cryptlen > MAX_INPUT_DATA_LEN)) {
dev_err(dev, "skcipher input param error!\n");
return -EINVAL;
}
@@ -1468,6 +2028,37 @@ static int sec_skcipher_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
return -EINVAL;
}
+static int sec_skcipher_soft_crypto(struct sec_ctx *ctx,
+ struct skcipher_request *sreq, bool encrypt)
+{
+ struct sec_cipher_ctx *c_ctx = &ctx->c_ctx;
+ struct device *dev = ctx->dev;
+ int ret;
+
+ SYNC_SKCIPHER_REQUEST_ON_STACK(subreq, c_ctx->fbtfm);
+
+ if (!c_ctx->fbtfm) {
+ dev_err(dev, "failed to check fallback tfm\n");
+ return -EINVAL;
+ }
+
+ skcipher_request_set_sync_tfm(subreq, c_ctx->fbtfm);
+
+ /* software need sync mode to do crypto */
+ skcipher_request_set_callback(subreq, sreq->base.flags,
+ NULL, NULL);
+ skcipher_request_set_crypt(subreq, sreq->src, sreq->dst,
+ sreq->cryptlen, sreq->iv);
+ if (encrypt)
+ ret = crypto_skcipher_encrypt(subreq);
+ else
+ ret = crypto_skcipher_decrypt(subreq);
+
+ skcipher_request_zero(subreq);
+
+ return ret;
+}
+
static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(sk_req);
@@ -1475,8 +2066,11 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
struct sec_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
- if (!sk_req->cryptlen)
+ if (!sk_req->cryptlen) {
+ if (ctx->c_ctx.c_mode == SEC_CMODE_XTS)
+ return -EINVAL;
return 0;
+ }
req->flag = sk_req->base.flags;
req->c_req.sk_req = sk_req;
@@ -1487,6 +2081,9 @@ static int sec_skcipher_crypto(struct skcipher_request *sk_req, bool encrypt)
if (unlikely(ret))
return -EINVAL;
+ if (unlikely(ctx->c_ctx.fallback))
+ return sec_skcipher_soft_crypto(ctx, sk_req, encrypt);
+
return ctx->req_op->process(ctx, req);
}
@@ -1507,7 +2104,9 @@ static int sec_skcipher_decrypt(struct skcipher_request *sk_req)
.cra_name = sec_cra_name,\
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
- .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
+ .cra_flags = CRYPTO_ALG_ASYNC |\
+ CRYPTO_ALG_ALLOCATES_MEMORY |\
+ CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\
.cra_module = THIS_MODULE,\
@@ -1541,11 +2140,11 @@ static struct skcipher_alg sec_skciphers[] = {
AES_BLOCK_SIZE, AES_BLOCK_SIZE)
SEC_SKCIPHER_ALG("ecb(des3_ede)", sec_setkey_3des_ecb,
- SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+ SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE,
DES3_EDE_BLOCK_SIZE, 0)
SEC_SKCIPHER_ALG("cbc(des3_ede)", sec_setkey_3des_cbc,
- SEC_DES3_2KEY_SIZE, SEC_DES3_3KEY_SIZE,
+ SEC_DES3_3KEY_SIZE, SEC_DES3_3KEY_SIZE,
DES3_EDE_BLOCK_SIZE, DES3_EDE_BLOCK_SIZE)
SEC_SKCIPHER_ALG("xts(sm4)", sec_setkey_sm4_xts,
@@ -1557,6 +2156,90 @@ static struct skcipher_alg sec_skciphers[] = {
AES_BLOCK_SIZE, AES_BLOCK_SIZE)
};
+static struct skcipher_alg sec_skciphers_v3[] = {
+ SEC_SKCIPHER_ALG("ofb(aes)", sec_setkey_aes_ofb,
+ AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+ SEC_SKCIPHER_ALG("cfb(aes)", sec_setkey_aes_cfb,
+ AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+ SEC_SKCIPHER_ALG("ctr(aes)", sec_setkey_aes_ctr,
+ AES_MIN_KEY_SIZE, AES_MAX_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+ SEC_SKCIPHER_ALG("ofb(sm4)", sec_setkey_sm4_ofb,
+ AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+ SEC_SKCIPHER_ALG("cfb(sm4)", sec_setkey_sm4_cfb,
+ AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+
+ SEC_SKCIPHER_ALG("ctr(sm4)", sec_setkey_sm4_ctr,
+ AES_MIN_KEY_SIZE, AES_MIN_KEY_SIZE,
+ SEC_MIN_BLOCK_SZ, AES_BLOCK_SIZE)
+};
+
+static int aead_iv_demension_check(struct aead_request *aead_req)
+{
+ u8 cl;
+
+ cl = aead_req->iv[0] + 1;
+ if (cl < IV_CL_MIN || cl > IV_CL_MAX)
+ return -EINVAL;
+
+ if (cl < IV_CL_MID && aead_req->cryptlen >> (BYTE_BITS * cl))
+ return -EOVERFLOW;
+
+ return 0;
+}
+
+static int sec_aead_spec_check(struct sec_ctx *ctx, struct sec_req *sreq)
+{
+ struct aead_request *req = sreq->aead_req.aead_req;
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ size_t authsize = crypto_aead_authsize(tfm);
+ u8 c_mode = ctx->c_ctx.c_mode;
+ struct device *dev = ctx->dev;
+ int ret;
+
+ if (unlikely(req->cryptlen + req->assoclen > MAX_INPUT_DATA_LEN ||
+ req->assoclen > SEC_MAX_AAD_LEN)) {
+ dev_err(dev, "aead input spec error!\n");
+ return -EINVAL;
+ }
+
+ if (unlikely((c_mode == SEC_CMODE_GCM && authsize < DES_BLOCK_SIZE) ||
+ (c_mode == SEC_CMODE_CCM && (authsize < MIN_MAC_LEN ||
+ authsize & MAC_LEN_MASK)))) {
+ dev_err(dev, "aead input mac length error!\n");
+ return -EINVAL;
+ }
+
+ if (c_mode == SEC_CMODE_CCM) {
+ ret = aead_iv_demension_check(req);
+ if (ret) {
+ dev_err(dev, "aead input iv param error!\n");
+ return ret;
+ }
+ }
+
+ if (sreq->c_req.encrypt)
+ sreq->c_req.c_len = req->cryptlen;
+ else
+ sreq->c_req.c_len = req->cryptlen - authsize;
+ if (c_mode == SEC_CMODE_CBC) {
+ if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) {
+ dev_err(dev, "aead crypto length error!\n");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
{
struct aead_request *req = sreq->aead_req.aead_req;
@@ -1565,34 +2248,61 @@ static int sec_aead_param_check(struct sec_ctx *ctx, struct sec_req *sreq)
struct device *dev = ctx->dev;
u8 c_alg = ctx->c_ctx.c_alg;
- if (unlikely(!req->src || !req->dst || !req->cryptlen ||
- req->assoclen > SEC_MAX_AAD_LEN)) {
+ if (unlikely(!req->src || !req->dst)) {
dev_err(dev, "aead input param error!\n");
return -EINVAL;
}
+ if (ctx->sec->qm.ver == QM_HW_V2) {
+ if (unlikely(!req->cryptlen || (!sreq->c_req.encrypt &&
+ req->cryptlen <= authsize))) {
+ dev_err(dev, "Kunpeng920 not support 0 length!\n");
+ ctx->a_ctx.fallback = true;
+ return -EINVAL;
+ }
+ }
+
+ /* Support AES or SM4 */
+ if (unlikely(c_alg != SEC_CALG_AES && c_alg != SEC_CALG_SM4)) {
+ dev_err(dev, "aead crypto alg error!\n");
+ return -EINVAL;
+ }
+
+ if (unlikely(sec_aead_spec_check(ctx, sreq)))
+ return -EINVAL;
+
if (ctx->pbuf_supported && (req->cryptlen + req->assoclen) <=
SEC_PBUF_SZ)
sreq->use_pbuf = true;
else
sreq->use_pbuf = false;
- /* Support AES only */
- if (unlikely(c_alg != SEC_CALG_AES)) {
- dev_err(dev, "aead crypto alg error!\n");
- return -EINVAL;
- }
- if (sreq->c_req.encrypt)
- sreq->c_req.c_len = req->cryptlen;
- else
- sreq->c_req.c_len = req->cryptlen - authsize;
+ return 0;
+}
- if (unlikely(sreq->c_req.c_len & (AES_BLOCK_SIZE - 1))) {
- dev_err(dev, "aead crypto length error!\n");
+static int sec_aead_soft_crypto(struct sec_ctx *ctx,
+ struct aead_request *aead_req,
+ bool encrypt)
+{
+ struct aead_request *subreq = aead_request_ctx(aead_req);
+ struct sec_auth_ctx *a_ctx = &ctx->a_ctx;
+ struct device *dev = ctx->dev;
+
+ /* Kunpeng920 aead mode not support input 0 size */
+ if (!a_ctx->fallback_aead_tfm) {
+ dev_err(dev, "aead fallback tfm is NULL!\n");
return -EINVAL;
}
- return 0;
+ aead_request_set_tfm(subreq, a_ctx->fallback_aead_tfm);
+ aead_request_set_callback(subreq, aead_req->base.flags,
+ aead_req->base.complete, aead_req->base.data);
+ aead_request_set_crypt(subreq, aead_req->src, aead_req->dst,
+ aead_req->cryptlen, aead_req->iv);
+ aead_request_set_ad(subreq, aead_req->assoclen);
+
+ return encrypt ? crypto_aead_encrypt(subreq) :
+ crypto_aead_decrypt(subreq);
}
static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
@@ -1608,8 +2318,11 @@ static int sec_aead_crypto(struct aead_request *a_req, bool encrypt)
req->ctx = ctx;
ret = sec_aead_param_check(ctx, req);
- if (unlikely(ret))
+ if (unlikely(ret)) {
+ if (ctx->a_ctx.fallback)
+ return sec_aead_soft_crypto(ctx, a_req, encrypt);
return -EINVAL;
+ }
return ctx->req_op->process(ctx, req);
}
@@ -1624,14 +2337,16 @@ static int sec_aead_decrypt(struct aead_request *a_req)
return sec_aead_crypto(a_req, false);
}
-#define SEC_AEAD_GEN_ALG(sec_cra_name, sec_set_key, ctx_init,\
+#define SEC_AEAD_ALG(sec_cra_name, sec_set_key, ctx_init,\
ctx_exit, blk_size, iv_size, max_authsize)\
{\
.base = {\
.cra_name = sec_cra_name,\
.cra_driver_name = "hisi_sec_"sec_cra_name,\
.cra_priority = SEC_PRIORITY,\
- .cra_flags = CRYPTO_ALG_ASYNC | CRYPTO_ALG_ALLOCATES_MEMORY,\
+ .cra_flags = CRYPTO_ALG_ASYNC |\
+ CRYPTO_ALG_ALLOCATES_MEMORY |\
+ CRYPTO_ALG_NEED_FALLBACK,\
.cra_blocksize = blk_size,\
.cra_ctxsize = sizeof(struct sec_ctx),\
.cra_module = THIS_MODULE,\
@@ -1639,28 +2354,46 @@ static int sec_aead_decrypt(struct aead_request *a_req)
.init = ctx_init,\
.exit = ctx_exit,\
.setkey = sec_set_key,\
+ .setauthsize = sec_aead_setauthsize,\
.decrypt = sec_aead_decrypt,\
.encrypt = sec_aead_encrypt,\
.ivsize = iv_size,\
.maxauthsize = max_authsize,\
}
-#define SEC_AEAD_ALG(algname, keyfunc, aead_init, blksize, ivsize, authsize)\
- SEC_AEAD_GEN_ALG(algname, keyfunc, aead_init,\
- sec_aead_ctx_exit, blksize, ivsize, authsize)
-
static struct aead_alg sec_aeads[] = {
SEC_AEAD_ALG("authenc(hmac(sha1),cbc(aes))",
sec_setkey_aes_cbc_sha1, sec_aead_sha1_ctx_init,
- AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA1_DIGEST_SIZE),
+ sec_aead_ctx_exit, AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE, SHA1_DIGEST_SIZE),
SEC_AEAD_ALG("authenc(hmac(sha256),cbc(aes))",
sec_setkey_aes_cbc_sha256, sec_aead_sha256_ctx_init,
- AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA256_DIGEST_SIZE),
+ sec_aead_ctx_exit, AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE, SHA256_DIGEST_SIZE),
SEC_AEAD_ALG("authenc(hmac(sha512),cbc(aes))",
sec_setkey_aes_cbc_sha512, sec_aead_sha512_ctx_init,
- AES_BLOCK_SIZE, AES_BLOCK_SIZE, SHA512_DIGEST_SIZE),
+ sec_aead_ctx_exit, AES_BLOCK_SIZE,
+ AES_BLOCK_SIZE, SHA512_DIGEST_SIZE),
+
+ SEC_AEAD_ALG("ccm(aes)", sec_setkey_aes_ccm, sec_aead_xcm_ctx_init,
+ sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE),
+
+ SEC_AEAD_ALG("gcm(aes)", sec_setkey_aes_gcm, sec_aead_xcm_ctx_init,
+ sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+ SEC_AIV_SIZE, AES_BLOCK_SIZE)
+};
+
+static struct aead_alg sec_aeads_v3[] = {
+ SEC_AEAD_ALG("ccm(sm4)", sec_setkey_sm4_ccm, sec_aead_xcm_ctx_init,
+ sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+ AES_BLOCK_SIZE, AES_BLOCK_SIZE),
+
+ SEC_AEAD_ALG("gcm(sm4)", sec_setkey_sm4_gcm, sec_aead_xcm_ctx_init,
+ sec_aead_xcm_ctx_exit, SEC_MIN_BLOCK_SZ,
+ SEC_AIV_SIZE, AES_BLOCK_SIZE)
};
int sec_register_to_crypto(struct hisi_qm *qm)
@@ -1673,16 +2406,45 @@ int sec_register_to_crypto(struct hisi_qm *qm)
if (ret)
return ret;
+ if (qm->ver > QM_HW_V2) {
+ ret = crypto_register_skciphers(sec_skciphers_v3,
+ ARRAY_SIZE(sec_skciphers_v3));
+ if (ret)
+ goto reg_skcipher_fail;
+ }
+
ret = crypto_register_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
if (ret)
- crypto_unregister_skciphers(sec_skciphers,
- ARRAY_SIZE(sec_skciphers));
+ goto reg_aead_fail;
+ if (qm->ver > QM_HW_V2) {
+ ret = crypto_register_aeads(sec_aeads_v3, ARRAY_SIZE(sec_aeads_v3));
+ if (ret)
+ goto reg_aead_v3_fail;
+ }
+ return ret;
+
+reg_aead_v3_fail:
+ crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
+reg_aead_fail:
+ if (qm->ver > QM_HW_V2)
+ crypto_unregister_skciphers(sec_skciphers_v3,
+ ARRAY_SIZE(sec_skciphers_v3));
+reg_skcipher_fail:
+ crypto_unregister_skciphers(sec_skciphers,
+ ARRAY_SIZE(sec_skciphers));
return ret;
}
void sec_unregister_from_crypto(struct hisi_qm *qm)
{
+ if (qm->ver > QM_HW_V2)
+ crypto_unregister_aeads(sec_aeads_v3,
+ ARRAY_SIZE(sec_aeads_v3));
+ crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
+
+ if (qm->ver > QM_HW_V2)
+ crypto_unregister_skciphers(sec_skciphers_v3,
+ ARRAY_SIZE(sec_skciphers_v3));
crypto_unregister_skciphers(sec_skciphers,
ARRAY_SIZE(sec_skciphers));
- crypto_unregister_aeads(sec_aeads, ARRAY_SIZE(sec_aeads));
}
diff --git a/drivers/crypto/hisilicon/sec2/sec_crypto.h b/drivers/crypto/hisilicon/sec2/sec_crypto.h
index 9c78edac56a4..9f71c358a6d3 100644
--- a/drivers/crypto/hisilicon/sec2/sec_crypto.h
+++ b/drivers/crypto/hisilicon/sec2/sec_crypto.h
@@ -4,9 +4,11 @@
#ifndef __HISI_SEC_V2_CRYPTO_H
#define __HISI_SEC_V2_CRYPTO_H
+#define SEC_AIV_SIZE 12
#define SEC_IV_SIZE 24
#define SEC_MAX_KEY_SIZE 64
#define SEC_COMM_SCENE 0
+#define SEC_MIN_BLOCK_SZ 1
enum sec_calg {
SEC_CALG_3DES = 0x1,
@@ -21,6 +23,11 @@ enum sec_hash_alg {
};
enum sec_mac_len {
+ SEC_HMAC_CCM_MAC = 16,
+ SEC_HMAC_GCM_MAC = 16,
+ SEC_SM3_MAC = 32,
+ SEC_HMAC_SM3_MAC = 32,
+ SEC_HMAC_MD5_MAC = 16,
SEC_HMAC_SHA1_MAC = 20,
SEC_HMAC_SHA256_MAC = 32,
SEC_HMAC_SHA512_MAC = 64,
@@ -29,7 +36,11 @@ enum sec_mac_len {
enum sec_cmode {
SEC_CMODE_ECB = 0x0,
SEC_CMODE_CBC = 0x1,
+ SEC_CMODE_CFB = 0x2,
+ SEC_CMODE_OFB = 0x3,
SEC_CMODE_CTR = 0x4,
+ SEC_CMODE_CCM = 0x5,
+ SEC_CMODE_GCM = 0x6,
SEC_CMODE_XTS = 0x7,
};
@@ -44,6 +55,7 @@ enum sec_ckey_type {
enum sec_bd_type {
SEC_BD_TYPE1 = 0x1,
SEC_BD_TYPE2 = 0x2,
+ SEC_BD_TYPE3 = 0x3,
};
enum sec_auth {
@@ -63,6 +75,24 @@ enum sec_addr_type {
SEC_PRP = 0x2,
};
+struct bd_status {
+ u64 tag;
+ u8 done;
+ u8 err_type;
+ u16 flag;
+ u16 icv;
+};
+
+enum {
+ AUTHPAD_PAD,
+ AUTHPAD_NOPAD,
+};
+
+enum {
+ AIGEN_GEN,
+ AIGEN_NOGEN,
+};
+
struct sec_sqe_type2 {
/*
* mac_len: 0~4 bits
@@ -209,6 +239,169 @@ struct sec_sqe {
struct sec_sqe_type2 type2;
};
+struct bd3_auth_ivin {
+ __le64 a_ivin_addr;
+ __le32 rsvd0;
+ __le32 rsvd1;
+} __packed __aligned(4);
+
+struct bd3_skip_data {
+ __le32 rsvd0;
+
+ /*
+ * gran_num: 0~15 bits
+ * reserved: 16~31 bits
+ */
+ __le32 gran_num;
+
+ /*
+ * src_skip_data_len: 0~24 bits
+ * reserved: 25~31 bits
+ */
+ __le32 src_skip_data_len;
+
+ /*
+ * dst_skip_data_len: 0~24 bits
+ * reserved: 25~31 bits
+ */
+ __le32 dst_skip_data_len;
+};
+
+struct bd3_stream_scene {
+ __le64 c_ivin_addr;
+ __le64 long_a_data_len;
+
+ /*
+ * auth_pad: 0~1 bits
+ * stream_protocol: 2~4 bits
+ * reserved: 5~7 bits
+ */
+ __u8 stream_auth_pad;
+ __u8 plaintext_type;
+ __le16 pad_len_1p3;
+} __packed __aligned(4);
+
+struct bd3_no_scene {
+ __le64 c_ivin_addr;
+ __le32 rsvd0;
+ __le32 rsvd1;
+ __le32 rsvd2;
+} __packed __aligned(4);
+
+struct bd3_check_sum {
+ __u8 rsvd0;
+ __u8 hac_sva_status;
+ __le16 check_sum_i;
+};
+
+struct bd3_tls_type_back {
+ __u8 tls_1p3_type_back;
+ __u8 hac_sva_status;
+ __le16 pad_len_1p3_back;
+};
+
+struct sec_sqe3 {
+ /*
+ * type: 0~3 bit
+ * bd_invalid: 4 bit
+ * scene: 5~8 bit
+ * de: 9~10 bit
+ * src_addr_type: 11~13 bit
+ * dst_addr_type: 14~16 bit
+ * mac_addr_type: 17~19 bit
+ * reserved: 20~31 bits
+ */
+ __le32 bd_param;
+
+ /*
+ * cipher: 0~1 bits
+ * ci_gen: 2~3 bit
+ * c_icv_len: 4~9 bit
+ * c_width: 10~12 bits
+ * c_key_len: 13~15 bits
+ */
+ __le16 c_icv_key;
+
+ /*
+ * c_mode : 0~3 bits
+ * c_alg : 4~7 bits
+ */
+ __u8 c_mode_alg;
+
+ /*
+ * nonce_len : 0~3 bits
+ * huk : 4 bits
+ * cal_iv_addr_en : 5 bits
+ * seq : 6 bits
+ * reserved : 7 bits
+ */
+ __u8 huk_iv_seq;
+
+ __le64 tag;
+ __le64 data_src_addr;
+ __le64 a_key_addr;
+ union {
+ struct bd3_auth_ivin auth_ivin;
+ struct bd3_skip_data skip_data;
+ };
+
+ __le64 c_key_addr;
+
+ /*
+ * auth: 0~1 bits
+ * ai_gen: 2~3 bits
+ * mac_len: 4~8 bits
+ * akey_len: 9~14 bits
+ * a_alg: 15~20 bits
+ * key_sel: 21~24 bits
+ * updata_key: 25 bits
+ * reserved: 26~31 bits
+ */
+ __le32 auth_mac_key;
+ __le32 salt;
+ __le16 auth_src_offset;
+ __le16 cipher_src_offset;
+
+ /*
+ * auth_len: 0~23 bit
+ * auth_key_offset: 24~31 bits
+ */
+ __le32 a_len_key;
+
+ /*
+ * cipher_len: 0~23 bit
+ * auth_ivin_offset: 24~31 bits
+ */
+ __le32 c_len_ivin;
+ __le64 data_dst_addr;
+ __le64 mac_addr;
+ union {
+ struct bd3_stream_scene stream_scene;
+ struct bd3_no_scene no_scene;
+ };
+
+ /*
+ * done: 0 bit
+ * icv: 1~3 bit
+ * csc: 4~6 bit
+ * flag: 7~10 bit
+ * reserved: 11~15 bit
+ */
+ __le16 done_flag;
+ __u8 error_type;
+ __u8 warning_type;
+ union {
+ __le32 mac_i;
+ __le32 kek_key_addr_l;
+ };
+ union {
+ __le32 kek_key_addr_h;
+ struct bd3_check_sum check_sum;
+ struct bd3_tls_type_back tls_type_back;
+ };
+ __le32 counter;
+} __packed __aligned(4);
+
int sec_register_to_crypto(struct hisi_qm *qm);
void sec_unregister_from_crypto(struct hisi_qm *qm);
#endif
diff --git a/drivers/crypto/hisilicon/sec2/sec_main.c b/drivers/crypto/hisilicon/sec2/sec_main.c
index 6f0062d4408c..d120ce3e34ed 100644
--- a/drivers/crypto/hisilicon/sec2/sec_main.c
+++ b/drivers/crypto/hisilicon/sec2/sec_main.c
@@ -52,6 +52,7 @@
#define SEC_RAS_CE_ENB_MSK 0x88
#define SEC_RAS_FE_ENB_MSK 0x0
#define SEC_RAS_NFE_ENB_MSK 0x7c177
+#define SEC_OOO_SHUTDOWN_SEL 0x301014
#define SEC_RAS_DISABLE 0x0
#define SEC_MEM_START_INIT_REG 0x301100
#define SEC_MEM_INIT_DONE_REG 0x301104
@@ -84,6 +85,12 @@
#define SEC_USER1_SMMU_MASK (~SEC_USER1_SVA_SET)
#define SEC_CORE_INT_STATUS_M_ECC BIT(2)
+#define SEC_PREFETCH_CFG 0x301130
+#define SEC_SVA_TRANS 0x301EC4
+#define SEC_PREFETCH_ENABLE (~(BIT(0) | BIT(1) | BIT(11)))
+#define SEC_PREFETCH_DISABLE BIT(1)
+#define SEC_SVA_DISABLE_READY (BIT(7) | BIT(11))
+
#define SEC_DELAY_10_US 10
#define SEC_POLL_TIMEOUT_US 1000
#define SEC_DBGFS_VAL_MAX_LEN 20
@@ -91,6 +98,7 @@
#define SEC_SQE_MASK_OFFSET 64
#define SEC_SQE_MASK_LEN 48
+#define SEC_SHAPER_TYPE_RATE 128
struct sec_hw_error {
u32 int_msk;
@@ -331,6 +339,42 @@ static u8 sec_get_endian(struct hisi_qm *qm)
return SEC_64BE;
}
+static void sec_open_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ /* Enable prefetch */
+ val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG);
+ val &= SEC_PREFETCH_ENABLE;
+ writel(val, qm->io_base + SEC_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + SEC_PREFETCH_CFG,
+ val, !(val & SEC_PREFETCH_DISABLE),
+ SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void sec_close_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ val = readl_relaxed(qm->io_base + SEC_PREFETCH_CFG);
+ val |= SEC_PREFETCH_DISABLE;
+ writel(val, qm->io_base + SEC_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + SEC_SVA_TRANS,
+ val, !(val & SEC_SVA_DISABLE_READY),
+ SEC_DELAY_10_US, SEC_POLL_TIMEOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to close sva prefetch\n");
+}
+
static int sec_engine_init(struct hisi_qm *qm)
{
int ret;
@@ -430,53 +474,60 @@ static void sec_debug_regs_clear(struct hisi_qm *qm)
hisi_qm_debug_regs_clear(qm);
}
-static void sec_hw_error_enable(struct hisi_qm *qm)
+static void sec_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
{
- u32 val;
+ u32 val1, val2;
+
+ val1 = readl(qm->io_base + SEC_CONTROL_REG);
+ if (enable) {
+ val1 |= SEC_AXI_SHUTDOWN_ENABLE;
+ val2 = SEC_RAS_NFE_ENB_MSK;
+ } else {
+ val1 &= SEC_AXI_SHUTDOWN_DISABLE;
+ val2 = 0x0;
+ }
+
+ if (qm->ver > QM_HW_V2)
+ writel(val2, qm->io_base + SEC_OOO_SHUTDOWN_SEL);
+
+ writel(val1, qm->io_base + SEC_CONTROL_REG);
+}
+static void sec_hw_error_enable(struct hisi_qm *qm)
+{
if (qm->ver == QM_HW_V1) {
writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
pci_info(qm->pdev, "V1 not support hw error handle\n");
return;
}
- val = readl(qm->io_base + SEC_CONTROL_REG);
-
/* clear SEC hw error source if having */
writel(SEC_CORE_INT_CLEAR, qm->io_base + SEC_CORE_INT_SOURCE);
- /* enable SEC hw error interrupts */
- writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
-
/* enable RAS int */
writel(SEC_RAS_CE_ENB_MSK, qm->io_base + SEC_RAS_CE_REG);
writel(SEC_RAS_FE_ENB_MSK, qm->io_base + SEC_RAS_FE_REG);
writel(SEC_RAS_NFE_ENB_MSK, qm->io_base + SEC_RAS_NFE_REG);
- /* enable SEC block master OOO when m-bit error occur */
- val = val | SEC_AXI_SHUTDOWN_ENABLE;
+ /* enable SEC block master OOO when nfe occurs on Kunpeng930 */
+ sec_master_ooo_ctrl(qm, true);
- writel(val, qm->io_base + SEC_CONTROL_REG);
+ /* enable SEC hw error interrupts */
+ writel(SEC_CORE_INT_ENABLE, qm->io_base + SEC_CORE_INT_MASK);
}
static void sec_hw_error_disable(struct hisi_qm *qm)
{
- u32 val;
+ /* disable SEC hw error interrupts */
+ writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
- val = readl(qm->io_base + SEC_CONTROL_REG);
+ /* disable SEC block master OOO when nfe occurs on Kunpeng930 */
+ sec_master_ooo_ctrl(qm, false);
/* disable RAS int */
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_CE_REG);
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_FE_REG);
writel(SEC_RAS_DISABLE, qm->io_base + SEC_RAS_NFE_REG);
-
- /* disable SEC hw error interrupts */
- writel(SEC_CORE_INT_DISABLE, qm->io_base + SEC_CORE_INT_MASK);
-
- /* disable SEC block master OOO when m-bit error occur */
- val = val & SEC_AXI_SHUTDOWN_DISABLE;
-
- writel(val, qm->io_base + SEC_CONTROL_REG);
}
static u32 sec_clear_enable_read(struct sec_debug_file *file)
@@ -743,6 +794,8 @@ static const struct hisi_qm_err_ini sec_err_ini = {
.clear_dev_hw_err_status = sec_clear_hw_err_status,
.log_dev_hw_err = sec_log_hw_error,
.open_axi_master_ooo = sec_open_axi_master_ooo,
+ .open_sva_prefetch = sec_open_sva_prefetch,
+ .close_sva_prefetch = sec_close_sva_prefetch,
.err_info_init = sec_err_info_init,
};
@@ -758,6 +811,7 @@ static int sec_pf_probe_init(struct sec_dev *sec)
if (ret)
return ret;
+ sec_open_sva_prefetch(qm);
hisi_qm_dev_err_init(qm);
sec_debug_regs_clear(qm);
@@ -821,6 +875,7 @@ static void sec_qm_uninit(struct hisi_qm *qm)
static int sec_probe_init(struct sec_dev *sec)
{
+ u32 type_rate = SEC_SHAPER_TYPE_RATE;
struct hisi_qm *qm = &sec->qm;
int ret;
@@ -828,6 +883,11 @@ static int sec_probe_init(struct sec_dev *sec)
ret = sec_pf_probe_init(sec);
if (ret)
return ret;
+ /* enable shaper type 0 */
+ if (qm->ver >= QM_HW_V3) {
+ type_rate |= QM_SHAPER_ENABLE;
+ qm->type_rate = type_rate;
+ }
}
return 0;
diff --git a/drivers/crypto/hisilicon/zip/zip_main.c b/drivers/crypto/hisilicon/zip/zip_main.c
index 2178b40e9f82..f8482ceebf2a 100644
--- a/drivers/crypto/hisilicon/zip/zip_main.c
+++ b/drivers/crypto/hisilicon/zip/zip_main.c
@@ -68,6 +68,7 @@
#define HZIP_CORE_INT_RAS_CE_ENABLE 0x1
#define HZIP_CORE_INT_RAS_NFE_ENB 0x301164
#define HZIP_CORE_INT_RAS_FE_ENB 0x301168
+#define HZIP_OOO_SHUTDOWN_SEL 0x30120C
#define HZIP_CORE_INT_RAS_NFE_ENABLE 0x1FFE
#define HZIP_SRAM_ECC_ERR_NUM_SHIFT 16
#define HZIP_SRAM_ECC_ERR_ADDR_SHIFT 24
@@ -96,6 +97,16 @@
#define HZIP_RD_CNT_CLR_CE_EN (HZIP_CNT_CLR_CE_EN | \
HZIP_RO_CNT_CLR_CE_EN)
+#define HZIP_PREFETCH_CFG 0x3011B0
+#define HZIP_SVA_TRANS 0x3011C4
+#define HZIP_PREFETCH_ENABLE (~(BIT(26) | BIT(17) | BIT(0)))
+#define HZIP_SVA_PREFETCH_DISABLE BIT(26)
+#define HZIP_SVA_DISABLE_READY (BIT(26) | BIT(30))
+#define HZIP_SHAPER_RATE_COMPRESS 252
+#define HZIP_SHAPER_RATE_DECOMPRESS 229
+#define HZIP_DELAY_1_US 1
+#define HZIP_POLL_TIMEOUT_US 1000
+
static const char hisi_zip_name[] = "hisi_zip";
static struct dentry *hzip_debugfs_root;
@@ -262,6 +273,45 @@ int zip_create_qps(struct hisi_qp **qps, int qp_num, int node)
return hisi_qm_alloc_qps_node(&zip_devices, qp_num, 0, node, qps);
}
+static void hisi_zip_open_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ /* Enable prefetch */
+ val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG);
+ val &= HZIP_PREFETCH_ENABLE;
+ writel(val, qm->io_base + HZIP_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_PREFETCH_CFG,
+ val, !(val & HZIP_SVA_PREFETCH_DISABLE),
+ HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to open sva prefetch\n");
+}
+
+static void hisi_zip_close_sva_prefetch(struct hisi_qm *qm)
+{
+ u32 val;
+ int ret;
+
+ if (qm->ver < QM_HW_V3)
+ return;
+
+ val = readl_relaxed(qm->io_base + HZIP_PREFETCH_CFG);
+ val |= HZIP_SVA_PREFETCH_DISABLE;
+ writel(val, qm->io_base + HZIP_PREFETCH_CFG);
+
+ ret = readl_relaxed_poll_timeout(qm->io_base + HZIP_SVA_TRANS,
+ val, !(val & HZIP_SVA_DISABLE_READY),
+ HZIP_DELAY_1_US, HZIP_POLL_TIMEOUT_US);
+ if (ret)
+ pci_err(qm->pdev, "failed to close sva prefetch\n");
+}
+
static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
{
void __iomem *base = qm->io_base;
@@ -312,10 +362,27 @@ static int hisi_zip_set_user_domain_and_cache(struct hisi_qm *qm)
return 0;
}
-static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
+static void hisi_zip_master_ooo_ctrl(struct hisi_qm *qm, bool enable)
{
- u32 val;
+ u32 val1, val2;
+
+ val1 = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+ if (enable) {
+ val1 |= HZIP_AXI_SHUTDOWN_ENABLE;
+ val2 = HZIP_CORE_INT_RAS_NFE_ENABLE;
+ } else {
+ val1 &= ~HZIP_AXI_SHUTDOWN_ENABLE;
+ val2 = 0x0;
+ }
+
+ if (qm->ver > QM_HW_V2)
+ writel(val2, qm->io_base + HZIP_OOO_SHUTDOWN_SEL);
+ writel(val1, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+}
+
+static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
+{
if (qm->ver == QM_HW_V1) {
writel(HZIP_CORE_INT_MASK_ALL,
qm->io_base + HZIP_CORE_INT_MASK_REG);
@@ -333,26 +400,20 @@ static void hisi_zip_hw_error_enable(struct hisi_qm *qm)
writel(HZIP_CORE_INT_RAS_NFE_ENABLE,
qm->io_base + HZIP_CORE_INT_RAS_NFE_ENB);
+ /* enable ZIP block master OOO when nfe occurs on Kunpeng930 */
+ hisi_zip_master_ooo_ctrl(qm, true);
+
/* enable ZIP hw error interrupts */
writel(0, qm->io_base + HZIP_CORE_INT_MASK_REG);
-
- /* enable ZIP block master OOO when m-bit error occur */
- val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
- val = val | HZIP_AXI_SHUTDOWN_ENABLE;
- writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
}
static void hisi_zip_hw_error_disable(struct hisi_qm *qm)
{
- u32 val;
-
/* disable ZIP hw error interrupts */
writel(HZIP_CORE_INT_MASK_ALL, qm->io_base + HZIP_CORE_INT_MASK_REG);
- /* disable ZIP block master OOO when m-bit error occur */
- val = readl(qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
- val = val & ~HZIP_AXI_SHUTDOWN_ENABLE;
- writel(val, qm->io_base + HZIP_SOFT_CTRL_ZIP_CONTROL);
+ /* disable ZIP block master OOO when nfe occurs on Kunpeng930 */
+ hisi_zip_master_ooo_ctrl(qm, false);
}
static inline struct hisi_qm *file_to_qm(struct ctrl_debug_file *file)
@@ -684,6 +745,8 @@ static const struct hisi_qm_err_ini hisi_zip_err_ini = {
.log_dev_hw_err = hisi_zip_log_hw_error,
.open_axi_master_ooo = hisi_zip_open_axi_master_ooo,
.close_axi_master_ooo = hisi_zip_close_axi_master_ooo,
+ .open_sva_prefetch = hisi_zip_open_sva_prefetch,
+ .close_sva_prefetch = hisi_zip_close_sva_prefetch,
.err_info_init = hisi_zip_err_info_init,
};
@@ -702,6 +765,7 @@ static int hisi_zip_pf_probe_init(struct hisi_zip *hisi_zip)
qm->err_ini->err_info_init(qm);
hisi_zip_set_user_domain_and_cache(qm);
+ hisi_zip_open_sva_prefetch(qm);
hisi_qm_dev_err_init(qm);
hisi_zip_debug_regs_clear(qm);
@@ -761,6 +825,7 @@ static void hisi_zip_qm_uninit(struct hisi_qm *qm)
static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
{
+ u32 type_rate = HZIP_SHAPER_RATE_COMPRESS;
struct hisi_qm *qm = &hisi_zip->qm;
int ret;
@@ -768,6 +833,14 @@ static int hisi_zip_probe_init(struct hisi_zip *hisi_zip)
ret = hisi_zip_pf_probe_init(hisi_zip);
if (ret)
return ret;
+ /* enable shaper type 0 */
+ if (qm->ver >= QM_HW_V3) {
+ type_rate |= QM_SHAPER_ENABLE;
+
+ /* ZIP need to enable shaper type 1 */
+ type_rate |= HZIP_SHAPER_RATE_DECOMPRESS << QM_SHAPER_TYPE1_OFFSET;
+ qm->type_rate = type_rate;
+ }
}
return 0;
diff --git a/drivers/crypto/ixp4xx_crypto.c b/drivers/crypto/ixp4xx_crypto.c
index 0616e369522e..35fc5ee70491 100644
--- a/drivers/crypto/ixp4xx_crypto.c
+++ b/drivers/crypto/ixp4xx_crypto.c
@@ -15,6 +15,7 @@
#include <linux/spinlock.h>
#include <linux/gfp.h>
#include <linux/module.h>
+#include <linux/of.h>
#include <crypto/ctr.h>
#include <crypto/internal/des.h>
@@ -71,15 +72,11 @@
#define MOD_AES256 (0x0a00 | KEYLEN_256)
#define MAX_IVLEN 16
-#define NPE_ID 2 /* NPE C */
#define NPE_QLEN 16
/* Space for registering when the first
* NPE_QLEN crypt_ctl are busy */
#define NPE_QLEN_TOTAL 64
-#define SEND_QID 29
-#define RECV_QID 30
-
#define CTL_FLAG_UNUSED 0x0000
#define CTL_FLAG_USED 0x1000
#define CTL_FLAG_PERFORM_ABLK 0x0001
@@ -136,7 +133,7 @@ struct crypt_ctl {
u32 crypto_ctx; /* NPE Crypto Param structure address */
/* Used by Host: 4*4 bytes*/
- unsigned ctl_flags;
+ unsigned int ctl_flags;
union {
struct skcipher_request *ablk_req;
struct aead_request *aead_req;
@@ -149,6 +146,9 @@ struct crypt_ctl {
struct ablk_ctx {
struct buffer_desc *src;
struct buffer_desc *dst;
+ u8 iv[MAX_IVLEN];
+ bool encrypt;
+ struct skcipher_request fallback_req; // keep at the end
};
struct aead_ctx {
@@ -181,9 +181,10 @@ struct ixp_ctx {
u8 enckey[MAX_KEYLEN];
u8 salt[MAX_IVLEN];
u8 nonce[CTR_RFC3686_NONCE_SIZE];
- unsigned salted;
+ unsigned int salted;
atomic_t configuring;
struct completion completion;
+ struct crypto_skcipher *fallback_tfm;
};
struct ixp_alg {
@@ -209,6 +210,7 @@ static const struct ix_hash_algo hash_alg_md5 = {
.icv = "\x01\x23\x45\x67\x89\xAB\xCD\xEF"
"\xFE\xDC\xBA\x98\x76\x54\x32\x10",
};
+
static const struct ix_hash_algo hash_alg_sha1 = {
.cfgword = 0x00000005,
.icv = "\x67\x45\x23\x01\xEF\xCD\xAB\x89\x98\xBA"
@@ -216,16 +218,17 @@ static const struct ix_hash_algo hash_alg_sha1 = {
};
static struct npe *npe_c;
-static struct dma_pool *buffer_pool = NULL;
-static struct dma_pool *ctx_pool = NULL;
-static struct crypt_ctl *crypt_virt = NULL;
+static unsigned int send_qid;
+static unsigned int recv_qid;
+static struct dma_pool *buffer_pool;
+static struct dma_pool *ctx_pool;
+
+static struct crypt_ctl *crypt_virt;
static dma_addr_t crypt_phys;
static int support_aes = 1;
-#define DRIVER_NAME "ixp4xx_crypto"
-
static struct platform_device *pdev;
static inline dma_addr_t crypt_virt2phys(struct crypt_ctl *virt)
@@ -240,12 +243,12 @@ static inline struct crypt_ctl *crypt_phys2virt(dma_addr_t phys)
static inline u32 cipher_cfg_enc(struct crypto_tfm *tfm)
{
- return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_enc;
+ return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_enc;
}
static inline u32 cipher_cfg_dec(struct crypto_tfm *tfm)
{
- return container_of(tfm->__crt_alg, struct ixp_alg,crypto.base)->cfg_dec;
+ return container_of(tfm->__crt_alg, struct ixp_alg, crypto.base)->cfg_dec;
}
static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm)
@@ -256,6 +259,7 @@ static inline const struct ix_hash_algo *ix_hash(struct crypto_tfm *tfm)
static int setup_crypt_desc(void)
{
struct device *dev = &pdev->dev;
+
BUILD_BUG_ON(sizeof(struct crypt_ctl) != 64);
crypt_virt = dma_alloc_coherent(dev,
NPE_QLEN * sizeof(struct crypt_ctl),
@@ -269,7 +273,7 @@ static DEFINE_SPINLOCK(desc_lock);
static struct crypt_ctl *get_crypt_desc(void)
{
int i;
- static int idx = 0;
+ static int idx;
unsigned long flags;
spin_lock_irqsave(&desc_lock, flags);
@@ -286,7 +290,7 @@ static struct crypt_ctl *get_crypt_desc(void)
idx = 0;
crypt_virt[i].ctl_flags = CTL_FLAG_USED;
spin_unlock_irqrestore(&desc_lock, flags);
- return crypt_virt +i;
+ return crypt_virt + i;
} else {
spin_unlock_irqrestore(&desc_lock, flags);
return NULL;
@@ -314,7 +318,7 @@ static struct crypt_ctl *get_crypt_desc_emerg(void)
idx = NPE_QLEN;
crypt_virt[i].ctl_flags = CTL_FLAG_USED;
spin_unlock_irqrestore(&emerg_lock, flags);
- return crypt_virt +i;
+ return crypt_virt + i;
} else {
spin_unlock_irqrestore(&emerg_lock, flags);
return NULL;
@@ -330,7 +334,7 @@ static void free_buf_chain(struct device *dev, struct buffer_desc *buf,
buf1 = buf->next;
phys1 = buf->phys_next;
- dma_unmap_single(dev, buf->phys_next, buf->buf_len, buf->dir);
+ dma_unmap_single(dev, buf->phys_addr, buf->buf_len, buf->dir);
dma_pool_free(buffer_pool, buf, phys);
buf = buf1;
phys = phys1;
@@ -348,8 +352,8 @@ static void finish_scattered_hmac(struct crypt_ctl *crypt)
int decryptlen = req->assoclen + req->cryptlen - authsize;
if (req_ctx->encrypt) {
- scatterwalk_map_and_copy(req_ctx->hmac_virt,
- req->dst, decryptlen, authsize, 1);
+ scatterwalk_map_and_copy(req_ctx->hmac_virt, req->dst,
+ decryptlen, authsize, 1);
}
dma_pool_free(buffer_pool, req_ctx->hmac_virt, crypt->icv_rev_aes);
}
@@ -372,19 +376,33 @@ static void one_packet(dma_addr_t phys)
free_buf_chain(dev, req_ctx->src, crypt->src_buf);
free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
- if (req_ctx->hmac_virt) {
+ if (req_ctx->hmac_virt)
finish_scattered_hmac(crypt);
- }
+
req->base.complete(&req->base, failed);
break;
}
case CTL_FLAG_PERFORM_ABLK: {
struct skcipher_request *req = crypt->data.ablk_req;
struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
+ unsigned int offset;
+
+ if (ivsize > 0) {
+ offset = req->cryptlen - ivsize;
+ if (req_ctx->encrypt) {
+ scatterwalk_map_and_copy(req->iv, req->dst,
+ offset, ivsize, 0);
+ } else {
+ memcpy(req->iv, req_ctx->iv, ivsize);
+ memzero_explicit(req_ctx->iv, ivsize);
+ }
+ }
- if (req_ctx->dst) {
+ if (req_ctx->dst)
free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
- }
+
free_buf_chain(dev, req_ctx->src, crypt->src_buf);
req->base.complete(&req->base, failed);
break;
@@ -392,14 +410,14 @@ static void one_packet(dma_addr_t phys)
case CTL_FLAG_GEN_ICV:
ctx = crypto_tfm_ctx(crypt->data.tfm);
dma_pool_free(ctx_pool, crypt->regist_ptr,
- crypt->regist_buf->phys_addr);
+ crypt->regist_buf->phys_addr);
dma_pool_free(buffer_pool, crypt->regist_buf, crypt->src_buf);
if (atomic_dec_and_test(&ctx->configuring))
complete(&ctx->completion);
break;
case CTL_FLAG_GEN_REVAES:
ctx = crypto_tfm_ctx(crypt->data.tfm);
- *(u32*)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
+ *(u32 *)ctx->decrypt.npe_ctx &= cpu_to_be32(~CIPH_ENCR);
if (atomic_dec_and_test(&ctx->configuring))
complete(&ctx->completion);
break;
@@ -418,8 +436,8 @@ static void crypto_done_action(unsigned long arg)
{
int i;
- for(i=0; i<4; i++) {
- dma_addr_t phys = qmgr_get_entry(RECV_QID);
+ for (i = 0; i < 4; i++) {
+ dma_addr_t phys = qmgr_get_entry(recv_qid);
if (!phys)
return;
one_packet(phys);
@@ -429,15 +447,52 @@ static void crypto_done_action(unsigned long arg)
static int init_ixp_crypto(struct device *dev)
{
- int ret = -ENODEV;
+ struct device_node *np = dev->of_node;
u32 msg[2] = { 0, 0 };
+ int ret = -ENODEV;
+ u32 npe_id;
- if (! ( ~(*IXP4XX_EXP_CFG2) & (IXP4XX_FEATURE_HASH |
- IXP4XX_FEATURE_AES | IXP4XX_FEATURE_DES))) {
- printk(KERN_ERR "ixp_crypto: No HW crypto available\n");
- return ret;
+ dev_info(dev, "probing...\n");
+
+ /* Locate the NPE and queue manager to use from device tree */
+ if (IS_ENABLED(CONFIG_OF) && np) {
+ struct of_phandle_args queue_spec;
+ struct of_phandle_args npe_spec;
+
+ ret = of_parse_phandle_with_fixed_args(np, "intel,npe-handle",
+ 1, 0, &npe_spec);
+ if (ret) {
+ dev_err(dev, "no NPE engine specified\n");
+ return -ENODEV;
+ }
+ npe_id = npe_spec.args[0];
+
+ ret = of_parse_phandle_with_fixed_args(np, "queue-rx", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no rx queue phandle\n");
+ return -ENODEV;
+ }
+ recv_qid = queue_spec.args[0];
+
+ ret = of_parse_phandle_with_fixed_args(np, "queue-txready", 1, 0,
+ &queue_spec);
+ if (ret) {
+ dev_err(dev, "no txready queue phandle\n");
+ return -ENODEV;
+ }
+ send_qid = queue_spec.args[0];
+ } else {
+ /*
+ * Hardcoded engine when using platform data, this goes away
+ * when we switch to using DT only.
+ */
+ npe_id = 2;
+ send_qid = 29;
+ recv_qid = 30;
}
- npe_c = npe_request(NPE_ID);
+
+ npe_c = npe_request(npe_id);
if (!npe_c)
return ret;
@@ -455,10 +510,9 @@ static int init_ixp_crypto(struct device *dev)
goto npe_error;
}
- switch ((msg[1]>>16) & 0xff) {
+ switch ((msg[1] >> 16) & 0xff) {
case 3:
- printk(KERN_WARNING "Firmware of %s lacks AES support\n",
- npe_name(npe_c));
+ dev_warn(dev, "Firmware of %s lacks AES support\n", npe_name(npe_c));
support_aes = 0;
break;
case 4:
@@ -466,8 +520,7 @@ static int init_ixp_crypto(struct device *dev)
support_aes = 1;
break;
default:
- printk(KERN_ERR "Firmware of %s lacks crypto support\n",
- npe_name(npe_c));
+ dev_err(dev, "Firmware of %s lacks crypto support\n", npe_name(npe_c));
ret = -ENODEV;
goto npe_release;
}
@@ -475,35 +528,34 @@ static int init_ixp_crypto(struct device *dev)
* so assure it is large enough
*/
BUILD_BUG_ON(SHA1_DIGEST_SIZE > sizeof(struct buffer_desc));
- buffer_pool = dma_pool_create("buffer", dev,
- sizeof(struct buffer_desc), 32, 0);
+ buffer_pool = dma_pool_create("buffer", dev, sizeof(struct buffer_desc),
+ 32, 0);
ret = -ENOMEM;
- if (!buffer_pool) {
+ if (!buffer_pool)
goto err;
- }
- ctx_pool = dma_pool_create("context", dev,
- NPE_CTX_LEN, 16, 0);
- if (!ctx_pool) {
+
+ ctx_pool = dma_pool_create("context", dev, NPE_CTX_LEN, 16, 0);
+ if (!ctx_pool)
goto err;
- }
- ret = qmgr_request_queue(SEND_QID, NPE_QLEN_TOTAL, 0, 0,
+
+ ret = qmgr_request_queue(send_qid, NPE_QLEN_TOTAL, 0, 0,
"ixp_crypto:out", NULL);
if (ret)
goto err;
- ret = qmgr_request_queue(RECV_QID, NPE_QLEN, 0, 0,
+ ret = qmgr_request_queue(recv_qid, NPE_QLEN, 0, 0,
"ixp_crypto:in", NULL);
if (ret) {
- qmgr_release_queue(SEND_QID);
+ qmgr_release_queue(send_qid);
goto err;
}
- qmgr_set_irq(RECV_QID, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL);
+ qmgr_set_irq(recv_qid, QUEUE_IRQ_SRC_NOT_EMPTY, irqhandler, NULL);
tasklet_init(&crypto_done_tasklet, crypto_done_action, 0);
- qmgr_enable_irq(RECV_QID);
+ qmgr_enable_irq(recv_qid);
return 0;
npe_error:
- printk(KERN_ERR "%s not responding\n", npe_name(npe_c));
+ dev_err(dev, "%s not responding\n", npe_name(npe_c));
ret = -EIO;
err:
dma_pool_destroy(ctx_pool);
@@ -515,22 +567,20 @@ npe_release:
static void release_ixp_crypto(struct device *dev)
{
- qmgr_disable_irq(RECV_QID);
+ qmgr_disable_irq(recv_qid);
tasklet_kill(&crypto_done_tasklet);
- qmgr_release_queue(SEND_QID);
- qmgr_release_queue(RECV_QID);
+ qmgr_release_queue(send_qid);
+ qmgr_release_queue(recv_qid);
dma_pool_destroy(ctx_pool);
dma_pool_destroy(buffer_pool);
npe_release(npe_c);
- if (crypt_virt) {
- dma_free_coherent(dev,
- NPE_QLEN * sizeof(struct crypt_ctl),
- crypt_virt, crypt_phys);
- }
+ if (crypt_virt)
+ dma_free_coherent(dev, NPE_QLEN * sizeof(struct crypt_ctl),
+ crypt_virt, crypt_phys);
}
static void reset_sa_dir(struct ix_sa_dir *dir)
@@ -543,9 +593,9 @@ static void reset_sa_dir(struct ix_sa_dir *dir)
static int init_sa_dir(struct ix_sa_dir *dir)
{
dir->npe_ctx = dma_pool_alloc(ctx_pool, GFP_KERNEL, &dir->npe_ctx_phys);
- if (!dir->npe_ctx) {
+ if (!dir->npe_ctx)
return -ENOMEM;
- }
+
reset_sa_dir(dir);
return 0;
}
@@ -566,15 +616,31 @@ static int init_tfm(struct crypto_tfm *tfm)
if (ret)
return ret;
ret = init_sa_dir(&ctx->decrypt);
- if (ret) {
+ if (ret)
free_sa_dir(&ctx->encrypt);
- }
+
return ret;
}
static int init_tfm_ablk(struct crypto_skcipher *tfm)
{
- crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx));
+ struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm);
+ struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm);
+ const char *name = crypto_tfm_alg_name(ctfm);
+
+ ctx->fallback_tfm = crypto_alloc_skcipher(name, 0, CRYPTO_ALG_NEED_FALLBACK);
+ if (IS_ERR(ctx->fallback_tfm)) {
+ pr_err("ERROR: Cannot allocate fallback for %s %ld\n",
+ name, PTR_ERR(ctx->fallback_tfm));
+ return PTR_ERR(ctx->fallback_tfm);
+ }
+
+ pr_info("Fallback for %s is %s\n",
+ crypto_tfm_alg_driver_name(&tfm->base),
+ crypto_tfm_alg_driver_name(crypto_skcipher_tfm(ctx->fallback_tfm))
+ );
+
+ crypto_skcipher_set_reqsize(tfm, sizeof(struct ablk_ctx) + crypto_skcipher_reqsize(ctx->fallback_tfm));
return init_tfm(crypto_skcipher_tfm(tfm));
}
@@ -587,12 +653,17 @@ static int init_tfm_aead(struct crypto_aead *tfm)
static void exit_tfm(struct crypto_tfm *tfm)
{
struct ixp_ctx *ctx = crypto_tfm_ctx(tfm);
+
free_sa_dir(&ctx->encrypt);
free_sa_dir(&ctx->decrypt);
}
static void exit_tfm_ablk(struct crypto_skcipher *tfm)
{
+ struct crypto_tfm *ctfm = crypto_skcipher_tfm(tfm);
+ struct ixp_ctx *ctx = crypto_tfm_ctx(ctfm);
+
+ crypto_free_skcipher(ctx->fallback_tfm);
exit_tfm(crypto_skcipher_tfm(tfm));
}
@@ -602,7 +673,8 @@ static void exit_tfm_aead(struct crypto_aead *tfm)
}
static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
- int init_len, u32 ctx_addr, const u8 *key, int key_len)
+ int init_len, u32 ctx_addr, const u8 *key,
+ int key_len)
{
struct ixp_ctx *ctx = crypto_tfm_ctx(tfm);
struct crypt_ctl *crypt;
@@ -629,9 +701,8 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
memcpy(pad, key, key_len);
memset(pad + key_len, 0, HMAC_PAD_BLOCKLEN - key_len);
- for (i = 0; i < HMAC_PAD_BLOCKLEN; i++) {
+ for (i = 0; i < HMAC_PAD_BLOCKLEN; i++)
pad[i] ^= xpad;
- }
crypt->data.tfm = tfm;
crypt->regist_ptr = pad;
@@ -652,13 +723,13 @@ static int register_chain_var(struct crypto_tfm *tfm, u8 xpad, u32 target,
buf->phys_addr = pad_phys;
atomic_inc(&ctx->configuring);
- qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
- BUG_ON(qmgr_stat_overflow(SEND_QID));
+ qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+ BUG_ON(qmgr_stat_overflow(send_qid));
return 0;
}
-static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned authsize,
- const u8 *key, int key_len, unsigned digest_len)
+static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned int authsize,
+ const u8 *key, int key_len, unsigned int digest_len)
{
u32 itarget, otarget, npe_ctx_addr;
unsigned char *cinfo;
@@ -673,11 +744,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned authsize,
algo = ix_hash(tfm);
/* write cfg word to cryptinfo */
- cfgword = algo->cfgword | ( authsize << 6); /* (authsize/4) << 8 */
+ cfgword = algo->cfgword | (authsize << 6); /* (authsize/4) << 8 */
#ifndef __ARMEB__
cfgword ^= 0xAA000000; /* change the "byte swap" flags */
#endif
- *(u32*)cinfo = cpu_to_be32(cfgword);
+ *(u32 *)cinfo = cpu_to_be32(cfgword);
cinfo += sizeof(cfgword);
/* write ICV to cryptinfo */
@@ -697,11 +768,11 @@ static int setup_auth(struct crypto_tfm *tfm, int encrypt, unsigned authsize,
dir->npe_mode |= NPE_OP_HASH_VERIFY;
ret = register_chain_var(tfm, HMAC_OPAD_VALUE, otarget,
- init_len, npe_ctx_addr, key, key_len);
+ init_len, npe_ctx_addr, key, key_len);
if (ret)
return ret;
return register_chain_var(tfm, HMAC_IPAD_VALUE, itarget,
- init_len, npe_ctx_addr, key, key_len);
+ init_len, npe_ctx_addr, key, key_len);
}
static int gen_rev_aes_key(struct crypto_tfm *tfm)
@@ -711,10 +782,10 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
struct ix_sa_dir *dir = &ctx->decrypt;
crypt = get_crypt_desc_emerg();
- if (!crypt) {
+ if (!crypt)
return -EAGAIN;
- }
- *(u32*)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
+
+ *(u32 *)dir->npe_ctx |= cpu_to_be32(CIPH_ENCR);
crypt->data.tfm = tfm;
crypt->crypt_offs = 0;
@@ -727,13 +798,13 @@ static int gen_rev_aes_key(struct crypto_tfm *tfm)
crypt->ctl_flags |= CTL_FLAG_GEN_REVAES;
atomic_inc(&ctx->configuring);
- qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
- BUG_ON(qmgr_stat_overflow(SEND_QID));
+ qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+ BUG_ON(qmgr_stat_overflow(send_qid));
return 0;
}
-static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
- const u8 *key, int key_len)
+static int setup_cipher(struct crypto_tfm *tfm, int encrypt, const u8 *key,
+ int key_len)
{
u8 *cinfo;
u32 cipher_cfg;
@@ -753,9 +824,15 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
}
if (cipher_cfg & MOD_AES) {
switch (key_len) {
- case 16: keylen_cfg = MOD_AES128; break;
- case 24: keylen_cfg = MOD_AES192; break;
- case 32: keylen_cfg = MOD_AES256; break;
+ case 16:
+ keylen_cfg = MOD_AES128;
+ break;
+ case 24:
+ keylen_cfg = MOD_AES192;
+ break;
+ case 32:
+ keylen_cfg = MOD_AES256;
+ break;
default:
return -EINVAL;
}
@@ -766,31 +843,31 @@ static int setup_cipher(struct crypto_tfm *tfm, int encrypt,
return err;
}
/* write cfg word to cryptinfo */
- *(u32*)cinfo = cpu_to_be32(cipher_cfg);
+ *(u32 *)cinfo = cpu_to_be32(cipher_cfg);
cinfo += sizeof(cipher_cfg);
/* write cipher key to cryptinfo */
memcpy(cinfo, key, key_len);
/* NPE wants keylen set to DES3_EDE_KEY_SIZE even for single DES */
if (key_len < DES3_EDE_KEY_SIZE && !(cipher_cfg & MOD_AES)) {
- memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE -key_len);
+ memset(cinfo + key_len, 0, DES3_EDE_KEY_SIZE - key_len);
key_len = DES3_EDE_KEY_SIZE;
}
dir->npe_ctx_idx = sizeof(cipher_cfg) + key_len;
dir->npe_mode |= NPE_OP_CRYPT_ENABLE;
- if ((cipher_cfg & MOD_AES) && !encrypt) {
+ if ((cipher_cfg & MOD_AES) && !encrypt)
return gen_rev_aes_key(tfm);
- }
+
return 0;
}
static struct buffer_desc *chainup_buffers(struct device *dev,
- struct scatterlist *sg, unsigned nbytes,
+ struct scatterlist *sg, unsigned int nbytes,
struct buffer_desc *buf, gfp_t flags,
enum dma_data_direction dir)
{
for (; nbytes > 0; sg = sg_next(sg)) {
- unsigned len = min(nbytes, sg->length);
+ unsigned int len = min(nbytes, sg->length);
struct buffer_desc *next_buf;
dma_addr_t next_buf_phys;
void *ptr;
@@ -817,7 +894,7 @@ static struct buffer_desc *chainup_buffers(struct device *dev,
}
static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int key_len)
+ unsigned int key_len)
{
struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
int ret;
@@ -838,7 +915,12 @@ static int ablk_setkey(struct crypto_skcipher *tfm, const u8 *key,
out:
if (!atomic_dec_and_test(&ctx->configuring))
wait_for_completion(&ctx->completion);
- return ret;
+ if (ret)
+ return ret;
+ crypto_skcipher_clear_flags(ctx->fallback_tfm, CRYPTO_TFM_REQ_MASK);
+ crypto_skcipher_set_flags(ctx->fallback_tfm, tfm->base.crt_flags & CRYPTO_TFM_REQ_MASK);
+
+ return crypto_skcipher_setkey(ctx->fallback_tfm, key, key_len);
}
static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
@@ -849,7 +931,7 @@ static int ablk_des3_setkey(struct crypto_skcipher *tfm, const u8 *key,
}
static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
- unsigned int key_len)
+ unsigned int key_len)
{
struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
@@ -858,17 +940,36 @@ static int ablk_rfc3686_setkey(struct crypto_skcipher *tfm, const u8 *key,
return -EINVAL;
memcpy(ctx->nonce, key + (key_len - CTR_RFC3686_NONCE_SIZE),
- CTR_RFC3686_NONCE_SIZE);
+ CTR_RFC3686_NONCE_SIZE);
key_len -= CTR_RFC3686_NONCE_SIZE;
return ablk_setkey(tfm, key, key_len);
}
+static int ixp4xx_cipher_fallback(struct skcipher_request *areq, int encrypt)
+{
+ struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(areq);
+ struct ixp_ctx *op = crypto_skcipher_ctx(tfm);
+ struct ablk_ctx *rctx = skcipher_request_ctx(areq);
+ int err;
+
+ skcipher_request_set_tfm(&rctx->fallback_req, op->fallback_tfm);
+ skcipher_request_set_callback(&rctx->fallback_req, areq->base.flags,
+ areq->base.complete, areq->base.data);
+ skcipher_request_set_crypt(&rctx->fallback_req, areq->src, areq->dst,
+ areq->cryptlen, areq->iv);
+ if (encrypt)
+ err = crypto_skcipher_encrypt(&rctx->fallback_req);
+ else
+ err = crypto_skcipher_decrypt(&rctx->fallback_req);
+ return err;
+}
+
static int ablk_perform(struct skcipher_request *req, int encrypt)
{
struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
struct ixp_ctx *ctx = crypto_skcipher_ctx(tfm);
- unsigned ivsize = crypto_skcipher_ivsize(tfm);
+ unsigned int ivsize = crypto_skcipher_ivsize(tfm);
struct ix_sa_dir *dir;
struct crypt_ctl *crypt;
unsigned int nbytes = req->cryptlen;
@@ -876,15 +977,20 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
struct ablk_ctx *req_ctx = skcipher_request_ctx(req);
struct buffer_desc src_hook;
struct device *dev = &pdev->dev;
+ unsigned int offset;
gfp_t flags = req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP ?
GFP_KERNEL : GFP_ATOMIC;
- if (qmgr_stat_full(SEND_QID))
+ if (sg_nents(req->src) > 1 || sg_nents(req->dst) > 1)
+ return ixp4xx_cipher_fallback(req, encrypt);
+
+ if (qmgr_stat_full(send_qid))
return -EAGAIN;
if (atomic_read(&ctx->configuring))
return -EAGAIN;
dir = encrypt ? &ctx->encrypt : &ctx->decrypt;
+ req_ctx->encrypt = encrypt;
crypt = get_crypt_desc();
if (!crypt)
@@ -900,14 +1006,19 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
BUG_ON(ivsize && !req->iv);
memcpy(crypt->iv, req->iv, ivsize);
+ if (ivsize > 0 && !encrypt) {
+ offset = req->cryptlen - ivsize;
+ scatterwalk_map_and_copy(req_ctx->iv, req->src, offset, ivsize, 0);
+ }
if (req->src != req->dst) {
struct buffer_desc dst_hook;
+
crypt->mode |= NPE_OP_NOT_IN_PLACE;
/* This was never tested by Intel
* for more than one dst buffer, I think. */
req_ctx->dst = NULL;
if (!chainup_buffers(dev, req->dst, nbytes, &dst_hook,
- flags, DMA_FROM_DEVICE))
+ flags, DMA_FROM_DEVICE))
goto free_buf_dest;
src_direction = DMA_TO_DEVICE;
req_ctx->dst = dst_hook.next;
@@ -916,23 +1027,23 @@ static int ablk_perform(struct skcipher_request *req, int encrypt)
req_ctx->dst = NULL;
}
req_ctx->src = NULL;
- if (!chainup_buffers(dev, req->src, nbytes, &src_hook,
- flags, src_direction))
+ if (!chainup_buffers(dev, req->src, nbytes, &src_hook, flags,
+ src_direction))
goto free_buf_src;
req_ctx->src = src_hook.next;
crypt->src_buf = src_hook.phys_next;
crypt->ctl_flags |= CTL_FLAG_PERFORM_ABLK;
- qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
- BUG_ON(qmgr_stat_overflow(SEND_QID));
+ qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+ BUG_ON(qmgr_stat_overflow(send_qid));
return -EINPROGRESS;
free_buf_src:
free_buf_chain(dev, req_ctx->src, crypt->src_buf);
free_buf_dest:
- if (req->src != req->dst) {
+ if (req->src != req->dst)
free_buf_chain(dev, req_ctx->dst, crypt->dst_buf);
- }
+
crypt->ctl_flags = CTL_FLAG_UNUSED;
return -ENOMEM;
}
@@ -956,7 +1067,7 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req)
int ret;
/* set up counter block */
- memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE);
+ memcpy(iv, ctx->nonce, CTR_RFC3686_NONCE_SIZE);
memcpy(iv + CTR_RFC3686_NONCE_SIZE, info, CTR_RFC3686_IV_SIZE);
/* initialize counter portion of counter block */
@@ -970,12 +1081,12 @@ static int ablk_rfc3686_crypt(struct skcipher_request *req)
}
static int aead_perform(struct aead_request *req, int encrypt,
- int cryptoffset, int eff_cryptlen, u8 *iv)
+ int cryptoffset, int eff_cryptlen, u8 *iv)
{
struct crypto_aead *tfm = crypto_aead_reqtfm(req);
struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
- unsigned ivsize = crypto_aead_ivsize(tfm);
- unsigned authsize = crypto_aead_authsize(tfm);
+ unsigned int ivsize = crypto_aead_ivsize(tfm);
+ unsigned int authsize = crypto_aead_authsize(tfm);
struct ix_sa_dir *dir;
struct crypt_ctl *crypt;
unsigned int cryptlen;
@@ -987,7 +1098,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
enum dma_data_direction src_direction = DMA_BIDIRECTIONAL;
unsigned int lastlen;
- if (qmgr_stat_full(SEND_QID))
+ if (qmgr_stat_full(send_qid))
return -EAGAIN;
if (atomic_read(&ctx->configuring))
return -EAGAIN;
@@ -998,7 +1109,7 @@ static int aead_perform(struct aead_request *req, int encrypt,
} else {
dir = &ctx->decrypt;
/* req->cryptlen includes the authsize when decrypting */
- cryptlen = req->cryptlen -authsize;
+ cryptlen = req->cryptlen - authsize;
eff_cryptlen -= authsize;
}
crypt = get_crypt_desc();
@@ -1058,12 +1169,12 @@ static int aead_perform(struct aead_request *req, int encrypt,
/* The 12 hmac bytes are scattered,
* we need to copy them into a safe buffer */
req_ctx->hmac_virt = dma_pool_alloc(buffer_pool, flags,
- &crypt->icv_rev_aes);
+ &crypt->icv_rev_aes);
if (unlikely(!req_ctx->hmac_virt))
goto free_buf_dst;
if (!encrypt) {
scatterwalk_map_and_copy(req_ctx->hmac_virt,
- req->src, cryptlen, authsize, 0);
+ req->src, cryptlen, authsize, 0);
}
req_ctx->encrypt = encrypt;
} else {
@@ -1071,8 +1182,8 @@ static int aead_perform(struct aead_request *req, int encrypt,
}
crypt->ctl_flags |= CTL_FLAG_PERFORM_AEAD;
- qmgr_put_entry(SEND_QID, crypt_virt2phys(crypt));
- BUG_ON(qmgr_stat_overflow(SEND_QID));
+ qmgr_put_entry(send_qid, crypt_virt2phys(crypt));
+ BUG_ON(qmgr_stat_overflow(send_qid));
return -EINPROGRESS;
free_buf_dst:
@@ -1086,7 +1197,7 @@ free_buf_src:
static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
{
struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
- unsigned digest_len = crypto_aead_maxauthsize(tfm);
+ unsigned int digest_len = crypto_aead_maxauthsize(tfm);
int ret;
if (!ctx->enckey_len && !ctx->authkey_len)
@@ -1104,11 +1215,11 @@ static int aead_setup(struct crypto_aead *tfm, unsigned int authsize)
if (ret)
goto out;
ret = setup_auth(&tfm->base, 0, authsize, ctx->authkey,
- ctx->authkey_len, digest_len);
+ ctx->authkey_len, digest_len);
if (ret)
goto out;
ret = setup_auth(&tfm->base, 1, authsize, ctx->authkey,
- ctx->authkey_len, digest_len);
+ ctx->authkey_len, digest_len);
out:
if (!atomic_dec_and_test(&ctx->configuring))
wait_for_completion(&ctx->completion);
@@ -1119,13 +1230,13 @@ static int aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
{
int max = crypto_aead_maxauthsize(tfm) >> 2;
- if ((authsize>>2) < 1 || (authsize>>2) > max || (authsize & 3))
+ if ((authsize >> 2) < 1 || (authsize >> 2) > max || (authsize & 3))
return -EINVAL;
return aead_setup(tfm, authsize);
}
static int aead_setkey(struct crypto_aead *tfm, const u8 *key,
- unsigned int keylen)
+ unsigned int keylen)
{
struct ixp_ctx *ctx = crypto_aead_ctx(tfm);
struct crypto_authenc_keys keys;
@@ -1364,43 +1475,33 @@ static struct ixp_aead_alg ixp4xx_aeads[] = {
#define IXP_POSTFIX "-ixp4xx"
-static const struct platform_device_info ixp_dev_info __initdata = {
- .name = DRIVER_NAME,
- .id = 0,
- .dma_mask = DMA_BIT_MASK(32),
-};
-
-static int __init ixp_module_init(void)
+static int ixp_crypto_probe(struct platform_device *_pdev)
{
+ struct device *dev = &_pdev->dev;
int num = ARRAY_SIZE(ixp4xx_algos);
int i, err;
- pdev = platform_device_register_full(&ixp_dev_info);
- if (IS_ERR(pdev))
- return PTR_ERR(pdev);
+ pdev = _pdev;
- err = init_ixp_crypto(&pdev->dev);
- if (err) {
- platform_device_unregister(pdev);
+ err = init_ixp_crypto(dev);
+ if (err)
return err;
- }
- for (i=0; i< num; i++) {
+
+ for (i = 0; i < num; i++) {
struct skcipher_alg *cra = &ixp4xx_algos[i].crypto;
if (snprintf(cra->base.cra_driver_name, CRYPTO_MAX_ALG_NAME,
- "%s"IXP_POSTFIX, cra->base.cra_name) >=
- CRYPTO_MAX_ALG_NAME)
- {
+ "%s"IXP_POSTFIX, cra->base.cra_name) >=
+ CRYPTO_MAX_ALG_NAME)
continue;
- }
- if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES)) {
+ if (!support_aes && (ixp4xx_algos[i].cfg_enc & MOD_AES))
continue;
- }
/* block ciphers */
cra->base.cra_flags = CRYPTO_ALG_KERN_DRIVER_ONLY |
CRYPTO_ALG_ASYNC |
- CRYPTO_ALG_ALLOCATES_MEMORY;
+ CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_NEED_FALLBACK;
if (!cra->setkey)
cra->setkey = ablk_setkey;
if (!cra->encrypt)
@@ -1415,7 +1516,7 @@ static int __init ixp_module_init(void)
cra->base.cra_alignmask = 3;
cra->base.cra_priority = 300;
if (crypto_register_skcipher(cra))
- printk(KERN_ERR "Failed to register '%s'\n",
+ dev_err(&pdev->dev, "Failed to register '%s'\n",
cra->base.cra_name);
else
ixp4xx_algos[i].registered = 1;
@@ -1448,7 +1549,7 @@ static int __init ixp_module_init(void)
cra->base.cra_priority = 300;
if (crypto_register_aead(cra))
- printk(KERN_ERR "Failed to register '%s'\n",
+ dev_err(&pdev->dev, "Failed to register '%s'\n",
cra->base.cra_driver_name);
else
ixp4xx_aeads[i].registered = 1;
@@ -1456,7 +1557,7 @@ static int __init ixp_module_init(void)
return 0;
}
-static void __exit ixp_module_exit(void)
+static int ixp_crypto_remove(struct platform_device *pdev)
{
int num = ARRAY_SIZE(ixp4xx_algos);
int i;
@@ -1466,16 +1567,30 @@ static void __exit ixp_module_exit(void)
crypto_unregister_aead(&ixp4xx_aeads[i].crypto);
}
- for (i=0; i< num; i++) {
+ for (i = 0; i < num; i++) {
if (ixp4xx_algos[i].registered)
crypto_unregister_skcipher(&ixp4xx_algos[i].crypto);
}
release_ixp_crypto(&pdev->dev);
- platform_device_unregister(pdev);
+
+ return 0;
}
+static const struct of_device_id ixp4xx_crypto_of_match[] = {
+ {
+ .compatible = "intel,ixp4xx-crypto",
+ },
+ {},
+};
-module_init(ixp_module_init);
-module_exit(ixp_module_exit);
+static struct platform_driver ixp_crypto_driver = {
+ .probe = ixp_crypto_probe,
+ .remove = ixp_crypto_remove,
+ .driver = {
+ .name = "ixp4xx_crypto",
+ .of_match_table = ixp4xx_crypto_of_match,
+ },
+};
+module_platform_driver(ixp_crypto_driver);
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Christian Hohnstaedt <chohnstaedt@innominate.com>");
diff --git a/drivers/crypto/marvell/cesa/cesa.h b/drivers/crypto/marvell/cesa/cesa.h
index c1007f2ba79c..d215a6bed6bc 100644
--- a/drivers/crypto/marvell/cesa/cesa.h
+++ b/drivers/crypto/marvell/cesa/cesa.h
@@ -66,7 +66,7 @@
#define CESA_SA_ST_ACT_1 BIT(1)
/*
- * CESA_SA_FPGA_INT_STATUS looks like a FPGA leftover and is documented only
+ * CESA_SA_FPGA_INT_STATUS looks like an FPGA leftover and is documented only
* in Errata 4.12. It looks like that it was part of an IRQ-controller in FPGA
* and someone forgot to remove it while switching to the core and moving to
* CESA_SA_INT_STATUS.
diff --git a/drivers/crypto/marvell/octeontx2/Makefile b/drivers/crypto/marvell/octeontx2/Makefile
index b9c6201019e0..c242d22008c3 100644
--- a/drivers/crypto/marvell/octeontx2/Makefile
+++ b/drivers/crypto/marvell/octeontx2/Makefile
@@ -1,10 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += octeontx2-cpt.o octeontx2-cptvf.o
+obj-$(CONFIG_CRYPTO_DEV_OCTEONTX2_CPT) += rvu_cptpf.o rvu_cptvf.o
-octeontx2-cpt-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
- otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o
-octeontx2-cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
- otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \
- otx2_cptvf_algs.o
+rvu_cptpf-objs := otx2_cptpf_main.o otx2_cptpf_mbox.o \
+ otx2_cpt_mbox_common.o otx2_cptpf_ucode.o otx2_cptlf.o \
+ cn10k_cpt.o
+rvu_cptvf-objs := otx2_cptvf_main.o otx2_cptvf_mbox.o otx2_cptlf.o \
+ otx2_cpt_mbox_common.o otx2_cptvf_reqmgr.o \
+ otx2_cptvf_algs.o cn10k_cpt.o
ccflags-y += -I$(srctree)/drivers/net/ethernet/marvell/octeontx2/af
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.c b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
new file mode 100644
index 000000000000..1499ef75b5c2
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2021 Marvell. */
+
+#include <linux/soc/marvell/octeontx2/asm.h>
+#include "otx2_cptpf.h"
+#include "otx2_cptvf.h"
+#include "otx2_cptlf.h"
+#include "cn10k_cpt.h"
+
+static struct cpt_hw_ops otx2_hw_ops = {
+ .send_cmd = otx2_cpt_send_cmd,
+ .cpt_get_compcode = otx2_cpt_get_compcode,
+ .cpt_get_uc_compcode = otx2_cpt_get_uc_compcode,
+};
+
+static struct cpt_hw_ops cn10k_hw_ops = {
+ .send_cmd = cn10k_cpt_send_cmd,
+ .cpt_get_compcode = cn10k_cpt_get_compcode,
+ .cpt_get_uc_compcode = cn10k_cpt_get_uc_compcode,
+};
+
+void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+ struct otx2_cptlf_info *lf)
+{
+ void __iomem *lmtline = lf->lmtline;
+ u64 val = (lf->slot & 0x7FF);
+ u64 tar_addr = 0;
+
+ /* tar_addr<6:4> = Size of first LMTST - 1 in units of 128b. */
+ tar_addr |= (__force u64)lf->ioreg |
+ (((OTX2_CPT_INST_SIZE/16) - 1) & 0x7) << 4;
+ /*
+ * Make sure memory areas pointed in CPT_INST_S
+ * are flushed before the instruction is sent to CPT
+ */
+ dma_wmb();
+
+ /* Copy CPT command to LMTLINE */
+ memcpy_toio(lmtline, cptinst, insts_num * OTX2_CPT_INST_SIZE);
+ cn10k_lmt_flush(val, tar_addr);
+}
+
+int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf)
+{
+ struct pci_dev *pdev = cptpf->pdev;
+ resource_size_t size;
+ u64 lmt_base;
+
+ if (!test_bit(CN10K_LMTST, &cptpf->cap_flag)) {
+ cptpf->lfs.ops = &otx2_hw_ops;
+ return 0;
+ }
+
+ cptpf->lfs.ops = &cn10k_hw_ops;
+ lmt_base = readq(cptpf->reg_base + RVU_PF_LMTLINE_ADDR);
+ if (!lmt_base) {
+ dev_err(&pdev->dev, "PF LMTLINE address not configured\n");
+ return -ENOMEM;
+ }
+ size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+ size -= ((1 + cptpf->max_vfs) * MBOX_SIZE);
+ cptpf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, lmt_base, size);
+ if (!cptpf->lfs.lmt_base) {
+ dev_err(&pdev->dev,
+ "Mapping of PF LMTLINE address failed\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf)
+{
+ struct pci_dev *pdev = cptvf->pdev;
+ resource_size_t offset, size;
+
+ if (!test_bit(CN10K_LMTST, &cptvf->cap_flag)) {
+ cptvf->lfs.ops = &otx2_hw_ops;
+ return 0;
+ }
+
+ cptvf->lfs.ops = &cn10k_hw_ops;
+ offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+ size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+ /* Map VF LMILINE region */
+ cptvf->lfs.lmt_base = devm_ioremap_wc(&pdev->dev, offset, size);
+ if (!cptvf->lfs.lmt_base) {
+ dev_err(&pdev->dev, "Unable to map BAR4\n");
+ return -ENOMEM;
+ }
+
+ return 0;
+}
diff --git a/drivers/crypto/marvell/octeontx2/cn10k_cpt.h b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
new file mode 100644
index 000000000000..c091392b47e0
--- /dev/null
+++ b/drivers/crypto/marvell/octeontx2/cn10k_cpt.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only
+ * Copyright (C) 2021 Marvell.
+ */
+#ifndef __CN10K_CPT_H
+#define __CN10K_CPT_H
+
+#include "otx2_cpt_common.h"
+#include "otx2_cptpf.h"
+#include "otx2_cptvf.h"
+
+static inline u8 cn10k_cpt_get_compcode(union otx2_cpt_res_s *result)
+{
+ return ((struct cn10k_cpt_res_s *)result)->compcode;
+}
+
+static inline u8 cn10k_cpt_get_uc_compcode(union otx2_cpt_res_s *result)
+{
+ return ((struct cn10k_cpt_res_s *)result)->uc_compcode;
+}
+
+static inline u8 otx2_cpt_get_compcode(union otx2_cpt_res_s *result)
+{
+ return ((struct cn9k_cpt_res_s *)result)->compcode;
+}
+
+static inline u8 otx2_cpt_get_uc_compcode(union otx2_cpt_res_s *result)
+{
+ return ((struct cn9k_cpt_res_s *)result)->uc_compcode;
+}
+
+void cn10k_cpt_send_cmd(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+ struct otx2_cptlf_info *lf);
+int cn10k_cptpf_lmtst_init(struct otx2_cptpf_dev *cptpf);
+int cn10k_cptvf_lmtst_init(struct otx2_cptvf_dev *cptvf);
+
+#endif /* __CN10K_CPTLF_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
index ecedd91a8d85..c5445b05f53c 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_common.h
@@ -25,6 +25,10 @@
#define OTX2_CPT_NAME_LENGTH 64
#define OTX2_CPT_DMA_MINALIGN 128
+/* HW capability flags */
+#define CN10K_MBOX 0
+#define CN10K_LMTST 1
+
#define BAD_OTX2_CPT_ENG_TYPE OTX2_CPT_MAX_ENG_TYPES
enum otx2_cpt_eng_type {
@@ -116,6 +120,25 @@ static inline u64 otx2_cpt_read64(void __iomem *reg_base, u64 blk, u64 slot,
OTX2_CPT_RVU_FUNC_ADDR_S(blk, slot, offs));
}
+static inline bool is_dev_otx2(struct pci_dev *pdev)
+{
+ if (pdev->device == OTX2_CPT_PCI_PF_DEVICE_ID ||
+ pdev->device == OTX2_CPT_PCI_VF_DEVICE_ID)
+ return true;
+
+ return false;
+}
+
+static inline void otx2_cpt_set_hw_caps(struct pci_dev *pdev,
+ unsigned long *cap_flag)
+{
+ if (!is_dev_otx2(pdev)) {
+ __set_bit(CN10K_MBOX, cap_flag);
+ __set_bit(CN10K_LMTST, cap_flag);
+ }
+}
+
+
int otx2_cpt_send_ready_msg(struct otx2_mbox *mbox, struct pci_dev *pdev);
int otx2_cpt_send_mbox_msg(struct otx2_mbox *mbox, struct pci_dev *pdev);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
index ecafc42f37a2..6f947978e4e8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cpt_hw_types.h
@@ -10,6 +10,8 @@
/* Device IDs */
#define OTX2_CPT_PCI_PF_DEVICE_ID 0xA0FD
#define OTX2_CPT_PCI_VF_DEVICE_ID 0xA0FE
+#define CN10K_CPT_PCI_PF_DEVICE_ID 0xA0F2
+#define CN10K_CPT_PCI_VF_DEVICE_ID 0xA0F3
/* Mailbox interrupts offset */
#define OTX2_CPT_PF_MBOX_INT 6
@@ -25,6 +27,7 @@
*/
#define OTX2_CPT_VF_MSIX_VECTORS 1
#define OTX2_CPT_VF_INTR_MBOX_MASK BIT(0)
+#define CN10K_CPT_VF_MBOX_REGION (0xC0000)
/* CPT LF MSIX vectors */
#define OTX2_CPT_LF_MSIX_VECTORS 2
@@ -135,7 +138,7 @@ enum otx2_cpt_comp_e {
OTX2_CPT_COMP_E_FAULT = 0x02,
OTX2_CPT_COMP_E_HWERR = 0x04,
OTX2_CPT_COMP_E_INSTERR = 0x05,
- OTX2_CPT_COMP_E_LAST_ENTRY = 0x06
+ OTX2_CPT_COMP_E_WARN = 0x06
};
/*
@@ -266,13 +269,22 @@ union otx2_cpt_inst_s {
union otx2_cpt_res_s {
u64 u[2];
- struct {
+ struct cn9k_cpt_res_s {
u64 compcode:8;
u64 uc_compcode:8;
u64 doneint:1;
u64 reserved_17_63:47;
u64 reserved_64_127;
} s;
+
+ struct cn10k_cpt_res_s {
+ u64 compcode:7;
+ u64 doneint:1;
+ u64 uc_compcode:8;
+ u64 rlen:16;
+ u64 spi:32;
+ u64 esn;
+ } cn10k;
};
/*
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
index 34aba1532761..c8350fcd60fa 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.c
@@ -379,9 +379,14 @@ int otx2_cptlf_init(struct otx2_cptlfs_info *lfs, u8 eng_grp_mask, int pri,
for (slot = 0; slot < lfs->lfs_num; slot++) {
lfs->lf[slot].lfs = lfs;
lfs->lf[slot].slot = slot;
- lfs->lf[slot].lmtline = lfs->reg_base +
- OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot,
+ if (lfs->lmt_base)
+ lfs->lf[slot].lmtline = lfs->lmt_base +
+ (slot * LMTLINE_SIZE);
+ else
+ lfs->lf[slot].lmtline = lfs->reg_base +
+ OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_LMT, slot,
OTX2_CPT_LMT_LF_LMTLINEX(0));
+
lfs->lf[slot].ioreg = lfs->reg_base +
OTX2_CPT_RVU_FUNC_ADDR_S(BLKADDR_CPT0, slot,
OTX2_CPT_LF_NQX(0));
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
index ab1678fc564d..b691b6c1d5c4 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptlf.h
@@ -84,12 +84,22 @@ struct otx2_cptlf_info {
struct otx2_cptlf_wqe *wqe; /* Tasklet work info */
};
+struct cpt_hw_ops {
+ void (*send_cmd)(union otx2_cpt_inst_s *cptinst, u32 insts_num,
+ struct otx2_cptlf_info *lf);
+ u8 (*cpt_get_compcode)(union otx2_cpt_res_s *result);
+ u8 (*cpt_get_uc_compcode)(union otx2_cpt_res_s *result);
+};
+
struct otx2_cptlfs_info {
/* Registers start address of VF/PF LFs are attached to */
void __iomem *reg_base;
+#define LMTLINE_SIZE 128
+ void __iomem *lmt_base;
struct pci_dev *pdev; /* Device LFs are attached to */
struct otx2_cptlf_info lf[OTX2_CPT_MAX_LFS_NUM];
struct otx2_mbox *mbox;
+ struct cpt_hw_ops *ops;
u8 are_lfs_attached; /* Whether CPT LFs are attached */
u8 lfs_num; /* Number of CPT LFs */
u8 kcrypto_eng_grp_num; /* Kernel crypto engine group number */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
index e19af1356f12..5ebba86c65d9 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf.h
@@ -47,6 +47,7 @@ struct otx2_cptpf_dev {
struct workqueue_struct *flr_wq;
struct cptpf_flr_work *flr_work;
+ unsigned long cap_flag;
u8 pf_id; /* RVU PF number */
u8 max_vfs; /* Maximum number of VFs supported by CPT */
u8 enabled_vfs; /* Number of enabled VFs */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
index 58f47e3ab62e..146a55ac4b9b 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_main.c
@@ -6,10 +6,11 @@
#include "otx2_cpt_common.h"
#include "otx2_cptpf_ucode.h"
#include "otx2_cptpf.h"
+#include "cn10k_cpt.h"
#include "rvu_reg.h"
-#define OTX2_CPT_DRV_NAME "octeontx2-cpt"
-#define OTX2_CPT_DRV_STRING "Marvell OcteonTX2 CPT Physical Function Driver"
+#define OTX2_CPT_DRV_NAME "rvu_cptpf"
+#define OTX2_CPT_DRV_STRING "Marvell RVU CPT Physical Function Driver"
static void cptpf_enable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf,
int num_vfs)
@@ -62,45 +63,66 @@ static void cptpf_disable_vfpf_mbox_intr(struct otx2_cptpf_dev *cptpf,
}
}
-static void cptpf_enable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf)
+static void cptpf_enable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf,
+ int num_vfs)
{
- /* Clear interrupt if any */
+ /* Clear FLR interrupt if any */
otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0),
- ~0x0ULL);
- otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
- ~0x0ULL);
+ INTR_MASK(num_vfs));
/* Enable VF FLR interrupts */
otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFFLR_INT_ENA_W1SX(0), ~0x0ULL);
+ RVU_PF_VFFLR_INT_ENA_W1SX(0), INTR_MASK(num_vfs));
+ /* Clear ME interrupt if any */
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(0),
+ INTR_MASK(num_vfs));
+ /* Enable VF ME interrupts */
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFME_INT_ENA_W1SX(0), INTR_MASK(num_vfs));
+
+ if (num_vfs <= 64)
+ return;
+
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
+ INTR_MASK(num_vfs - 64));
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFFLR_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64));
+
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFME_INTX(1),
+ INTR_MASK(num_vfs - 64));
otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFFLR_INT_ENA_W1SX(1), ~0x0ULL);
+ RVU_PF_VFME_INT_ENA_W1SX(1), INTR_MASK(num_vfs - 64));
}
-static void cptpf_disable_vf_flr_intrs(struct otx2_cptpf_dev *cptpf,
+static void cptpf_disable_vf_flr_me_intrs(struct otx2_cptpf_dev *cptpf,
int num_vfs)
{
int vector;
/* Disable VF FLR interrupts */
otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFFLR_INT_ENA_W1CX(0), ~0x0ULL);
+ RVU_PF_VFFLR_INT_ENA_W1CX(0), INTR_MASK(num_vfs));
+ vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0);
+ free_irq(vector, cptpf);
+
+ /* Disable VF ME interrupts */
otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
- RVU_PF_VFFLR_INT_ENA_W1CX(1), ~0x0ULL);
+ RVU_PF_VFME_INT_ENA_W1CX(0), INTR_MASK(num_vfs));
+ vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME0);
+ free_irq(vector, cptpf);
- /* Clear interrupt if any */
- otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(0),
- ~0x0ULL);
- otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0, RVU_PF_VFFLR_INTX(1),
- ~0x0ULL);
+ if (num_vfs <= 64)
+ return;
- vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR0);
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFFLR_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64));
+ vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1);
free_irq(vector, cptpf);
- if (num_vfs > 64) {
- vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFFLR1);
- free_irq(vector, cptpf);
- }
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFME_INT_ENA_W1CX(1), INTR_MASK(num_vfs - 64));
+ vector = pci_irq_vector(cptpf->pdev, RVU_PF_INT_VEC_VFME1);
+ free_irq(vector, cptpf);
}
static void cptpf_flr_wq_handler(struct work_struct *work)
@@ -172,11 +194,38 @@ static irqreturn_t cptpf_vf_flr_intr(int __always_unused irq, void *arg)
return IRQ_HANDLED;
}
+static irqreturn_t cptpf_vf_me_intr(int __always_unused irq, void *arg)
+{
+ struct otx2_cptpf_dev *cptpf = arg;
+ int reg, vf, num_reg = 1;
+ u64 intr;
+
+ if (cptpf->max_vfs > 64)
+ num_reg = 2;
+
+ for (reg = 0; reg < num_reg; reg++) {
+ intr = otx2_cpt_read64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFME_INTX(reg));
+ if (!intr)
+ continue;
+ for (vf = 0; vf < 64; vf++) {
+ if (!(intr & BIT_ULL(vf)))
+ continue;
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFTRPENDX(reg), BIT_ULL(vf));
+ /* Clear interrupt */
+ otx2_cpt_write64(cptpf->reg_base, BLKADDR_RVUM, 0,
+ RVU_PF_VFME_INTX(reg), BIT_ULL(vf));
+ }
+ }
+ return IRQ_HANDLED;
+}
+
static void cptpf_unregister_vfpf_intr(struct otx2_cptpf_dev *cptpf,
int num_vfs)
{
cptpf_disable_vfpf_mbox_intr(cptpf, num_vfs);
- cptpf_disable_vf_flr_intrs(cptpf, num_vfs);
+ cptpf_disable_vf_flr_me_intrs(cptpf, num_vfs);
}
static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
@@ -202,6 +251,15 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
"IRQ registration failed for VFFLR0 irq\n");
goto free_mbox0_irq;
}
+ vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0);
+ /* Register VF ME interrupt handler */
+ ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME0", cptpf);
+ if (ret) {
+ dev_err(dev,
+ "IRQ registration failed for PFVF mbox0 irq\n");
+ goto free_flr0_irq;
+ }
+
if (num_vfs > 64) {
vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1);
ret = request_irq(vector, otx2_cptpf_vfpf_mbox_intr, 0,
@@ -209,7 +267,7 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
if (ret) {
dev_err(dev,
"IRQ registration failed for PFVF mbox1 irq\n");
- goto free_flr0_irq;
+ goto free_me0_irq;
}
vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1);
/* Register VF FLR interrupt handler */
@@ -220,15 +278,30 @@ static int cptpf_register_vfpf_intr(struct otx2_cptpf_dev *cptpf, int num_vfs)
"IRQ registration failed for VFFLR1 irq\n");
goto free_mbox1_irq;
}
+ vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME1);
+ /* Register VF FLR interrupt handler */
+ ret = request_irq(vector, cptpf_vf_me_intr, 0, "CPTPF ME1",
+ cptpf);
+ if (ret) {
+ dev_err(dev,
+ "IRQ registration failed for VFFLR1 irq\n");
+ goto free_flr1_irq;
+ }
}
cptpf_enable_vfpf_mbox_intr(cptpf, num_vfs);
- cptpf_enable_vf_flr_intrs(cptpf);
+ cptpf_enable_vf_flr_me_intrs(cptpf, num_vfs);
return 0;
+free_flr1_irq:
+ vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR1);
+ free_irq(vector, cptpf);
free_mbox1_irq:
vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFPF_MBOX1);
free_irq(vector, cptpf);
+free_me0_irq:
+ vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFME0);
+ free_irq(vector, cptpf);
free_flr0_irq:
vector = pci_irq_vector(pdev, RVU_PF_INT_VEC_VFFLR0);
free_irq(vector, cptpf);
@@ -284,7 +357,11 @@ static int cptpf_vfpf_mbox_init(struct otx2_cptpf_dev *cptpf, int num_vfs)
return -ENOMEM;
/* Map VF-PF mailbox memory */
- vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR);
+ if (test_bit(CN10K_MBOX, &cptpf->cap_flag))
+ vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_MBOX_ADDR);
+ else
+ vfpf_mbox_base = readq(cptpf->reg_base + RVU_PF_VF_BAR4_ADDR);
+
if (!vfpf_mbox_base) {
dev_err(dev, "VF-PF mailbox address not configured\n");
err = -ENOMEM;
@@ -365,6 +442,8 @@ static int cptpf_register_afpf_mbox_intr(struct otx2_cptpf_dev *cptpf)
static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
{
+ struct pci_dev *pdev = cptpf->pdev;
+ resource_size_t offset;
int err;
cptpf->afpf_mbox_wq = alloc_workqueue("cpt_afpf_mailbox",
@@ -373,8 +452,17 @@ static int cptpf_afpf_mbox_init(struct otx2_cptpf_dev *cptpf)
if (!cptpf->afpf_mbox_wq)
return -ENOMEM;
+ offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+ /* Map AF-PF mailbox memory */
+ cptpf->afpf_mbox_base = devm_ioremap_wc(&pdev->dev, offset, MBOX_SIZE);
+ if (!cptpf->afpf_mbox_base) {
+ dev_err(&pdev->dev, "Unable to map BAR4\n");
+ err = -ENOMEM;
+ goto error;
+ }
+
err = otx2_mbox_init(&cptpf->afpf_mbox, cptpf->afpf_mbox_base,
- cptpf->pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1);
+ pdev, cptpf->reg_base, MBOX_DIR_PFAF, 1);
if (err)
goto error;
@@ -570,7 +658,7 @@ static int cptpf_sriov_enable(struct pci_dev *pdev, int num_vfs)
if (ret)
goto disable_intr;
- ret = otx2_cpt_create_eng_grps(cptpf->pdev, &cptpf->eng_grps);
+ ret = otx2_cpt_create_eng_grps(cptpf, &cptpf->eng_grps);
if (ret)
goto disable_intr;
@@ -607,7 +695,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
- resource_size_t offset, size;
struct otx2_cptpf_dev *cptpf;
int err;
@@ -644,15 +731,6 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
if (err)
goto clear_drvdata;
- offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
- size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
- /* Map AF-PF mailbox memory */
- cptpf->afpf_mbox_base = devm_ioremap_wc(dev, offset, size);
- if (!cptpf->afpf_mbox_base) {
- dev_err(&pdev->dev, "Unable to map BAR4\n");
- err = -ENODEV;
- goto clear_drvdata;
- }
err = pci_alloc_irq_vectors(pdev, RVU_PF_INT_VEC_CNT,
RVU_PF_INT_VEC_CNT, PCI_IRQ_MSIX);
if (err < 0) {
@@ -660,6 +738,7 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
RVU_PF_INT_VEC_CNT);
goto clear_drvdata;
}
+ otx2_cpt_set_hw_caps(pdev, &cptpf->cap_flag);
/* Initialize AF-PF mailbox */
err = cptpf_afpf_mbox_init(cptpf);
if (err)
@@ -671,6 +750,10 @@ static int otx2_cptpf_probe(struct pci_dev *pdev,
cptpf->max_vfs = pci_sriov_get_totalvfs(pdev);
+ err = cn10k_cptpf_lmtst_init(cptpf);
+ if (err)
+ goto unregister_intr;
+
/* Initialize CPT PF device */
err = cptpf_device_init(cptpf);
if (err)
@@ -719,6 +802,7 @@ static void otx2_cptpf_remove(struct pci_dev *pdev)
/* Supported devices */
static const struct pci_device_id otx2_cpt_id_table[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, OTX2_CPT_PCI_PF_DEVICE_ID) },
+ { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, CN10K_CPT_PCI_PF_DEVICE_ID) },
{ 0, } /* end of table */
};
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
index a531f4c8b441..dff34b3ec09e 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.c
@@ -16,6 +16,8 @@
#define LOADFVC_MAJOR_OP 0x01
#define LOADFVC_MINOR_OP 0x08
+#define CTX_FLUSH_TIMER_CNT 0xFFFFFF
+
struct fw_info_t {
struct list_head ucodes;
};
@@ -666,7 +668,8 @@ static int reserve_engines(struct device *dev,
static void ucode_unload(struct device *dev, struct otx2_cpt_ucode *ucode)
{
if (ucode->va) {
- dma_free_coherent(dev, ucode->size, ucode->va, ucode->dma);
+ dma_free_coherent(dev, OTX2_CPT_UCODE_SZ, ucode->va,
+ ucode->dma);
ucode->va = NULL;
ucode->dma = 0;
ucode->size = 0;
@@ -685,7 +688,7 @@ static int copy_ucode_to_dma_mem(struct device *dev,
u32 i;
/* Allocate DMAable space */
- ucode->va = dma_alloc_coherent(dev, ucode->size, &ucode->dma,
+ ucode->va = dma_alloc_coherent(dev, OTX2_CPT_UCODE_SZ, &ucode->dma,
GFP_KERNEL);
if (!ucode->va)
return -ENOMEM;
@@ -1100,11 +1103,12 @@ int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type)
return eng_grp_num;
}
-int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
+int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
struct otx2_cpt_eng_grps *eng_grps)
{
struct otx2_cpt_uc_info_t *uc_info[OTX2_CPT_MAX_ETYPES_PER_GRP] = { };
struct otx2_cpt_engines engs[OTX2_CPT_MAX_ETYPES_PER_GRP] = { {0} };
+ struct pci_dev *pdev = cptpf->pdev;
struct fw_info_t fw_info;
int ret;
@@ -1180,6 +1184,23 @@ int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
eng_grps->is_grps_created = true;
cpt_ucode_release_fw(&fw_info);
+
+ if (is_dev_otx2(pdev))
+ return 0;
+ /*
+ * Configure engine group mask to allow context prefetching
+ * for the groups.
+ */
+ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTL,
+ OTX2_CPT_ALL_ENG_GRPS_MASK << 3 | BIT_ULL(16),
+ BLKADDR_CPT0);
+ /*
+ * Set interval to periodically flush dirty data for the next
+ * CTX cache entry. Set the interval count to maximum supported
+ * value.
+ */
+ otx2_cpt_write_af_reg(&cptpf->afpf_mbox, pdev, CPT_AF_CTX_FLUSH_TIMER,
+ CTX_FLUSH_TIMER_CNT, BLKADDR_CPT0);
return 0;
delete_eng_grp:
@@ -1460,9 +1481,10 @@ int otx2_cpt_discover_eng_capabilities(struct otx2_cptpf_dev *cptpf)
iq_cmd.cptr.s.grp = otx2_cpt_get_eng_grp(&cptpf->eng_grps,
etype);
otx2_cpt_fill_inst(&inst, &iq_cmd, rptr_baddr);
- otx2_cpt_send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
+ lfs->ops->send_cmd(&inst, 1, &cptpf->lfs.lf[0]);
- while (result->s.compcode == OTX2_CPT_COMPLETION_CODE_INIT)
+ while (lfs->ops->cpt_get_compcode(result) ==
+ OTX2_CPT_COMPLETION_CODE_INIT)
cpu_relax();
cptpf->eng_caps[etype].u = be64_to_cpup(rptr);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
index 6b0d432de0af..fe019ab730b2 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptpf_ucode.h
@@ -23,11 +23,13 @@
/* Microcode version string length */
#define OTX2_CPT_UCODE_VER_STR_SZ 44
-/* Maximum number of supported engines/cores on OcteonTX2 platform */
-#define OTX2_CPT_MAX_ENGINES 128
+/* Maximum number of supported engines/cores on OcteonTX2/CN10K platform */
+#define OTX2_CPT_MAX_ENGINES 144
#define OTX2_CPT_ENGS_BITMASK_LEN BITS_TO_LONGS(OTX2_CPT_MAX_ENGINES)
+#define OTX2_CPT_UCODE_SZ (64 * 1024)
+
/* Microcode types */
enum otx2_cpt_ucode_type {
OTX2_CPT_AE_UC_TYPE = 1, /* AE-MAIN */
@@ -153,7 +155,7 @@ int otx2_cpt_init_eng_grps(struct pci_dev *pdev,
struct otx2_cpt_eng_grps *eng_grps);
void otx2_cpt_cleanup_eng_grps(struct pci_dev *pdev,
struct otx2_cpt_eng_grps *eng_grps);
-int otx2_cpt_create_eng_grps(struct pci_dev *pdev,
+int otx2_cpt_create_eng_grps(struct otx2_cptpf_dev *cptpf,
struct otx2_cpt_eng_grps *eng_grps);
int otx2_cpt_disable_all_cores(struct otx2_cptpf_dev *cptpf);
int otx2_cpt_get_eng_grp(struct otx2_cpt_eng_grps *eng_grps, int eng_type);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
index 4f0a169fddbd..4207e2236903 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf.h
@@ -19,11 +19,14 @@ struct otx2_cptvf_dev {
struct otx2_mbox pfvf_mbox;
struct work_struct pfvf_mbox_work;
struct workqueue_struct *pfvf_mbox_wq;
+ void *bbuf_base;
+ unsigned long cap_flag;
};
irqreturn_t otx2_cptvf_pfvf_mbox_intr(int irq, void *arg);
void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work);
int otx2_cptvf_send_eng_grp_num_msg(struct otx2_cptvf_dev *cptvf, int eng_type);
int otx2_cptvf_send_kvf_limits_msg(struct otx2_cptvf_dev *cptvf);
+int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev);
#endif /* __OTX2_CPTVF_H */
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
index 47f378731024..3411e664cf50 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_main.c
@@ -5,9 +5,10 @@
#include "otx2_cptvf.h"
#include "otx2_cptlf.h"
#include "otx2_cptvf_algs.h"
+#include "cn10k_cpt.h"
#include <rvu_reg.h>
-#define OTX2_CPTVF_DRV_NAME "octeontx2-cptvf"
+#define OTX2_CPTVF_DRV_NAME "rvu_cptvf"
static void cptvf_enable_pfvf_mbox_intrs(struct otx2_cptvf_dev *cptvf)
{
@@ -70,6 +71,8 @@ static int cptvf_register_interrupts(struct otx2_cptvf_dev *cptvf)
static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf)
{
+ struct pci_dev *pdev = cptvf->pdev;
+ resource_size_t offset, size;
int ret;
cptvf->pfvf_mbox_wq = alloc_workqueue("cpt_pfvf_mailbox",
@@ -78,14 +81,39 @@ static int cptvf_pfvf_mbox_init(struct otx2_cptvf_dev *cptvf)
if (!cptvf->pfvf_mbox_wq)
return -ENOMEM;
+ if (test_bit(CN10K_MBOX, &cptvf->cap_flag)) {
+ /* For cn10k platform, VF mailbox region is in its BAR2
+ * register space
+ */
+ cptvf->pfvf_mbox_base = cptvf->reg_base +
+ CN10K_CPT_VF_MBOX_REGION;
+ } else {
+ offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
+ size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
+ /* Map PF-VF mailbox memory */
+ cptvf->pfvf_mbox_base = devm_ioremap_wc(&pdev->dev, offset,
+ size);
+ if (!cptvf->pfvf_mbox_base) {
+ dev_err(&pdev->dev, "Unable to map BAR4\n");
+ ret = -ENOMEM;
+ goto free_wqe;
+ }
+ }
+
ret = otx2_mbox_init(&cptvf->pfvf_mbox, cptvf->pfvf_mbox_base,
- cptvf->pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1);
+ pdev, cptvf->reg_base, MBOX_DIR_VFPF, 1);
if (ret)
goto free_wqe;
+ ret = otx2_cpt_mbox_bbuf_init(cptvf, pdev);
+ if (ret)
+ goto destroy_mbox;
+
INIT_WORK(&cptvf->pfvf_mbox_work, otx2_cptvf_pfvf_mbox_handler);
return 0;
+destroy_mbox:
+ otx2_mbox_destroy(&cptvf->pfvf_mbox);
free_wqe:
destroy_workqueue(cptvf->pfvf_mbox_wq);
return ret;
@@ -305,7 +333,6 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
const struct pci_device_id *ent)
{
struct device *dev = &pdev->dev;
- resource_size_t offset, size;
struct otx2_cptvf_dev *cptvf;
int ret;
@@ -337,15 +364,12 @@ static int otx2_cptvf_probe(struct pci_dev *pdev,
cptvf->reg_base = pcim_iomap_table(pdev)[PCI_PF_REG_BAR_NUM];
- offset = pci_resource_start(pdev, PCI_MBOX_BAR_NUM);
- size = pci_resource_len(pdev, PCI_MBOX_BAR_NUM);
- /* Map PF-VF mailbox memory */
- cptvf->pfvf_mbox_base = devm_ioremap_wc(dev, offset, size);
- if (!cptvf->pfvf_mbox_base) {
- dev_err(&pdev->dev, "Unable to map BAR4\n");
- ret = -ENODEV;
+ otx2_cpt_set_hw_caps(pdev, &cptvf->cap_flag);
+
+ ret = cn10k_cptvf_lmtst_init(cptvf);
+ if (ret)
goto clear_drvdata;
- }
+
/* Initialize PF<=>VF mailbox */
ret = cptvf_pfvf_mbox_init(cptvf);
if (ret)
@@ -392,6 +416,7 @@ static void otx2_cptvf_remove(struct pci_dev *pdev)
/* Supported devices */
static const struct pci_device_id otx2_cptvf_id_table[] = {
{PCI_VDEVICE(CAVIUM, OTX2_CPT_PCI_VF_DEVICE_ID), 0},
+ {PCI_VDEVICE(CAVIUM, CN10K_CPT_PCI_VF_DEVICE_ID), 0},
{ 0, } /* end of table */
};
@@ -405,6 +430,6 @@ static struct pci_driver otx2_cptvf_pci_driver = {
module_pci_driver(otx2_cptvf_pci_driver);
MODULE_AUTHOR("Marvell");
-MODULE_DESCRIPTION("Marvell OcteonTX2 CPT Virtual Function Driver");
+MODULE_DESCRIPTION("Marvell RVU CPT Virtual Function Driver");
MODULE_LICENSE("GPL v2");
MODULE_DEVICE_TABLE(pci, otx2_cptvf_id_table);
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
index 5d73b711cba6..02cb9e44afd8 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_mbox.c
@@ -5,6 +5,48 @@
#include "otx2_cptvf.h"
#include <rvu_reg.h>
+int otx2_cpt_mbox_bbuf_init(struct otx2_cptvf_dev *cptvf, struct pci_dev *pdev)
+{
+ struct otx2_mbox_dev *mdev;
+ struct otx2_mbox *otx2_mbox;
+
+ cptvf->bbuf_base = devm_kmalloc(&pdev->dev, MBOX_SIZE, GFP_KERNEL);
+ if (!cptvf->bbuf_base)
+ return -ENOMEM;
+ /*
+ * Overwrite mbox mbase to point to bounce buffer, so that PF/VF
+ * prepare all mbox messages in bounce buffer instead of directly
+ * in hw mbox memory.
+ */
+ otx2_mbox = &cptvf->pfvf_mbox;
+ mdev = &otx2_mbox->dev[0];
+ mdev->mbase = cptvf->bbuf_base;
+
+ return 0;
+}
+
+static void otx2_cpt_sync_mbox_bbuf(struct otx2_mbox *mbox, int devid)
+{
+ u16 msgs_offset = ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
+ void *hw_mbase = mbox->hwbase + (devid * MBOX_SIZE);
+ struct otx2_mbox_dev *mdev = &mbox->dev[devid];
+ struct mbox_hdr *hdr;
+ u64 msg_size;
+
+ if (mdev->mbase == hw_mbase)
+ return;
+
+ hdr = hw_mbase + mbox->rx_start;
+ msg_size = hdr->msg_size;
+
+ if (msg_size > mbox->rx_size - msgs_offset)
+ msg_size = mbox->rx_size - msgs_offset;
+
+ /* Copy mbox messages from mbox memory to bounce buffer */
+ memcpy(mdev->mbase + mbox->rx_start,
+ hw_mbase + mbox->rx_start, msg_size + msgs_offset);
+}
+
irqreturn_t otx2_cptvf_pfvf_mbox_intr(int __always_unused irq, void *arg)
{
struct otx2_cptvf_dev *cptvf = arg;
@@ -106,6 +148,7 @@ void otx2_cptvf_pfvf_mbox_handler(struct work_struct *work)
cptvf = container_of(work, struct otx2_cptvf_dev, pfvf_mbox_work);
pfvf_mbox = &cptvf->pfvf_mbox;
+ otx2_cpt_sync_mbox_bbuf(pfvf_mbox, 0);
mdev = &pfvf_mbox->dev[0];
rsp_hdr = (struct mbox_hdr *)(mdev->mbase + pfvf_mbox->rx_start);
if (rsp_hdr->num_msgs == 0)
diff --git a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
index d5c1c1b7c7e4..811ded72ce5f 100644
--- a/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
+++ b/drivers/crypto/marvell/octeontx2/otx2_cptvf_reqmgr.c
@@ -320,7 +320,7 @@ static int process_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
cpt_req->dlen, false);
/* Send CPT command */
- otx2_cpt_send_cmd(&cptinst, 1, lf);
+ lf->lfs->ops->send_cmd(&cptinst, 1, lf);
/*
* We allocate and prepare pending queue entry in critical section
@@ -349,13 +349,14 @@ int otx2_cpt_do_request(struct pci_dev *pdev, struct otx2_cpt_req_info *req,
&lfs->lf[cpu_num]);
}
-static int cpt_process_ccode(struct pci_dev *pdev,
+static int cpt_process_ccode(struct otx2_cptlfs_info *lfs,
union otx2_cpt_res_s *cpt_status,
struct otx2_cpt_inst_info *info,
u32 *res_code)
{
- u8 uc_ccode = cpt_status->s.uc_compcode;
- u8 ccode = cpt_status->s.compcode;
+ u8 uc_ccode = lfs->ops->cpt_get_uc_compcode(cpt_status);
+ u8 ccode = lfs->ops->cpt_get_compcode(cpt_status);
+ struct pci_dev *pdev = lfs->pdev;
switch (ccode) {
case OTX2_CPT_COMP_E_FAULT:
@@ -389,6 +390,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
return 1;
case OTX2_CPT_COMP_E_GOOD:
+ case OTX2_CPT_COMP_E_WARN:
/*
* Check microcode completion code, it is only valid
* when completion code is CPT_COMP_E::GOOD
@@ -426,7 +428,7 @@ static int cpt_process_ccode(struct pci_dev *pdev,
return 0;
}
-static inline void process_pending_queue(struct pci_dev *pdev,
+static inline void process_pending_queue(struct otx2_cptlfs_info *lfs,
struct otx2_cpt_pending_queue *pqueue)
{
struct otx2_cpt_pending_entry *resume_pentry = NULL;
@@ -436,6 +438,7 @@ static inline void process_pending_queue(struct pci_dev *pdev,
struct otx2_cpt_inst_info *info = NULL;
struct otx2_cpt_req_info *req = NULL;
struct crypto_async_request *areq;
+ struct pci_dev *pdev = lfs->pdev;
u32 res_code, resume_index;
while (1) {
@@ -476,7 +479,7 @@ static inline void process_pending_queue(struct pci_dev *pdev,
goto process_pentry;
}
- if (cpt_process_ccode(pdev, cpt_status, info, &res_code)) {
+ if (cpt_process_ccode(lfs, cpt_status, info, &res_code)) {
spin_unlock_bh(&pqueue->lock);
return;
}
@@ -529,7 +532,7 @@ process_pentry:
void otx2_cpt_post_process(struct otx2_cptlf_wqe *wqe)
{
- process_pending_queue(wqe->lfs->pdev,
+ process_pending_queue(wqe->lfs,
&wqe->lfs->lf[wqe->lf_num].pqueue);
}
diff --git a/drivers/crypto/nx/nx-842-pseries.c b/drivers/crypto/nx/nx-842-pseries.c
index cc8dd3072b8b..1491cbfbc071 100644
--- a/drivers/crypto/nx/nx-842-pseries.c
+++ b/drivers/crypto/nx/nx-842-pseries.c
@@ -264,8 +264,8 @@ static int nx842_validate_result(struct device *dev,
* @inlen: Length of input buffer
* @out: Pointer to output buffer
* @outlen: Length of output buffer
- * @wrkmem: ptr to buffer for working memory, size determined by
- * nx842_pseries_driver.workmem_size
+ * @wmem: ptr to buffer for working memory, size determined by
+ * nx842_pseries_driver.workmem_size
*
* Returns:
* 0 Success, output of length @outlen stored in the buffer at @out
@@ -393,8 +393,8 @@ unlock:
* @inlen: Length of input buffer
* @out: Pointer to output buffer
* @outlen: Length of output buffer
- * @wrkmem: ptr to buffer for working memory, size determined by
- * nx842_pseries_driver.workmem_size
+ * @wmem: ptr to buffer for working memory, size determined by
+ * nx842_pseries_driver.workmem_size
*
* Returns:
* 0 Success, output of length @outlen stored in the buffer at @out
@@ -513,7 +513,7 @@ unlock:
/**
* nx842_OF_set_defaults -- Set default (disabled) values for devdata
*
- * @devdata - struct nx842_devdata to update
+ * @devdata: struct nx842_devdata to update
*
* Returns:
* 0 on success
@@ -538,13 +538,15 @@ static int nx842_OF_set_defaults(struct nx842_devdata *devdata)
* The status field indicates if the device is enabled when the status
* is 'okay'. Otherwise the device driver will be disabled.
*
- * @prop - struct property point containing the maxsyncop for the update
+ * @devdata: struct nx842_devdata to use for dev_info
+ * @prop: struct property point containing the maxsyncop for the update
*
* Returns:
* 0 - Device is available
* -ENODEV - Device is not available
*/
-static int nx842_OF_upd_status(struct property *prop)
+static int nx842_OF_upd_status(struct nx842_devdata *devdata,
+ struct property *prop)
{
const char *status = (const char *)prop->value;
@@ -571,8 +573,8 @@ static int nx842_OF_upd_status(struct property *prop)
* In this example, the maximum byte length of a scatter list is
* 0x0ff0 (4,080).
*
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
*
* Returns:
* 0 on success
@@ -619,8 +621,8 @@ static int nx842_OF_upd_maxsglen(struct nx842_devdata *devdata,
* 0x1000 (4,096) data byte length and 0x1f3 (510) total scatter list
* elements.
*
- * @devdata - struct nx842_devdata to update
- * @prop - struct property point containing the maxsyncop for the update
+ * @devdata: struct nx842_devdata to update
+ * @prop: struct property point containing the maxsyncop for the update
*
* Returns:
* 0 on success
@@ -689,7 +691,6 @@ out:
}
/**
- *
* nx842_OF_upd -- Handle OF properties updates for the device.
*
* Set all properties from the OF tree. Optionally, a new property
@@ -758,7 +759,7 @@ static int nx842_OF_upd(struct property *new_prop)
goto out;
/* Perform property updates */
- ret = nx842_OF_upd_status(status);
+ ret = nx842_OF_upd_status(new_devdata, status);
if (ret)
goto error_out;
@@ -812,8 +813,7 @@ error_out:
*
* @np: notifier block
* @action: notifier action
- * @update: struct pSeries_reconfig_prop_update pointer if action is
- * PSERIES_UPDATE_PROPERTY
+ * @data: struct of_reconfig_data pointer
*
* Returns:
* NOTIFY_OK on success
@@ -1069,6 +1069,7 @@ static const struct vio_device_id nx842_vio_driver_ids[] = {
{"ibm,compression-v1", "ibm,compression"},
{"", ""},
};
+MODULE_DEVICE_TABLE(vio, nx842_vio_driver_ids);
static struct vio_driver nx842_vio_driver = {
.name = KBUILD_MODNAME,
diff --git a/drivers/crypto/nx/nx-aes-cbc.c b/drivers/crypto/nx/nx-aes-cbc.c
index d6314ea9ae89..0e440f704a8f 100644
--- a/drivers/crypto/nx/nx-aes-cbc.c
+++ b/drivers/crypto/nx/nx-aes-cbc.c
@@ -88,7 +88,7 @@ static int cbc_aes_nx_crypt(struct skcipher_request *req,
memcpy(req->iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-ccm.c b/drivers/crypto/nx/nx-aes-ccm.c
index e7384d107573..3793885f928d 100644
--- a/drivers/crypto/nx/nx-aes-ccm.c
+++ b/drivers/crypto/nx/nx-aes-ccm.c
@@ -391,7 +391,7 @@ static int ccm_nx_decrypt(struct aead_request *req,
/* update stats */
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
@@ -460,7 +460,7 @@ static int ccm_nx_encrypt(struct aead_request *req,
/* update stats */
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-ctr.c b/drivers/crypto/nx/nx-aes-ctr.c
index 13f518802343..dfa3ad1a12f2 100644
--- a/drivers/crypto/nx/nx-aes-ctr.c
+++ b/drivers/crypto/nx/nx-aes-ctr.c
@@ -102,7 +102,7 @@ static int ctr_aes_nx_crypt(struct skcipher_request *req, u8 *iv)
memcpy(iv, csbcpb->cpb.aes_cbc.cv, AES_BLOCK_SIZE);
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
@@ -118,7 +118,7 @@ static int ctr3686_aes_nx_crypt(struct skcipher_request *req)
struct nx_crypto_ctx *nx_ctx = crypto_skcipher_ctx(tfm);
u8 iv[16];
- memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_IV_SIZE);
+ memcpy(iv, nx_ctx->priv.ctr.nonce, CTR_RFC3686_NONCE_SIZE);
memcpy(iv + CTR_RFC3686_NONCE_SIZE, req->iv, CTR_RFC3686_IV_SIZE);
iv[12] = iv[13] = iv[14] = 0;
iv[15] = 1;
diff --git a/drivers/crypto/nx/nx-aes-ecb.c b/drivers/crypto/nx/nx-aes-ecb.c
index 7a729dc2bc17..502a565074e9 100644
--- a/drivers/crypto/nx/nx-aes-ecb.c
+++ b/drivers/crypto/nx/nx-aes-ecb.c
@@ -86,7 +86,7 @@ static int ecb_aes_nx_crypt(struct skcipher_request *req,
goto out;
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
diff --git a/drivers/crypto/nx/nx-aes-gcm.c b/drivers/crypto/nx/nx-aes-gcm.c
index fc9baca13920..4a796318b430 100644
--- a/drivers/crypto/nx/nx-aes-gcm.c
+++ b/drivers/crypto/nx/nx-aes-gcm.c
@@ -382,7 +382,7 @@ static int gcm_aes_nx_crypt(struct aead_request *req, int enc,
NX_CPB_FDM(csbcpb) |= NX_FDM_CONTINUATION;
atomic_inc(&(nx_ctx->stats->aes_ops));
- atomic64_add(csbcpb->csb.processed_byte_count,
+ atomic64_add(be32_to_cpu(csbcpb->csb.processed_byte_count),
&(nx_ctx->stats->aes_bytes));
processed += to_process;
diff --git a/drivers/crypto/nx/nx-common-powernv.c b/drivers/crypto/nx/nx-common-powernv.c
index 446f611726df..655361ba9107 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -660,8 +660,8 @@ static int nx842_powernv_compress(const unsigned char *in, unsigned int inlen,
* @inlen: input buffer size
* @out: output buffer pointer
* @outlenp: output buffer size pointer
- * @workmem: working memory buffer pointer, size determined by
- * nx842_powernv_driver.workmem_size
+ * @wmem: working memory buffer pointer, size determined by
+ * nx842_powernv_driver.workmem_size
*
* Returns: see @nx842_powernv_exec()
*/
diff --git a/drivers/crypto/nx/nx-sha256.c b/drivers/crypto/nx/nx-sha256.c
index b0ad665e4bda..c3bebf0feabe 100644
--- a/drivers/crypto/nx/nx-sha256.c
+++ b/drivers/crypto/nx/nx-sha256.c
@@ -16,6 +16,11 @@
#include "nx_csbcpb.h"
#include "nx.h"
+struct sha256_state_be {
+ __be32 state[SHA256_DIGEST_SIZE / 4];
+ u64 count;
+ u8 buf[SHA256_BLOCK_SIZE];
+};
static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
{
@@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha256_init(struct crypto_tfm *tfm)
}
static int nx_sha256_init(struct shash_desc *desc) {
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state_be *sctx = shash_desc_ctx(desc);
memset(sctx, 0, sizeof *sctx);
@@ -56,7 +61,7 @@ static int nx_sha256_init(struct shash_desc *desc) {
static int nx_sha256_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state_be *sctx = shash_desc_ctx(desc);
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *out_sg;
@@ -175,7 +180,7 @@ out:
static int nx_sha256_final(struct shash_desc *desc, u8 *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state_be *sctx = shash_desc_ctx(desc);
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
@@ -245,7 +250,7 @@ out:
static int nx_sha256_export(struct shash_desc *desc, void *out)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state_be *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
@@ -254,7 +259,7 @@ static int nx_sha256_export(struct shash_desc *desc, void *out)
static int nx_sha256_import(struct shash_desc *desc, const void *in)
{
- struct sha256_state *sctx = shash_desc_ctx(desc);
+ struct sha256_state_be *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
@@ -268,8 +273,8 @@ struct shash_alg nx_shash_sha256_alg = {
.final = nx_sha256_final,
.export = nx_sha256_export,
.import = nx_sha256_import,
- .descsize = sizeof(struct sha256_state),
- .statesize = sizeof(struct sha256_state),
+ .descsize = sizeof(struct sha256_state_be),
+ .statesize = sizeof(struct sha256_state_be),
.base = {
.cra_name = "sha256",
.cra_driver_name = "sha256-nx",
diff --git a/drivers/crypto/nx/nx-sha512.c b/drivers/crypto/nx/nx-sha512.c
index c29103a1a0b6..1ffb40d2c324 100644
--- a/drivers/crypto/nx/nx-sha512.c
+++ b/drivers/crypto/nx/nx-sha512.c
@@ -15,6 +15,11 @@
#include "nx_csbcpb.h"
#include "nx.h"
+struct sha512_state_be {
+ __be64 state[SHA512_DIGEST_SIZE / 8];
+ u64 count[2];
+ u8 buf[SHA512_BLOCK_SIZE];
+};
static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm)
{
@@ -36,7 +41,7 @@ static int nx_crypto_ctx_sha512_init(struct crypto_tfm *tfm)
static int nx_sha512_init(struct shash_desc *desc)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct sha512_state_be *sctx = shash_desc_ctx(desc);
memset(sctx, 0, sizeof *sctx);
@@ -56,7 +61,7 @@ static int nx_sha512_init(struct shash_desc *desc)
static int nx_sha512_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct sha512_state_be *sctx = shash_desc_ctx(desc);
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *out_sg;
@@ -178,7 +183,7 @@ out:
static int nx_sha512_final(struct shash_desc *desc, u8 *out)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct sha512_state_be *sctx = shash_desc_ctx(desc);
struct nx_crypto_ctx *nx_ctx = crypto_tfm_ctx(&desc->tfm->base);
struct nx_csbcpb *csbcpb = (struct nx_csbcpb *)nx_ctx->csbcpb;
struct nx_sg *in_sg, *out_sg;
@@ -251,7 +256,7 @@ out:
static int nx_sha512_export(struct shash_desc *desc, void *out)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct sha512_state_be *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
@@ -260,7 +265,7 @@ static int nx_sha512_export(struct shash_desc *desc, void *out)
static int nx_sha512_import(struct shash_desc *desc, const void *in)
{
- struct sha512_state *sctx = shash_desc_ctx(desc);
+ struct sha512_state_be *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
@@ -274,8 +279,8 @@ struct shash_alg nx_shash_sha512_alg = {
.final = nx_sha512_final,
.export = nx_sha512_export,
.import = nx_sha512_import,
- .descsize = sizeof(struct sha512_state),
- .statesize = sizeof(struct sha512_state),
+ .descsize = sizeof(struct sha512_state_be),
+ .statesize = sizeof(struct sha512_state_be),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-nx",
diff --git a/drivers/crypto/nx/nx_csbcpb.h b/drivers/crypto/nx/nx_csbcpb.h
index 493f8490ff94..e64f7e36fb92 100644
--- a/drivers/crypto/nx/nx_csbcpb.h
+++ b/drivers/crypto/nx/nx_csbcpb.h
@@ -140,8 +140,8 @@ struct cop_status_block {
u8 crb_seq_number;
u8 completion_code;
u8 completion_extension;
- u32 processed_byte_count;
- u64 address;
+ __be32 processed_byte_count;
+ __be64 address;
} __packed;
/* Nest accelerator workbook section 4.4 */
diff --git a/drivers/crypto/omap-des.c b/drivers/crypto/omap-des.c
index c9d38bcfd1c7..bc8631363d72 100644
--- a/drivers/crypto/omap-des.c
+++ b/drivers/crypto/omap-des.c
@@ -229,9 +229,8 @@ static int omap_des_hw_init(struct omap_des_dev *dd)
* It may be long delays between requests.
* Device might go to off mode to save power.
*/
- err = pm_runtime_get_sync(dd->dev);
+ err = pm_runtime_resume_and_get(dd->dev);
if (err < 0) {
- pm_runtime_put_noidle(dd->dev);
dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err);
return err;
}
@@ -994,9 +993,8 @@ static int omap_des_probe(struct platform_device *pdev)
pm_runtime_set_autosuspend_delay(dev, DEFAULT_AUTOSUSPEND_DELAY);
pm_runtime_enable(dev);
- err = pm_runtime_get_sync(dev);
+ err = pm_runtime_resume_and_get(dev);
if (err < 0) {
- pm_runtime_put_noidle(dev);
dev_err(dd->dev, "%s: failed to get_sync(%d)\n", __func__, err);
goto err_get;
}
@@ -1124,9 +1122,8 @@ static int omap_des_resume(struct device *dev)
{
int err;
- err = pm_runtime_get_sync(dev);
+ err = pm_runtime_resume_and_get(dev);
if (err < 0) {
- pm_runtime_put_noidle(dev);
dev_err(dev, "%s: failed to get_sync(%d)\n", __func__, err);
return err;
}
diff --git a/drivers/crypto/omap-sham.c b/drivers/crypto/omap-sham.c
index ae0d320d3c60..dd53ad9987b0 100644
--- a/drivers/crypto/omap-sham.c
+++ b/drivers/crypto/omap-sham.c
@@ -372,7 +372,7 @@ static int omap_sham_hw_init(struct omap_sham_dev *dd)
{
int err;
- err = pm_runtime_get_sync(dd->dev);
+ err = pm_runtime_resume_and_get(dd->dev);
if (err < 0) {
dev_err(dd->dev, "failed to get sync: %d\n", err);
return err;
@@ -2244,7 +2244,7 @@ static int omap_sham_suspend(struct device *dev)
static int omap_sham_resume(struct device *dev)
{
- int err = pm_runtime_get_sync(dev);
+ int err = pm_runtime_resume_and_get(dev);
if (err < 0) {
dev_err(dev, "failed to get sync: %d\n", err);
return err;
diff --git a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
index b8f3463be6ef..7eb5daef4f88 100644
--- a/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
+++ b/drivers/crypto/qat/qat_common/icp_qat_fw_loader_handle.h
@@ -24,7 +24,7 @@ struct icp_qat_fw_loader_hal_handle {
};
struct icp_qat_fw_loader_chip_info {
- bool sram_visible;
+ int mmp_sram_size;
bool nn;
bool lm2lm3;
u32 lm_size;
diff --git a/drivers/crypto/qat/qat_common/qat_hal.c b/drivers/crypto/qat/qat_common/qat_hal.c
index bd3028126cbe..12ca6b8764aa 100644
--- a/drivers/crypto/qat/qat_common/qat_hal.c
+++ b/drivers/crypto/qat/qat_common/qat_hal.c
@@ -696,7 +696,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
handle->pci_dev = pci_info->pci_dev;
switch (handle->pci_dev->device) {
case ADF_4XXX_PCI_DEVICE_ID:
- handle->chip_info->sram_visible = false;
+ handle->chip_info->mmp_sram_size = 0;
handle->chip_info->nn = false;
handle->chip_info->lm2lm3 = true;
handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG_2X;
@@ -730,7 +730,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
break;
case PCI_DEVICE_ID_INTEL_QAT_C62X:
case PCI_DEVICE_ID_INTEL_QAT_C3XXX:
- handle->chip_info->sram_visible = false;
+ handle->chip_info->mmp_sram_size = 0;
handle->chip_info->nn = true;
handle->chip_info->lm2lm3 = false;
handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG;
@@ -763,7 +763,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
+ LOCAL_TO_XFER_REG_OFFSET);
break;
case PCI_DEVICE_ID_INTEL_QAT_DH895XCC:
- handle->chip_info->sram_visible = true;
+ handle->chip_info->mmp_sram_size = 0x40000;
handle->chip_info->nn = true;
handle->chip_info->lm2lm3 = false;
handle->chip_info->lm_size = ICP_QAT_UCLO_MAX_LMEM_REG;
@@ -800,7 +800,7 @@ static int qat_hal_chip_init(struct icp_qat_fw_loader_handle *handle,
goto out_err;
}
- if (handle->chip_info->sram_visible) {
+ if (handle->chip_info->mmp_sram_size > 0) {
sram_bar =
&pci_info->pci_bars[hw_data->get_sram_bar_id(hw_data)];
handle->hal_sram_addr_v = sram_bar->virt_addr;
@@ -1417,7 +1417,11 @@ static int qat_hal_put_rel_wr_xfer(struct icp_qat_fw_loader_handle *handle,
pr_err("QAT: bad xfrAddr=0x%x\n", xfr_addr);
return -EINVAL;
}
- qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval);
+ status = qat_hal_rd_rel_reg(handle, ae, ctx, ICP_GPB_REL, gprnum, &gprval);
+ if (status) {
+ pr_err("QAT: failed to read register");
+ return status;
+ }
gpr_addr = qat_hal_get_reg_addr(ICP_GPB_REL, gprnum);
data16low = 0xffff & data;
data16hi = 0xffff & (data >> 0x10);
diff --git a/drivers/crypto/qat/qat_common/qat_uclo.c b/drivers/crypto/qat/qat_common/qat_uclo.c
index 1fb5fc852f6b..2026cc6be8f0 100644
--- a/drivers/crypto/qat/qat_common/qat_uclo.c
+++ b/drivers/crypto/qat/qat_common/qat_uclo.c
@@ -342,7 +342,6 @@ static int qat_uclo_init_umem_seg(struct icp_qat_fw_loader_handle *handle,
return 0;
}
-#define ICP_DH895XCC_PESRAM_BAR_SIZE 0x80000
static int qat_uclo_init_ae_memory(struct icp_qat_fw_loader_handle *handle,
struct icp_qat_uof_initmem *init_mem)
{
@@ -1546,15 +1545,14 @@ int qat_uclo_wr_mimage(struct icp_qat_fw_loader_handle *handle,
int status = 0;
if (handle->chip_info->fw_auth) {
- if (!qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc))
+ status = qat_uclo_map_auth_fw(handle, addr_ptr, mem_size, &desc);
+ if (!status)
status = qat_uclo_auth_fw(handle, desc);
qat_uclo_ummap_auth_fw(handle, &desc);
} else {
- if (!handle->chip_info->sram_visible) {
- dev_dbg(&handle->pci_dev->dev,
- "QAT MMP fw not loaded for device 0x%x",
- handle->pci_dev->device);
- return status;
+ if (handle->chip_info->mmp_sram_size < mem_size) {
+ pr_err("QAT: MMP size is too large: 0x%x\n", mem_size);
+ return -EFBIG;
}
qat_uclo_wr_sram_by_words(handle, 0, addr_ptr, mem_size);
}
diff --git a/drivers/crypto/qce/Makefile b/drivers/crypto/qce/Makefile
index 14ade8a7d664..2cf8984e1b85 100644
--- a/drivers/crypto/qce/Makefile
+++ b/drivers/crypto/qce/Makefile
@@ -6,3 +6,4 @@ qcrypto-objs := core.o \
qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SHA) += sha.o
qcrypto-$(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) += skcipher.o
+qcrypto-$(CONFIG_CRYPTO_DEV_QCE_AEAD) += aead.o
diff --git a/drivers/crypto/qce/aead.c b/drivers/crypto/qce/aead.c
new file mode 100644
index 000000000000..290e2446a2f3
--- /dev/null
+++ b/drivers/crypto/qce/aead.c
@@ -0,0 +1,847 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright (C) 2021, Linaro Limited. All rights reserved.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/interrupt.h>
+#include <crypto/gcm.h>
+#include <crypto/authenc.h>
+#include <crypto/internal/aead.h>
+#include <crypto/internal/des.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
+#include <crypto/scatterwalk.h>
+#include "aead.h"
+
+#define CCM_NONCE_ADATA_SHIFT 6
+#define CCM_NONCE_AUTHSIZE_SHIFT 3
+#define MAX_CCM_ADATA_HEADER_LEN 6
+
+static LIST_HEAD(aead_algs);
+
+static void qce_aead_done(void *data)
+{
+ struct crypto_async_request *async_req = data;
+ struct aead_request *req = aead_request_cast(async_req);
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+ struct qce_result_dump *result_buf = qce->dma.result_buf;
+ enum dma_data_direction dir_src, dir_dst;
+ bool diff_dst;
+ int error;
+ u32 status;
+ unsigned int totallen;
+ unsigned char tag[SHA256_DIGEST_SIZE] = {0};
+ int ret = 0;
+
+ diff_dst = (req->src != req->dst) ? true : false;
+ dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+ dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+ error = qce_dma_terminate_all(&qce->dma);
+ if (error)
+ dev_dbg(qce->dev, "aead dma termination error (%d)\n",
+ error);
+ if (diff_dst)
+ dma_unmap_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+
+ dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+
+ if (IS_CCM(rctx->flags)) {
+ if (req->assoclen) {
+ sg_free_table(&rctx->src_tbl);
+ if (diff_dst)
+ sg_free_table(&rctx->dst_tbl);
+ } else {
+ if (!(IS_DECRYPT(rctx->flags) && !diff_dst))
+ sg_free_table(&rctx->dst_tbl);
+ }
+ } else {
+ sg_free_table(&rctx->dst_tbl);
+ }
+
+ error = qce_check_status(qce, &status);
+ if (error < 0 && (error != -EBADMSG))
+ dev_err(qce->dev, "aead operation error (%x)\n", status);
+
+ if (IS_ENCRYPT(rctx->flags)) {
+ totallen = req->cryptlen + req->assoclen;
+ if (IS_CCM(rctx->flags))
+ scatterwalk_map_and_copy(rctx->ccmresult_buf, req->dst,
+ totallen, ctx->authsize, 1);
+ else
+ scatterwalk_map_and_copy(result_buf->auth_iv, req->dst,
+ totallen, ctx->authsize, 1);
+
+ } else if (!IS_CCM(rctx->flags)) {
+ totallen = req->cryptlen + req->assoclen - ctx->authsize;
+ scatterwalk_map_and_copy(tag, req->src, totallen, ctx->authsize, 0);
+ ret = memcmp(result_buf->auth_iv, tag, ctx->authsize);
+ if (ret) {
+ pr_err("Bad message error\n");
+ error = -EBADMSG;
+ }
+ }
+
+ qce->async_req_done(qce, error);
+}
+
+static struct scatterlist *
+qce_aead_prepare_result_buf(struct sg_table *tbl, struct aead_request *req)
+{
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+
+ sg_init_one(&rctx->result_sg, qce->dma.result_buf, QCE_RESULT_BUF_SZ);
+ return qce_sgtable_add(tbl, &rctx->result_sg, QCE_RESULT_BUF_SZ);
+}
+
+static struct scatterlist *
+qce_aead_prepare_ccm_result_buf(struct sg_table *tbl, struct aead_request *req)
+{
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+
+ sg_init_one(&rctx->result_sg, rctx->ccmresult_buf, QCE_BAM_BURST_SIZE);
+ return qce_sgtable_add(tbl, &rctx->result_sg, QCE_BAM_BURST_SIZE);
+}
+
+static struct scatterlist *
+qce_aead_prepare_dst_buf(struct aead_request *req)
+{
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+ struct scatterlist *sg, *msg_sg, __sg[2];
+ gfp_t gfp;
+ unsigned int assoclen = req->assoclen;
+ unsigned int totallen;
+ int ret;
+
+ totallen = rctx->cryptlen + assoclen;
+ rctx->dst_nents = sg_nents_for_len(req->dst, totallen);
+ if (rctx->dst_nents < 0) {
+ dev_err(qce->dev, "Invalid numbers of dst SG.\n");
+ return ERR_PTR(-EINVAL);
+ }
+ if (IS_CCM(rctx->flags))
+ rctx->dst_nents += 2;
+ else
+ rctx->dst_nents += 1;
+
+ gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ?
+ GFP_KERNEL : GFP_ATOMIC;
+ ret = sg_alloc_table(&rctx->dst_tbl, rctx->dst_nents, gfp);
+ if (ret)
+ return ERR_PTR(ret);
+
+ if (IS_CCM(rctx->flags) && assoclen) {
+ /* Get the dst buffer */
+ msg_sg = scatterwalk_ffwd(__sg, req->dst, assoclen);
+
+ sg = qce_sgtable_add(&rctx->dst_tbl, &rctx->adata_sg,
+ rctx->assoclen);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto dst_tbl_free;
+ }
+ /* dst buffer */
+ sg = qce_sgtable_add(&rctx->dst_tbl, msg_sg, rctx->cryptlen);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto dst_tbl_free;
+ }
+ totallen = rctx->cryptlen + rctx->assoclen;
+ } else {
+ if (totallen) {
+ sg = qce_sgtable_add(&rctx->dst_tbl, req->dst, totallen);
+ if (IS_ERR(sg))
+ goto dst_tbl_free;
+ }
+ }
+ if (IS_CCM(rctx->flags))
+ sg = qce_aead_prepare_ccm_result_buf(&rctx->dst_tbl, req);
+ else
+ sg = qce_aead_prepare_result_buf(&rctx->dst_tbl, req);
+
+ if (IS_ERR(sg))
+ goto dst_tbl_free;
+
+ sg_mark_end(sg);
+ rctx->dst_sg = rctx->dst_tbl.sgl;
+ rctx->dst_nents = sg_nents_for_len(rctx->dst_sg, totallen) + 1;
+
+ return sg;
+
+dst_tbl_free:
+ sg_free_table(&rctx->dst_tbl);
+ return sg;
+}
+
+static int
+qce_aead_ccm_prepare_buf_assoclen(struct aead_request *req)
+{
+ struct scatterlist *sg, *msg_sg, __sg[2];
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ unsigned int assoclen = rctx->assoclen;
+ unsigned int adata_header_len, cryptlen, totallen;
+ gfp_t gfp;
+ bool diff_dst;
+ int ret;
+
+ if (IS_DECRYPT(rctx->flags))
+ cryptlen = rctx->cryptlen + ctx->authsize;
+ else
+ cryptlen = rctx->cryptlen;
+ totallen = cryptlen + req->assoclen;
+
+ /* Get the msg */
+ msg_sg = scatterwalk_ffwd(__sg, req->src, req->assoclen);
+
+ rctx->adata = kzalloc((ALIGN(assoclen, 16) + MAX_CCM_ADATA_HEADER_LEN) *
+ sizeof(unsigned char), GFP_ATOMIC);
+ if (!rctx->adata)
+ return -ENOMEM;
+
+ /*
+ * Format associated data (RFC3610 and NIST 800-38C)
+ * Even though specification allows for AAD to be up to 2^64 - 1 bytes,
+ * the assoclen field in aead_request is unsigned int and thus limits
+ * the AAD to be up to 2^32 - 1 bytes. So we handle only two scenarios
+ * while forming the header for AAD.
+ */
+ if (assoclen < 0xff00) {
+ adata_header_len = 2;
+ *(__be16 *)rctx->adata = cpu_to_be16(assoclen);
+ } else {
+ adata_header_len = 6;
+ *(__be16 *)rctx->adata = cpu_to_be16(0xfffe);
+ *(__be32 *)(rctx->adata + 2) = cpu_to_be32(assoclen);
+ }
+
+ /* Copy the associated data */
+ if (sg_copy_to_buffer(req->src, sg_nents_for_len(req->src, assoclen),
+ rctx->adata + adata_header_len,
+ assoclen) != assoclen)
+ return -EINVAL;
+
+ /* Pad associated data to block size */
+ rctx->assoclen = ALIGN(assoclen + adata_header_len, 16);
+
+ diff_dst = (req->src != req->dst) ? true : false;
+
+ if (diff_dst)
+ rctx->src_nents = sg_nents_for_len(req->src, totallen) + 1;
+ else
+ rctx->src_nents = sg_nents_for_len(req->src, totallen) + 2;
+
+ gfp = (req->base.flags & CRYPTO_TFM_REQ_MAY_SLEEP) ? GFP_KERNEL : GFP_ATOMIC;
+ ret = sg_alloc_table(&rctx->src_tbl, rctx->src_nents, gfp);
+ if (ret)
+ return ret;
+
+ /* Associated Data */
+ sg_init_one(&rctx->adata_sg, rctx->adata, rctx->assoclen);
+ sg = qce_sgtable_add(&rctx->src_tbl, &rctx->adata_sg,
+ rctx->assoclen);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto err_free;
+ }
+ /* src msg */
+ sg = qce_sgtable_add(&rctx->src_tbl, msg_sg, cryptlen);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto err_free;
+ }
+ if (!diff_dst) {
+ /*
+ * For decrypt, when src and dst buffers are same, there is already space
+ * in the buffer for padded 0's which is output in lieu of
+ * the MAC that is input. So skip the below.
+ */
+ if (!IS_DECRYPT(rctx->flags)) {
+ sg = qce_aead_prepare_ccm_result_buf(&rctx->src_tbl, req);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto err_free;
+ }
+ }
+ }
+ sg_mark_end(sg);
+ rctx->src_sg = rctx->src_tbl.sgl;
+ totallen = cryptlen + rctx->assoclen;
+ rctx->src_nents = sg_nents_for_len(rctx->src_sg, totallen);
+
+ if (diff_dst) {
+ sg = qce_aead_prepare_dst_buf(req);
+ if (IS_ERR(sg)) {
+ ret = PTR_ERR(sg);
+ goto err_free;
+ }
+ } else {
+ if (IS_ENCRYPT(rctx->flags))
+ rctx->dst_nents = rctx->src_nents + 1;
+ else
+ rctx->dst_nents = rctx->src_nents;
+ rctx->dst_sg = rctx->src_sg;
+ }
+
+ return 0;
+err_free:
+ sg_free_table(&rctx->src_tbl);
+ return ret;
+}
+
+static int qce_aead_prepare_buf(struct aead_request *req)
+{
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+ struct scatterlist *sg;
+ bool diff_dst = (req->src != req->dst) ? true : false;
+ unsigned int totallen;
+
+ totallen = rctx->cryptlen + rctx->assoclen;
+
+ sg = qce_aead_prepare_dst_buf(req);
+ if (IS_ERR(sg))
+ return PTR_ERR(sg);
+ if (diff_dst) {
+ rctx->src_nents = sg_nents_for_len(req->src, totallen);
+ if (rctx->src_nents < 0) {
+ dev_err(qce->dev, "Invalid numbers of src SG.\n");
+ return -EINVAL;
+ }
+ rctx->src_sg = req->src;
+ } else {
+ rctx->src_nents = rctx->dst_nents - 1;
+ rctx->src_sg = rctx->dst_sg;
+ }
+ return 0;
+}
+
+static int qce_aead_ccm_prepare_buf(struct aead_request *req)
+{
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct scatterlist *sg;
+ bool diff_dst = (req->src != req->dst) ? true : false;
+ unsigned int cryptlen;
+
+ if (rctx->assoclen)
+ return qce_aead_ccm_prepare_buf_assoclen(req);
+
+ if (IS_ENCRYPT(rctx->flags))
+ return qce_aead_prepare_buf(req);
+
+ cryptlen = rctx->cryptlen + ctx->authsize;
+ if (diff_dst) {
+ rctx->src_nents = sg_nents_for_len(req->src, cryptlen);
+ rctx->src_sg = req->src;
+ sg = qce_aead_prepare_dst_buf(req);
+ if (IS_ERR(sg))
+ return PTR_ERR(sg);
+ } else {
+ rctx->src_nents = sg_nents_for_len(req->src, cryptlen);
+ rctx->src_sg = req->src;
+ rctx->dst_nents = rctx->src_nents;
+ rctx->dst_sg = rctx->src_sg;
+ }
+
+ return 0;
+}
+
+static int qce_aead_create_ccm_nonce(struct qce_aead_reqctx *rctx, struct qce_aead_ctx *ctx)
+{
+ unsigned int msglen_size, ivsize;
+ u8 msg_len[4];
+ int i;
+
+ if (!rctx || !rctx->iv)
+ return -EINVAL;
+
+ msglen_size = rctx->iv[0] + 1;
+
+ /* Verify that msg len size is valid */
+ if (msglen_size < 2 || msglen_size > 8)
+ return -EINVAL;
+
+ ivsize = rctx->ivsize;
+
+ /*
+ * Clear the msglen bytes in IV.
+ * Else the h/w engine and nonce will use any stray value pending there.
+ */
+ if (!IS_CCM_RFC4309(rctx->flags)) {
+ for (i = 0; i < msglen_size; i++)
+ rctx->iv[ivsize - i - 1] = 0;
+ }
+
+ /*
+ * The crypto framework encodes cryptlen as unsigned int. Thus, even though
+ * spec allows for upto 8 bytes to encode msg_len only 4 bytes are needed.
+ */
+ if (msglen_size > 4)
+ msglen_size = 4;
+
+ memcpy(&msg_len[0], &rctx->cryptlen, 4);
+
+ memcpy(&rctx->ccm_nonce[0], rctx->iv, rctx->ivsize);
+ if (rctx->assoclen)
+ rctx->ccm_nonce[0] |= 1 << CCM_NONCE_ADATA_SHIFT;
+ rctx->ccm_nonce[0] |= ((ctx->authsize - 2) / 2) <<
+ CCM_NONCE_AUTHSIZE_SHIFT;
+ for (i = 0; i < msglen_size; i++)
+ rctx->ccm_nonce[QCE_MAX_NONCE - i - 1] = msg_len[i];
+
+ return 0;
+}
+
+static int
+qce_aead_async_req_handle(struct crypto_async_request *async_req)
+{
+ struct aead_request *req = aead_request_cast(async_req);
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+ enum dma_data_direction dir_src, dir_dst;
+ bool diff_dst;
+ int dst_nents, src_nents, ret;
+
+ if (IS_CCM_RFC4309(rctx->flags)) {
+ memset(rctx->ccm_rfc4309_iv, 0, QCE_MAX_IV_SIZE);
+ rctx->ccm_rfc4309_iv[0] = 3;
+ memcpy(&rctx->ccm_rfc4309_iv[1], ctx->ccm4309_salt, QCE_CCM4309_SALT_SIZE);
+ memcpy(&rctx->ccm_rfc4309_iv[4], req->iv, 8);
+ rctx->iv = rctx->ccm_rfc4309_iv;
+ rctx->ivsize = AES_BLOCK_SIZE;
+ } else {
+ rctx->iv = req->iv;
+ rctx->ivsize = crypto_aead_ivsize(tfm);
+ }
+ if (IS_CCM_RFC4309(rctx->flags))
+ rctx->assoclen = req->assoclen - 8;
+ else
+ rctx->assoclen = req->assoclen;
+
+ diff_dst = (req->src != req->dst) ? true : false;
+ dir_src = diff_dst ? DMA_TO_DEVICE : DMA_BIDIRECTIONAL;
+ dir_dst = diff_dst ? DMA_FROM_DEVICE : DMA_BIDIRECTIONAL;
+
+ if (IS_CCM(rctx->flags)) {
+ ret = qce_aead_create_ccm_nonce(rctx, ctx);
+ if (ret)
+ return ret;
+ }
+ if (IS_CCM(rctx->flags))
+ ret = qce_aead_ccm_prepare_buf(req);
+ else
+ ret = qce_aead_prepare_buf(req);
+
+ if (ret)
+ return ret;
+ dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+ if (dst_nents < 0) {
+ ret = dst_nents;
+ goto error_free;
+ }
+
+ if (diff_dst) {
+ src_nents = dma_map_sg(qce->dev, rctx->src_sg, rctx->src_nents, dir_src);
+ if (src_nents < 0) {
+ ret = src_nents;
+ goto error_unmap_dst;
+ }
+ } else {
+ if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+ src_nents = dst_nents;
+ else
+ src_nents = dst_nents - 1;
+ }
+
+ ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents, rctx->dst_sg, dst_nents,
+ qce_aead_done, async_req);
+ if (ret)
+ goto error_unmap_src;
+
+ qce_dma_issue_pending(&qce->dma);
+
+ ret = qce_start(async_req, tmpl->crypto_alg_type);
+ if (ret)
+ goto error_terminate;
+
+ return 0;
+
+error_terminate:
+ qce_dma_terminate_all(&qce->dma);
+error_unmap_src:
+ if (diff_dst)
+ dma_unmap_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+error_unmap_dst:
+ dma_unmap_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+error_free:
+ if (IS_CCM(rctx->flags) && rctx->assoclen) {
+ sg_free_table(&rctx->src_tbl);
+ if (diff_dst)
+ sg_free_table(&rctx->dst_tbl);
+ } else {
+ sg_free_table(&rctx->dst_tbl);
+ }
+ return ret;
+}
+
+static int qce_aead_crypt(struct aead_request *req, int encrypt)
+{
+ struct crypto_aead *tfm = crypto_aead_reqtfm(req);
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct qce_alg_template *tmpl = to_aead_tmpl(tfm);
+ unsigned int blocksize = crypto_aead_blocksize(tfm);
+
+ rctx->flags = tmpl->alg_flags;
+ rctx->flags |= encrypt ? QCE_ENCRYPT : QCE_DECRYPT;
+
+ if (encrypt)
+ rctx->cryptlen = req->cryptlen;
+ else
+ rctx->cryptlen = req->cryptlen - ctx->authsize;
+
+ /* CE does not handle 0 length messages */
+ if (!rctx->cryptlen) {
+ if (!(IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags)))
+ ctx->need_fallback = true;
+ }
+
+ /* If fallback is needed, schedule and exit */
+ if (ctx->need_fallback) {
+ /* Reset need_fallback in case the same ctx is used for another transaction */
+ ctx->need_fallback = false;
+
+ aead_request_set_tfm(&rctx->fallback_req, ctx->fallback);
+ aead_request_set_callback(&rctx->fallback_req, req->base.flags,
+ req->base.complete, req->base.data);
+ aead_request_set_crypt(&rctx->fallback_req, req->src,
+ req->dst, req->cryptlen, req->iv);
+ aead_request_set_ad(&rctx->fallback_req, req->assoclen);
+
+ return encrypt ? crypto_aead_encrypt(&rctx->fallback_req) :
+ crypto_aead_decrypt(&rctx->fallback_req);
+ }
+
+ /*
+ * CBC algorithms require message lengths to be
+ * multiples of block size.
+ */
+ if (IS_CBC(rctx->flags) && !IS_ALIGNED(rctx->cryptlen, blocksize))
+ return -EINVAL;
+
+ /* RFC4309 supported AAD size 16 bytes/20 bytes */
+ if (IS_CCM_RFC4309(rctx->flags))
+ if (crypto_ipsec_check_assoclen(req->assoclen))
+ return -EINVAL;
+
+ return tmpl->qce->async_req_enqueue(tmpl->qce, &req->base);
+}
+
+static int qce_aead_encrypt(struct aead_request *req)
+{
+ return qce_aead_crypt(req, 1);
+}
+
+static int qce_aead_decrypt(struct aead_request *req)
+{
+ return qce_aead_crypt(req, 0);
+}
+
+static int qce_aead_ccm_setkey(struct crypto_aead *tfm, const u8 *key,
+ unsigned int keylen)
+{
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+
+ if (IS_CCM_RFC4309(flags)) {
+ if (keylen < QCE_CCM4309_SALT_SIZE)
+ return -EINVAL;
+ keylen -= QCE_CCM4309_SALT_SIZE;
+ memcpy(ctx->ccm4309_salt, key + keylen, QCE_CCM4309_SALT_SIZE);
+ }
+
+ if (keylen != AES_KEYSIZE_128 && keylen != AES_KEYSIZE_256 && keylen != AES_KEYSIZE_192)
+ return -EINVAL;
+
+ ctx->enc_keylen = keylen;
+ ctx->auth_keylen = keylen;
+
+ memcpy(ctx->enc_key, key, keylen);
+ memcpy(ctx->auth_key, key, keylen);
+
+ if (keylen == AES_KEYSIZE_192)
+ ctx->need_fallback = true;
+
+ return IS_CCM_RFC4309(flags) ?
+ crypto_aead_setkey(ctx->fallback, key, keylen + QCE_CCM4309_SALT_SIZE) :
+ crypto_aead_setkey(ctx->fallback, key, keylen);
+}
+
+static int qce_aead_setkey(struct crypto_aead *tfm, const u8 *key, unsigned int keylen)
+{
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ struct crypto_authenc_keys authenc_keys;
+ unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+ u32 _key[6];
+ int err;
+
+ err = crypto_authenc_extractkeys(&authenc_keys, key, keylen);
+ if (err)
+ return err;
+
+ if (authenc_keys.enckeylen > QCE_MAX_KEY_SIZE ||
+ authenc_keys.authkeylen > QCE_MAX_KEY_SIZE)
+ return -EINVAL;
+
+ if (IS_DES(flags)) {
+ err = verify_aead_des_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen);
+ if (err)
+ return err;
+ } else if (IS_3DES(flags)) {
+ err = verify_aead_des3_key(tfm, authenc_keys.enckey, authenc_keys.enckeylen);
+ if (err)
+ return err;
+ /*
+ * The crypto engine does not support any two keys
+ * being the same for triple des algorithms. The
+ * verify_skcipher_des3_key does not check for all the
+ * below conditions. Schedule fallback in this case.
+ */
+ memcpy(_key, authenc_keys.enckey, DES3_EDE_KEY_SIZE);
+ if (!((_key[0] ^ _key[2]) | (_key[1] ^ _key[3])) ||
+ !((_key[2] ^ _key[4]) | (_key[3] ^ _key[5])) ||
+ !((_key[0] ^ _key[4]) | (_key[1] ^ _key[5])))
+ ctx->need_fallback = true;
+ } else if (IS_AES(flags)) {
+ /* No random key sizes */
+ if (authenc_keys.enckeylen != AES_KEYSIZE_128 &&
+ authenc_keys.enckeylen != AES_KEYSIZE_192 &&
+ authenc_keys.enckeylen != AES_KEYSIZE_256)
+ return -EINVAL;
+ if (authenc_keys.enckeylen == AES_KEYSIZE_192)
+ ctx->need_fallback = true;
+ }
+
+ ctx->enc_keylen = authenc_keys.enckeylen;
+ ctx->auth_keylen = authenc_keys.authkeylen;
+
+ memcpy(ctx->enc_key, authenc_keys.enckey, authenc_keys.enckeylen);
+
+ memset(ctx->auth_key, 0, sizeof(ctx->auth_key));
+ memcpy(ctx->auth_key, authenc_keys.authkey, authenc_keys.authkeylen);
+
+ return crypto_aead_setkey(ctx->fallback, key, keylen);
+}
+
+static int qce_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize)
+{
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+ unsigned long flags = to_aead_tmpl(tfm)->alg_flags;
+
+ if (IS_CCM(flags)) {
+ if (authsize < 4 || authsize > 16 || authsize % 2)
+ return -EINVAL;
+ if (IS_CCM_RFC4309(flags) && (authsize < 8 || authsize % 4))
+ return -EINVAL;
+ }
+ ctx->authsize = authsize;
+
+ return crypto_aead_setauthsize(ctx->fallback, authsize);
+}
+
+static int qce_aead_init(struct crypto_aead *tfm)
+{
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ ctx->need_fallback = false;
+ ctx->fallback = crypto_alloc_aead(crypto_tfm_alg_name(&tfm->base),
+ 0, CRYPTO_ALG_NEED_FALLBACK);
+
+ if (IS_ERR(ctx->fallback))
+ return PTR_ERR(ctx->fallback);
+
+ crypto_aead_set_reqsize(tfm, sizeof(struct qce_aead_reqctx) +
+ crypto_aead_reqsize(ctx->fallback));
+ return 0;
+}
+
+static void qce_aead_exit(struct crypto_aead *tfm)
+{
+ struct qce_aead_ctx *ctx = crypto_aead_ctx(tfm);
+
+ crypto_free_aead(ctx->fallback);
+}
+
+struct qce_aead_def {
+ unsigned long flags;
+ const char *name;
+ const char *drv_name;
+ unsigned int blocksize;
+ unsigned int chunksize;
+ unsigned int ivsize;
+ unsigned int maxauthsize;
+};
+
+static const struct qce_aead_def aead_def[] = {
+ {
+ .flags = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC,
+ .name = "authenc(hmac(sha1),cbc(des))",
+ .drv_name = "authenc-hmac-sha1-cbc-des-qce",
+ .blocksize = DES_BLOCK_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+ {
+ .flags = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA1_HMAC,
+ .name = "authenc(hmac(sha1),cbc(des3_ede))",
+ .drv_name = "authenc-hmac-sha1-cbc-3des-qce",
+ .blocksize = DES3_EDE_BLOCK_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA1_DIGEST_SIZE,
+ },
+ {
+ .flags = QCE_ALG_DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+ .name = "authenc(hmac(sha256),cbc(des))",
+ .drv_name = "authenc-hmac-sha256-cbc-des-qce",
+ .blocksize = DES_BLOCK_SIZE,
+ .ivsize = DES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+ {
+ .flags = QCE_ALG_3DES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+ .name = "authenc(hmac(sha256),cbc(des3_ede))",
+ .drv_name = "authenc-hmac-sha256-cbc-3des-qce",
+ .blocksize = DES3_EDE_BLOCK_SIZE,
+ .ivsize = DES3_EDE_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+ {
+ .flags = QCE_ALG_AES | QCE_MODE_CBC | QCE_HASH_SHA256_HMAC,
+ .name = "authenc(hmac(sha256),cbc(aes))",
+ .drv_name = "authenc-hmac-sha256-cbc-aes-qce",
+ .blocksize = AES_BLOCK_SIZE,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = SHA256_DIGEST_SIZE,
+ },
+ {
+ .flags = QCE_ALG_AES | QCE_MODE_CCM,
+ .name = "ccm(aes)",
+ .drv_name = "ccm-aes-qce",
+ .blocksize = 1,
+ .ivsize = AES_BLOCK_SIZE,
+ .maxauthsize = AES_BLOCK_SIZE,
+ },
+ {
+ .flags = QCE_ALG_AES | QCE_MODE_CCM | QCE_MODE_CCM_RFC4309,
+ .name = "rfc4309(ccm(aes))",
+ .drv_name = "rfc4309-ccm-aes-qce",
+ .blocksize = 1,
+ .ivsize = 8,
+ .maxauthsize = AES_BLOCK_SIZE,
+ },
+};
+
+static int qce_aead_register_one(const struct qce_aead_def *def, struct qce_device *qce)
+{
+ struct qce_alg_template *tmpl;
+ struct aead_alg *alg;
+ int ret;
+
+ tmpl = kzalloc(sizeof(*tmpl), GFP_KERNEL);
+ if (!tmpl)
+ return -ENOMEM;
+
+ alg = &tmpl->alg.aead;
+
+ snprintf(alg->base.cra_name, CRYPTO_MAX_ALG_NAME, "%s", def->name);
+ snprintf(alg->base.cra_driver_name, CRYPTO_MAX_ALG_NAME, "%s",
+ def->drv_name);
+
+ alg->base.cra_blocksize = def->blocksize;
+ alg->chunksize = def->chunksize;
+ alg->ivsize = def->ivsize;
+ alg->maxauthsize = def->maxauthsize;
+ if (IS_CCM(def->flags))
+ alg->setkey = qce_aead_ccm_setkey;
+ else
+ alg->setkey = qce_aead_setkey;
+ alg->setauthsize = qce_aead_setauthsize;
+ alg->encrypt = qce_aead_encrypt;
+ alg->decrypt = qce_aead_decrypt;
+ alg->init = qce_aead_init;
+ alg->exit = qce_aead_exit;
+
+ alg->base.cra_priority = 300;
+ alg->base.cra_flags = CRYPTO_ALG_ASYNC |
+ CRYPTO_ALG_ALLOCATES_MEMORY |
+ CRYPTO_ALG_KERN_DRIVER_ONLY |
+ CRYPTO_ALG_NEED_FALLBACK;
+ alg->base.cra_ctxsize = sizeof(struct qce_aead_ctx);
+ alg->base.cra_alignmask = 0;
+ alg->base.cra_module = THIS_MODULE;
+
+ INIT_LIST_HEAD(&tmpl->entry);
+ tmpl->crypto_alg_type = CRYPTO_ALG_TYPE_AEAD;
+ tmpl->alg_flags = def->flags;
+ tmpl->qce = qce;
+
+ ret = crypto_register_aead(alg);
+ if (ret) {
+ kfree(tmpl);
+ dev_err(qce->dev, "%s registration failed\n", alg->base.cra_name);
+ return ret;
+ }
+
+ list_add_tail(&tmpl->entry, &aead_algs);
+ dev_dbg(qce->dev, "%s is registered\n", alg->base.cra_name);
+ return 0;
+}
+
+static void qce_aead_unregister(struct qce_device *qce)
+{
+ struct qce_alg_template *tmpl, *n;
+
+ list_for_each_entry_safe(tmpl, n, &aead_algs, entry) {
+ crypto_unregister_aead(&tmpl->alg.aead);
+ list_del(&tmpl->entry);
+ kfree(tmpl);
+ }
+}
+
+static int qce_aead_register(struct qce_device *qce)
+{
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(aead_def); i++) {
+ ret = qce_aead_register_one(&aead_def[i], qce);
+ if (ret)
+ goto err;
+ }
+
+ return 0;
+err:
+ qce_aead_unregister(qce);
+ return ret;
+}
+
+const struct qce_algo_ops aead_ops = {
+ .type = CRYPTO_ALG_TYPE_AEAD,
+ .register_algs = qce_aead_register,
+ .unregister_algs = qce_aead_unregister,
+ .async_req_handle = qce_aead_async_req_handle,
+};
diff --git a/drivers/crypto/qce/aead.h b/drivers/crypto/qce/aead.h
new file mode 100644
index 000000000000..efb8477cc088
--- /dev/null
+++ b/drivers/crypto/qce/aead.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (c) 2021, Linaro Limited. All rights reserved.
+ */
+
+#ifndef _AEAD_H_
+#define _AEAD_H_
+
+#include "common.h"
+#include "core.h"
+
+#define QCE_MAX_KEY_SIZE 64
+#define QCE_CCM4309_SALT_SIZE 3
+
+struct qce_aead_ctx {
+ u8 enc_key[QCE_MAX_KEY_SIZE];
+ u8 auth_key[QCE_MAX_KEY_SIZE];
+ u8 ccm4309_salt[QCE_CCM4309_SALT_SIZE];
+ unsigned int enc_keylen;
+ unsigned int auth_keylen;
+ unsigned int authsize;
+ bool need_fallback;
+ struct crypto_aead *fallback;
+};
+
+struct qce_aead_reqctx {
+ unsigned long flags;
+ u8 *iv;
+ unsigned int ivsize;
+ int src_nents;
+ int dst_nents;
+ struct scatterlist result_sg;
+ struct scatterlist adata_sg;
+ struct sg_table dst_tbl;
+ struct sg_table src_tbl;
+ struct scatterlist *dst_sg;
+ struct scatterlist *src_sg;
+ unsigned int cryptlen;
+ unsigned int assoclen;
+ unsigned char *adata;
+ u8 ccm_nonce[QCE_MAX_NONCE];
+ u8 ccmresult_buf[QCE_BAM_BURST_SIZE];
+ u8 ccm_rfc4309_iv[QCE_MAX_IV_SIZE];
+ struct aead_request fallback_req;
+};
+
+static inline struct qce_alg_template *to_aead_tmpl(struct crypto_aead *tfm)
+{
+ struct aead_alg *alg = crypto_aead_alg(tfm);
+
+ return container_of(alg, struct qce_alg_template, alg.aead);
+}
+
+extern const struct qce_algo_ops aead_ops;
+
+#endif /* _AEAD_H_ */
diff --git a/drivers/crypto/qce/common.c b/drivers/crypto/qce/common.c
index dceb9579d87a..7c612ba5068f 100644
--- a/drivers/crypto/qce/common.c
+++ b/drivers/crypto/qce/common.c
@@ -15,6 +15,7 @@
#include "core.h"
#include "regs-v5.h"
#include "sha.h"
+#include "aead.h"
static inline u32 qce_read(struct qce_device *qce, u32 offset)
{
@@ -88,17 +89,20 @@ static void qce_setup_config(struct qce_device *qce)
qce_write(qce, REG_CONFIG, config);
}
-static inline void qce_crypto_go(struct qce_device *qce)
+static inline void qce_crypto_go(struct qce_device *qce, bool result_dump)
{
- qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
+ if (result_dump)
+ qce_write(qce, REG_GOPROC, BIT(GO_SHIFT) | BIT(RESULTS_DUMP_SHIFT));
+ else
+ qce_write(qce, REG_GOPROC, BIT(GO_SHIFT));
}
-#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
-static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
+#if defined(CONFIG_CRYPTO_DEV_QCE_SHA) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
+static u32 qce_auth_cfg(unsigned long flags, u32 key_size, u32 auth_size)
{
u32 cfg = 0;
- if (IS_AES(flags) && (IS_CCM(flags) || IS_CMAC(flags)))
+ if (IS_CCM(flags) || IS_CMAC(flags))
cfg |= AUTH_ALG_AES << AUTH_ALG_SHIFT;
else
cfg |= AUTH_ALG_SHA << AUTH_ALG_SHIFT;
@@ -116,15 +120,16 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
cfg |= AUTH_SIZE_SHA256 << AUTH_SIZE_SHIFT;
else if (IS_CMAC(flags))
cfg |= AUTH_SIZE_ENUM_16_BYTES << AUTH_SIZE_SHIFT;
+ else if (IS_CCM(flags))
+ cfg |= (auth_size - 1) << AUTH_SIZE_SHIFT;
if (IS_SHA1(flags) || IS_SHA256(flags))
cfg |= AUTH_MODE_HASH << AUTH_MODE_SHIFT;
- else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags) ||
- IS_CBC(flags) || IS_CTR(flags))
+ else if (IS_SHA1_HMAC(flags) || IS_SHA256_HMAC(flags))
cfg |= AUTH_MODE_HMAC << AUTH_MODE_SHIFT;
- else if (IS_AES(flags) && IS_CCM(flags))
+ else if (IS_CCM(flags))
cfg |= AUTH_MODE_CCM << AUTH_MODE_SHIFT;
- else if (IS_AES(flags) && IS_CMAC(flags))
+ else if (IS_CMAC(flags))
cfg |= AUTH_MODE_CMAC << AUTH_MODE_SHIFT;
if (IS_SHA(flags) || IS_SHA_HMAC(flags))
@@ -133,13 +138,11 @@ static u32 qce_auth_cfg(unsigned long flags, u32 key_size)
if (IS_CCM(flags))
cfg |= QCE_MAX_NONCE_WORDS << AUTH_NONCE_NUM_WORDS_SHIFT;
- if (IS_CBC(flags) || IS_CTR(flags) || IS_CCM(flags) ||
- IS_CMAC(flags))
- cfg |= BIT(AUTH_LAST_SHIFT) | BIT(AUTH_FIRST_SHIFT);
-
return cfg;
}
+#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
{
struct ahash_request *req = ahash_request_cast(async_req);
@@ -168,7 +171,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
qce_clear_array(qce, REG_AUTH_KEY0, 16);
qce_clear_array(qce, REG_AUTH_BYTECNT0, 4);
- auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen);
+ auth_cfg = qce_auth_cfg(rctx->flags, rctx->authklen, digestsize);
}
if (IS_SHA_HMAC(rctx->flags) || IS_CMAC(rctx->flags)) {
@@ -196,7 +199,7 @@ static int qce_setup_regs_ahash(struct crypto_async_request *async_req)
qce_write_array(qce, REG_AUTH_BYTECNT0,
(u32 *)rctx->byte_count, 2);
- auth_cfg = qce_auth_cfg(rctx->flags, 0);
+ auth_cfg = qce_auth_cfg(rctx->flags, 0, digestsize);
if (rctx->last_blk)
auth_cfg |= BIT(AUTH_LAST_SHIFT);
@@ -219,13 +222,13 @@ go_proc:
config = qce_config_reg(qce, 1);
qce_write(qce, REG_CONFIG, config);
- qce_crypto_go(qce);
+ qce_crypto_go(qce, true);
return 0;
}
#endif
-#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
+#if defined(CONFIG_CRYPTO_DEV_QCE_SKCIPHER) || defined(CONFIG_CRYPTO_DEV_QCE_AEAD)
static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
{
u32 cfg = 0;
@@ -271,7 +274,9 @@ static u32 qce_encr_cfg(unsigned long flags, u32 aes_key_size)
return cfg;
}
+#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_SKCIPHER
static void qce_xts_swapiv(__be32 *dst, const u8 *src, unsigned int ivsize)
{
u8 swap[QCE_AES_IV_LENGTH];
@@ -380,7 +385,156 @@ static int qce_setup_regs_skcipher(struct crypto_async_request *async_req)
config = qce_config_reg(qce, 1);
qce_write(qce, REG_CONFIG, config);
- qce_crypto_go(qce);
+ qce_crypto_go(qce, true);
+
+ return 0;
+}
+#endif
+
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+static const u32 std_iv_sha1[SHA256_DIGEST_SIZE / sizeof(u32)] = {
+ SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4, 0, 0, 0
+};
+
+static const u32 std_iv_sha256[SHA256_DIGEST_SIZE / sizeof(u32)] = {
+ SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3,
+ SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7
+};
+
+static unsigned int qce_be32_to_cpu_array(u32 *dst, const u8 *src, unsigned int len)
+{
+ u32 *d = dst;
+ const u8 *s = src;
+ unsigned int n;
+
+ n = len / sizeof(u32);
+ for (; n > 0; n--) {
+ *d = be32_to_cpup((const __be32 *)s);
+ s += sizeof(u32);
+ d++;
+ }
+ return DIV_ROUND_UP(len, sizeof(u32));
+}
+
+static int qce_setup_regs_aead(struct crypto_async_request *async_req)
+{
+ struct aead_request *req = aead_request_cast(async_req);
+ struct qce_aead_reqctx *rctx = aead_request_ctx(req);
+ struct qce_aead_ctx *ctx = crypto_tfm_ctx(async_req->tfm);
+ struct qce_alg_template *tmpl = to_aead_tmpl(crypto_aead_reqtfm(req));
+ struct qce_device *qce = tmpl->qce;
+ u32 enckey[QCE_MAX_CIPHER_KEY_SIZE / sizeof(u32)] = {0};
+ u32 enciv[QCE_MAX_IV_SIZE / sizeof(u32)] = {0};
+ u32 authkey[QCE_SHA_HMAC_KEY_SIZE / sizeof(u32)] = {0};
+ u32 authiv[SHA256_DIGEST_SIZE / sizeof(u32)] = {0};
+ u32 authnonce[QCE_MAX_NONCE / sizeof(u32)] = {0};
+ unsigned int enc_keylen = ctx->enc_keylen;
+ unsigned int auth_keylen = ctx->auth_keylen;
+ unsigned int enc_ivsize = rctx->ivsize;
+ unsigned int auth_ivsize = 0;
+ unsigned int enckey_words, enciv_words;
+ unsigned int authkey_words, authiv_words, authnonce_words;
+ unsigned long flags = rctx->flags;
+ u32 encr_cfg, auth_cfg, config, totallen;
+ u32 iv_last_word;
+
+ qce_setup_config(qce);
+
+ /* Write encryption key */
+ enckey_words = qce_be32_to_cpu_array(enckey, ctx->enc_key, enc_keylen);
+ qce_write_array(qce, REG_ENCR_KEY0, enckey, enckey_words);
+
+ /* Write encryption iv */
+ enciv_words = qce_be32_to_cpu_array(enciv, rctx->iv, enc_ivsize);
+ qce_write_array(qce, REG_CNTR0_IV0, enciv, enciv_words);
+
+ if (IS_CCM(rctx->flags)) {
+ iv_last_word = enciv[enciv_words - 1];
+ qce_write(qce, REG_CNTR3_IV3, iv_last_word + 1);
+ qce_write_array(qce, REG_ENCR_CCM_INT_CNTR0, (u32 *)enciv, enciv_words);
+ qce_write(qce, REG_CNTR_MASK, ~0);
+ qce_write(qce, REG_CNTR_MASK0, ~0);
+ qce_write(qce, REG_CNTR_MASK1, ~0);
+ qce_write(qce, REG_CNTR_MASK2, ~0);
+ }
+
+ /* Clear authentication IV and KEY registers of previous values */
+ qce_clear_array(qce, REG_AUTH_IV0, 16);
+ qce_clear_array(qce, REG_AUTH_KEY0, 16);
+
+ /* Clear byte count */
+ qce_clear_array(qce, REG_AUTH_BYTECNT0, 4);
+
+ /* Write authentication key */
+ authkey_words = qce_be32_to_cpu_array(authkey, ctx->auth_key, auth_keylen);
+ qce_write_array(qce, REG_AUTH_KEY0, (u32 *)authkey, authkey_words);
+
+ /* Write initial authentication IV only for HMAC algorithms */
+ if (IS_SHA_HMAC(rctx->flags)) {
+ /* Write default authentication iv */
+ if (IS_SHA1_HMAC(rctx->flags)) {
+ auth_ivsize = SHA1_DIGEST_SIZE;
+ memcpy(authiv, std_iv_sha1, auth_ivsize);
+ } else if (IS_SHA256_HMAC(rctx->flags)) {
+ auth_ivsize = SHA256_DIGEST_SIZE;
+ memcpy(authiv, std_iv_sha256, auth_ivsize);
+ }
+ authiv_words = auth_ivsize / sizeof(u32);
+ qce_write_array(qce, REG_AUTH_IV0, (u32 *)authiv, authiv_words);
+ } else if (IS_CCM(rctx->flags)) {
+ /* Write nonce for CCM algorithms */
+ authnonce_words = qce_be32_to_cpu_array(authnonce, rctx->ccm_nonce, QCE_MAX_NONCE);
+ qce_write_array(qce, REG_AUTH_INFO_NONCE0, authnonce, authnonce_words);
+ }
+
+ /* Set up ENCR_SEG_CFG */
+ encr_cfg = qce_encr_cfg(flags, enc_keylen);
+ if (IS_ENCRYPT(flags))
+ encr_cfg |= BIT(ENCODE_SHIFT);
+ qce_write(qce, REG_ENCR_SEG_CFG, encr_cfg);
+
+ /* Set up AUTH_SEG_CFG */
+ auth_cfg = qce_auth_cfg(rctx->flags, auth_keylen, ctx->authsize);
+ auth_cfg |= BIT(AUTH_LAST_SHIFT);
+ auth_cfg |= BIT(AUTH_FIRST_SHIFT);
+ if (IS_ENCRYPT(flags)) {
+ if (IS_CCM(rctx->flags))
+ auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT;
+ else
+ auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT;
+ } else {
+ if (IS_CCM(rctx->flags))
+ auth_cfg |= AUTH_POS_AFTER << AUTH_POS_SHIFT;
+ else
+ auth_cfg |= AUTH_POS_BEFORE << AUTH_POS_SHIFT;
+ }
+ qce_write(qce, REG_AUTH_SEG_CFG, auth_cfg);
+
+ totallen = rctx->cryptlen + rctx->assoclen;
+
+ /* Set the encryption size and start offset */
+ if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+ qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen + ctx->authsize);
+ else
+ qce_write(qce, REG_ENCR_SEG_SIZE, rctx->cryptlen);
+ qce_write(qce, REG_ENCR_SEG_START, rctx->assoclen & 0xffff);
+
+ /* Set the authentication size and start offset */
+ qce_write(qce, REG_AUTH_SEG_SIZE, totallen);
+ qce_write(qce, REG_AUTH_SEG_START, 0);
+
+ /* Write total length */
+ if (IS_CCM(rctx->flags) && IS_DECRYPT(rctx->flags))
+ qce_write(qce, REG_SEG_SIZE, totallen + ctx->authsize);
+ else
+ qce_write(qce, REG_SEG_SIZE, totallen);
+
+ /* get little endianness */
+ config = qce_config_reg(qce, 1);
+ qce_write(qce, REG_CONFIG, config);
+
+ /* Start the process */
+ qce_crypto_go(qce, !IS_CCM(flags));
return 0;
}
@@ -397,6 +551,10 @@ int qce_start(struct crypto_async_request *async_req, u32 type)
case CRYPTO_ALG_TYPE_AHASH:
return qce_setup_regs_ahash(async_req);
#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+ case CRYPTO_ALG_TYPE_AEAD:
+ return qce_setup_regs_aead(async_req);
+#endif
default:
return -EINVAL;
}
@@ -419,6 +577,8 @@ int qce_check_status(struct qce_device *qce, u32 *status)
*/
if (*status & STATUS_ERRORS || !(*status & BIT(OPERATION_DONE_SHIFT)))
ret = -ENXIO;
+ else if (*status & BIT(MAC_FAILED_SHIFT))
+ ret = -EBADMSG;
return ret;
}
diff --git a/drivers/crypto/qce/common.h b/drivers/crypto/qce/common.h
index 3bc244bcca2d..02e63ad9f245 100644
--- a/drivers/crypto/qce/common.h
+++ b/drivers/crypto/qce/common.h
@@ -11,6 +11,7 @@
#include <crypto/aes.h>
#include <crypto/hash.h>
#include <crypto/internal/skcipher.h>
+#include <crypto/internal/aead.h>
/* xts du size */
#define QCE_SECTOR_SIZE 512
@@ -51,9 +52,11 @@
#define QCE_MODE_CCM BIT(12)
#define QCE_MODE_MASK GENMASK(12, 8)
+#define QCE_MODE_CCM_RFC4309 BIT(13)
+
/* cipher encryption/decryption operations */
-#define QCE_ENCRYPT BIT(13)
-#define QCE_DECRYPT BIT(14)
+#define QCE_ENCRYPT BIT(30)
+#define QCE_DECRYPT BIT(31)
#define IS_DES(flags) (flags & QCE_ALG_DES)
#define IS_3DES(flags) (flags & QCE_ALG_3DES)
@@ -73,6 +76,7 @@
#define IS_CTR(mode) (mode & QCE_MODE_CTR)
#define IS_XTS(mode) (mode & QCE_MODE_XTS)
#define IS_CCM(mode) (mode & QCE_MODE_CCM)
+#define IS_CCM_RFC4309(mode) ((mode) & QCE_MODE_CCM_RFC4309)
#define IS_ENCRYPT(dir) (dir & QCE_ENCRYPT)
#define IS_DECRYPT(dir) (dir & QCE_DECRYPT)
@@ -85,6 +89,7 @@ struct qce_alg_template {
union {
struct skcipher_alg skcipher;
struct ahash_alg ahash;
+ struct aead_alg aead;
} alg;
struct qce_device *qce;
const u8 *hash_zero;
diff --git a/drivers/crypto/qce/core.c b/drivers/crypto/qce/core.c
index 80b75085c265..d3780be44a76 100644
--- a/drivers/crypto/qce/core.c
+++ b/drivers/crypto/qce/core.c
@@ -17,6 +17,7 @@
#include "core.h"
#include "cipher.h"
#include "sha.h"
+#include "aead.h"
#define QCE_MAJOR_VERSION5 0x05
#define QCE_QUEUE_LENGTH 1
@@ -28,6 +29,9 @@ static const struct qce_algo_ops *qce_ops[] = {
#ifdef CONFIG_CRYPTO_DEV_QCE_SHA
&ahash_ops,
#endif
+#ifdef CONFIG_CRYPTO_DEV_QCE_AEAD
+ &aead_ops,
+#endif
};
static void qce_unregister_algs(struct qce_device *qce)
diff --git a/drivers/crypto/qce/skcipher.c b/drivers/crypto/qce/skcipher.c
index c0a0d8c4fce1..8ff10928f581 100644
--- a/drivers/crypto/qce/skcipher.c
+++ b/drivers/crypto/qce/skcipher.c
@@ -72,7 +72,7 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
struct scatterlist *sg;
bool diff_dst;
gfp_t gfp;
- int ret;
+ int dst_nents, src_nents, ret;
rctx->iv = req->iv;
rctx->ivsize = crypto_skcipher_ivsize(skcipher);
@@ -123,21 +123,26 @@ qce_skcipher_async_req_handle(struct crypto_async_request *async_req)
sg_mark_end(sg);
rctx->dst_sg = rctx->dst_tbl.sgl;
- ret = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
- if (ret < 0)
+ dst_nents = dma_map_sg(qce->dev, rctx->dst_sg, rctx->dst_nents, dir_dst);
+ if (dst_nents < 0) {
+ ret = dst_nents;
goto error_free;
+ }
if (diff_dst) {
- ret = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
- if (ret < 0)
+ src_nents = dma_map_sg(qce->dev, req->src, rctx->src_nents, dir_src);
+ if (src_nents < 0) {
+ ret = src_nents;
goto error_unmap_dst;
+ }
rctx->src_sg = req->src;
} else {
rctx->src_sg = rctx->dst_sg;
+ src_nents = dst_nents - 1;
}
- ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, rctx->src_nents,
- rctx->dst_sg, rctx->dst_nents,
+ ret = qce_dma_prep_sgs(&qce->dma, rctx->src_sg, src_nents,
+ rctx->dst_sg, dst_nents,
qce_skcipher_done, async_req);
if (ret)
goto error_unmap_src;
diff --git a/drivers/crypto/sa2ul.c b/drivers/crypto/sa2ul.c
index 1c6929fb3a13..544d7040cfc5 100644
--- a/drivers/crypto/sa2ul.c
+++ b/drivers/crypto/sa2ul.c
@@ -1698,7 +1698,6 @@ static void sa_aead_dma_in_callback(void *data)
size_t pl, ml;
int i;
int err = 0;
- u16 auth_len;
u32 *mdptr;
sa_sync_from_device(rxd);
@@ -1711,13 +1710,10 @@ static void sa_aead_dma_in_callback(void *data)
for (i = 0; i < (authsize / 4); i++)
mdptr[i + 4] = swab32(mdptr[i + 4]);
- auth_len = req->assoclen + req->cryptlen;
-
if (rxd->enc) {
scatterwalk_map_and_copy(&mdptr[4], req->dst, start, authsize,
1);
} else {
- auth_len -= authsize;
start -= authsize;
scatterwalk_map_and_copy(auth_tag, req->src, start, authsize,
0);
@@ -2300,9 +2296,9 @@ static int sa_dma_init(struct sa_crypto_data *dd)
dd->dma_rx2 = dma_request_chan(dd->dev, "rx2");
if (IS_ERR(dd->dma_rx2)) {
- dma_release_channel(dd->dma_rx1);
- return dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2),
- "Unable to request rx2 DMA channel\n");
+ ret = dev_err_probe(dd->dev, PTR_ERR(dd->dma_rx2),
+ "Unable to request rx2 DMA channel\n");
+ goto err_dma_rx2;
}
dd->dma_tx = dma_request_chan(dd->dev, "tx");
@@ -2323,28 +2319,31 @@ static int sa_dma_init(struct sa_crypto_data *dd)
if (ret) {
dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
ret);
- return ret;
+ goto err_dma_config;
}
ret = dmaengine_slave_config(dd->dma_rx2, &cfg);
if (ret) {
dev_err(dd->dev, "can't configure IN dmaengine slave: %d\n",
ret);
- return ret;
+ goto err_dma_config;
}
ret = dmaengine_slave_config(dd->dma_tx, &cfg);
if (ret) {
dev_err(dd->dev, "can't configure OUT dmaengine slave: %d\n",
ret);
- return ret;
+ goto err_dma_config;
}
return 0;
+err_dma_config:
+ dma_release_channel(dd->dma_tx);
err_dma_tx:
- dma_release_channel(dd->dma_rx1);
dma_release_channel(dd->dma_rx2);
+err_dma_rx2:
+ dma_release_channel(dd->dma_rx1);
return ret;
}
@@ -2385,10 +2384,8 @@ MODULE_DEVICE_TABLE(of, of_match);
static int sa_ul_probe(struct platform_device *pdev)
{
- const struct of_device_id *match;
struct device *dev = &pdev->dev;
struct device_node *node = dev->of_node;
- struct resource *res;
static void __iomem *saul_base;
struct sa_crypto_data *dev_data;
int ret;
@@ -2397,9 +2394,18 @@ static int sa_ul_probe(struct platform_device *pdev)
if (!dev_data)
return -ENOMEM;
+ dev_data->match_data = of_device_get_match_data(dev);
+ if (!dev_data->match_data)
+ return -ENODEV;
+
+ saul_base = devm_platform_ioremap_resource(pdev, 0);
+ if (IS_ERR(saul_base))
+ return PTR_ERR(saul_base);
+
sa_k3_dev = dev;
dev_data->dev = dev;
dev_data->pdev = pdev;
+ dev_data->base = saul_base;
platform_set_drvdata(pdev, dev_data);
dev_set_drvdata(sa_k3_dev, dev_data);
@@ -2408,26 +2414,16 @@ static int sa_ul_probe(struct platform_device *pdev)
if (ret < 0) {
dev_err(&pdev->dev, "%s: failed to get sync: %d\n", __func__,
ret);
+ pm_runtime_disable(dev);
return ret;
}
sa_init_mem(dev_data);
ret = sa_dma_init(dev_data);
if (ret)
- goto disable_pm_runtime;
-
- match = of_match_node(of_match, dev->of_node);
- if (!match) {
- dev_err(dev, "No compatible match found\n");
- return -ENODEV;
- }
- dev_data->match_data = match->data;
+ goto destroy_dma_pool;
spin_lock_init(&dev_data->scid_lock);
- res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- saul_base = devm_ioremap_resource(dev, res);
-
- dev_data->base = saul_base;
if (!dev_data->match_data->skip_engine_control) {
u32 val = SA_EEC_ENCSS_EN | SA_EEC_AUTHSS_EN | SA_EEC_CTXCACH_EN |
@@ -2454,9 +2450,9 @@ release_dma:
dma_release_channel(dev_data->dma_rx1);
dma_release_channel(dev_data->dma_tx);
+destroy_dma_pool:
dma_pool_destroy(dev_data->sc_pool);
-disable_pm_runtime:
pm_runtime_put_sync(&pdev->dev);
pm_runtime_disable(&pdev->dev);
@@ -2467,6 +2463,8 @@ static int sa_ul_remove(struct platform_device *pdev)
{
struct sa_crypto_data *dev_data = platform_get_drvdata(pdev);
+ of_platform_depopulate(&pdev->dev);
+
sa_unregister_algos(&pdev->dev);
dma_release_channel(dev_data->dma_rx2);
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index ecb7412e84e3..51a6e1a42434 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1011,6 +1011,7 @@ static int hash_hw_final(struct ahash_request *req)
goto out;
}
} else if (req->nbytes == 0 && ctx->keylen > 0) {
+ ret = -EPERM;
dev_err(device_data->dev, "%s: Empty message with keylength > 0, NOT supported\n",
__func__);
goto out;
diff --git a/drivers/soc/ixp4xx/ixp4xx-npe.c b/drivers/soc/ixp4xx/ixp4xx-npe.c
index ec90b44fa0cd..3c158251a58b 100644
--- a/drivers/soc/ixp4xx/ixp4xx-npe.c
+++ b/drivers/soc/ixp4xx/ixp4xx-npe.c
@@ -18,6 +18,7 @@
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/of.h>
+#include <linux/of_platform.h>
#include <linux/platform_device.h>
#include <linux/soc/ixp4xx/npe.h>
@@ -679,6 +680,7 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
{
int i, found = 0;
struct device *dev = &pdev->dev;
+ struct device_node *np = dev->of_node;
struct resource *res;
for (i = 0; i < NPE_COUNT; i++) {
@@ -711,6 +713,11 @@ static int ixp4xx_npe_probe(struct platform_device *pdev)
if (!found)
return -ENODEV;
+
+ /* Spawn crypto subdevice if using device tree */
+ if (IS_ENABLED(CONFIG_OF) && np)
+ devm_of_platform_populate(dev);
+
return 0;
}
diff --git a/include/crypto/aead.h b/include/crypto/aead.h
index e728469c4ccc..5af914c1ab8e 100644
--- a/include/crypto/aead.h
+++ b/include/crypto/aead.h
@@ -490,7 +490,7 @@ static inline void aead_request_set_callback(struct aead_request *req,
* The memory structure for cipher operation has the following structure:
*
* - AEAD encryption input: assoc data || plaintext
- * - AEAD encryption output: assoc data || cipherntext || auth tag
+ * - AEAD encryption output: assoc data || ciphertext || auth tag
* - AEAD decryption input: assoc data || ciphertext || auth tag
* - AEAD decryption output: assoc data || plaintext
*
diff --git a/include/crypto/algapi.h b/include/crypto/algapi.h
index 86f0748009af..5f6841c73e5a 100644
--- a/include/crypto/algapi.h
+++ b/include/crypto/algapi.h
@@ -96,6 +96,15 @@ struct scatter_walk {
unsigned int offset;
};
+struct crypto_attr_alg {
+ char name[CRYPTO_MAX_ALG_NAME];
+};
+
+struct crypto_attr_type {
+ u32 type;
+ u32 mask;
+};
+
void crypto_mod_put(struct crypto_alg *alg);
int crypto_register_template(struct crypto_template *tmpl);
@@ -118,7 +127,6 @@ void *crypto_spawn_tfm2(struct crypto_spawn *spawn);
struct crypto_attr_type *crypto_get_attr_type(struct rtattr **tb);
int crypto_check_attr_type(struct rtattr **tb, u32 type, u32 *mask_ret);
const char *crypto_attr_alg_name(struct rtattr *rta);
-int crypto_attr_u32(struct rtattr *rta, u32 *num);
int crypto_inst_setname(struct crypto_instance *inst, const char *name,
struct crypto_alg *alg);
diff --git a/include/crypto/engine.h b/include/crypto/engine.h
index 3f06e40d063a..26cac19b0f46 100644
--- a/include/crypto/engine.h
+++ b/include/crypto/engine.h
@@ -28,7 +28,7 @@
* of a failed backlog request
* crypto-engine, in head position to keep order
* @list: link with the global crypto engine list
- * @queue_lock: spinlock to syncronise access to request queue
+ * @queue_lock: spinlock to synchronise access to request queue
* @queue: the crypto queue of the engine
* @rt: whether this queue is set to run as a realtime task
* @prepare_crypt_hardware: a request will soon arrive from the queue
diff --git a/include/crypto/hash.h b/include/crypto/hash.h
index b2bc1e46e86a..f140e4643949 100644
--- a/include/crypto/hash.h
+++ b/include/crypto/hash.h
@@ -458,7 +458,7 @@ int crypto_ahash_finup(struct ahash_request *req);
*
* Return:
* 0 if the message digest was successfully calculated;
- * -EINPROGRESS if data is feeded into hardware (DMA) or queued for later;
+ * -EINPROGRESS if data is fed into hardware (DMA) or queued for later;
* -EBUSY if queue is full and request should be resubmitted later;
* other < 0 if an error occurred
*/
diff --git a/include/crypto/internal/hash.h b/include/crypto/internal/hash.h
index 0a288dddcf5b..25806141db59 100644
--- a/include/crypto/internal/hash.h
+++ b/include/crypto/internal/hash.h
@@ -75,13 +75,7 @@ void crypto_unregister_ahashes(struct ahash_alg *algs, int count);
int ahash_register_instance(struct crypto_template *tmpl,
struct ahash_instance *inst);
-int shash_no_setkey(struct crypto_shash *tfm, const u8 *key,
- unsigned int keylen);
-
-static inline bool crypto_shash_alg_has_setkey(struct shash_alg *alg)
-{
- return alg->setkey != shash_no_setkey;
-}
+bool crypto_shash_alg_has_setkey(struct shash_alg *alg);
static inline bool crypto_shash_alg_needs_key(struct shash_alg *alg)
{
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index da5e0d74bb2f..855869e1fd32 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -643,32 +643,6 @@ struct crypto_comp {
struct crypto_tfm base;
};
-enum {
- CRYPTOA_UNSPEC,
- CRYPTOA_ALG,
- CRYPTOA_TYPE,
- CRYPTOA_U32,
- __CRYPTOA_MAX,
-};
-
-#define CRYPTOA_MAX (__CRYPTOA_MAX - 1)
-
-/* Maximum number of (rtattr) parameters for each template. */
-#define CRYPTO_MAX_ATTRS 32
-
-struct crypto_attr_alg {
- char name[CRYPTO_MAX_ALG_NAME];
-};
-
-struct crypto_attr_type {
- u32 type;
- u32 mask;
-};
-
-struct crypto_attr_u32 {
- u32 num;
-};
-
/*
* Transform user interface.
*/